##// END OF EJS Templates
revlog: finer computation of "issnapshot"...
marmoute -
r50371:5846bc8a default
parent child Browse files
Show More
@@ -1,3300 +1,3315 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <assert.h>
12 #include <assert.h>
13 #include <ctype.h>
13 #include <ctype.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stddef.h>
15 #include <stddef.h>
16 #include <stdlib.h>
16 #include <stdlib.h>
17 #include <string.h>
17 #include <string.h>
18 #include <structmember.h>
18 #include <structmember.h>
19
19
20 #include "bitmanipulation.h"
20 #include "bitmanipulation.h"
21 #include "charencode.h"
21 #include "charencode.h"
22 #include "compat.h"
22 #include "compat.h"
23 #include "revlog.h"
23 #include "revlog.h"
24 #include "util.h"
24 #include "util.h"
25
25
26 typedef struct indexObjectStruct indexObject;
26 typedef struct indexObjectStruct indexObject;
27
27
28 typedef struct {
28 typedef struct {
29 int children[16];
29 int children[16];
30 } nodetreenode;
30 } nodetreenode;
31
31
32 typedef struct {
32 typedef struct {
33 int abi_version;
33 int abi_version;
34 Py_ssize_t (*index_length)(const indexObject *);
34 Py_ssize_t (*index_length)(const indexObject *);
35 const char *(*index_node)(indexObject *, Py_ssize_t);
35 const char *(*index_node)(indexObject *, Py_ssize_t);
36 int (*fast_rank)(indexObject *, Py_ssize_t);
36 int (*fast_rank)(indexObject *, Py_ssize_t);
37 int (*index_parents)(PyObject *, int, int *);
37 int (*index_parents)(PyObject *, int, int *);
38 } Revlog_CAPI;
38 } Revlog_CAPI;
39
39
40 /*
40 /*
41 * A base-16 trie for fast node->rev mapping.
41 * A base-16 trie for fast node->rev mapping.
42 *
42 *
43 * Positive value is index of the next node in the trie
43 * Positive value is index of the next node in the trie
44 * Negative value is a leaf: -(rev + 2)
44 * Negative value is a leaf: -(rev + 2)
45 * Zero is empty
45 * Zero is empty
46 */
46 */
47 typedef struct {
47 typedef struct {
48 indexObject *index;
48 indexObject *index;
49 nodetreenode *nodes;
49 nodetreenode *nodes;
50 Py_ssize_t nodelen;
50 Py_ssize_t nodelen;
51 size_t length; /* # nodes in use */
51 size_t length; /* # nodes in use */
52 size_t capacity; /* # nodes allocated */
52 size_t capacity; /* # nodes allocated */
53 int depth; /* maximum depth of tree */
53 int depth; /* maximum depth of tree */
54 int splits; /* # splits performed */
54 int splits; /* # splits performed */
55 } nodetree;
55 } nodetree;
56
56
57 typedef struct {
57 typedef struct {
58 PyObject_HEAD /* ; */
58 PyObject_HEAD /* ; */
59 nodetree nt;
59 nodetree nt;
60 } nodetreeObject;
60 } nodetreeObject;
61
61
62 /*
62 /*
63 * This class has two behaviors.
63 * This class has two behaviors.
64 *
64 *
65 * When used in a list-like way (with integer keys), we decode an
65 * When used in a list-like way (with integer keys), we decode an
66 * entry in a RevlogNG index file on demand. We have limited support for
66 * entry in a RevlogNG index file on demand. We have limited support for
67 * integer-keyed insert and delete, only at elements right before the
67 * integer-keyed insert and delete, only at elements right before the
68 * end.
68 * end.
69 *
69 *
70 * With string keys, we lazily perform a reverse mapping from node to
70 * With string keys, we lazily perform a reverse mapping from node to
71 * rev, using a base-16 trie.
71 * rev, using a base-16 trie.
72 */
72 */
73 struct indexObjectStruct {
73 struct indexObjectStruct {
74 PyObject_HEAD
74 PyObject_HEAD
75 /* Type-specific fields go here. */
75 /* Type-specific fields go here. */
76 PyObject *data; /* raw bytes of index */
76 PyObject *data; /* raw bytes of index */
77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
78 PyObject *nullentry; /* fast path for references to null */
78 PyObject *nullentry; /* fast path for references to null */
79 Py_buffer buf; /* buffer of data */
79 Py_buffer buf; /* buffer of data */
80 const char **offsets; /* populated on demand */
80 const char **offsets; /* populated on demand */
81 Py_ssize_t length; /* current on-disk number of elements */
81 Py_ssize_t length; /* current on-disk number of elements */
82 unsigned new_length; /* number of added elements */
82 unsigned new_length; /* number of added elements */
83 unsigned added_length; /* space reserved for added elements */
83 unsigned added_length; /* space reserved for added elements */
84 char *added; /* populated on demand */
84 char *added; /* populated on demand */
85 PyObject *headrevs; /* cache, invalidated on changes */
85 PyObject *headrevs; /* cache, invalidated on changes */
86 PyObject *filteredrevs; /* filtered revs set */
86 PyObject *filteredrevs; /* filtered revs set */
87 nodetree nt; /* base-16 trie */
87 nodetree nt; /* base-16 trie */
88 int ntinitialized; /* 0 or 1 */
88 int ntinitialized; /* 0 or 1 */
89 int ntrev; /* last rev scanned */
89 int ntrev; /* last rev scanned */
90 int ntlookups; /* # lookups */
90 int ntlookups; /* # lookups */
91 int ntmisses; /* # lookups that miss the cache */
91 int ntmisses; /* # lookups that miss the cache */
92 int inlined;
92 int inlined;
93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
94 */
94 */
95 long rust_ext_compat; /* compatibility with being used in rust
95 long rust_ext_compat; /* compatibility with being used in rust
96 extensions */
96 extensions */
97 long format_version; /* format version selector (format_*) */
97 long format_version; /* format version selector (format_*) */
98 };
98 };
99
99
100 static Py_ssize_t index_length(const indexObject *self)
100 static Py_ssize_t index_length(const indexObject *self)
101 {
101 {
102 return self->length + self->new_length;
102 return self->length + self->new_length;
103 }
103 }
104
104
105 static const char nullid[32] = {0};
105 static const char nullid[32] = {0};
106 static const Py_ssize_t nullrev = -1;
106 static const Py_ssize_t nullrev = -1;
107
107
108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
109
109
110 static int index_find_node(indexObject *self, const char *node);
110 static int index_find_node(indexObject *self, const char *node);
111
111
112 #if LONG_MAX == 0x7fffffffL
112 #if LONG_MAX == 0x7fffffffL
113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
114 #else
114 #else
115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
116 #endif
116 #endif
117
117
118 /* A RevlogNG v1 index entry is 64 bytes long. */
118 /* A RevlogNG v1 index entry is 64 bytes long. */
119 static const long v1_entry_size = 64;
119 static const long v1_entry_size = 64;
120
120
121 /* A Revlogv2 index entry is 96 bytes long. */
121 /* A Revlogv2 index entry is 96 bytes long. */
122 static const long v2_entry_size = 96;
122 static const long v2_entry_size = 96;
123
123
124 /* A Changelogv2 index entry is 96 bytes long. */
124 /* A Changelogv2 index entry is 96 bytes long. */
125 static const long cl2_entry_size = 96;
125 static const long cl2_entry_size = 96;
126
126
127 /* Internal format version.
127 /* Internal format version.
128 * Must match their counterparts in revlogutils/constants.py */
128 * Must match their counterparts in revlogutils/constants.py */
129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
132
132
133 static const long entry_v1_offset_high = 0;
133 static const long entry_v1_offset_high = 0;
134 static const long entry_v1_offset_offset_flags = 4;
134 static const long entry_v1_offset_offset_flags = 4;
135 static const long entry_v1_offset_comp_len = 8;
135 static const long entry_v1_offset_comp_len = 8;
136 static const long entry_v1_offset_uncomp_len = 12;
136 static const long entry_v1_offset_uncomp_len = 12;
137 static const long entry_v1_offset_base_rev = 16;
137 static const long entry_v1_offset_base_rev = 16;
138 static const long entry_v1_offset_link_rev = 20;
138 static const long entry_v1_offset_link_rev = 20;
139 static const long entry_v1_offset_parent_1 = 24;
139 static const long entry_v1_offset_parent_1 = 24;
140 static const long entry_v1_offset_parent_2 = 28;
140 static const long entry_v1_offset_parent_2 = 28;
141 static const long entry_v1_offset_node_id = 32;
141 static const long entry_v1_offset_node_id = 32;
142
142
143 static const long entry_v2_offset_high = 0;
143 static const long entry_v2_offset_high = 0;
144 static const long entry_v2_offset_offset_flags = 4;
144 static const long entry_v2_offset_offset_flags = 4;
145 static const long entry_v2_offset_comp_len = 8;
145 static const long entry_v2_offset_comp_len = 8;
146 static const long entry_v2_offset_uncomp_len = 12;
146 static const long entry_v2_offset_uncomp_len = 12;
147 static const long entry_v2_offset_base_rev = 16;
147 static const long entry_v2_offset_base_rev = 16;
148 static const long entry_v2_offset_link_rev = 20;
148 static const long entry_v2_offset_link_rev = 20;
149 static const long entry_v2_offset_parent_1 = 24;
149 static const long entry_v2_offset_parent_1 = 24;
150 static const long entry_v2_offset_parent_2 = 28;
150 static const long entry_v2_offset_parent_2 = 28;
151 static const long entry_v2_offset_node_id = 32;
151 static const long entry_v2_offset_node_id = 32;
152 static const long entry_v2_offset_sidedata_offset = 64;
152 static const long entry_v2_offset_sidedata_offset = 64;
153 static const long entry_v2_offset_sidedata_comp_len = 72;
153 static const long entry_v2_offset_sidedata_comp_len = 72;
154 static const long entry_v2_offset_all_comp_mode = 76;
154 static const long entry_v2_offset_all_comp_mode = 76;
155 /* next free offset: 77 */
155 /* next free offset: 77 */
156
156
157 static const long entry_cl2_offset_high = 0;
157 static const long entry_cl2_offset_high = 0;
158 static const long entry_cl2_offset_offset_flags = 4;
158 static const long entry_cl2_offset_offset_flags = 4;
159 static const long entry_cl2_offset_comp_len = 8;
159 static const long entry_cl2_offset_comp_len = 8;
160 static const long entry_cl2_offset_uncomp_len = 12;
160 static const long entry_cl2_offset_uncomp_len = 12;
161 static const long entry_cl2_offset_parent_1 = 16;
161 static const long entry_cl2_offset_parent_1 = 16;
162 static const long entry_cl2_offset_parent_2 = 20;
162 static const long entry_cl2_offset_parent_2 = 20;
163 static const long entry_cl2_offset_node_id = 24;
163 static const long entry_cl2_offset_node_id = 24;
164 static const long entry_cl2_offset_sidedata_offset = 56;
164 static const long entry_cl2_offset_sidedata_offset = 56;
165 static const long entry_cl2_offset_sidedata_comp_len = 64;
165 static const long entry_cl2_offset_sidedata_comp_len = 64;
166 static const long entry_cl2_offset_all_comp_mode = 68;
166 static const long entry_cl2_offset_all_comp_mode = 68;
167 static const long entry_cl2_offset_rank = 69;
167 static const long entry_cl2_offset_rank = 69;
168 /* next free offset: 73 */
168 /* next free offset: 73 */
169
169
170 static const char comp_mode_inline = 2;
170 static const char comp_mode_inline = 2;
171 static const int rank_unknown = -1;
171 static const int rank_unknown = -1;
172
172
173 static void raise_revlog_error(void)
173 static void raise_revlog_error(void)
174 {
174 {
175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
176
176
177 mod = PyImport_ImportModule("mercurial.error");
177 mod = PyImport_ImportModule("mercurial.error");
178 if (mod == NULL) {
178 if (mod == NULL) {
179 goto cleanup;
179 goto cleanup;
180 }
180 }
181
181
182 dict = PyModule_GetDict(mod);
182 dict = PyModule_GetDict(mod);
183 if (dict == NULL) {
183 if (dict == NULL) {
184 goto cleanup;
184 goto cleanup;
185 }
185 }
186 Py_INCREF(dict);
186 Py_INCREF(dict);
187
187
188 errclass = PyDict_GetItemString(dict, "RevlogError");
188 errclass = PyDict_GetItemString(dict, "RevlogError");
189 if (errclass == NULL) {
189 if (errclass == NULL) {
190 PyErr_SetString(PyExc_SystemError,
190 PyErr_SetString(PyExc_SystemError,
191 "could not find RevlogError");
191 "could not find RevlogError");
192 goto cleanup;
192 goto cleanup;
193 }
193 }
194
194
195 /* value of exception is ignored by callers */
195 /* value of exception is ignored by callers */
196 PyErr_SetString(errclass, "RevlogError");
196 PyErr_SetString(errclass, "RevlogError");
197
197
198 cleanup:
198 cleanup:
199 Py_XDECREF(dict);
199 Py_XDECREF(dict);
200 Py_XDECREF(mod);
200 Py_XDECREF(mod);
201 }
201 }
202
202
203 /*
203 /*
204 * Return a pointer to the beginning of a RevlogNG record.
204 * Return a pointer to the beginning of a RevlogNG record.
205 */
205 */
206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
207 {
207 {
208 if (pos >= self->length)
208 if (pos >= self->length)
209 return self->added + (pos - self->length) * self->entry_size;
209 return self->added + (pos - self->length) * self->entry_size;
210
210
211 if (self->inlined && pos > 0) {
211 if (self->inlined && pos > 0) {
212 if (self->offsets == NULL) {
212 if (self->offsets == NULL) {
213 Py_ssize_t ret;
213 Py_ssize_t ret;
214 self->offsets =
214 self->offsets =
215 PyMem_Malloc(self->length * sizeof(*self->offsets));
215 PyMem_Malloc(self->length * sizeof(*self->offsets));
216 if (self->offsets == NULL)
216 if (self->offsets == NULL)
217 return (const char *)PyErr_NoMemory();
217 return (const char *)PyErr_NoMemory();
218 ret = inline_scan(self, self->offsets);
218 ret = inline_scan(self, self->offsets);
219 if (ret == -1) {
219 if (ret == -1) {
220 return NULL;
220 return NULL;
221 };
221 };
222 }
222 }
223 return self->offsets[pos];
223 return self->offsets[pos];
224 }
224 }
225
225
226 return (const char *)(self->buf.buf) + pos * self->entry_size;
226 return (const char *)(self->buf.buf) + pos * self->entry_size;
227 }
227 }
228
228
229 /*
229 /*
230 * Get parents of the given rev.
230 * Get parents of the given rev.
231 *
231 *
232 * The specified rev must be valid and must not be nullrev. A returned
232 * The specified rev must be valid and must not be nullrev. A returned
233 * parent revision may be nullrev, but is guaranteed to be in valid range.
233 * parent revision may be nullrev, but is guaranteed to be in valid range.
234 */
234 */
235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
236 int maxrev)
236 int maxrev)
237 {
237 {
238 const char *data = index_deref(self, rev);
238 const char *data = index_deref(self, rev);
239
239
240 if (self->format_version == format_v1) {
240 if (self->format_version == format_v1) {
241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
243 } else if (self->format_version == format_v2) {
243 } else if (self->format_version == format_v2) {
244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
246 } else if (self->format_version == format_cl2) {
246 } else if (self->format_version == format_cl2) {
247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
249 } else {
249 } else {
250 raise_revlog_error();
250 raise_revlog_error();
251 return -1;
251 return -1;
252 }
252 }
253
253
254 /* If index file is corrupted, ps[] may point to invalid revisions. So
254 /* If index file is corrupted, ps[] may point to invalid revisions. So
255 * there is a risk of buffer overflow to trust them unconditionally. */
255 * there is a risk of buffer overflow to trust them unconditionally. */
256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
257 PyErr_SetString(PyExc_ValueError, "parent out of range");
257 PyErr_SetString(PyExc_ValueError, "parent out of range");
258 return -1;
258 return -1;
259 }
259 }
260 return 0;
260 return 0;
261 }
261 }
262
262
263 /*
263 /*
264 * Get parents of the given rev.
264 * Get parents of the given rev.
265 *
265 *
266 * If the specified rev is out of range, IndexError will be raised. If the
266 * If the specified rev is out of range, IndexError will be raised. If the
267 * revlog entry is corrupted, ValueError may be raised.
267 * revlog entry is corrupted, ValueError may be raised.
268 *
268 *
269 * Returns 0 on success or -1 on failure.
269 * Returns 0 on success or -1 on failure.
270 */
270 */
271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
272 {
272 {
273 int tiprev;
273 int tiprev;
274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
275 PyErr_BadInternalCall();
275 PyErr_BadInternalCall();
276 return -1;
276 return -1;
277 }
277 }
278 tiprev = (int)index_length((indexObject *)op) - 1;
278 tiprev = (int)index_length((indexObject *)op) - 1;
279 if (rev < -1 || rev > tiprev) {
279 if (rev < -1 || rev > tiprev) {
280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
281 return -1;
281 return -1;
282 } else if (rev == -1) {
282 } else if (rev == -1) {
283 ps[0] = ps[1] = -1;
283 ps[0] = ps[1] = -1;
284 return 0;
284 return 0;
285 } else {
285 } else {
286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
287 }
287 }
288 }
288 }
289
289
290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
291 {
291 {
292 const char *data;
292 const char *data;
293 uint64_t offset;
293 uint64_t offset;
294
294
295 if (rev == nullrev)
295 if (rev == nullrev)
296 return 0;
296 return 0;
297
297
298 data = index_deref(self, rev);
298 data = index_deref(self, rev);
299
299
300 if (self->format_version == format_v1) {
300 if (self->format_version == format_v1) {
301 offset = getbe32(data + entry_v1_offset_offset_flags);
301 offset = getbe32(data + entry_v1_offset_offset_flags);
302 if (rev == 0) {
302 if (rev == 0) {
303 /* mask out version number for the first entry */
303 /* mask out version number for the first entry */
304 offset &= 0xFFFF;
304 offset &= 0xFFFF;
305 } else {
305 } else {
306 uint32_t offset_high =
306 uint32_t offset_high =
307 getbe32(data + entry_v1_offset_high);
307 getbe32(data + entry_v1_offset_high);
308 offset |= ((uint64_t)offset_high) << 32;
308 offset |= ((uint64_t)offset_high) << 32;
309 }
309 }
310 } else if (self->format_version == format_v2) {
310 } else if (self->format_version == format_v2) {
311 offset = getbe32(data + entry_v2_offset_offset_flags);
311 offset = getbe32(data + entry_v2_offset_offset_flags);
312 if (rev == 0) {
312 if (rev == 0) {
313 /* mask out version number for the first entry */
313 /* mask out version number for the first entry */
314 offset &= 0xFFFF;
314 offset &= 0xFFFF;
315 } else {
315 } else {
316 uint32_t offset_high =
316 uint32_t offset_high =
317 getbe32(data + entry_v2_offset_high);
317 getbe32(data + entry_v2_offset_high);
318 offset |= ((uint64_t)offset_high) << 32;
318 offset |= ((uint64_t)offset_high) << 32;
319 }
319 }
320 } else if (self->format_version == format_cl2) {
320 } else if (self->format_version == format_cl2) {
321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
322 offset = getbe32(data + entry_cl2_offset_offset_flags);
322 offset = getbe32(data + entry_cl2_offset_offset_flags);
323 offset |= ((uint64_t)offset_high) << 32;
323 offset |= ((uint64_t)offset_high) << 32;
324 } else {
324 } else {
325 raise_revlog_error();
325 raise_revlog_error();
326 return -1;
326 return -1;
327 }
327 }
328
328
329 return (int64_t)(offset >> 16);
329 return (int64_t)(offset >> 16);
330 }
330 }
331
331
332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
333 {
333 {
334 const char *data;
334 const char *data;
335 int tmp;
335 int tmp;
336
336
337 if (rev == nullrev)
337 if (rev == nullrev)
338 return 0;
338 return 0;
339
339
340 data = index_deref(self, rev);
340 data = index_deref(self, rev);
341
341
342 if (self->format_version == format_v1) {
342 if (self->format_version == format_v1) {
343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
344 } else if (self->format_version == format_v2) {
344 } else if (self->format_version == format_v2) {
345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
346 } else if (self->format_version == format_cl2) {
346 } else if (self->format_version == format_cl2) {
347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
348 } else {
348 } else {
349 raise_revlog_error();
349 raise_revlog_error();
350 return -1;
350 return -1;
351 }
351 }
352 if (tmp < 0) {
352 if (tmp < 0) {
353 PyErr_Format(PyExc_OverflowError,
353 PyErr_Format(PyExc_OverflowError,
354 "revlog entry size out of bound (%d)", tmp);
354 "revlog entry size out of bound (%d)", tmp);
355 return -1;
355 return -1;
356 }
356 }
357 return tmp;
357 return tmp;
358 }
358 }
359
359
360 /*
360 /*
361 * RevlogNG format (all in big endian, data may be inlined):
361 * RevlogNG format (all in big endian, data may be inlined):
362 * 6 bytes: offset
362 * 6 bytes: offset
363 * 2 bytes: flags
363 * 2 bytes: flags
364 * 4 bytes: compressed length
364 * 4 bytes: compressed length
365 * 4 bytes: uncompressed length
365 * 4 bytes: uncompressed length
366 * 4 bytes: base revision
366 * 4 bytes: base revision
367 * 4 bytes: link revision
367 * 4 bytes: link revision
368 * 4 bytes: parent 1 revision
368 * 4 bytes: parent 1 revision
369 * 4 bytes: parent 2 revision
369 * 4 bytes: parent 2 revision
370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
371 */
371 */
372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
373 {
373 {
374 uint64_t offset_flags, sidedata_offset;
374 uint64_t offset_flags, sidedata_offset;
375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
376 sidedata_comp_len, rank = rank_unknown;
376 sidedata_comp_len, rank = rank_unknown;
377 char data_comp_mode, sidedata_comp_mode;
377 char data_comp_mode, sidedata_comp_mode;
378 const char *c_node_id;
378 const char *c_node_id;
379 const char *data;
379 const char *data;
380 Py_ssize_t length = index_length(self);
380 Py_ssize_t length = index_length(self);
381
381
382 if (pos == nullrev) {
382 if (pos == nullrev) {
383 Py_INCREF(self->nullentry);
383 Py_INCREF(self->nullentry);
384 return self->nullentry;
384 return self->nullentry;
385 }
385 }
386
386
387 if (pos < 0 || pos >= length) {
387 if (pos < 0 || pos >= length) {
388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
389 return NULL;
389 return NULL;
390 }
390 }
391
391
392 data = index_deref(self, pos);
392 data = index_deref(self, pos);
393 if (data == NULL)
393 if (data == NULL)
394 return NULL;
394 return NULL;
395
395
396 if (self->format_version == format_v1) {
396 if (self->format_version == format_v1) {
397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
398 /*
398 /*
399 * The first entry on-disk needs the version number masked out,
399 * The first entry on-disk needs the version number masked out,
400 * but this doesn't apply if entries are added to an empty
400 * but this doesn't apply if entries are added to an empty
401 * index.
401 * index.
402 */
402 */
403 if (self->length && pos == 0)
403 if (self->length && pos == 0)
404 offset_flags &= 0xFFFF;
404 offset_flags &= 0xFFFF;
405 else {
405 else {
406 uint32_t offset_high =
406 uint32_t offset_high =
407 getbe32(data + entry_v1_offset_high);
407 getbe32(data + entry_v1_offset_high);
408 offset_flags |= ((uint64_t)offset_high) << 32;
408 offset_flags |= ((uint64_t)offset_high) << 32;
409 }
409 }
410
410
411 comp_len = getbe32(data + entry_v1_offset_comp_len);
411 comp_len = getbe32(data + entry_v1_offset_comp_len);
412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
413 base_rev = getbe32(data + entry_v1_offset_base_rev);
413 base_rev = getbe32(data + entry_v1_offset_base_rev);
414 link_rev = getbe32(data + entry_v1_offset_link_rev);
414 link_rev = getbe32(data + entry_v1_offset_link_rev);
415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
417 c_node_id = data + entry_v1_offset_node_id;
417 c_node_id = data + entry_v1_offset_node_id;
418
418
419 sidedata_offset = 0;
419 sidedata_offset = 0;
420 sidedata_comp_len = 0;
420 sidedata_comp_len = 0;
421 data_comp_mode = comp_mode_inline;
421 data_comp_mode = comp_mode_inline;
422 sidedata_comp_mode = comp_mode_inline;
422 sidedata_comp_mode = comp_mode_inline;
423 } else if (self->format_version == format_v2) {
423 } else if (self->format_version == format_v2) {
424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
425 /*
425 /*
426 * The first entry on-disk needs the version number masked out,
426 * The first entry on-disk needs the version number masked out,
427 * but this doesn't apply if entries are added to an empty
427 * but this doesn't apply if entries are added to an empty
428 * index.
428 * index.
429 */
429 */
430 if (self->length && pos == 0)
430 if (self->length && pos == 0)
431 offset_flags &= 0xFFFF;
431 offset_flags &= 0xFFFF;
432 else {
432 else {
433 uint32_t offset_high =
433 uint32_t offset_high =
434 getbe32(data + entry_v2_offset_high);
434 getbe32(data + entry_v2_offset_high);
435 offset_flags |= ((uint64_t)offset_high) << 32;
435 offset_flags |= ((uint64_t)offset_high) << 32;
436 }
436 }
437
437
438 comp_len = getbe32(data + entry_v2_offset_comp_len);
438 comp_len = getbe32(data + entry_v2_offset_comp_len);
439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
440 base_rev = getbe32(data + entry_v2_offset_base_rev);
440 base_rev = getbe32(data + entry_v2_offset_base_rev);
441 link_rev = getbe32(data + entry_v2_offset_link_rev);
441 link_rev = getbe32(data + entry_v2_offset_link_rev);
442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
444 c_node_id = data + entry_v2_offset_node_id;
444 c_node_id = data + entry_v2_offset_node_id;
445
445
446 sidedata_offset =
446 sidedata_offset =
447 getbe64(data + entry_v2_offset_sidedata_offset);
447 getbe64(data + entry_v2_offset_sidedata_offset);
448 sidedata_comp_len =
448 sidedata_comp_len =
449 getbe32(data + entry_v2_offset_sidedata_comp_len);
449 getbe32(data + entry_v2_offset_sidedata_comp_len);
450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
451 sidedata_comp_mode =
451 sidedata_comp_mode =
452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
453 } else if (self->format_version == format_cl2) {
453 } else if (self->format_version == format_cl2) {
454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
456 offset_flags |= ((uint64_t)offset_high) << 32;
456 offset_flags |= ((uint64_t)offset_high) << 32;
457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
459 /* base_rev and link_rev are not stored in changelogv2, but are
459 /* base_rev and link_rev are not stored in changelogv2, but are
460 still used by some functions shared with the other revlogs.
460 still used by some functions shared with the other revlogs.
461 They are supposed to contain links to other revisions,
461 They are supposed to contain links to other revisions,
462 but they always point to themselves in the case of a changelog.
462 but they always point to themselves in the case of a changelog.
463 */
463 */
464 base_rev = pos;
464 base_rev = pos;
465 link_rev = pos;
465 link_rev = pos;
466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
468 c_node_id = data + entry_cl2_offset_node_id;
468 c_node_id = data + entry_cl2_offset_node_id;
469 sidedata_offset =
469 sidedata_offset =
470 getbe64(data + entry_cl2_offset_sidedata_offset);
470 getbe64(data + entry_cl2_offset_sidedata_offset);
471 sidedata_comp_len =
471 sidedata_comp_len =
472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
474 sidedata_comp_mode =
474 sidedata_comp_mode =
475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
476 rank = getbe32(data + entry_cl2_offset_rank);
476 rank = getbe32(data + entry_cl2_offset_rank);
477 } else {
477 } else {
478 raise_revlog_error();
478 raise_revlog_error();
479 return NULL;
479 return NULL;
480 }
480 }
481
481
482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
483 base_rev, link_rev, parent_1, parent_2, c_node_id,
483 base_rev, link_rev, parent_1, parent_2, c_node_id,
484 self->nodelen, sidedata_offset, sidedata_comp_len,
484 self->nodelen, sidedata_offset, sidedata_comp_len,
485 data_comp_mode, sidedata_comp_mode, rank);
485 data_comp_mode, sidedata_comp_mode, rank);
486 }
486 }
487 /*
487 /*
488 * Pack header information in binary
488 * Pack header information in binary
489 */
489 */
490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
491 {
491 {
492 int header;
492 int header;
493 char out[4];
493 char out[4];
494 if (!PyArg_ParseTuple(args, "i", &header)) {
494 if (!PyArg_ParseTuple(args, "i", &header)) {
495 return NULL;
495 return NULL;
496 }
496 }
497 if (self->format_version != format_v1) {
497 if (self->format_version != format_v1) {
498 PyErr_Format(PyExc_RuntimeError,
498 PyErr_Format(PyExc_RuntimeError,
499 "version header should go in the docket, not the "
499 "version header should go in the docket, not the "
500 "index: %d",
500 "index: %d",
501 header);
501 header);
502 return NULL;
502 return NULL;
503 }
503 }
504 putbe32(header, out);
504 putbe32(header, out);
505 return PyBytes_FromStringAndSize(out, 4);
505 return PyBytes_FromStringAndSize(out, 4);
506 }
506 }
507 /*
507 /*
508 * Return the raw binary string representing a revision
508 * Return the raw binary string representing a revision
509 */
509 */
510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
511 {
511 {
512 long rev;
512 long rev;
513 const char *data;
513 const char *data;
514 Py_ssize_t length = index_length(self);
514 Py_ssize_t length = index_length(self);
515
515
516 if (!pylong_to_long(value, &rev)) {
516 if (!pylong_to_long(value, &rev)) {
517 return NULL;
517 return NULL;
518 }
518 }
519 if (rev < 0 || rev >= length) {
519 if (rev < 0 || rev >= length) {
520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
521 rev);
521 rev);
522 return NULL;
522 return NULL;
523 };
523 };
524
524
525 data = index_deref(self, rev);
525 data = index_deref(self, rev);
526 if (data == NULL)
526 if (data == NULL)
527 return NULL;
527 return NULL;
528 if (rev == 0 && self->format_version == format_v1) {
528 if (rev == 0 && self->format_version == format_v1) {
529 /* the header is eating the start of the first entry */
529 /* the header is eating the start of the first entry */
530 return PyBytes_FromStringAndSize(data + 4,
530 return PyBytes_FromStringAndSize(data + 4,
531 self->entry_size - 4);
531 self->entry_size - 4);
532 }
532 }
533 return PyBytes_FromStringAndSize(data, self->entry_size);
533 return PyBytes_FromStringAndSize(data, self->entry_size);
534 }
534 }
535
535
536 /*
536 /*
537 * Return the hash of node corresponding to the given rev.
537 * Return the hash of node corresponding to the given rev.
538 */
538 */
539 static const char *index_node(indexObject *self, Py_ssize_t pos)
539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 {
540 {
541 Py_ssize_t length = index_length(self);
541 Py_ssize_t length = index_length(self);
542 const char *data;
542 const char *data;
543 const char *node_id;
543 const char *node_id;
544
544
545 if (pos == nullrev)
545 if (pos == nullrev)
546 return nullid;
546 return nullid;
547
547
548 if (pos >= length)
548 if (pos >= length)
549 return NULL;
549 return NULL;
550
550
551 data = index_deref(self, pos);
551 data = index_deref(self, pos);
552
552
553 if (self->format_version == format_v1) {
553 if (self->format_version == format_v1) {
554 node_id = data + entry_v1_offset_node_id;
554 node_id = data + entry_v1_offset_node_id;
555 } else if (self->format_version == format_v2) {
555 } else if (self->format_version == format_v2) {
556 node_id = data + entry_v2_offset_node_id;
556 node_id = data + entry_v2_offset_node_id;
557 } else if (self->format_version == format_cl2) {
557 } else if (self->format_version == format_cl2) {
558 node_id = data + entry_cl2_offset_node_id;
558 node_id = data + entry_cl2_offset_node_id;
559 } else {
559 } else {
560 raise_revlog_error();
560 raise_revlog_error();
561 return NULL;
561 return NULL;
562 }
562 }
563
563
564 return data ? node_id : NULL;
564 return data ? node_id : NULL;
565 }
565 }
566
566
567 /*
567 /*
568 * Return the stored rank of a given revision if known, or rank_unknown
568 * Return the stored rank of a given revision if known, or rank_unknown
569 * otherwise.
569 * otherwise.
570 *
570 *
571 * The rank of a revision is the size of the sub-graph it defines as a head.
571 * The rank of a revision is the size of the sub-graph it defines as a head.
572 * Equivalently, the rank of a revision `r` is the size of the set
572 * Equivalently, the rank of a revision `r` is the size of the set
573 * `ancestors(r)`, `r` included.
573 * `ancestors(r)`, `r` included.
574 *
574 *
575 * This method returns the rank retrieved from the revlog in constant time. It
575 * This method returns the rank retrieved from the revlog in constant time. It
576 * makes no attempt at computing unknown values for versions of the revlog
576 * makes no attempt at computing unknown values for versions of the revlog
577 * which do not persist the rank.
577 * which do not persist the rank.
578 */
578 */
579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
580 {
580 {
581 Py_ssize_t length = index_length(self);
581 Py_ssize_t length = index_length(self);
582
582
583 if (self->format_version != format_cl2 || pos >= length) {
583 if (self->format_version != format_cl2 || pos >= length) {
584 return rank_unknown;
584 return rank_unknown;
585 }
585 }
586
586
587 if (pos == nullrev) {
587 if (pos == nullrev) {
588 return 0; /* convention */
588 return 0; /* convention */
589 }
589 }
590
590
591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
592 }
592 }
593
593
594 /*
594 /*
595 * Return the hash of the node corresponding to the given rev. The
595 * Return the hash of the node corresponding to the given rev. The
596 * rev is assumed to be existing. If not, an exception is set.
596 * rev is assumed to be existing. If not, an exception is set.
597 */
597 */
598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
599 {
599 {
600 const char *node = index_node(self, pos);
600 const char *node = index_node(self, pos);
601 if (node == NULL) {
601 if (node == NULL) {
602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
603 (int)pos);
603 (int)pos);
604 }
604 }
605 return node;
605 return node;
606 }
606 }
607
607
608 static int nt_insert(nodetree *self, const char *node, int rev);
608 static int nt_insert(nodetree *self, const char *node, int rev);
609
609
610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
611 {
611 {
612 Py_ssize_t thisnodelen;
612 Py_ssize_t thisnodelen;
613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
614 return -1;
614 return -1;
615 if (nodelen == thisnodelen)
615 if (nodelen == thisnodelen)
616 return 0;
616 return 0;
617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
618 thisnodelen, nodelen);
618 thisnodelen, nodelen);
619 return -1;
619 return -1;
620 }
620 }
621
621
622 static PyObject *index_append(indexObject *self, PyObject *obj)
622 static PyObject *index_append(indexObject *self, PyObject *obj)
623 {
623 {
624 uint64_t offset_flags, sidedata_offset;
624 uint64_t offset_flags, sidedata_offset;
625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
626 sidedata_comp_len, rank;
626 sidedata_comp_len, rank;
627 char data_comp_mode, sidedata_comp_mode;
627 char data_comp_mode, sidedata_comp_mode;
628 Py_ssize_t c_node_id_len;
628 Py_ssize_t c_node_id_len;
629 const char *c_node_id;
629 const char *c_node_id;
630 char comp_field;
630 char comp_field;
631 char *data;
631 char *data;
632
632
633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
634 &uncomp_len, &base_rev, &link_rev, &parent_1,
634 &uncomp_len, &base_rev, &link_rev, &parent_1,
635 &parent_2, &c_node_id, &c_node_id_len,
635 &parent_2, &c_node_id, &c_node_id_len,
636 &sidedata_offset, &sidedata_comp_len,
636 &sidedata_offset, &sidedata_comp_len,
637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
639 return NULL;
639 return NULL;
640 }
640 }
641
641
642 if (c_node_id_len != self->nodelen) {
642 if (c_node_id_len != self->nodelen) {
643 PyErr_SetString(PyExc_TypeError, "invalid node");
643 PyErr_SetString(PyExc_TypeError, "invalid node");
644 return NULL;
644 return NULL;
645 }
645 }
646 if (self->format_version == format_v1) {
646 if (self->format_version == format_v1) {
647
647
648 if (data_comp_mode != comp_mode_inline) {
648 if (data_comp_mode != comp_mode_inline) {
649 PyErr_Format(PyExc_ValueError,
649 PyErr_Format(PyExc_ValueError,
650 "invalid data compression mode: %i",
650 "invalid data compression mode: %i",
651 data_comp_mode);
651 data_comp_mode);
652 return NULL;
652 return NULL;
653 }
653 }
654 if (sidedata_comp_mode != comp_mode_inline) {
654 if (sidedata_comp_mode != comp_mode_inline) {
655 PyErr_Format(PyExc_ValueError,
655 PyErr_Format(PyExc_ValueError,
656 "invalid sidedata compression mode: %i",
656 "invalid sidedata compression mode: %i",
657 sidedata_comp_mode);
657 sidedata_comp_mode);
658 return NULL;
658 return NULL;
659 }
659 }
660 }
660 }
661
661
662 if (self->new_length == self->added_length) {
662 if (self->new_length == self->added_length) {
663 size_t new_added_length =
663 size_t new_added_length =
664 self->added_length ? self->added_length * 2 : 4096;
664 self->added_length ? self->added_length * 2 : 4096;
665 void *new_added = PyMem_Realloc(
665 void *new_added = PyMem_Realloc(
666 self->added, new_added_length * self->entry_size);
666 self->added, new_added_length * self->entry_size);
667 if (!new_added)
667 if (!new_added)
668 return PyErr_NoMemory();
668 return PyErr_NoMemory();
669 self->added = new_added;
669 self->added = new_added;
670 self->added_length = new_added_length;
670 self->added_length = new_added_length;
671 }
671 }
672 rev = self->length + self->new_length;
672 rev = self->length + self->new_length;
673 data = self->added + self->entry_size * self->new_length++;
673 data = self->added + self->entry_size * self->new_length++;
674
674
675 memset(data, 0, self->entry_size);
675 memset(data, 0, self->entry_size);
676
676
677 if (self->format_version == format_v1) {
677 if (self->format_version == format_v1) {
678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
679 putbe32(offset_flags & 0xffffffffU,
679 putbe32(offset_flags & 0xffffffffU,
680 data + entry_v1_offset_offset_flags);
680 data + entry_v1_offset_offset_flags);
681 putbe32(comp_len, data + entry_v1_offset_comp_len);
681 putbe32(comp_len, data + entry_v1_offset_comp_len);
682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
683 putbe32(base_rev, data + entry_v1_offset_base_rev);
683 putbe32(base_rev, data + entry_v1_offset_base_rev);
684 putbe32(link_rev, data + entry_v1_offset_link_rev);
684 putbe32(link_rev, data + entry_v1_offset_link_rev);
685 putbe32(parent_1, data + entry_v1_offset_parent_1);
685 putbe32(parent_1, data + entry_v1_offset_parent_1);
686 putbe32(parent_2, data + entry_v1_offset_parent_2);
686 putbe32(parent_2, data + entry_v1_offset_parent_2);
687 memcpy(data + entry_v1_offset_node_id, c_node_id,
687 memcpy(data + entry_v1_offset_node_id, c_node_id,
688 c_node_id_len);
688 c_node_id_len);
689 } else if (self->format_version == format_v2) {
689 } else if (self->format_version == format_v2) {
690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
691 putbe32(offset_flags & 0xffffffffU,
691 putbe32(offset_flags & 0xffffffffU,
692 data + entry_v2_offset_offset_flags);
692 data + entry_v2_offset_offset_flags);
693 putbe32(comp_len, data + entry_v2_offset_comp_len);
693 putbe32(comp_len, data + entry_v2_offset_comp_len);
694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
695 putbe32(base_rev, data + entry_v2_offset_base_rev);
695 putbe32(base_rev, data + entry_v2_offset_base_rev);
696 putbe32(link_rev, data + entry_v2_offset_link_rev);
696 putbe32(link_rev, data + entry_v2_offset_link_rev);
697 putbe32(parent_1, data + entry_v2_offset_parent_1);
697 putbe32(parent_1, data + entry_v2_offset_parent_1);
698 putbe32(parent_2, data + entry_v2_offset_parent_2);
698 putbe32(parent_2, data + entry_v2_offset_parent_2);
699 memcpy(data + entry_v2_offset_node_id, c_node_id,
699 memcpy(data + entry_v2_offset_node_id, c_node_id,
700 c_node_id_len);
700 c_node_id_len);
701 putbe64(sidedata_offset,
701 putbe64(sidedata_offset,
702 data + entry_v2_offset_sidedata_offset);
702 data + entry_v2_offset_sidedata_offset);
703 putbe32(sidedata_comp_len,
703 putbe32(sidedata_comp_len,
704 data + entry_v2_offset_sidedata_comp_len);
704 data + entry_v2_offset_sidedata_comp_len);
705 comp_field = data_comp_mode & 3;
705 comp_field = data_comp_mode & 3;
706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
707 data[entry_v2_offset_all_comp_mode] = comp_field;
707 data[entry_v2_offset_all_comp_mode] = comp_field;
708 } else if (self->format_version == format_cl2) {
708 } else if (self->format_version == format_cl2) {
709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
710 putbe32(offset_flags & 0xffffffffU,
710 putbe32(offset_flags & 0xffffffffU,
711 data + entry_cl2_offset_offset_flags);
711 data + entry_cl2_offset_offset_flags);
712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
717 c_node_id_len);
717 c_node_id_len);
718 putbe64(sidedata_offset,
718 putbe64(sidedata_offset,
719 data + entry_cl2_offset_sidedata_offset);
719 data + entry_cl2_offset_sidedata_offset);
720 putbe32(sidedata_comp_len,
720 putbe32(sidedata_comp_len,
721 data + entry_cl2_offset_sidedata_comp_len);
721 data + entry_cl2_offset_sidedata_comp_len);
722 comp_field = data_comp_mode & 3;
722 comp_field = data_comp_mode & 3;
723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
724 data[entry_cl2_offset_all_comp_mode] = comp_field;
724 data[entry_cl2_offset_all_comp_mode] = comp_field;
725 putbe32(rank, data + entry_cl2_offset_rank);
725 putbe32(rank, data + entry_cl2_offset_rank);
726 } else {
726 } else {
727 raise_revlog_error();
727 raise_revlog_error();
728 return NULL;
728 return NULL;
729 }
729 }
730
730
731 if (self->ntinitialized)
731 if (self->ntinitialized)
732 nt_insert(&self->nt, c_node_id, rev);
732 nt_insert(&self->nt, c_node_id, rev);
733
733
734 Py_CLEAR(self->headrevs);
734 Py_CLEAR(self->headrevs);
735 Py_RETURN_NONE;
735 Py_RETURN_NONE;
736 }
736 }
737
737
738 /* Replace an existing index entry's sidedata offset and length with new ones.
738 /* Replace an existing index entry's sidedata offset and length with new ones.
739 This cannot be used outside of the context of sidedata rewriting,
739 This cannot be used outside of the context of sidedata rewriting,
740 inside the transaction that creates the given revision. */
740 inside the transaction that creates the given revision. */
741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
742 {
742 {
743 uint64_t offset_flags, sidedata_offset;
743 uint64_t offset_flags, sidedata_offset;
744 Py_ssize_t rev;
744 Py_ssize_t rev;
745 int sidedata_comp_len;
745 int sidedata_comp_len;
746 char comp_mode;
746 char comp_mode;
747 char *data;
747 char *data;
748 #if LONG_MAX == 0x7fffffffL
748 #if LONG_MAX == 0x7fffffffL
749 const char *const sidedata_format = "nKiKB";
749 const char *const sidedata_format = "nKiKB";
750 #else
750 #else
751 const char *const sidedata_format = "nkikB";
751 const char *const sidedata_format = "nkikB";
752 #endif
752 #endif
753
753
754 if (self->entry_size == v1_entry_size || self->inlined) {
754 if (self->entry_size == v1_entry_size || self->inlined) {
755 /*
755 /*
756 There is a bug in the transaction handling when going from an
756 There is a bug in the transaction handling when going from an
757 inline revlog to a separate index and data file. Turn it off until
757 inline revlog to a separate index and data file. Turn it off until
758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
759 See issue6485.
759 See issue6485.
760 */
760 */
761 raise_revlog_error();
761 raise_revlog_error();
762 return NULL;
762 return NULL;
763 }
763 }
764
764
765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
766 &sidedata_comp_len, &offset_flags, &comp_mode))
766 &sidedata_comp_len, &offset_flags, &comp_mode))
767 return NULL;
767 return NULL;
768
768
769 if (rev < 0 || rev >= index_length(self)) {
769 if (rev < 0 || rev >= index_length(self)) {
770 PyErr_SetString(PyExc_IndexError, "revision outside index");
770 PyErr_SetString(PyExc_IndexError, "revision outside index");
771 return NULL;
771 return NULL;
772 }
772 }
773 if (rev < self->length) {
773 if (rev < self->length) {
774 PyErr_SetString(
774 PyErr_SetString(
775 PyExc_IndexError,
775 PyExc_IndexError,
776 "cannot rewrite entries outside of this transaction");
776 "cannot rewrite entries outside of this transaction");
777 return NULL;
777 return NULL;
778 }
778 }
779
779
780 /* Find the newly added node, offset from the "already on-disk" length
780 /* Find the newly added node, offset from the "already on-disk" length
781 */
781 */
782 data = self->added + self->entry_size * (rev - self->length);
782 data = self->added + self->entry_size * (rev - self->length);
783 if (self->format_version == format_v2) {
783 if (self->format_version == format_v2) {
784 putbe64(offset_flags, data + entry_v2_offset_high);
784 putbe64(offset_flags, data + entry_v2_offset_high);
785 putbe64(sidedata_offset,
785 putbe64(sidedata_offset,
786 data + entry_v2_offset_sidedata_offset);
786 data + entry_v2_offset_sidedata_offset);
787 putbe32(sidedata_comp_len,
787 putbe32(sidedata_comp_len,
788 data + entry_v2_offset_sidedata_comp_len);
788 data + entry_v2_offset_sidedata_comp_len);
789 data[entry_v2_offset_all_comp_mode] =
789 data[entry_v2_offset_all_comp_mode] =
790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
791 ((comp_mode & 3) << 2);
791 ((comp_mode & 3) << 2);
792 } else if (self->format_version == format_cl2) {
792 } else if (self->format_version == format_cl2) {
793 putbe64(offset_flags, data + entry_cl2_offset_high);
793 putbe64(offset_flags, data + entry_cl2_offset_high);
794 putbe64(sidedata_offset,
794 putbe64(sidedata_offset,
795 data + entry_cl2_offset_sidedata_offset);
795 data + entry_cl2_offset_sidedata_offset);
796 putbe32(sidedata_comp_len,
796 putbe32(sidedata_comp_len,
797 data + entry_cl2_offset_sidedata_comp_len);
797 data + entry_cl2_offset_sidedata_comp_len);
798 data[entry_cl2_offset_all_comp_mode] =
798 data[entry_cl2_offset_all_comp_mode] =
799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
800 ((comp_mode & 3) << 2);
800 ((comp_mode & 3) << 2);
801 } else {
801 } else {
802 raise_revlog_error();
802 raise_revlog_error();
803 return NULL;
803 return NULL;
804 }
804 }
805
805
806 Py_RETURN_NONE;
806 Py_RETURN_NONE;
807 }
807 }
808
808
809 static PyObject *index_stats(indexObject *self)
809 static PyObject *index_stats(indexObject *self)
810 {
810 {
811 PyObject *obj = PyDict_New();
811 PyObject *obj = PyDict_New();
812 PyObject *s = NULL;
812 PyObject *s = NULL;
813 PyObject *t = NULL;
813 PyObject *t = NULL;
814
814
815 if (obj == NULL)
815 if (obj == NULL)
816 return NULL;
816 return NULL;
817
817
818 #define istat(__n, __d) \
818 #define istat(__n, __d) \
819 do { \
819 do { \
820 s = PyBytes_FromString(__d); \
820 s = PyBytes_FromString(__d); \
821 t = PyLong_FromSsize_t(self->__n); \
821 t = PyLong_FromSsize_t(self->__n); \
822 if (!s || !t) \
822 if (!s || !t) \
823 goto bail; \
823 goto bail; \
824 if (PyDict_SetItem(obj, s, t) == -1) \
824 if (PyDict_SetItem(obj, s, t) == -1) \
825 goto bail; \
825 goto bail; \
826 Py_CLEAR(s); \
826 Py_CLEAR(s); \
827 Py_CLEAR(t); \
827 Py_CLEAR(t); \
828 } while (0)
828 } while (0)
829
829
830 if (self->added_length)
830 if (self->added_length)
831 istat(new_length, "index entries added");
831 istat(new_length, "index entries added");
832 istat(length, "revs in memory");
832 istat(length, "revs in memory");
833 istat(ntlookups, "node trie lookups");
833 istat(ntlookups, "node trie lookups");
834 istat(ntmisses, "node trie misses");
834 istat(ntmisses, "node trie misses");
835 istat(ntrev, "node trie last rev scanned");
835 istat(ntrev, "node trie last rev scanned");
836 if (self->ntinitialized) {
836 if (self->ntinitialized) {
837 istat(nt.capacity, "node trie capacity");
837 istat(nt.capacity, "node trie capacity");
838 istat(nt.depth, "node trie depth");
838 istat(nt.depth, "node trie depth");
839 istat(nt.length, "node trie count");
839 istat(nt.length, "node trie count");
840 istat(nt.splits, "node trie splits");
840 istat(nt.splits, "node trie splits");
841 }
841 }
842
842
843 #undef istat
843 #undef istat
844
844
845 return obj;
845 return obj;
846
846
847 bail:
847 bail:
848 Py_XDECREF(obj);
848 Py_XDECREF(obj);
849 Py_XDECREF(s);
849 Py_XDECREF(s);
850 Py_XDECREF(t);
850 Py_XDECREF(t);
851 return NULL;
851 return NULL;
852 }
852 }
853
853
854 /*
854 /*
855 * When we cache a list, we want to be sure the caller can't mutate
855 * When we cache a list, we want to be sure the caller can't mutate
856 * the cached copy.
856 * the cached copy.
857 */
857 */
858 static PyObject *list_copy(PyObject *list)
858 static PyObject *list_copy(PyObject *list)
859 {
859 {
860 Py_ssize_t len = PyList_GET_SIZE(list);
860 Py_ssize_t len = PyList_GET_SIZE(list);
861 PyObject *newlist = PyList_New(len);
861 PyObject *newlist = PyList_New(len);
862 Py_ssize_t i;
862 Py_ssize_t i;
863
863
864 if (newlist == NULL)
864 if (newlist == NULL)
865 return NULL;
865 return NULL;
866
866
867 for (i = 0; i < len; i++) {
867 for (i = 0; i < len; i++) {
868 PyObject *obj = PyList_GET_ITEM(list, i);
868 PyObject *obj = PyList_GET_ITEM(list, i);
869 Py_INCREF(obj);
869 Py_INCREF(obj);
870 PyList_SET_ITEM(newlist, i, obj);
870 PyList_SET_ITEM(newlist, i, obj);
871 }
871 }
872
872
873 return newlist;
873 return newlist;
874 }
874 }
875
875
876 static int check_filter(PyObject *filter, Py_ssize_t arg)
876 static int check_filter(PyObject *filter, Py_ssize_t arg)
877 {
877 {
878 if (filter) {
878 if (filter) {
879 PyObject *arglist, *result;
879 PyObject *arglist, *result;
880 int isfiltered;
880 int isfiltered;
881
881
882 arglist = Py_BuildValue("(n)", arg);
882 arglist = Py_BuildValue("(n)", arg);
883 if (!arglist) {
883 if (!arglist) {
884 return -1;
884 return -1;
885 }
885 }
886
886
887 result = PyObject_Call(filter, arglist, NULL);
887 result = PyObject_Call(filter, arglist, NULL);
888 Py_DECREF(arglist);
888 Py_DECREF(arglist);
889 if (!result) {
889 if (!result) {
890 return -1;
890 return -1;
891 }
891 }
892
892
893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
894 * same as this function, so we can just return it directly.*/
894 * same as this function, so we can just return it directly.*/
895 isfiltered = PyObject_IsTrue(result);
895 isfiltered = PyObject_IsTrue(result);
896 Py_DECREF(result);
896 Py_DECREF(result);
897 return isfiltered;
897 return isfiltered;
898 } else {
898 } else {
899 return 0;
899 return 0;
900 }
900 }
901 }
901 }
902
902
903 static inline void set_phase_from_parents(char *phases, int parent_1,
903 static inline void set_phase_from_parents(char *phases, int parent_1,
904 int parent_2, Py_ssize_t i)
904 int parent_2, Py_ssize_t i)
905 {
905 {
906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
907 phases[i] = phases[parent_1];
907 phases[i] = phases[parent_1];
908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
909 phases[i] = phases[parent_2];
909 phases[i] = phases[parent_2];
910 }
910 }
911
911
912 static PyObject *reachableroots2(indexObject *self, PyObject *args)
912 static PyObject *reachableroots2(indexObject *self, PyObject *args)
913 {
913 {
914
914
915 /* Input */
915 /* Input */
916 long minroot;
916 long minroot;
917 PyObject *includepatharg = NULL;
917 PyObject *includepatharg = NULL;
918 int includepath = 0;
918 int includepath = 0;
919 /* heads and roots are lists */
919 /* heads and roots are lists */
920 PyObject *heads = NULL;
920 PyObject *heads = NULL;
921 PyObject *roots = NULL;
921 PyObject *roots = NULL;
922 PyObject *reachable = NULL;
922 PyObject *reachable = NULL;
923
923
924 PyObject *val;
924 PyObject *val;
925 Py_ssize_t len = index_length(self);
925 Py_ssize_t len = index_length(self);
926 long revnum;
926 long revnum;
927 Py_ssize_t k;
927 Py_ssize_t k;
928 Py_ssize_t i;
928 Py_ssize_t i;
929 Py_ssize_t l;
929 Py_ssize_t l;
930 int r;
930 int r;
931 int parents[2];
931 int parents[2];
932
932
933 /* Internal data structure:
933 /* Internal data structure:
934 * tovisit: array of length len+1 (all revs + nullrev), filled upto
934 * tovisit: array of length len+1 (all revs + nullrev), filled upto
935 * lentovisit
935 * lentovisit
936 *
936 *
937 * revstates: array of length len+1 (all revs + nullrev) */
937 * revstates: array of length len+1 (all revs + nullrev) */
938 int *tovisit = NULL;
938 int *tovisit = NULL;
939 long lentovisit = 0;
939 long lentovisit = 0;
940 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
940 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
941 char *revstates = NULL;
941 char *revstates = NULL;
942
942
943 /* Get arguments */
943 /* Get arguments */
944 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
944 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
945 &PyList_Type, &roots, &PyBool_Type,
945 &PyList_Type, &roots, &PyBool_Type,
946 &includepatharg))
946 &includepatharg))
947 goto bail;
947 goto bail;
948
948
949 if (includepatharg == Py_True)
949 if (includepatharg == Py_True)
950 includepath = 1;
950 includepath = 1;
951
951
952 /* Initialize return set */
952 /* Initialize return set */
953 reachable = PyList_New(0);
953 reachable = PyList_New(0);
954 if (reachable == NULL)
954 if (reachable == NULL)
955 goto bail;
955 goto bail;
956
956
957 /* Initialize internal datastructures */
957 /* Initialize internal datastructures */
958 tovisit = (int *)malloc((len + 1) * sizeof(int));
958 tovisit = (int *)malloc((len + 1) * sizeof(int));
959 if (tovisit == NULL) {
959 if (tovisit == NULL) {
960 PyErr_NoMemory();
960 PyErr_NoMemory();
961 goto bail;
961 goto bail;
962 }
962 }
963
963
964 revstates = (char *)calloc(len + 1, 1);
964 revstates = (char *)calloc(len + 1, 1);
965 if (revstates == NULL) {
965 if (revstates == NULL) {
966 PyErr_NoMemory();
966 PyErr_NoMemory();
967 goto bail;
967 goto bail;
968 }
968 }
969
969
970 l = PyList_GET_SIZE(roots);
970 l = PyList_GET_SIZE(roots);
971 for (i = 0; i < l; i++) {
971 for (i = 0; i < l; i++) {
972 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
972 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
973 if (revnum == -1 && PyErr_Occurred())
973 if (revnum == -1 && PyErr_Occurred())
974 goto bail;
974 goto bail;
975 /* If root is out of range, e.g. wdir(), it must be unreachable
975 /* If root is out of range, e.g. wdir(), it must be unreachable
976 * from heads. So we can just ignore it. */
976 * from heads. So we can just ignore it. */
977 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
977 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
978 continue;
978 continue;
979 revstates[revnum + 1] |= RS_ROOT;
979 revstates[revnum + 1] |= RS_ROOT;
980 }
980 }
981
981
982 /* Populate tovisit with all the heads */
982 /* Populate tovisit with all the heads */
983 l = PyList_GET_SIZE(heads);
983 l = PyList_GET_SIZE(heads);
984 for (i = 0; i < l; i++) {
984 for (i = 0; i < l; i++) {
985 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
985 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
986 if (revnum == -1 && PyErr_Occurred())
986 if (revnum == -1 && PyErr_Occurred())
987 goto bail;
987 goto bail;
988 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
988 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
989 PyErr_SetString(PyExc_IndexError, "head out of range");
989 PyErr_SetString(PyExc_IndexError, "head out of range");
990 goto bail;
990 goto bail;
991 }
991 }
992 if (!(revstates[revnum + 1] & RS_SEEN)) {
992 if (!(revstates[revnum + 1] & RS_SEEN)) {
993 tovisit[lentovisit++] = (int)revnum;
993 tovisit[lentovisit++] = (int)revnum;
994 revstates[revnum + 1] |= RS_SEEN;
994 revstates[revnum + 1] |= RS_SEEN;
995 }
995 }
996 }
996 }
997
997
998 /* Visit the tovisit list and find the reachable roots */
998 /* Visit the tovisit list and find the reachable roots */
999 k = 0;
999 k = 0;
1000 while (k < lentovisit) {
1000 while (k < lentovisit) {
1001 /* Add the node to reachable if it is a root*/
1001 /* Add the node to reachable if it is a root*/
1002 revnum = tovisit[k++];
1002 revnum = tovisit[k++];
1003 if (revstates[revnum + 1] & RS_ROOT) {
1003 if (revstates[revnum + 1] & RS_ROOT) {
1004 revstates[revnum + 1] |= RS_REACHABLE;
1004 revstates[revnum + 1] |= RS_REACHABLE;
1005 val = PyLong_FromLong(revnum);
1005 val = PyLong_FromLong(revnum);
1006 if (val == NULL)
1006 if (val == NULL)
1007 goto bail;
1007 goto bail;
1008 r = PyList_Append(reachable, val);
1008 r = PyList_Append(reachable, val);
1009 Py_DECREF(val);
1009 Py_DECREF(val);
1010 if (r < 0)
1010 if (r < 0)
1011 goto bail;
1011 goto bail;
1012 if (includepath == 0)
1012 if (includepath == 0)
1013 continue;
1013 continue;
1014 }
1014 }
1015
1015
1016 /* Add its parents to the list of nodes to visit */
1016 /* Add its parents to the list of nodes to visit */
1017 if (revnum == nullrev)
1017 if (revnum == nullrev)
1018 continue;
1018 continue;
1019 r = index_get_parents(self, revnum, parents, (int)len - 1);
1019 r = index_get_parents(self, revnum, parents, (int)len - 1);
1020 if (r < 0)
1020 if (r < 0)
1021 goto bail;
1021 goto bail;
1022 for (i = 0; i < 2; i++) {
1022 for (i = 0; i < 2; i++) {
1023 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1023 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1024 parents[i] >= minroot) {
1024 parents[i] >= minroot) {
1025 tovisit[lentovisit++] = parents[i];
1025 tovisit[lentovisit++] = parents[i];
1026 revstates[parents[i] + 1] |= RS_SEEN;
1026 revstates[parents[i] + 1] |= RS_SEEN;
1027 }
1027 }
1028 }
1028 }
1029 }
1029 }
1030
1030
1031 /* Find all the nodes in between the roots we found and the heads
1031 /* Find all the nodes in between the roots we found and the heads
1032 * and add them to the reachable set */
1032 * and add them to the reachable set */
1033 if (includepath == 1) {
1033 if (includepath == 1) {
1034 long minidx = minroot;
1034 long minidx = minroot;
1035 if (minidx < 0)
1035 if (minidx < 0)
1036 minidx = 0;
1036 minidx = 0;
1037 for (i = minidx; i < len; i++) {
1037 for (i = minidx; i < len; i++) {
1038 if (!(revstates[i + 1] & RS_SEEN))
1038 if (!(revstates[i + 1] & RS_SEEN))
1039 continue;
1039 continue;
1040 r = index_get_parents(self, i, parents, (int)len - 1);
1040 r = index_get_parents(self, i, parents, (int)len - 1);
1041 /* Corrupted index file, error is set from
1041 /* Corrupted index file, error is set from
1042 * index_get_parents */
1042 * index_get_parents */
1043 if (r < 0)
1043 if (r < 0)
1044 goto bail;
1044 goto bail;
1045 if (((revstates[parents[0] + 1] |
1045 if (((revstates[parents[0] + 1] |
1046 revstates[parents[1] + 1]) &
1046 revstates[parents[1] + 1]) &
1047 RS_REACHABLE) &&
1047 RS_REACHABLE) &&
1048 !(revstates[i + 1] & RS_REACHABLE)) {
1048 !(revstates[i + 1] & RS_REACHABLE)) {
1049 revstates[i + 1] |= RS_REACHABLE;
1049 revstates[i + 1] |= RS_REACHABLE;
1050 val = PyLong_FromSsize_t(i);
1050 val = PyLong_FromSsize_t(i);
1051 if (val == NULL)
1051 if (val == NULL)
1052 goto bail;
1052 goto bail;
1053 r = PyList_Append(reachable, val);
1053 r = PyList_Append(reachable, val);
1054 Py_DECREF(val);
1054 Py_DECREF(val);
1055 if (r < 0)
1055 if (r < 0)
1056 goto bail;
1056 goto bail;
1057 }
1057 }
1058 }
1058 }
1059 }
1059 }
1060
1060
1061 free(revstates);
1061 free(revstates);
1062 free(tovisit);
1062 free(tovisit);
1063 return reachable;
1063 return reachable;
1064 bail:
1064 bail:
1065 Py_XDECREF(reachable);
1065 Py_XDECREF(reachable);
1066 free(revstates);
1066 free(revstates);
1067 free(tovisit);
1067 free(tovisit);
1068 return NULL;
1068 return NULL;
1069 }
1069 }
1070
1070
1071 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1071 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1072 char phase)
1072 char phase)
1073 {
1073 {
1074 Py_ssize_t len = index_length(self);
1074 Py_ssize_t len = index_length(self);
1075 PyObject *item;
1075 PyObject *item;
1076 PyObject *iterator;
1076 PyObject *iterator;
1077 int rev, minrev = -1;
1077 int rev, minrev = -1;
1078 char *node;
1078 char *node;
1079
1079
1080 if (!PySet_Check(roots)) {
1080 if (!PySet_Check(roots)) {
1081 PyErr_SetString(PyExc_TypeError,
1081 PyErr_SetString(PyExc_TypeError,
1082 "roots must be a set of nodes");
1082 "roots must be a set of nodes");
1083 return -2;
1083 return -2;
1084 }
1084 }
1085 iterator = PyObject_GetIter(roots);
1085 iterator = PyObject_GetIter(roots);
1086 if (iterator == NULL)
1086 if (iterator == NULL)
1087 return -2;
1087 return -2;
1088 while ((item = PyIter_Next(iterator))) {
1088 while ((item = PyIter_Next(iterator))) {
1089 if (node_check(self->nodelen, item, &node) == -1)
1089 if (node_check(self->nodelen, item, &node) == -1)
1090 goto failed;
1090 goto failed;
1091 rev = index_find_node(self, node);
1091 rev = index_find_node(self, node);
1092 /* null is implicitly public, so negative is invalid */
1092 /* null is implicitly public, so negative is invalid */
1093 if (rev < 0 || rev >= len)
1093 if (rev < 0 || rev >= len)
1094 goto failed;
1094 goto failed;
1095 phases[rev] = phase;
1095 phases[rev] = phase;
1096 if (minrev == -1 || minrev > rev)
1096 if (minrev == -1 || minrev > rev)
1097 minrev = rev;
1097 minrev = rev;
1098 Py_DECREF(item);
1098 Py_DECREF(item);
1099 }
1099 }
1100 Py_DECREF(iterator);
1100 Py_DECREF(iterator);
1101 return minrev;
1101 return minrev;
1102 failed:
1102 failed:
1103 Py_DECREF(iterator);
1103 Py_DECREF(iterator);
1104 Py_DECREF(item);
1104 Py_DECREF(item);
1105 return -2;
1105 return -2;
1106 }
1106 }
1107
1107
1108 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1108 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1109 {
1109 {
1110 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1110 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1111 96: internal */
1111 96: internal */
1112 static const char trackedphases[] = {1, 2, 32, 96};
1112 static const char trackedphases[] = {1, 2, 32, 96};
1113 PyObject *roots = Py_None;
1113 PyObject *roots = Py_None;
1114 PyObject *phasesetsdict = NULL;
1114 PyObject *phasesetsdict = NULL;
1115 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1115 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1116 Py_ssize_t len = index_length(self);
1116 Py_ssize_t len = index_length(self);
1117 char *phases = NULL;
1117 char *phases = NULL;
1118 int minphaserev = -1, rev, i;
1118 int minphaserev = -1, rev, i;
1119 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1119 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1120
1120
1121 if (!PyArg_ParseTuple(args, "O", &roots))
1121 if (!PyArg_ParseTuple(args, "O", &roots))
1122 return NULL;
1122 return NULL;
1123 if (roots == NULL || !PyDict_Check(roots)) {
1123 if (roots == NULL || !PyDict_Check(roots)) {
1124 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1124 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1125 return NULL;
1125 return NULL;
1126 }
1126 }
1127
1127
1128 phases = calloc(len, 1);
1128 phases = calloc(len, 1);
1129 if (phases == NULL) {
1129 if (phases == NULL) {
1130 PyErr_NoMemory();
1130 PyErr_NoMemory();
1131 return NULL;
1131 return NULL;
1132 }
1132 }
1133
1133
1134 for (i = 0; i < numphases; ++i) {
1134 for (i = 0; i < numphases; ++i) {
1135 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1135 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1136 PyObject *phaseroots = NULL;
1136 PyObject *phaseroots = NULL;
1137 if (pyphase == NULL)
1137 if (pyphase == NULL)
1138 goto release;
1138 goto release;
1139 phaseroots = PyDict_GetItem(roots, pyphase);
1139 phaseroots = PyDict_GetItem(roots, pyphase);
1140 Py_DECREF(pyphase);
1140 Py_DECREF(pyphase);
1141 if (phaseroots == NULL)
1141 if (phaseroots == NULL)
1142 continue;
1142 continue;
1143 rev = add_roots_get_min(self, phaseroots, phases,
1143 rev = add_roots_get_min(self, phaseroots, phases,
1144 trackedphases[i]);
1144 trackedphases[i]);
1145 if (rev == -2)
1145 if (rev == -2)
1146 goto release;
1146 goto release;
1147 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1147 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1148 minphaserev = rev;
1148 minphaserev = rev;
1149 }
1149 }
1150
1150
1151 for (i = 0; i < numphases; ++i) {
1151 for (i = 0; i < numphases; ++i) {
1152 phasesets[i] = PySet_New(NULL);
1152 phasesets[i] = PySet_New(NULL);
1153 if (phasesets[i] == NULL)
1153 if (phasesets[i] == NULL)
1154 goto release;
1154 goto release;
1155 }
1155 }
1156
1156
1157 if (minphaserev == -1)
1157 if (minphaserev == -1)
1158 minphaserev = len;
1158 minphaserev = len;
1159 for (rev = minphaserev; rev < len; ++rev) {
1159 for (rev = minphaserev; rev < len; ++rev) {
1160 PyObject *pyphase = NULL;
1160 PyObject *pyphase = NULL;
1161 PyObject *pyrev = NULL;
1161 PyObject *pyrev = NULL;
1162 int parents[2];
1162 int parents[2];
1163 /*
1163 /*
1164 * The parent lookup could be skipped for phaseroots, but
1164 * The parent lookup could be skipped for phaseroots, but
1165 * phase --force would historically not recompute them
1165 * phase --force would historically not recompute them
1166 * correctly, leaving descendents with a lower phase around.
1166 * correctly, leaving descendents with a lower phase around.
1167 * As such, unconditionally recompute the phase.
1167 * As such, unconditionally recompute the phase.
1168 */
1168 */
1169 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1169 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1170 goto release;
1170 goto release;
1171 set_phase_from_parents(phases, parents[0], parents[1], rev);
1171 set_phase_from_parents(phases, parents[0], parents[1], rev);
1172 switch (phases[rev]) {
1172 switch (phases[rev]) {
1173 case 0:
1173 case 0:
1174 continue;
1174 continue;
1175 case 1:
1175 case 1:
1176 pyphase = phasesets[0];
1176 pyphase = phasesets[0];
1177 break;
1177 break;
1178 case 2:
1178 case 2:
1179 pyphase = phasesets[1];
1179 pyphase = phasesets[1];
1180 break;
1180 break;
1181 case 32:
1181 case 32:
1182 pyphase = phasesets[2];
1182 pyphase = phasesets[2];
1183 break;
1183 break;
1184 case 96:
1184 case 96:
1185 pyphase = phasesets[3];
1185 pyphase = phasesets[3];
1186 break;
1186 break;
1187 default:
1187 default:
1188 /* this should never happen since the phase number is
1188 /* this should never happen since the phase number is
1189 * specified by this function. */
1189 * specified by this function. */
1190 PyErr_SetString(PyExc_SystemError,
1190 PyErr_SetString(PyExc_SystemError,
1191 "bad phase number in internal list");
1191 "bad phase number in internal list");
1192 goto release;
1192 goto release;
1193 }
1193 }
1194 pyrev = PyLong_FromLong(rev);
1194 pyrev = PyLong_FromLong(rev);
1195 if (pyrev == NULL)
1195 if (pyrev == NULL)
1196 goto release;
1196 goto release;
1197 if (PySet_Add(pyphase, pyrev) == -1) {
1197 if (PySet_Add(pyphase, pyrev) == -1) {
1198 Py_DECREF(pyrev);
1198 Py_DECREF(pyrev);
1199 goto release;
1199 goto release;
1200 }
1200 }
1201 Py_DECREF(pyrev);
1201 Py_DECREF(pyrev);
1202 }
1202 }
1203
1203
1204 phasesetsdict = _dict_new_presized(numphases);
1204 phasesetsdict = _dict_new_presized(numphases);
1205 if (phasesetsdict == NULL)
1205 if (phasesetsdict == NULL)
1206 goto release;
1206 goto release;
1207 for (i = 0; i < numphases; ++i) {
1207 for (i = 0; i < numphases; ++i) {
1208 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1208 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1209 if (pyphase == NULL)
1209 if (pyphase == NULL)
1210 goto release;
1210 goto release;
1211 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1211 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1212 -1) {
1212 -1) {
1213 Py_DECREF(pyphase);
1213 Py_DECREF(pyphase);
1214 goto release;
1214 goto release;
1215 }
1215 }
1216 Py_DECREF(phasesets[i]);
1216 Py_DECREF(phasesets[i]);
1217 phasesets[i] = NULL;
1217 phasesets[i] = NULL;
1218 }
1218 }
1219
1219
1220 free(phases);
1220 free(phases);
1221 return Py_BuildValue("nN", len, phasesetsdict);
1221 return Py_BuildValue("nN", len, phasesetsdict);
1222
1222
1223 release:
1223 release:
1224 for (i = 0; i < numphases; ++i)
1224 for (i = 0; i < numphases; ++i)
1225 Py_XDECREF(phasesets[i]);
1225 Py_XDECREF(phasesets[i]);
1226 Py_XDECREF(phasesetsdict);
1226 Py_XDECREF(phasesetsdict);
1227
1227
1228 free(phases);
1228 free(phases);
1229 return NULL;
1229 return NULL;
1230 }
1230 }
1231
1231
1232 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1232 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1233 {
1233 {
1234 Py_ssize_t i, j, len;
1234 Py_ssize_t i, j, len;
1235 char *nothead = NULL;
1235 char *nothead = NULL;
1236 PyObject *heads = NULL;
1236 PyObject *heads = NULL;
1237 PyObject *filter = NULL;
1237 PyObject *filter = NULL;
1238 PyObject *filteredrevs = Py_None;
1238 PyObject *filteredrevs = Py_None;
1239
1239
1240 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1240 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1241 return NULL;
1241 return NULL;
1242 }
1242 }
1243
1243
1244 if (self->headrevs && filteredrevs == self->filteredrevs)
1244 if (self->headrevs && filteredrevs == self->filteredrevs)
1245 return list_copy(self->headrevs);
1245 return list_copy(self->headrevs);
1246
1246
1247 Py_DECREF(self->filteredrevs);
1247 Py_DECREF(self->filteredrevs);
1248 self->filteredrevs = filteredrevs;
1248 self->filteredrevs = filteredrevs;
1249 Py_INCREF(filteredrevs);
1249 Py_INCREF(filteredrevs);
1250
1250
1251 if (filteredrevs != Py_None) {
1251 if (filteredrevs != Py_None) {
1252 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1252 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1253 if (!filter) {
1253 if (!filter) {
1254 PyErr_SetString(
1254 PyErr_SetString(
1255 PyExc_TypeError,
1255 PyExc_TypeError,
1256 "filteredrevs has no attribute __contains__");
1256 "filteredrevs has no attribute __contains__");
1257 goto bail;
1257 goto bail;
1258 }
1258 }
1259 }
1259 }
1260
1260
1261 len = index_length(self);
1261 len = index_length(self);
1262 heads = PyList_New(0);
1262 heads = PyList_New(0);
1263 if (heads == NULL)
1263 if (heads == NULL)
1264 goto bail;
1264 goto bail;
1265 if (len == 0) {
1265 if (len == 0) {
1266 PyObject *nullid = PyLong_FromLong(-1);
1266 PyObject *nullid = PyLong_FromLong(-1);
1267 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1267 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1268 Py_XDECREF(nullid);
1268 Py_XDECREF(nullid);
1269 goto bail;
1269 goto bail;
1270 }
1270 }
1271 goto done;
1271 goto done;
1272 }
1272 }
1273
1273
1274 nothead = calloc(len, 1);
1274 nothead = calloc(len, 1);
1275 if (nothead == NULL) {
1275 if (nothead == NULL) {
1276 PyErr_NoMemory();
1276 PyErr_NoMemory();
1277 goto bail;
1277 goto bail;
1278 }
1278 }
1279
1279
1280 for (i = len - 1; i >= 0; i--) {
1280 for (i = len - 1; i >= 0; i--) {
1281 int isfiltered;
1281 int isfiltered;
1282 int parents[2];
1282 int parents[2];
1283
1283
1284 /* If nothead[i] == 1, it means we've seen an unfiltered child
1284 /* If nothead[i] == 1, it means we've seen an unfiltered child
1285 * of this node already, and therefore this node is not
1285 * of this node already, and therefore this node is not
1286 * filtered. So we can skip the expensive check_filter step.
1286 * filtered. So we can skip the expensive check_filter step.
1287 */
1287 */
1288 if (nothead[i] != 1) {
1288 if (nothead[i] != 1) {
1289 isfiltered = check_filter(filter, i);
1289 isfiltered = check_filter(filter, i);
1290 if (isfiltered == -1) {
1290 if (isfiltered == -1) {
1291 PyErr_SetString(PyExc_TypeError,
1291 PyErr_SetString(PyExc_TypeError,
1292 "unable to check filter");
1292 "unable to check filter");
1293 goto bail;
1293 goto bail;
1294 }
1294 }
1295
1295
1296 if (isfiltered) {
1296 if (isfiltered) {
1297 nothead[i] = 1;
1297 nothead[i] = 1;
1298 continue;
1298 continue;
1299 }
1299 }
1300 }
1300 }
1301
1301
1302 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1302 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1303 goto bail;
1303 goto bail;
1304 for (j = 0; j < 2; j++) {
1304 for (j = 0; j < 2; j++) {
1305 if (parents[j] >= 0)
1305 if (parents[j] >= 0)
1306 nothead[parents[j]] = 1;
1306 nothead[parents[j]] = 1;
1307 }
1307 }
1308 }
1308 }
1309
1309
1310 for (i = 0; i < len; i++) {
1310 for (i = 0; i < len; i++) {
1311 PyObject *head;
1311 PyObject *head;
1312
1312
1313 if (nothead[i])
1313 if (nothead[i])
1314 continue;
1314 continue;
1315 head = PyLong_FromSsize_t(i);
1315 head = PyLong_FromSsize_t(i);
1316 if (head == NULL || PyList_Append(heads, head) == -1) {
1316 if (head == NULL || PyList_Append(heads, head) == -1) {
1317 Py_XDECREF(head);
1317 Py_XDECREF(head);
1318 goto bail;
1318 goto bail;
1319 }
1319 }
1320 }
1320 }
1321
1321
1322 done:
1322 done:
1323 self->headrevs = heads;
1323 self->headrevs = heads;
1324 Py_XDECREF(filter);
1324 Py_XDECREF(filter);
1325 free(nothead);
1325 free(nothead);
1326 return list_copy(self->headrevs);
1326 return list_copy(self->headrevs);
1327 bail:
1327 bail:
1328 Py_XDECREF(filter);
1328 Py_XDECREF(filter);
1329 Py_XDECREF(heads);
1329 Py_XDECREF(heads);
1330 free(nothead);
1330 free(nothead);
1331 return NULL;
1331 return NULL;
1332 }
1332 }
1333
1333
1334 /**
1334 /**
1335 * Obtain the base revision index entry.
1335 * Obtain the base revision index entry.
1336 *
1336 *
1337 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1337 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1338 */
1338 */
1339 static inline int index_baserev(indexObject *self, int rev)
1339 static inline int index_baserev(indexObject *self, int rev)
1340 {
1340 {
1341 const char *data;
1341 const char *data;
1342 int result;
1342 int result;
1343
1343
1344 data = index_deref(self, rev);
1344 data = index_deref(self, rev);
1345 if (data == NULL)
1345 if (data == NULL)
1346 return -2;
1346 return -2;
1347
1347
1348 if (self->format_version == format_v1) {
1348 if (self->format_version == format_v1) {
1349 result = getbe32(data + entry_v1_offset_base_rev);
1349 result = getbe32(data + entry_v1_offset_base_rev);
1350 } else if (self->format_version == format_v2) {
1350 } else if (self->format_version == format_v2) {
1351 result = getbe32(data + entry_v2_offset_base_rev);
1351 result = getbe32(data + entry_v2_offset_base_rev);
1352 } else if (self->format_version == format_cl2) {
1352 } else if (self->format_version == format_cl2) {
1353 return rev;
1353 return rev;
1354 } else {
1354 } else {
1355 raise_revlog_error();
1355 raise_revlog_error();
1356 return -1;
1356 return -1;
1357 }
1357 }
1358
1358
1359 if (result > rev) {
1359 if (result > rev) {
1360 PyErr_Format(
1360 PyErr_Format(
1361 PyExc_ValueError,
1361 PyExc_ValueError,
1362 "corrupted revlog, revision base above revision: %d, %d",
1362 "corrupted revlog, revision base above revision: %d, %d",
1363 rev, result);
1363 rev, result);
1364 return -2;
1364 return -2;
1365 }
1365 }
1366 if (result < -1) {
1366 if (result < -1) {
1367 PyErr_Format(
1367 PyErr_Format(
1368 PyExc_ValueError,
1368 PyExc_ValueError,
1369 "corrupted revlog, revision base out of range: %d, %d", rev,
1369 "corrupted revlog, revision base out of range: %d, %d", rev,
1370 result);
1370 result);
1371 return -2;
1371 return -2;
1372 }
1372 }
1373 return result;
1373 return result;
1374 }
1374 }
1375
1375
1376 /**
1376 /**
1377 * Find if a revision is a snapshot or not
1377 * Find if a revision is a snapshot or not
1378 *
1378 *
1379 * Only relevant for sparse-revlog case.
1379 * Only relevant for sparse-revlog case.
1380 * Callers must ensure that rev is in a valid range.
1380 * Callers must ensure that rev is in a valid range.
1381 */
1381 */
1382 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1382 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1383 {
1383 {
1384 int ps[2];
1384 int ps[2];
1385 int b;
1385 Py_ssize_t base;
1386 Py_ssize_t base;
1386 while (rev >= 0) {
1387 while (rev >= 0) {
1387 base = (Py_ssize_t)index_baserev(self, rev);
1388 base = (Py_ssize_t)index_baserev(self, rev);
1388 if (base == rev) {
1389 if (base == rev) {
1389 base = -1;
1390 base = -1;
1390 }
1391 }
1391 if (base == -2) {
1392 if (base == -2) {
1392 assert(PyErr_Occurred());
1393 assert(PyErr_Occurred());
1393 return -1;
1394 return -1;
1394 }
1395 }
1395 if (base == -1) {
1396 if (base == -1) {
1396 return 1;
1397 return 1;
1397 }
1398 }
1398 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1399 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1399 assert(PyErr_Occurred());
1400 assert(PyErr_Occurred());
1400 return -1;
1401 return -1;
1401 };
1402 };
1403 while ((index_get_length(self, ps[0]) == 0) && ps[0] >= 0) {
1404 b = index_baserev(self, ps[0]);
1405 if (b == ps[0]) {
1406 break;
1407 }
1408 ps[0] = b;
1409 }
1410 while ((index_get_length(self, ps[1]) == 0) && ps[1] >= 0) {
1411 b = index_baserev(self, ps[1]);
1412 if (b == ps[1]) {
1413 break;
1414 }
1415 ps[1] = b;
1416 }
1402 if (base == ps[0] || base == ps[1]) {
1417 if (base == ps[0] || base == ps[1]) {
1403 return 0;
1418 return 0;
1404 }
1419 }
1405 rev = base;
1420 rev = base;
1406 }
1421 }
1407 return rev == -1;
1422 return rev == -1;
1408 }
1423 }
1409
1424
1410 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1425 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1411 {
1426 {
1412 long rev;
1427 long rev;
1413 int issnap;
1428 int issnap;
1414 Py_ssize_t length = index_length(self);
1429 Py_ssize_t length = index_length(self);
1415
1430
1416 if (!pylong_to_long(value, &rev)) {
1431 if (!pylong_to_long(value, &rev)) {
1417 return NULL;
1432 return NULL;
1418 }
1433 }
1419 if (rev < -1 || rev >= length) {
1434 if (rev < -1 || rev >= length) {
1420 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1435 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1421 rev);
1436 rev);
1422 return NULL;
1437 return NULL;
1423 };
1438 };
1424 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1439 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1425 if (issnap < 0) {
1440 if (issnap < 0) {
1426 return NULL;
1441 return NULL;
1427 };
1442 };
1428 return PyBool_FromLong((long)issnap);
1443 return PyBool_FromLong((long)issnap);
1429 }
1444 }
1430
1445
1431 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1446 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1432 {
1447 {
1433 Py_ssize_t start_rev;
1448 Py_ssize_t start_rev;
1434 PyObject *cache;
1449 PyObject *cache;
1435 Py_ssize_t base;
1450 Py_ssize_t base;
1436 Py_ssize_t rev;
1451 Py_ssize_t rev;
1437 PyObject *key = NULL;
1452 PyObject *key = NULL;
1438 PyObject *value = NULL;
1453 PyObject *value = NULL;
1439 const Py_ssize_t length = index_length(self);
1454 const Py_ssize_t length = index_length(self);
1440 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1455 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1441 return NULL;
1456 return NULL;
1442 }
1457 }
1443 for (rev = start_rev; rev < length; rev++) {
1458 for (rev = start_rev; rev < length; rev++) {
1444 int issnap;
1459 int issnap;
1445 PyObject *allvalues = NULL;
1460 PyObject *allvalues = NULL;
1446 issnap = index_issnapshotrev(self, rev);
1461 issnap = index_issnapshotrev(self, rev);
1447 if (issnap < 0) {
1462 if (issnap < 0) {
1448 goto bail;
1463 goto bail;
1449 }
1464 }
1450 if (issnap == 0) {
1465 if (issnap == 0) {
1451 continue;
1466 continue;
1452 }
1467 }
1453 base = (Py_ssize_t)index_baserev(self, rev);
1468 base = (Py_ssize_t)index_baserev(self, rev);
1454 if (base == rev) {
1469 if (base == rev) {
1455 base = -1;
1470 base = -1;
1456 }
1471 }
1457 if (base == -2) {
1472 if (base == -2) {
1458 assert(PyErr_Occurred());
1473 assert(PyErr_Occurred());
1459 goto bail;
1474 goto bail;
1460 }
1475 }
1461 key = PyLong_FromSsize_t(base);
1476 key = PyLong_FromSsize_t(base);
1462 allvalues = PyDict_GetItem(cache, key);
1477 allvalues = PyDict_GetItem(cache, key);
1463 if (allvalues == NULL && PyErr_Occurred()) {
1478 if (allvalues == NULL && PyErr_Occurred()) {
1464 goto bail;
1479 goto bail;
1465 }
1480 }
1466 if (allvalues == NULL) {
1481 if (allvalues == NULL) {
1467 int r;
1482 int r;
1468 allvalues = PyList_New(0);
1483 allvalues = PyList_New(0);
1469 if (!allvalues) {
1484 if (!allvalues) {
1470 goto bail;
1485 goto bail;
1471 }
1486 }
1472 r = PyDict_SetItem(cache, key, allvalues);
1487 r = PyDict_SetItem(cache, key, allvalues);
1473 Py_DECREF(allvalues);
1488 Py_DECREF(allvalues);
1474 if (r < 0) {
1489 if (r < 0) {
1475 goto bail;
1490 goto bail;
1476 }
1491 }
1477 }
1492 }
1478 value = PyLong_FromSsize_t(rev);
1493 value = PyLong_FromSsize_t(rev);
1479 if (PyList_Append(allvalues, value)) {
1494 if (PyList_Append(allvalues, value)) {
1480 goto bail;
1495 goto bail;
1481 }
1496 }
1482 Py_CLEAR(key);
1497 Py_CLEAR(key);
1483 Py_CLEAR(value);
1498 Py_CLEAR(value);
1484 }
1499 }
1485 Py_RETURN_NONE;
1500 Py_RETURN_NONE;
1486 bail:
1501 bail:
1487 Py_XDECREF(key);
1502 Py_XDECREF(key);
1488 Py_XDECREF(value);
1503 Py_XDECREF(value);
1489 return NULL;
1504 return NULL;
1490 }
1505 }
1491
1506
1492 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1507 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1493 {
1508 {
1494 int rev, generaldelta;
1509 int rev, generaldelta;
1495 PyObject *stoparg;
1510 PyObject *stoparg;
1496 int stoprev, iterrev, baserev = -1;
1511 int stoprev, iterrev, baserev = -1;
1497 int stopped;
1512 int stopped;
1498 PyObject *chain = NULL, *result = NULL;
1513 PyObject *chain = NULL, *result = NULL;
1499 const Py_ssize_t length = index_length(self);
1514 const Py_ssize_t length = index_length(self);
1500
1515
1501 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1516 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1502 return NULL;
1517 return NULL;
1503 }
1518 }
1504
1519
1505 if (PyLong_Check(stoparg)) {
1520 if (PyLong_Check(stoparg)) {
1506 stoprev = (int)PyLong_AsLong(stoparg);
1521 stoprev = (int)PyLong_AsLong(stoparg);
1507 if (stoprev == -1 && PyErr_Occurred()) {
1522 if (stoprev == -1 && PyErr_Occurred()) {
1508 return NULL;
1523 return NULL;
1509 }
1524 }
1510 } else if (stoparg == Py_None) {
1525 } else if (stoparg == Py_None) {
1511 stoprev = -2;
1526 stoprev = -2;
1512 } else {
1527 } else {
1513 PyErr_SetString(PyExc_ValueError,
1528 PyErr_SetString(PyExc_ValueError,
1514 "stoprev must be integer or None");
1529 "stoprev must be integer or None");
1515 return NULL;
1530 return NULL;
1516 }
1531 }
1517
1532
1518 if (rev < 0 || rev >= length) {
1533 if (rev < 0 || rev >= length) {
1519 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1534 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1520 return NULL;
1535 return NULL;
1521 }
1536 }
1522
1537
1523 chain = PyList_New(0);
1538 chain = PyList_New(0);
1524 if (chain == NULL) {
1539 if (chain == NULL) {
1525 return NULL;
1540 return NULL;
1526 }
1541 }
1527
1542
1528 baserev = index_baserev(self, rev);
1543 baserev = index_baserev(self, rev);
1529
1544
1530 /* This should never happen. */
1545 /* This should never happen. */
1531 if (baserev <= -2) {
1546 if (baserev <= -2) {
1532 /* Error should be set by index_deref() */
1547 /* Error should be set by index_deref() */
1533 assert(PyErr_Occurred());
1548 assert(PyErr_Occurred());
1534 goto bail;
1549 goto bail;
1535 }
1550 }
1536
1551
1537 iterrev = rev;
1552 iterrev = rev;
1538
1553
1539 while (iterrev != baserev && iterrev != stoprev) {
1554 while (iterrev != baserev && iterrev != stoprev) {
1540 PyObject *value = PyLong_FromLong(iterrev);
1555 PyObject *value = PyLong_FromLong(iterrev);
1541 if (value == NULL) {
1556 if (value == NULL) {
1542 goto bail;
1557 goto bail;
1543 }
1558 }
1544 if (PyList_Append(chain, value)) {
1559 if (PyList_Append(chain, value)) {
1545 Py_DECREF(value);
1560 Py_DECREF(value);
1546 goto bail;
1561 goto bail;
1547 }
1562 }
1548 Py_DECREF(value);
1563 Py_DECREF(value);
1549
1564
1550 if (generaldelta) {
1565 if (generaldelta) {
1551 iterrev = baserev;
1566 iterrev = baserev;
1552 } else {
1567 } else {
1553 iterrev--;
1568 iterrev--;
1554 }
1569 }
1555
1570
1556 if (iterrev < 0) {
1571 if (iterrev < 0) {
1557 break;
1572 break;
1558 }
1573 }
1559
1574
1560 if (iterrev >= length) {
1575 if (iterrev >= length) {
1561 PyErr_SetString(PyExc_IndexError,
1576 PyErr_SetString(PyExc_IndexError,
1562 "revision outside index");
1577 "revision outside index");
1563 return NULL;
1578 return NULL;
1564 }
1579 }
1565
1580
1566 baserev = index_baserev(self, iterrev);
1581 baserev = index_baserev(self, iterrev);
1567
1582
1568 /* This should never happen. */
1583 /* This should never happen. */
1569 if (baserev <= -2) {
1584 if (baserev <= -2) {
1570 /* Error should be set by index_deref() */
1585 /* Error should be set by index_deref() */
1571 assert(PyErr_Occurred());
1586 assert(PyErr_Occurred());
1572 goto bail;
1587 goto bail;
1573 }
1588 }
1574 }
1589 }
1575
1590
1576 if (iterrev == stoprev) {
1591 if (iterrev == stoprev) {
1577 stopped = 1;
1592 stopped = 1;
1578 } else {
1593 } else {
1579 PyObject *value = PyLong_FromLong(iterrev);
1594 PyObject *value = PyLong_FromLong(iterrev);
1580 if (value == NULL) {
1595 if (value == NULL) {
1581 goto bail;
1596 goto bail;
1582 }
1597 }
1583 if (PyList_Append(chain, value)) {
1598 if (PyList_Append(chain, value)) {
1584 Py_DECREF(value);
1599 Py_DECREF(value);
1585 goto bail;
1600 goto bail;
1586 }
1601 }
1587 Py_DECREF(value);
1602 Py_DECREF(value);
1588
1603
1589 stopped = 0;
1604 stopped = 0;
1590 }
1605 }
1591
1606
1592 if (PyList_Reverse(chain)) {
1607 if (PyList_Reverse(chain)) {
1593 goto bail;
1608 goto bail;
1594 }
1609 }
1595
1610
1596 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1611 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1597 Py_DECREF(chain);
1612 Py_DECREF(chain);
1598 return result;
1613 return result;
1599
1614
1600 bail:
1615 bail:
1601 Py_DECREF(chain);
1616 Py_DECREF(chain);
1602 return NULL;
1617 return NULL;
1603 }
1618 }
1604
1619
1605 static inline int64_t
1620 static inline int64_t
1606 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1621 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1607 {
1622 {
1608 int64_t start_offset;
1623 int64_t start_offset;
1609 int64_t end_offset;
1624 int64_t end_offset;
1610 int end_size;
1625 int end_size;
1611 start_offset = index_get_start(self, start_rev);
1626 start_offset = index_get_start(self, start_rev);
1612 if (start_offset < 0) {
1627 if (start_offset < 0) {
1613 return -1;
1628 return -1;
1614 }
1629 }
1615 end_offset = index_get_start(self, end_rev);
1630 end_offset = index_get_start(self, end_rev);
1616 if (end_offset < 0) {
1631 if (end_offset < 0) {
1617 return -1;
1632 return -1;
1618 }
1633 }
1619 end_size = index_get_length(self, end_rev);
1634 end_size = index_get_length(self, end_rev);
1620 if (end_size < 0) {
1635 if (end_size < 0) {
1621 return -1;
1636 return -1;
1622 }
1637 }
1623 if (end_offset < start_offset) {
1638 if (end_offset < start_offset) {
1624 PyErr_Format(PyExc_ValueError,
1639 PyErr_Format(PyExc_ValueError,
1625 "corrupted revlog index: inconsistent offset "
1640 "corrupted revlog index: inconsistent offset "
1626 "between revisions (%zd) and (%zd)",
1641 "between revisions (%zd) and (%zd)",
1627 start_rev, end_rev);
1642 start_rev, end_rev);
1628 return -1;
1643 return -1;
1629 }
1644 }
1630 return (end_offset - start_offset) + (int64_t)end_size;
1645 return (end_offset - start_offset) + (int64_t)end_size;
1631 }
1646 }
1632
1647
1633 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1648 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1634 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1649 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1635 Py_ssize_t startidx, Py_ssize_t endidx)
1650 Py_ssize_t startidx, Py_ssize_t endidx)
1636 {
1651 {
1637 int length;
1652 int length;
1638 while (endidx > 1 && endidx > startidx) {
1653 while (endidx > 1 && endidx > startidx) {
1639 length = index_get_length(self, revs[endidx - 1]);
1654 length = index_get_length(self, revs[endidx - 1]);
1640 if (length < 0) {
1655 if (length < 0) {
1641 return -1;
1656 return -1;
1642 }
1657 }
1643 if (length != 0) {
1658 if (length != 0) {
1644 break;
1659 break;
1645 }
1660 }
1646 endidx -= 1;
1661 endidx -= 1;
1647 }
1662 }
1648 return endidx;
1663 return endidx;
1649 }
1664 }
1650
1665
1651 struct Gap {
1666 struct Gap {
1652 int64_t size;
1667 int64_t size;
1653 Py_ssize_t idx;
1668 Py_ssize_t idx;
1654 };
1669 };
1655
1670
1656 static int gap_compare(const void *left, const void *right)
1671 static int gap_compare(const void *left, const void *right)
1657 {
1672 {
1658 const struct Gap *l_left = ((const struct Gap *)left);
1673 const struct Gap *l_left = ((const struct Gap *)left);
1659 const struct Gap *l_right = ((const struct Gap *)right);
1674 const struct Gap *l_right = ((const struct Gap *)right);
1660 if (l_left->size < l_right->size) {
1675 if (l_left->size < l_right->size) {
1661 return -1;
1676 return -1;
1662 } else if (l_left->size > l_right->size) {
1677 } else if (l_left->size > l_right->size) {
1663 return 1;
1678 return 1;
1664 }
1679 }
1665 return 0;
1680 return 0;
1666 }
1681 }
1667 static int Py_ssize_t_compare(const void *left, const void *right)
1682 static int Py_ssize_t_compare(const void *left, const void *right)
1668 {
1683 {
1669 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1684 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1670 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1685 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1671 if (l_left < l_right) {
1686 if (l_left < l_right) {
1672 return -1;
1687 return -1;
1673 } else if (l_left > l_right) {
1688 } else if (l_left > l_right) {
1674 return 1;
1689 return 1;
1675 }
1690 }
1676 return 0;
1691 return 0;
1677 }
1692 }
1678
1693
1679 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1694 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1680 {
1695 {
1681 /* method arguments */
1696 /* method arguments */
1682 PyObject *list_revs = NULL; /* revisions in the chain */
1697 PyObject *list_revs = NULL; /* revisions in the chain */
1683 double targetdensity = 0; /* min density to achieve */
1698 double targetdensity = 0; /* min density to achieve */
1684 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1699 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1685
1700
1686 /* other core variables */
1701 /* other core variables */
1687 Py_ssize_t idxlen = index_length(self);
1702 Py_ssize_t idxlen = index_length(self);
1688 Py_ssize_t i; /* used for various iteration */
1703 Py_ssize_t i; /* used for various iteration */
1689 PyObject *result = NULL; /* the final return of the function */
1704 PyObject *result = NULL; /* the final return of the function */
1690
1705
1691 /* generic information about the delta chain being slice */
1706 /* generic information about the delta chain being slice */
1692 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1707 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1693 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1708 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1694 int64_t chainpayload = 0; /* sum of all delta in the chain */
1709 int64_t chainpayload = 0; /* sum of all delta in the chain */
1695 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1710 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1696
1711
1697 /* variable used for slicing the delta chain */
1712 /* variable used for slicing the delta chain */
1698 int64_t readdata = 0; /* amount of data currently planned to be read */
1713 int64_t readdata = 0; /* amount of data currently planned to be read */
1699 double density = 0; /* ration of payload data compared to read ones */
1714 double density = 0; /* ration of payload data compared to read ones */
1700 int64_t previous_end;
1715 int64_t previous_end;
1701 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1716 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1702 Py_ssize_t num_gaps =
1717 Py_ssize_t num_gaps =
1703 0; /* total number of notable gap recorded so far */
1718 0; /* total number of notable gap recorded so far */
1704 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1719 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1705 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1720 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1706 PyObject *chunk = NULL; /* individual slice */
1721 PyObject *chunk = NULL; /* individual slice */
1707 PyObject *allchunks = NULL; /* all slices */
1722 PyObject *allchunks = NULL; /* all slices */
1708 Py_ssize_t previdx;
1723 Py_ssize_t previdx;
1709
1724
1710 /* parsing argument */
1725 /* parsing argument */
1711 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1726 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1712 &targetdensity, &mingapsize)) {
1727 &targetdensity, &mingapsize)) {
1713 goto bail;
1728 goto bail;
1714 }
1729 }
1715
1730
1716 /* If the delta chain contains a single element, we do not need slicing
1731 /* If the delta chain contains a single element, we do not need slicing
1717 */
1732 */
1718 num_revs = PyList_GET_SIZE(list_revs);
1733 num_revs = PyList_GET_SIZE(list_revs);
1719 if (num_revs <= 1) {
1734 if (num_revs <= 1) {
1720 result = PyTuple_Pack(1, list_revs);
1735 result = PyTuple_Pack(1, list_revs);
1721 goto done;
1736 goto done;
1722 }
1737 }
1723
1738
1724 /* Turn the python list into a native integer array (for efficiency) */
1739 /* Turn the python list into a native integer array (for efficiency) */
1725 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1740 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1726 if (revs == NULL) {
1741 if (revs == NULL) {
1727 PyErr_NoMemory();
1742 PyErr_NoMemory();
1728 goto bail;
1743 goto bail;
1729 }
1744 }
1730 for (i = 0; i < num_revs; i++) {
1745 for (i = 0; i < num_revs; i++) {
1731 Py_ssize_t revnum =
1746 Py_ssize_t revnum =
1732 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1747 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1733 if (revnum == -1 && PyErr_Occurred()) {
1748 if (revnum == -1 && PyErr_Occurred()) {
1734 goto bail;
1749 goto bail;
1735 }
1750 }
1736 if (revnum < nullrev || revnum >= idxlen) {
1751 if (revnum < nullrev || revnum >= idxlen) {
1737 PyErr_Format(PyExc_IndexError,
1752 PyErr_Format(PyExc_IndexError,
1738 "index out of range: %zd", revnum);
1753 "index out of range: %zd", revnum);
1739 goto bail;
1754 goto bail;
1740 }
1755 }
1741 revs[i] = revnum;
1756 revs[i] = revnum;
1742 }
1757 }
1743
1758
1744 /* Compute and check various property of the unsliced delta chain */
1759 /* Compute and check various property of the unsliced delta chain */
1745 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1760 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1746 if (deltachainspan < 0) {
1761 if (deltachainspan < 0) {
1747 goto bail;
1762 goto bail;
1748 }
1763 }
1749
1764
1750 if (deltachainspan <= mingapsize) {
1765 if (deltachainspan <= mingapsize) {
1751 result = PyTuple_Pack(1, list_revs);
1766 result = PyTuple_Pack(1, list_revs);
1752 goto done;
1767 goto done;
1753 }
1768 }
1754 chainpayload = 0;
1769 chainpayload = 0;
1755 for (i = 0; i < num_revs; i++) {
1770 for (i = 0; i < num_revs; i++) {
1756 int tmp = index_get_length(self, revs[i]);
1771 int tmp = index_get_length(self, revs[i]);
1757 if (tmp < 0) {
1772 if (tmp < 0) {
1758 goto bail;
1773 goto bail;
1759 }
1774 }
1760 chainpayload += tmp;
1775 chainpayload += tmp;
1761 }
1776 }
1762
1777
1763 readdata = deltachainspan;
1778 readdata = deltachainspan;
1764 density = 1.0;
1779 density = 1.0;
1765
1780
1766 if (0 < deltachainspan) {
1781 if (0 < deltachainspan) {
1767 density = (double)chainpayload / (double)deltachainspan;
1782 density = (double)chainpayload / (double)deltachainspan;
1768 }
1783 }
1769
1784
1770 if (density >= targetdensity) {
1785 if (density >= targetdensity) {
1771 result = PyTuple_Pack(1, list_revs);
1786 result = PyTuple_Pack(1, list_revs);
1772 goto done;
1787 goto done;
1773 }
1788 }
1774
1789
1775 /* if chain is too sparse, look for relevant gaps */
1790 /* if chain is too sparse, look for relevant gaps */
1776 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1791 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1777 if (gaps == NULL) {
1792 if (gaps == NULL) {
1778 PyErr_NoMemory();
1793 PyErr_NoMemory();
1779 goto bail;
1794 goto bail;
1780 }
1795 }
1781
1796
1782 previous_end = -1;
1797 previous_end = -1;
1783 for (i = 0; i < num_revs; i++) {
1798 for (i = 0; i < num_revs; i++) {
1784 int64_t revstart;
1799 int64_t revstart;
1785 int revsize;
1800 int revsize;
1786 revstart = index_get_start(self, revs[i]);
1801 revstart = index_get_start(self, revs[i]);
1787 if (revstart < 0) {
1802 if (revstart < 0) {
1788 goto bail;
1803 goto bail;
1789 };
1804 };
1790 revsize = index_get_length(self, revs[i]);
1805 revsize = index_get_length(self, revs[i]);
1791 if (revsize < 0) {
1806 if (revsize < 0) {
1792 goto bail;
1807 goto bail;
1793 };
1808 };
1794 if (revsize == 0) {
1809 if (revsize == 0) {
1795 continue;
1810 continue;
1796 }
1811 }
1797 if (previous_end >= 0) {
1812 if (previous_end >= 0) {
1798 int64_t gapsize = revstart - previous_end;
1813 int64_t gapsize = revstart - previous_end;
1799 if (gapsize > mingapsize) {
1814 if (gapsize > mingapsize) {
1800 gaps[num_gaps].size = gapsize;
1815 gaps[num_gaps].size = gapsize;
1801 gaps[num_gaps].idx = i;
1816 gaps[num_gaps].idx = i;
1802 num_gaps += 1;
1817 num_gaps += 1;
1803 }
1818 }
1804 }
1819 }
1805 previous_end = revstart + revsize;
1820 previous_end = revstart + revsize;
1806 }
1821 }
1807 if (num_gaps == 0) {
1822 if (num_gaps == 0) {
1808 result = PyTuple_Pack(1, list_revs);
1823 result = PyTuple_Pack(1, list_revs);
1809 goto done;
1824 goto done;
1810 }
1825 }
1811 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1826 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1812
1827
1813 /* Slice the largest gap first, they improve the density the most */
1828 /* Slice the largest gap first, they improve the density the most */
1814 selected_indices =
1829 selected_indices =
1815 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1830 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1816 if (selected_indices == NULL) {
1831 if (selected_indices == NULL) {
1817 PyErr_NoMemory();
1832 PyErr_NoMemory();
1818 goto bail;
1833 goto bail;
1819 }
1834 }
1820
1835
1821 for (i = num_gaps - 1; i >= 0; i--) {
1836 for (i = num_gaps - 1; i >= 0; i--) {
1822 selected_indices[num_selected] = gaps[i].idx;
1837 selected_indices[num_selected] = gaps[i].idx;
1823 readdata -= gaps[i].size;
1838 readdata -= gaps[i].size;
1824 num_selected += 1;
1839 num_selected += 1;
1825 if (readdata <= 0) {
1840 if (readdata <= 0) {
1826 density = 1.0;
1841 density = 1.0;
1827 } else {
1842 } else {
1828 density = (double)chainpayload / (double)readdata;
1843 density = (double)chainpayload / (double)readdata;
1829 }
1844 }
1830 if (density >= targetdensity) {
1845 if (density >= targetdensity) {
1831 break;
1846 break;
1832 }
1847 }
1833 }
1848 }
1834 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1849 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1835 &Py_ssize_t_compare);
1850 &Py_ssize_t_compare);
1836
1851
1837 /* create the resulting slice */
1852 /* create the resulting slice */
1838 allchunks = PyList_New(0);
1853 allchunks = PyList_New(0);
1839 if (allchunks == NULL) {
1854 if (allchunks == NULL) {
1840 goto bail;
1855 goto bail;
1841 }
1856 }
1842 previdx = 0;
1857 previdx = 0;
1843 selected_indices[num_selected] = num_revs;
1858 selected_indices[num_selected] = num_revs;
1844 for (i = 0; i <= num_selected; i++) {
1859 for (i = 0; i <= num_selected; i++) {
1845 Py_ssize_t idx = selected_indices[i];
1860 Py_ssize_t idx = selected_indices[i];
1846 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1861 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1847 if (endidx < 0) {
1862 if (endidx < 0) {
1848 goto bail;
1863 goto bail;
1849 }
1864 }
1850 if (previdx < endidx) {
1865 if (previdx < endidx) {
1851 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1866 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1852 if (chunk == NULL) {
1867 if (chunk == NULL) {
1853 goto bail;
1868 goto bail;
1854 }
1869 }
1855 if (PyList_Append(allchunks, chunk) == -1) {
1870 if (PyList_Append(allchunks, chunk) == -1) {
1856 goto bail;
1871 goto bail;
1857 }
1872 }
1858 Py_DECREF(chunk);
1873 Py_DECREF(chunk);
1859 chunk = NULL;
1874 chunk = NULL;
1860 }
1875 }
1861 previdx = idx;
1876 previdx = idx;
1862 }
1877 }
1863 result = allchunks;
1878 result = allchunks;
1864 goto done;
1879 goto done;
1865
1880
1866 bail:
1881 bail:
1867 Py_XDECREF(allchunks);
1882 Py_XDECREF(allchunks);
1868 Py_XDECREF(chunk);
1883 Py_XDECREF(chunk);
1869 done:
1884 done:
1870 free(revs);
1885 free(revs);
1871 free(gaps);
1886 free(gaps);
1872 free(selected_indices);
1887 free(selected_indices);
1873 return result;
1888 return result;
1874 }
1889 }
1875
1890
1876 static inline int nt_level(const char *node, Py_ssize_t level)
1891 static inline int nt_level(const char *node, Py_ssize_t level)
1877 {
1892 {
1878 int v = node[level >> 1];
1893 int v = node[level >> 1];
1879 if (!(level & 1))
1894 if (!(level & 1))
1880 v >>= 4;
1895 v >>= 4;
1881 return v & 0xf;
1896 return v & 0xf;
1882 }
1897 }
1883
1898
1884 /*
1899 /*
1885 * Return values:
1900 * Return values:
1886 *
1901 *
1887 * -4: match is ambiguous (multiple candidates)
1902 * -4: match is ambiguous (multiple candidates)
1888 * -2: not found
1903 * -2: not found
1889 * rest: valid rev
1904 * rest: valid rev
1890 */
1905 */
1891 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1906 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1892 int hex)
1907 int hex)
1893 {
1908 {
1894 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1909 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1895 int level, maxlevel, off;
1910 int level, maxlevel, off;
1896
1911
1897 /* If the input is binary, do a fast check for the nullid first. */
1912 /* If the input is binary, do a fast check for the nullid first. */
1898 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1913 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1899 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1914 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1900 return -1;
1915 return -1;
1901
1916
1902 if (hex)
1917 if (hex)
1903 maxlevel = nodelen;
1918 maxlevel = nodelen;
1904 else
1919 else
1905 maxlevel = 2 * nodelen;
1920 maxlevel = 2 * nodelen;
1906 if (maxlevel > 2 * self->nodelen)
1921 if (maxlevel > 2 * self->nodelen)
1907 maxlevel = 2 * self->nodelen;
1922 maxlevel = 2 * self->nodelen;
1908
1923
1909 for (level = off = 0; level < maxlevel; level++) {
1924 for (level = off = 0; level < maxlevel; level++) {
1910 int k = getnybble(node, level);
1925 int k = getnybble(node, level);
1911 nodetreenode *n = &self->nodes[off];
1926 nodetreenode *n = &self->nodes[off];
1912 int v = n->children[k];
1927 int v = n->children[k];
1913
1928
1914 if (v < 0) {
1929 if (v < 0) {
1915 const char *n;
1930 const char *n;
1916 Py_ssize_t i;
1931 Py_ssize_t i;
1917
1932
1918 v = -(v + 2);
1933 v = -(v + 2);
1919 n = index_node(self->index, v);
1934 n = index_node(self->index, v);
1920 if (n == NULL)
1935 if (n == NULL)
1921 return -2;
1936 return -2;
1922 for (i = level; i < maxlevel; i++)
1937 for (i = level; i < maxlevel; i++)
1923 if (getnybble(node, i) != nt_level(n, i))
1938 if (getnybble(node, i) != nt_level(n, i))
1924 return -2;
1939 return -2;
1925 return v;
1940 return v;
1926 }
1941 }
1927 if (v == 0)
1942 if (v == 0)
1928 return -2;
1943 return -2;
1929 off = v;
1944 off = v;
1930 }
1945 }
1931 /* multiple matches against an ambiguous prefix */
1946 /* multiple matches against an ambiguous prefix */
1932 return -4;
1947 return -4;
1933 }
1948 }
1934
1949
1935 static int nt_new(nodetree *self)
1950 static int nt_new(nodetree *self)
1936 {
1951 {
1937 if (self->length == self->capacity) {
1952 if (self->length == self->capacity) {
1938 size_t newcapacity;
1953 size_t newcapacity;
1939 nodetreenode *newnodes;
1954 nodetreenode *newnodes;
1940 newcapacity = self->capacity * 2;
1955 newcapacity = self->capacity * 2;
1941 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1956 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1942 PyErr_SetString(PyExc_MemoryError,
1957 PyErr_SetString(PyExc_MemoryError,
1943 "overflow in nt_new");
1958 "overflow in nt_new");
1944 return -1;
1959 return -1;
1945 }
1960 }
1946 newnodes =
1961 newnodes =
1947 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1962 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1948 if (newnodes == NULL) {
1963 if (newnodes == NULL) {
1949 PyErr_SetString(PyExc_MemoryError, "out of memory");
1964 PyErr_SetString(PyExc_MemoryError, "out of memory");
1950 return -1;
1965 return -1;
1951 }
1966 }
1952 self->capacity = newcapacity;
1967 self->capacity = newcapacity;
1953 self->nodes = newnodes;
1968 self->nodes = newnodes;
1954 memset(&self->nodes[self->length], 0,
1969 memset(&self->nodes[self->length], 0,
1955 sizeof(nodetreenode) * (self->capacity - self->length));
1970 sizeof(nodetreenode) * (self->capacity - self->length));
1956 }
1971 }
1957 return self->length++;
1972 return self->length++;
1958 }
1973 }
1959
1974
1960 static int nt_insert(nodetree *self, const char *node, int rev)
1975 static int nt_insert(nodetree *self, const char *node, int rev)
1961 {
1976 {
1962 int level = 0;
1977 int level = 0;
1963 int off = 0;
1978 int off = 0;
1964
1979
1965 while (level < 2 * self->nodelen) {
1980 while (level < 2 * self->nodelen) {
1966 int k = nt_level(node, level);
1981 int k = nt_level(node, level);
1967 nodetreenode *n;
1982 nodetreenode *n;
1968 int v;
1983 int v;
1969
1984
1970 n = &self->nodes[off];
1985 n = &self->nodes[off];
1971 v = n->children[k];
1986 v = n->children[k];
1972
1987
1973 if (v == 0) {
1988 if (v == 0) {
1974 n->children[k] = -rev - 2;
1989 n->children[k] = -rev - 2;
1975 return 0;
1990 return 0;
1976 }
1991 }
1977 if (v < 0) {
1992 if (v < 0) {
1978 const char *oldnode =
1993 const char *oldnode =
1979 index_node_existing(self->index, -(v + 2));
1994 index_node_existing(self->index, -(v + 2));
1980 int noff;
1995 int noff;
1981
1996
1982 if (oldnode == NULL)
1997 if (oldnode == NULL)
1983 return -1;
1998 return -1;
1984 if (!memcmp(oldnode, node, self->nodelen)) {
1999 if (!memcmp(oldnode, node, self->nodelen)) {
1985 n->children[k] = -rev - 2;
2000 n->children[k] = -rev - 2;
1986 return 0;
2001 return 0;
1987 }
2002 }
1988 noff = nt_new(self);
2003 noff = nt_new(self);
1989 if (noff == -1)
2004 if (noff == -1)
1990 return -1;
2005 return -1;
1991 /* self->nodes may have been changed by realloc */
2006 /* self->nodes may have been changed by realloc */
1992 self->nodes[off].children[k] = noff;
2007 self->nodes[off].children[k] = noff;
1993 off = noff;
2008 off = noff;
1994 n = &self->nodes[off];
2009 n = &self->nodes[off];
1995 n->children[nt_level(oldnode, ++level)] = v;
2010 n->children[nt_level(oldnode, ++level)] = v;
1996 if (level > self->depth)
2011 if (level > self->depth)
1997 self->depth = level;
2012 self->depth = level;
1998 self->splits += 1;
2013 self->splits += 1;
1999 } else {
2014 } else {
2000 level += 1;
2015 level += 1;
2001 off = v;
2016 off = v;
2002 }
2017 }
2003 }
2018 }
2004
2019
2005 return -1;
2020 return -1;
2006 }
2021 }
2007
2022
2008 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2023 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2009 {
2024 {
2010 Py_ssize_t rev;
2025 Py_ssize_t rev;
2011 const char *node;
2026 const char *node;
2012 Py_ssize_t length;
2027 Py_ssize_t length;
2013 if (!PyArg_ParseTuple(args, "n", &rev))
2028 if (!PyArg_ParseTuple(args, "n", &rev))
2014 return NULL;
2029 return NULL;
2015 length = index_length(self->nt.index);
2030 length = index_length(self->nt.index);
2016 if (rev < 0 || rev >= length) {
2031 if (rev < 0 || rev >= length) {
2017 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2032 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2018 return NULL;
2033 return NULL;
2019 }
2034 }
2020 node = index_node_existing(self->nt.index, rev);
2035 node = index_node_existing(self->nt.index, rev);
2021 if (nt_insert(&self->nt, node, (int)rev) == -1)
2036 if (nt_insert(&self->nt, node, (int)rev) == -1)
2022 return NULL;
2037 return NULL;
2023 Py_RETURN_NONE;
2038 Py_RETURN_NONE;
2024 }
2039 }
2025
2040
2026 static int nt_delete_node(nodetree *self, const char *node)
2041 static int nt_delete_node(nodetree *self, const char *node)
2027 {
2042 {
2028 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2043 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2029 */
2044 */
2030 return nt_insert(self, node, -2);
2045 return nt_insert(self, node, -2);
2031 }
2046 }
2032
2047
2033 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2048 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2034 {
2049 {
2035 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2050 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2036 self->nodes = NULL;
2051 self->nodes = NULL;
2037
2052
2038 self->index = index;
2053 self->index = index;
2039 /* The input capacity is in terms of revisions, while the field is in
2054 /* The input capacity is in terms of revisions, while the field is in
2040 * terms of nodetree nodes. */
2055 * terms of nodetree nodes. */
2041 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2056 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2042 self->nodelen = index->nodelen;
2057 self->nodelen = index->nodelen;
2043 self->depth = 0;
2058 self->depth = 0;
2044 self->splits = 0;
2059 self->splits = 0;
2045 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2060 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2046 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2061 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2047 return -1;
2062 return -1;
2048 }
2063 }
2049 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2064 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2050 if (self->nodes == NULL) {
2065 if (self->nodes == NULL) {
2051 PyErr_NoMemory();
2066 PyErr_NoMemory();
2052 return -1;
2067 return -1;
2053 }
2068 }
2054 self->length = 1;
2069 self->length = 1;
2055 return 0;
2070 return 0;
2056 }
2071 }
2057
2072
2058 static int ntobj_init(nodetreeObject *self, PyObject *args)
2073 static int ntobj_init(nodetreeObject *self, PyObject *args)
2059 {
2074 {
2060 PyObject *index;
2075 PyObject *index;
2061 unsigned capacity;
2076 unsigned capacity;
2062 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2077 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2063 &capacity))
2078 &capacity))
2064 return -1;
2079 return -1;
2065 Py_INCREF(index);
2080 Py_INCREF(index);
2066 return nt_init(&self->nt, (indexObject *)index, capacity);
2081 return nt_init(&self->nt, (indexObject *)index, capacity);
2067 }
2082 }
2068
2083
2069 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2084 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2070 {
2085 {
2071 return nt_find(self, node, nodelen, 1);
2086 return nt_find(self, node, nodelen, 1);
2072 }
2087 }
2073
2088
2074 /*
2089 /*
2075 * Find the length of the shortest unique prefix of node.
2090 * Find the length of the shortest unique prefix of node.
2076 *
2091 *
2077 * Return values:
2092 * Return values:
2078 *
2093 *
2079 * -3: error (exception set)
2094 * -3: error (exception set)
2080 * -2: not found (no exception set)
2095 * -2: not found (no exception set)
2081 * rest: length of shortest prefix
2096 * rest: length of shortest prefix
2082 */
2097 */
2083 static int nt_shortest(nodetree *self, const char *node)
2098 static int nt_shortest(nodetree *self, const char *node)
2084 {
2099 {
2085 int level, off;
2100 int level, off;
2086
2101
2087 for (level = off = 0; level < 2 * self->nodelen; level++) {
2102 for (level = off = 0; level < 2 * self->nodelen; level++) {
2088 int k, v;
2103 int k, v;
2089 nodetreenode *n = &self->nodes[off];
2104 nodetreenode *n = &self->nodes[off];
2090 k = nt_level(node, level);
2105 k = nt_level(node, level);
2091 v = n->children[k];
2106 v = n->children[k];
2092 if (v < 0) {
2107 if (v < 0) {
2093 const char *n;
2108 const char *n;
2094 v = -(v + 2);
2109 v = -(v + 2);
2095 n = index_node_existing(self->index, v);
2110 n = index_node_existing(self->index, v);
2096 if (n == NULL)
2111 if (n == NULL)
2097 return -3;
2112 return -3;
2098 if (memcmp(node, n, self->nodelen) != 0)
2113 if (memcmp(node, n, self->nodelen) != 0)
2099 /*
2114 /*
2100 * Found a unique prefix, but it wasn't for the
2115 * Found a unique prefix, but it wasn't for the
2101 * requested node (i.e the requested node does
2116 * requested node (i.e the requested node does
2102 * not exist).
2117 * not exist).
2103 */
2118 */
2104 return -2;
2119 return -2;
2105 return level + 1;
2120 return level + 1;
2106 }
2121 }
2107 if (v == 0)
2122 if (v == 0)
2108 return -2;
2123 return -2;
2109 off = v;
2124 off = v;
2110 }
2125 }
2111 /*
2126 /*
2112 * The node was still not unique after 40 hex digits, so this won't
2127 * The node was still not unique after 40 hex digits, so this won't
2113 * happen. Also, if we get here, then there's a programming error in
2128 * happen. Also, if we get here, then there's a programming error in
2114 * this file that made us insert a node longer than 40 hex digits.
2129 * this file that made us insert a node longer than 40 hex digits.
2115 */
2130 */
2116 PyErr_SetString(PyExc_Exception, "broken node tree");
2131 PyErr_SetString(PyExc_Exception, "broken node tree");
2117 return -3;
2132 return -3;
2118 }
2133 }
2119
2134
2120 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2135 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2121 {
2136 {
2122 PyObject *val;
2137 PyObject *val;
2123 char *node;
2138 char *node;
2124 int length;
2139 int length;
2125
2140
2126 if (!PyArg_ParseTuple(args, "O", &val))
2141 if (!PyArg_ParseTuple(args, "O", &val))
2127 return NULL;
2142 return NULL;
2128 if (node_check(self->nt.nodelen, val, &node) == -1)
2143 if (node_check(self->nt.nodelen, val, &node) == -1)
2129 return NULL;
2144 return NULL;
2130
2145
2131 length = nt_shortest(&self->nt, node);
2146 length = nt_shortest(&self->nt, node);
2132 if (length == -3)
2147 if (length == -3)
2133 return NULL;
2148 return NULL;
2134 if (length == -2) {
2149 if (length == -2) {
2135 raise_revlog_error();
2150 raise_revlog_error();
2136 return NULL;
2151 return NULL;
2137 }
2152 }
2138 return PyLong_FromLong(length);
2153 return PyLong_FromLong(length);
2139 }
2154 }
2140
2155
2141 static void nt_dealloc(nodetree *self)
2156 static void nt_dealloc(nodetree *self)
2142 {
2157 {
2143 free(self->nodes);
2158 free(self->nodes);
2144 self->nodes = NULL;
2159 self->nodes = NULL;
2145 }
2160 }
2146
2161
2147 static void ntobj_dealloc(nodetreeObject *self)
2162 static void ntobj_dealloc(nodetreeObject *self)
2148 {
2163 {
2149 Py_XDECREF(self->nt.index);
2164 Py_XDECREF(self->nt.index);
2150 nt_dealloc(&self->nt);
2165 nt_dealloc(&self->nt);
2151 PyObject_Del(self);
2166 PyObject_Del(self);
2152 }
2167 }
2153
2168
2154 static PyMethodDef ntobj_methods[] = {
2169 static PyMethodDef ntobj_methods[] = {
2155 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2170 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2156 "insert an index entry"},
2171 "insert an index entry"},
2157 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2172 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2158 "find length of shortest hex nodeid of a binary ID"},
2173 "find length of shortest hex nodeid of a binary ID"},
2159 {NULL} /* Sentinel */
2174 {NULL} /* Sentinel */
2160 };
2175 };
2161
2176
2162 static PyTypeObject nodetreeType = {
2177 static PyTypeObject nodetreeType = {
2163 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2178 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2164 "parsers.nodetree", /* tp_name */
2179 "parsers.nodetree", /* tp_name */
2165 sizeof(nodetreeObject), /* tp_basicsize */
2180 sizeof(nodetreeObject), /* tp_basicsize */
2166 0, /* tp_itemsize */
2181 0, /* tp_itemsize */
2167 (destructor)ntobj_dealloc, /* tp_dealloc */
2182 (destructor)ntobj_dealloc, /* tp_dealloc */
2168 0, /* tp_print */
2183 0, /* tp_print */
2169 0, /* tp_getattr */
2184 0, /* tp_getattr */
2170 0, /* tp_setattr */
2185 0, /* tp_setattr */
2171 0, /* tp_compare */
2186 0, /* tp_compare */
2172 0, /* tp_repr */
2187 0, /* tp_repr */
2173 0, /* tp_as_number */
2188 0, /* tp_as_number */
2174 0, /* tp_as_sequence */
2189 0, /* tp_as_sequence */
2175 0, /* tp_as_mapping */
2190 0, /* tp_as_mapping */
2176 0, /* tp_hash */
2191 0, /* tp_hash */
2177 0, /* tp_call */
2192 0, /* tp_call */
2178 0, /* tp_str */
2193 0, /* tp_str */
2179 0, /* tp_getattro */
2194 0, /* tp_getattro */
2180 0, /* tp_setattro */
2195 0, /* tp_setattro */
2181 0, /* tp_as_buffer */
2196 0, /* tp_as_buffer */
2182 Py_TPFLAGS_DEFAULT, /* tp_flags */
2197 Py_TPFLAGS_DEFAULT, /* tp_flags */
2183 "nodetree", /* tp_doc */
2198 "nodetree", /* tp_doc */
2184 0, /* tp_traverse */
2199 0, /* tp_traverse */
2185 0, /* tp_clear */
2200 0, /* tp_clear */
2186 0, /* tp_richcompare */
2201 0, /* tp_richcompare */
2187 0, /* tp_weaklistoffset */
2202 0, /* tp_weaklistoffset */
2188 0, /* tp_iter */
2203 0, /* tp_iter */
2189 0, /* tp_iternext */
2204 0, /* tp_iternext */
2190 ntobj_methods, /* tp_methods */
2205 ntobj_methods, /* tp_methods */
2191 0, /* tp_members */
2206 0, /* tp_members */
2192 0, /* tp_getset */
2207 0, /* tp_getset */
2193 0, /* tp_base */
2208 0, /* tp_base */
2194 0, /* tp_dict */
2209 0, /* tp_dict */
2195 0, /* tp_descr_get */
2210 0, /* tp_descr_get */
2196 0, /* tp_descr_set */
2211 0, /* tp_descr_set */
2197 0, /* tp_dictoffset */
2212 0, /* tp_dictoffset */
2198 (initproc)ntobj_init, /* tp_init */
2213 (initproc)ntobj_init, /* tp_init */
2199 0, /* tp_alloc */
2214 0, /* tp_alloc */
2200 };
2215 };
2201
2216
2202 static int index_init_nt(indexObject *self)
2217 static int index_init_nt(indexObject *self)
2203 {
2218 {
2204 if (!self->ntinitialized) {
2219 if (!self->ntinitialized) {
2205 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2220 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2206 nt_dealloc(&self->nt);
2221 nt_dealloc(&self->nt);
2207 return -1;
2222 return -1;
2208 }
2223 }
2209 if (nt_insert(&self->nt, nullid, -1) == -1) {
2224 if (nt_insert(&self->nt, nullid, -1) == -1) {
2210 nt_dealloc(&self->nt);
2225 nt_dealloc(&self->nt);
2211 return -1;
2226 return -1;
2212 }
2227 }
2213 self->ntinitialized = 1;
2228 self->ntinitialized = 1;
2214 self->ntrev = (int)index_length(self);
2229 self->ntrev = (int)index_length(self);
2215 self->ntlookups = 1;
2230 self->ntlookups = 1;
2216 self->ntmisses = 0;
2231 self->ntmisses = 0;
2217 }
2232 }
2218 return 0;
2233 return 0;
2219 }
2234 }
2220
2235
2221 /*
2236 /*
2222 * Return values:
2237 * Return values:
2223 *
2238 *
2224 * -3: error (exception set)
2239 * -3: error (exception set)
2225 * -2: not found (no exception set)
2240 * -2: not found (no exception set)
2226 * rest: valid rev
2241 * rest: valid rev
2227 */
2242 */
2228 static int index_find_node(indexObject *self, const char *node)
2243 static int index_find_node(indexObject *self, const char *node)
2229 {
2244 {
2230 int rev;
2245 int rev;
2231
2246
2232 if (index_init_nt(self) == -1)
2247 if (index_init_nt(self) == -1)
2233 return -3;
2248 return -3;
2234
2249
2235 self->ntlookups++;
2250 self->ntlookups++;
2236 rev = nt_find(&self->nt, node, self->nodelen, 0);
2251 rev = nt_find(&self->nt, node, self->nodelen, 0);
2237 if (rev >= -1)
2252 if (rev >= -1)
2238 return rev;
2253 return rev;
2239
2254
2240 /*
2255 /*
2241 * For the first handful of lookups, we scan the entire index,
2256 * For the first handful of lookups, we scan the entire index,
2242 * and cache only the matching nodes. This optimizes for cases
2257 * and cache only the matching nodes. This optimizes for cases
2243 * like "hg tip", where only a few nodes are accessed.
2258 * like "hg tip", where only a few nodes are accessed.
2244 *
2259 *
2245 * After that, we cache every node we visit, using a single
2260 * After that, we cache every node we visit, using a single
2246 * scan amortized over multiple lookups. This gives the best
2261 * scan amortized over multiple lookups. This gives the best
2247 * bulk performance, e.g. for "hg log".
2262 * bulk performance, e.g. for "hg log".
2248 */
2263 */
2249 if (self->ntmisses++ < 4) {
2264 if (self->ntmisses++ < 4) {
2250 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2265 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2251 const char *n = index_node_existing(self, rev);
2266 const char *n = index_node_existing(self, rev);
2252 if (n == NULL)
2267 if (n == NULL)
2253 return -3;
2268 return -3;
2254 if (memcmp(node, n, self->nodelen) == 0) {
2269 if (memcmp(node, n, self->nodelen) == 0) {
2255 if (nt_insert(&self->nt, n, rev) == -1)
2270 if (nt_insert(&self->nt, n, rev) == -1)
2256 return -3;
2271 return -3;
2257 break;
2272 break;
2258 }
2273 }
2259 }
2274 }
2260 } else {
2275 } else {
2261 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2276 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2262 const char *n = index_node_existing(self, rev);
2277 const char *n = index_node_existing(self, rev);
2263 if (n == NULL)
2278 if (n == NULL)
2264 return -3;
2279 return -3;
2265 if (nt_insert(&self->nt, n, rev) == -1) {
2280 if (nt_insert(&self->nt, n, rev) == -1) {
2266 self->ntrev = rev + 1;
2281 self->ntrev = rev + 1;
2267 return -3;
2282 return -3;
2268 }
2283 }
2269 if (memcmp(node, n, self->nodelen) == 0) {
2284 if (memcmp(node, n, self->nodelen) == 0) {
2270 break;
2285 break;
2271 }
2286 }
2272 }
2287 }
2273 self->ntrev = rev;
2288 self->ntrev = rev;
2274 }
2289 }
2275
2290
2276 if (rev >= 0)
2291 if (rev >= 0)
2277 return rev;
2292 return rev;
2278 return -2;
2293 return -2;
2279 }
2294 }
2280
2295
2281 static PyObject *index_getitem(indexObject *self, PyObject *value)
2296 static PyObject *index_getitem(indexObject *self, PyObject *value)
2282 {
2297 {
2283 char *node;
2298 char *node;
2284 int rev;
2299 int rev;
2285
2300
2286 if (PyLong_Check(value)) {
2301 if (PyLong_Check(value)) {
2287 long idx;
2302 long idx;
2288 if (!pylong_to_long(value, &idx)) {
2303 if (!pylong_to_long(value, &idx)) {
2289 return NULL;
2304 return NULL;
2290 }
2305 }
2291 return index_get(self, idx);
2306 return index_get(self, idx);
2292 }
2307 }
2293
2308
2294 if (node_check(self->nodelen, value, &node) == -1)
2309 if (node_check(self->nodelen, value, &node) == -1)
2295 return NULL;
2310 return NULL;
2296 rev = index_find_node(self, node);
2311 rev = index_find_node(self, node);
2297 if (rev >= -1)
2312 if (rev >= -1)
2298 return PyLong_FromLong(rev);
2313 return PyLong_FromLong(rev);
2299 if (rev == -2)
2314 if (rev == -2)
2300 raise_revlog_error();
2315 raise_revlog_error();
2301 return NULL;
2316 return NULL;
2302 }
2317 }
2303
2318
2304 /*
2319 /*
2305 * Fully populate the radix tree.
2320 * Fully populate the radix tree.
2306 */
2321 */
2307 static int index_populate_nt(indexObject *self)
2322 static int index_populate_nt(indexObject *self)
2308 {
2323 {
2309 int rev;
2324 int rev;
2310 if (self->ntrev > 0) {
2325 if (self->ntrev > 0) {
2311 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2326 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2312 const char *n = index_node_existing(self, rev);
2327 const char *n = index_node_existing(self, rev);
2313 if (n == NULL)
2328 if (n == NULL)
2314 return -1;
2329 return -1;
2315 if (nt_insert(&self->nt, n, rev) == -1)
2330 if (nt_insert(&self->nt, n, rev) == -1)
2316 return -1;
2331 return -1;
2317 }
2332 }
2318 self->ntrev = -1;
2333 self->ntrev = -1;
2319 }
2334 }
2320 return 0;
2335 return 0;
2321 }
2336 }
2322
2337
2323 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2338 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2324 {
2339 {
2325 const char *fullnode;
2340 const char *fullnode;
2326 Py_ssize_t nodelen;
2341 Py_ssize_t nodelen;
2327 char *node;
2342 char *node;
2328 int rev, i;
2343 int rev, i;
2329
2344
2330 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2345 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2331 return NULL;
2346 return NULL;
2332
2347
2333 if (nodelen < 1) {
2348 if (nodelen < 1) {
2334 PyErr_SetString(PyExc_ValueError, "key too short");
2349 PyErr_SetString(PyExc_ValueError, "key too short");
2335 return NULL;
2350 return NULL;
2336 }
2351 }
2337
2352
2338 if (nodelen > 2 * self->nodelen) {
2353 if (nodelen > 2 * self->nodelen) {
2339 PyErr_SetString(PyExc_ValueError, "key too long");
2354 PyErr_SetString(PyExc_ValueError, "key too long");
2340 return NULL;
2355 return NULL;
2341 }
2356 }
2342
2357
2343 for (i = 0; i < nodelen; i++)
2358 for (i = 0; i < nodelen; i++)
2344 hexdigit(node, i);
2359 hexdigit(node, i);
2345 if (PyErr_Occurred()) {
2360 if (PyErr_Occurred()) {
2346 /* input contains non-hex characters */
2361 /* input contains non-hex characters */
2347 PyErr_Clear();
2362 PyErr_Clear();
2348 Py_RETURN_NONE;
2363 Py_RETURN_NONE;
2349 }
2364 }
2350
2365
2351 if (index_init_nt(self) == -1)
2366 if (index_init_nt(self) == -1)
2352 return NULL;
2367 return NULL;
2353 if (index_populate_nt(self) == -1)
2368 if (index_populate_nt(self) == -1)
2354 return NULL;
2369 return NULL;
2355 rev = nt_partialmatch(&self->nt, node, nodelen);
2370 rev = nt_partialmatch(&self->nt, node, nodelen);
2356
2371
2357 switch (rev) {
2372 switch (rev) {
2358 case -4:
2373 case -4:
2359 raise_revlog_error();
2374 raise_revlog_error();
2360 return NULL;
2375 return NULL;
2361 case -2:
2376 case -2:
2362 Py_RETURN_NONE;
2377 Py_RETURN_NONE;
2363 case -1:
2378 case -1:
2364 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2379 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2365 }
2380 }
2366
2381
2367 fullnode = index_node_existing(self, rev);
2382 fullnode = index_node_existing(self, rev);
2368 if (fullnode == NULL) {
2383 if (fullnode == NULL) {
2369 return NULL;
2384 return NULL;
2370 }
2385 }
2371 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2386 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2372 }
2387 }
2373
2388
2374 static PyObject *index_shortest(indexObject *self, PyObject *args)
2389 static PyObject *index_shortest(indexObject *self, PyObject *args)
2375 {
2390 {
2376 PyObject *val;
2391 PyObject *val;
2377 char *node;
2392 char *node;
2378 int length;
2393 int length;
2379
2394
2380 if (!PyArg_ParseTuple(args, "O", &val))
2395 if (!PyArg_ParseTuple(args, "O", &val))
2381 return NULL;
2396 return NULL;
2382 if (node_check(self->nodelen, val, &node) == -1)
2397 if (node_check(self->nodelen, val, &node) == -1)
2383 return NULL;
2398 return NULL;
2384
2399
2385 self->ntlookups++;
2400 self->ntlookups++;
2386 if (index_init_nt(self) == -1)
2401 if (index_init_nt(self) == -1)
2387 return NULL;
2402 return NULL;
2388 if (index_populate_nt(self) == -1)
2403 if (index_populate_nt(self) == -1)
2389 return NULL;
2404 return NULL;
2390 length = nt_shortest(&self->nt, node);
2405 length = nt_shortest(&self->nt, node);
2391 if (length == -3)
2406 if (length == -3)
2392 return NULL;
2407 return NULL;
2393 if (length == -2) {
2408 if (length == -2) {
2394 raise_revlog_error();
2409 raise_revlog_error();
2395 return NULL;
2410 return NULL;
2396 }
2411 }
2397 return PyLong_FromLong(length);
2412 return PyLong_FromLong(length);
2398 }
2413 }
2399
2414
2400 static PyObject *index_m_get(indexObject *self, PyObject *args)
2415 static PyObject *index_m_get(indexObject *self, PyObject *args)
2401 {
2416 {
2402 PyObject *val;
2417 PyObject *val;
2403 char *node;
2418 char *node;
2404 int rev;
2419 int rev;
2405
2420
2406 if (!PyArg_ParseTuple(args, "O", &val))
2421 if (!PyArg_ParseTuple(args, "O", &val))
2407 return NULL;
2422 return NULL;
2408 if (node_check(self->nodelen, val, &node) == -1)
2423 if (node_check(self->nodelen, val, &node) == -1)
2409 return NULL;
2424 return NULL;
2410 rev = index_find_node(self, node);
2425 rev = index_find_node(self, node);
2411 if (rev == -3)
2426 if (rev == -3)
2412 return NULL;
2427 return NULL;
2413 if (rev == -2)
2428 if (rev == -2)
2414 Py_RETURN_NONE;
2429 Py_RETURN_NONE;
2415 return PyLong_FromLong(rev);
2430 return PyLong_FromLong(rev);
2416 }
2431 }
2417
2432
2418 static int index_contains(indexObject *self, PyObject *value)
2433 static int index_contains(indexObject *self, PyObject *value)
2419 {
2434 {
2420 char *node;
2435 char *node;
2421
2436
2422 if (PyLong_Check(value)) {
2437 if (PyLong_Check(value)) {
2423 long rev;
2438 long rev;
2424 if (!pylong_to_long(value, &rev)) {
2439 if (!pylong_to_long(value, &rev)) {
2425 return -1;
2440 return -1;
2426 }
2441 }
2427 return rev >= -1 && rev < index_length(self);
2442 return rev >= -1 && rev < index_length(self);
2428 }
2443 }
2429
2444
2430 if (node_check(self->nodelen, value, &node) == -1)
2445 if (node_check(self->nodelen, value, &node) == -1)
2431 return -1;
2446 return -1;
2432
2447
2433 switch (index_find_node(self, node)) {
2448 switch (index_find_node(self, node)) {
2434 case -3:
2449 case -3:
2435 return -1;
2450 return -1;
2436 case -2:
2451 case -2:
2437 return 0;
2452 return 0;
2438 default:
2453 default:
2439 return 1;
2454 return 1;
2440 }
2455 }
2441 }
2456 }
2442
2457
2443 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2458 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2444 {
2459 {
2445 int ret = index_contains(self, args);
2460 int ret = index_contains(self, args);
2446 if (ret < 0)
2461 if (ret < 0)
2447 return NULL;
2462 return NULL;
2448 return PyBool_FromLong((long)ret);
2463 return PyBool_FromLong((long)ret);
2449 }
2464 }
2450
2465
2451 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2466 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2452 {
2467 {
2453 char *node;
2468 char *node;
2454 int rev;
2469 int rev;
2455
2470
2456 if (node_check(self->nodelen, val, &node) == -1)
2471 if (node_check(self->nodelen, val, &node) == -1)
2457 return NULL;
2472 return NULL;
2458 rev = index_find_node(self, node);
2473 rev = index_find_node(self, node);
2459 if (rev >= -1)
2474 if (rev >= -1)
2460 return PyLong_FromLong(rev);
2475 return PyLong_FromLong(rev);
2461 if (rev == -2)
2476 if (rev == -2)
2462 raise_revlog_error();
2477 raise_revlog_error();
2463 return NULL;
2478 return NULL;
2464 }
2479 }
2465
2480
2466 typedef uint64_t bitmask;
2481 typedef uint64_t bitmask;
2467
2482
2468 /*
2483 /*
2469 * Given a disjoint set of revs, return all candidates for the
2484 * Given a disjoint set of revs, return all candidates for the
2470 * greatest common ancestor. In revset notation, this is the set
2485 * greatest common ancestor. In revset notation, this is the set
2471 * "heads(::a and ::b and ...)"
2486 * "heads(::a and ::b and ...)"
2472 */
2487 */
2473 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2488 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2474 int revcount)
2489 int revcount)
2475 {
2490 {
2476 const bitmask allseen = (1ull << revcount) - 1;
2491 const bitmask allseen = (1ull << revcount) - 1;
2477 const bitmask poison = 1ull << revcount;
2492 const bitmask poison = 1ull << revcount;
2478 PyObject *gca = PyList_New(0);
2493 PyObject *gca = PyList_New(0);
2479 int i, v, interesting;
2494 int i, v, interesting;
2480 int maxrev = -1;
2495 int maxrev = -1;
2481 bitmask sp;
2496 bitmask sp;
2482 bitmask *seen;
2497 bitmask *seen;
2483
2498
2484 if (gca == NULL)
2499 if (gca == NULL)
2485 return PyErr_NoMemory();
2500 return PyErr_NoMemory();
2486
2501
2487 for (i = 0; i < revcount; i++) {
2502 for (i = 0; i < revcount; i++) {
2488 if (revs[i] > maxrev)
2503 if (revs[i] > maxrev)
2489 maxrev = revs[i];
2504 maxrev = revs[i];
2490 }
2505 }
2491
2506
2492 seen = calloc(sizeof(*seen), maxrev + 1);
2507 seen = calloc(sizeof(*seen), maxrev + 1);
2493 if (seen == NULL) {
2508 if (seen == NULL) {
2494 Py_DECREF(gca);
2509 Py_DECREF(gca);
2495 return PyErr_NoMemory();
2510 return PyErr_NoMemory();
2496 }
2511 }
2497
2512
2498 for (i = 0; i < revcount; i++)
2513 for (i = 0; i < revcount; i++)
2499 seen[revs[i]] = 1ull << i;
2514 seen[revs[i]] = 1ull << i;
2500
2515
2501 interesting = revcount;
2516 interesting = revcount;
2502
2517
2503 for (v = maxrev; v >= 0 && interesting; v--) {
2518 for (v = maxrev; v >= 0 && interesting; v--) {
2504 bitmask sv = seen[v];
2519 bitmask sv = seen[v];
2505 int parents[2];
2520 int parents[2];
2506
2521
2507 if (!sv)
2522 if (!sv)
2508 continue;
2523 continue;
2509
2524
2510 if (sv < poison) {
2525 if (sv < poison) {
2511 interesting -= 1;
2526 interesting -= 1;
2512 if (sv == allseen) {
2527 if (sv == allseen) {
2513 PyObject *obj = PyLong_FromLong(v);
2528 PyObject *obj = PyLong_FromLong(v);
2514 if (obj == NULL)
2529 if (obj == NULL)
2515 goto bail;
2530 goto bail;
2516 if (PyList_Append(gca, obj) == -1) {
2531 if (PyList_Append(gca, obj) == -1) {
2517 Py_DECREF(obj);
2532 Py_DECREF(obj);
2518 goto bail;
2533 goto bail;
2519 }
2534 }
2520 sv |= poison;
2535 sv |= poison;
2521 for (i = 0; i < revcount; i++) {
2536 for (i = 0; i < revcount; i++) {
2522 if (revs[i] == v)
2537 if (revs[i] == v)
2523 goto done;
2538 goto done;
2524 }
2539 }
2525 }
2540 }
2526 }
2541 }
2527 if (index_get_parents(self, v, parents, maxrev) < 0)
2542 if (index_get_parents(self, v, parents, maxrev) < 0)
2528 goto bail;
2543 goto bail;
2529
2544
2530 for (i = 0; i < 2; i++) {
2545 for (i = 0; i < 2; i++) {
2531 int p = parents[i];
2546 int p = parents[i];
2532 if (p == -1)
2547 if (p == -1)
2533 continue;
2548 continue;
2534 sp = seen[p];
2549 sp = seen[p];
2535 if (sv < poison) {
2550 if (sv < poison) {
2536 if (sp == 0) {
2551 if (sp == 0) {
2537 seen[p] = sv;
2552 seen[p] = sv;
2538 interesting++;
2553 interesting++;
2539 } else if (sp != sv)
2554 } else if (sp != sv)
2540 seen[p] |= sv;
2555 seen[p] |= sv;
2541 } else {
2556 } else {
2542 if (sp && sp < poison)
2557 if (sp && sp < poison)
2543 interesting--;
2558 interesting--;
2544 seen[p] = sv;
2559 seen[p] = sv;
2545 }
2560 }
2546 }
2561 }
2547 }
2562 }
2548
2563
2549 done:
2564 done:
2550 free(seen);
2565 free(seen);
2551 return gca;
2566 return gca;
2552 bail:
2567 bail:
2553 free(seen);
2568 free(seen);
2554 Py_XDECREF(gca);
2569 Py_XDECREF(gca);
2555 return NULL;
2570 return NULL;
2556 }
2571 }
2557
2572
2558 /*
2573 /*
2559 * Given a disjoint set of revs, return the subset with the longest
2574 * Given a disjoint set of revs, return the subset with the longest
2560 * path to the root.
2575 * path to the root.
2561 */
2576 */
2562 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2577 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2563 {
2578 {
2564 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2579 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2565 static const Py_ssize_t capacity = 24;
2580 static const Py_ssize_t capacity = 24;
2566 int *depth, *interesting = NULL;
2581 int *depth, *interesting = NULL;
2567 int i, j, v, ninteresting;
2582 int i, j, v, ninteresting;
2568 PyObject *dict = NULL, *keys = NULL;
2583 PyObject *dict = NULL, *keys = NULL;
2569 long *seen = NULL;
2584 long *seen = NULL;
2570 int maxrev = -1;
2585 int maxrev = -1;
2571 long final;
2586 long final;
2572
2587
2573 if (revcount > capacity) {
2588 if (revcount > capacity) {
2574 PyErr_Format(PyExc_OverflowError,
2589 PyErr_Format(PyExc_OverflowError,
2575 "bitset size (%ld) > capacity (%ld)",
2590 "bitset size (%ld) > capacity (%ld)",
2576 (long)revcount, (long)capacity);
2591 (long)revcount, (long)capacity);
2577 return NULL;
2592 return NULL;
2578 }
2593 }
2579
2594
2580 for (i = 0; i < revcount; i++) {
2595 for (i = 0; i < revcount; i++) {
2581 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2596 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2582 if (n > maxrev)
2597 if (n > maxrev)
2583 maxrev = n;
2598 maxrev = n;
2584 }
2599 }
2585
2600
2586 depth = calloc(sizeof(*depth), maxrev + 1);
2601 depth = calloc(sizeof(*depth), maxrev + 1);
2587 if (depth == NULL)
2602 if (depth == NULL)
2588 return PyErr_NoMemory();
2603 return PyErr_NoMemory();
2589
2604
2590 seen = calloc(sizeof(*seen), maxrev + 1);
2605 seen = calloc(sizeof(*seen), maxrev + 1);
2591 if (seen == NULL) {
2606 if (seen == NULL) {
2592 PyErr_NoMemory();
2607 PyErr_NoMemory();
2593 goto bail;
2608 goto bail;
2594 }
2609 }
2595
2610
2596 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2611 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2597 if (interesting == NULL) {
2612 if (interesting == NULL) {
2598 PyErr_NoMemory();
2613 PyErr_NoMemory();
2599 goto bail;
2614 goto bail;
2600 }
2615 }
2601
2616
2602 if (PyList_Sort(revs) == -1)
2617 if (PyList_Sort(revs) == -1)
2603 goto bail;
2618 goto bail;
2604
2619
2605 for (i = 0; i < revcount; i++) {
2620 for (i = 0; i < revcount; i++) {
2606 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2621 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2607 long b = 1l << i;
2622 long b = 1l << i;
2608 depth[n] = 1;
2623 depth[n] = 1;
2609 seen[n] = b;
2624 seen[n] = b;
2610 interesting[b] = 1;
2625 interesting[b] = 1;
2611 }
2626 }
2612
2627
2613 /* invariant: ninteresting is the number of non-zero entries in
2628 /* invariant: ninteresting is the number of non-zero entries in
2614 * interesting. */
2629 * interesting. */
2615 ninteresting = (int)revcount;
2630 ninteresting = (int)revcount;
2616
2631
2617 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2632 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2618 int dv = depth[v];
2633 int dv = depth[v];
2619 int parents[2];
2634 int parents[2];
2620 long sv;
2635 long sv;
2621
2636
2622 if (dv == 0)
2637 if (dv == 0)
2623 continue;
2638 continue;
2624
2639
2625 sv = seen[v];
2640 sv = seen[v];
2626 if (index_get_parents(self, v, parents, maxrev) < 0)
2641 if (index_get_parents(self, v, parents, maxrev) < 0)
2627 goto bail;
2642 goto bail;
2628
2643
2629 for (i = 0; i < 2; i++) {
2644 for (i = 0; i < 2; i++) {
2630 int p = parents[i];
2645 int p = parents[i];
2631 long sp;
2646 long sp;
2632 int dp;
2647 int dp;
2633
2648
2634 if (p == -1)
2649 if (p == -1)
2635 continue;
2650 continue;
2636
2651
2637 dp = depth[p];
2652 dp = depth[p];
2638 sp = seen[p];
2653 sp = seen[p];
2639 if (dp <= dv) {
2654 if (dp <= dv) {
2640 depth[p] = dv + 1;
2655 depth[p] = dv + 1;
2641 if (sp != sv) {
2656 if (sp != sv) {
2642 interesting[sv] += 1;
2657 interesting[sv] += 1;
2643 seen[p] = sv;
2658 seen[p] = sv;
2644 if (sp) {
2659 if (sp) {
2645 interesting[sp] -= 1;
2660 interesting[sp] -= 1;
2646 if (interesting[sp] == 0)
2661 if (interesting[sp] == 0)
2647 ninteresting -= 1;
2662 ninteresting -= 1;
2648 }
2663 }
2649 }
2664 }
2650 } else if (dv == dp - 1) {
2665 } else if (dv == dp - 1) {
2651 long nsp = sp | sv;
2666 long nsp = sp | sv;
2652 if (nsp == sp)
2667 if (nsp == sp)
2653 continue;
2668 continue;
2654 seen[p] = nsp;
2669 seen[p] = nsp;
2655 interesting[sp] -= 1;
2670 interesting[sp] -= 1;
2656 if (interesting[sp] == 0)
2671 if (interesting[sp] == 0)
2657 ninteresting -= 1;
2672 ninteresting -= 1;
2658 if (interesting[nsp] == 0)
2673 if (interesting[nsp] == 0)
2659 ninteresting += 1;
2674 ninteresting += 1;
2660 interesting[nsp] += 1;
2675 interesting[nsp] += 1;
2661 }
2676 }
2662 }
2677 }
2663 interesting[sv] -= 1;
2678 interesting[sv] -= 1;
2664 if (interesting[sv] == 0)
2679 if (interesting[sv] == 0)
2665 ninteresting -= 1;
2680 ninteresting -= 1;
2666 }
2681 }
2667
2682
2668 final = 0;
2683 final = 0;
2669 j = ninteresting;
2684 j = ninteresting;
2670 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2685 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2671 if (interesting[i] == 0)
2686 if (interesting[i] == 0)
2672 continue;
2687 continue;
2673 final |= i;
2688 final |= i;
2674 j -= 1;
2689 j -= 1;
2675 }
2690 }
2676 if (final == 0) {
2691 if (final == 0) {
2677 keys = PyList_New(0);
2692 keys = PyList_New(0);
2678 goto bail;
2693 goto bail;
2679 }
2694 }
2680
2695
2681 dict = PyDict_New();
2696 dict = PyDict_New();
2682 if (dict == NULL)
2697 if (dict == NULL)
2683 goto bail;
2698 goto bail;
2684
2699
2685 for (i = 0; i < revcount; i++) {
2700 for (i = 0; i < revcount; i++) {
2686 PyObject *key;
2701 PyObject *key;
2687
2702
2688 if ((final & (1 << i)) == 0)
2703 if ((final & (1 << i)) == 0)
2689 continue;
2704 continue;
2690
2705
2691 key = PyList_GET_ITEM(revs, i);
2706 key = PyList_GET_ITEM(revs, i);
2692 Py_INCREF(key);
2707 Py_INCREF(key);
2693 Py_INCREF(Py_None);
2708 Py_INCREF(Py_None);
2694 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2709 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2695 Py_DECREF(key);
2710 Py_DECREF(key);
2696 Py_DECREF(Py_None);
2711 Py_DECREF(Py_None);
2697 goto bail;
2712 goto bail;
2698 }
2713 }
2699 }
2714 }
2700
2715
2701 keys = PyDict_Keys(dict);
2716 keys = PyDict_Keys(dict);
2702
2717
2703 bail:
2718 bail:
2704 free(depth);
2719 free(depth);
2705 free(seen);
2720 free(seen);
2706 free(interesting);
2721 free(interesting);
2707 Py_XDECREF(dict);
2722 Py_XDECREF(dict);
2708
2723
2709 return keys;
2724 return keys;
2710 }
2725 }
2711
2726
2712 /*
2727 /*
2713 * Given a (possibly overlapping) set of revs, return all the
2728 * Given a (possibly overlapping) set of revs, return all the
2714 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2729 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2715 */
2730 */
2716 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2731 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2717 {
2732 {
2718 PyObject *ret = NULL;
2733 PyObject *ret = NULL;
2719 Py_ssize_t argcount, i, len;
2734 Py_ssize_t argcount, i, len;
2720 bitmask repeat = 0;
2735 bitmask repeat = 0;
2721 int revcount = 0;
2736 int revcount = 0;
2722 int *revs;
2737 int *revs;
2723
2738
2724 argcount = PySequence_Length(args);
2739 argcount = PySequence_Length(args);
2725 revs = PyMem_Malloc(argcount * sizeof(*revs));
2740 revs = PyMem_Malloc(argcount * sizeof(*revs));
2726 if (argcount > 0 && revs == NULL)
2741 if (argcount > 0 && revs == NULL)
2727 return PyErr_NoMemory();
2742 return PyErr_NoMemory();
2728 len = index_length(self);
2743 len = index_length(self);
2729
2744
2730 for (i = 0; i < argcount; i++) {
2745 for (i = 0; i < argcount; i++) {
2731 static const int capacity = 24;
2746 static const int capacity = 24;
2732 PyObject *obj = PySequence_GetItem(args, i);
2747 PyObject *obj = PySequence_GetItem(args, i);
2733 bitmask x;
2748 bitmask x;
2734 long val;
2749 long val;
2735
2750
2736 if (!PyLong_Check(obj)) {
2751 if (!PyLong_Check(obj)) {
2737 PyErr_SetString(PyExc_TypeError,
2752 PyErr_SetString(PyExc_TypeError,
2738 "arguments must all be ints");
2753 "arguments must all be ints");
2739 Py_DECREF(obj);
2754 Py_DECREF(obj);
2740 goto bail;
2755 goto bail;
2741 }
2756 }
2742 val = PyLong_AsLong(obj);
2757 val = PyLong_AsLong(obj);
2743 Py_DECREF(obj);
2758 Py_DECREF(obj);
2744 if (val == -1) {
2759 if (val == -1) {
2745 ret = PyList_New(0);
2760 ret = PyList_New(0);
2746 goto done;
2761 goto done;
2747 }
2762 }
2748 if (val < 0 || val >= len) {
2763 if (val < 0 || val >= len) {
2749 PyErr_SetString(PyExc_IndexError, "index out of range");
2764 PyErr_SetString(PyExc_IndexError, "index out of range");
2750 goto bail;
2765 goto bail;
2751 }
2766 }
2752 /* this cheesy bloom filter lets us avoid some more
2767 /* this cheesy bloom filter lets us avoid some more
2753 * expensive duplicate checks in the common set-is-disjoint
2768 * expensive duplicate checks in the common set-is-disjoint
2754 * case */
2769 * case */
2755 x = 1ull << (val & 0x3f);
2770 x = 1ull << (val & 0x3f);
2756 if (repeat & x) {
2771 if (repeat & x) {
2757 int k;
2772 int k;
2758 for (k = 0; k < revcount; k++) {
2773 for (k = 0; k < revcount; k++) {
2759 if (val == revs[k])
2774 if (val == revs[k])
2760 goto duplicate;
2775 goto duplicate;
2761 }
2776 }
2762 } else
2777 } else
2763 repeat |= x;
2778 repeat |= x;
2764 if (revcount >= capacity) {
2779 if (revcount >= capacity) {
2765 PyErr_Format(PyExc_OverflowError,
2780 PyErr_Format(PyExc_OverflowError,
2766 "bitset size (%d) > capacity (%d)",
2781 "bitset size (%d) > capacity (%d)",
2767 revcount, capacity);
2782 revcount, capacity);
2768 goto bail;
2783 goto bail;
2769 }
2784 }
2770 revs[revcount++] = (int)val;
2785 revs[revcount++] = (int)val;
2771 duplicate:;
2786 duplicate:;
2772 }
2787 }
2773
2788
2774 if (revcount == 0) {
2789 if (revcount == 0) {
2775 ret = PyList_New(0);
2790 ret = PyList_New(0);
2776 goto done;
2791 goto done;
2777 }
2792 }
2778 if (revcount == 1) {
2793 if (revcount == 1) {
2779 PyObject *obj;
2794 PyObject *obj;
2780 ret = PyList_New(1);
2795 ret = PyList_New(1);
2781 if (ret == NULL)
2796 if (ret == NULL)
2782 goto bail;
2797 goto bail;
2783 obj = PyLong_FromLong(revs[0]);
2798 obj = PyLong_FromLong(revs[0]);
2784 if (obj == NULL)
2799 if (obj == NULL)
2785 goto bail;
2800 goto bail;
2786 PyList_SET_ITEM(ret, 0, obj);
2801 PyList_SET_ITEM(ret, 0, obj);
2787 goto done;
2802 goto done;
2788 }
2803 }
2789
2804
2790 ret = find_gca_candidates(self, revs, revcount);
2805 ret = find_gca_candidates(self, revs, revcount);
2791 if (ret == NULL)
2806 if (ret == NULL)
2792 goto bail;
2807 goto bail;
2793
2808
2794 done:
2809 done:
2795 PyMem_Free(revs);
2810 PyMem_Free(revs);
2796 return ret;
2811 return ret;
2797
2812
2798 bail:
2813 bail:
2799 PyMem_Free(revs);
2814 PyMem_Free(revs);
2800 Py_XDECREF(ret);
2815 Py_XDECREF(ret);
2801 return NULL;
2816 return NULL;
2802 }
2817 }
2803
2818
2804 /*
2819 /*
2805 * Given a (possibly overlapping) set of revs, return the greatest
2820 * Given a (possibly overlapping) set of revs, return the greatest
2806 * common ancestors: those with the longest path to the root.
2821 * common ancestors: those with the longest path to the root.
2807 */
2822 */
2808 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2823 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2809 {
2824 {
2810 PyObject *ret;
2825 PyObject *ret;
2811 PyObject *gca = index_commonancestorsheads(self, args);
2826 PyObject *gca = index_commonancestorsheads(self, args);
2812 if (gca == NULL)
2827 if (gca == NULL)
2813 return NULL;
2828 return NULL;
2814
2829
2815 if (PyList_GET_SIZE(gca) <= 1) {
2830 if (PyList_GET_SIZE(gca) <= 1) {
2816 return gca;
2831 return gca;
2817 }
2832 }
2818
2833
2819 ret = find_deepest(self, gca);
2834 ret = find_deepest(self, gca);
2820 Py_DECREF(gca);
2835 Py_DECREF(gca);
2821 return ret;
2836 return ret;
2822 }
2837 }
2823
2838
2824 /*
2839 /*
2825 * Invalidate any trie entries introduced by added revs.
2840 * Invalidate any trie entries introduced by added revs.
2826 */
2841 */
2827 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2842 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2828 {
2843 {
2829 Py_ssize_t i, len;
2844 Py_ssize_t i, len;
2830
2845
2831 len = self->length + self->new_length;
2846 len = self->length + self->new_length;
2832 i = start - self->length;
2847 i = start - self->length;
2833 if (i < 0)
2848 if (i < 0)
2834 return;
2849 return;
2835
2850
2836 for (i = start; i < len; i++) {
2851 for (i = start; i < len; i++) {
2837 const char *node = index_node(self, i);
2852 const char *node = index_node(self, i);
2838 nt_delete_node(&self->nt, node);
2853 nt_delete_node(&self->nt, node);
2839 }
2854 }
2840
2855
2841 self->new_length = start - self->length;
2856 self->new_length = start - self->length;
2842 }
2857 }
2843
2858
2844 /*
2859 /*
2845 * Delete a numeric range of revs, which must be at the end of the
2860 * Delete a numeric range of revs, which must be at the end of the
2846 * range.
2861 * range.
2847 */
2862 */
2848 static int index_slice_del(indexObject *self, PyObject *item)
2863 static int index_slice_del(indexObject *self, PyObject *item)
2849 {
2864 {
2850 Py_ssize_t start, stop, step, slicelength;
2865 Py_ssize_t start, stop, step, slicelength;
2851 Py_ssize_t length = index_length(self) + 1;
2866 Py_ssize_t length = index_length(self) + 1;
2852 int ret = 0;
2867 int ret = 0;
2853
2868
2854 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2869 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2855 &slicelength) < 0)
2870 &slicelength) < 0)
2856 return -1;
2871 return -1;
2857
2872
2858 if (slicelength <= 0)
2873 if (slicelength <= 0)
2859 return 0;
2874 return 0;
2860
2875
2861 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2876 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2862 stop = start;
2877 stop = start;
2863
2878
2864 if (step < 0) {
2879 if (step < 0) {
2865 stop = start + 1;
2880 stop = start + 1;
2866 start = stop + step * (slicelength - 1) - 1;
2881 start = stop + step * (slicelength - 1) - 1;
2867 step = -step;
2882 step = -step;
2868 }
2883 }
2869
2884
2870 if (step != 1) {
2885 if (step != 1) {
2871 PyErr_SetString(PyExc_ValueError,
2886 PyErr_SetString(PyExc_ValueError,
2872 "revlog index delete requires step size of 1");
2887 "revlog index delete requires step size of 1");
2873 return -1;
2888 return -1;
2874 }
2889 }
2875
2890
2876 if (stop != length - 1) {
2891 if (stop != length - 1) {
2877 PyErr_SetString(PyExc_IndexError,
2892 PyErr_SetString(PyExc_IndexError,
2878 "revlog index deletion indices are invalid");
2893 "revlog index deletion indices are invalid");
2879 return -1;
2894 return -1;
2880 }
2895 }
2881
2896
2882 if (start < self->length) {
2897 if (start < self->length) {
2883 if (self->ntinitialized) {
2898 if (self->ntinitialized) {
2884 Py_ssize_t i;
2899 Py_ssize_t i;
2885
2900
2886 for (i = start; i < self->length; i++) {
2901 for (i = start; i < self->length; i++) {
2887 const char *node = index_node_existing(self, i);
2902 const char *node = index_node_existing(self, i);
2888 if (node == NULL)
2903 if (node == NULL)
2889 return -1;
2904 return -1;
2890
2905
2891 nt_delete_node(&self->nt, node);
2906 nt_delete_node(&self->nt, node);
2892 }
2907 }
2893 if (self->new_length)
2908 if (self->new_length)
2894 index_invalidate_added(self, self->length);
2909 index_invalidate_added(self, self->length);
2895 if (self->ntrev > start)
2910 if (self->ntrev > start)
2896 self->ntrev = (int)start;
2911 self->ntrev = (int)start;
2897 } else if (self->new_length) {
2912 } else if (self->new_length) {
2898 self->new_length = 0;
2913 self->new_length = 0;
2899 }
2914 }
2900
2915
2901 self->length = start;
2916 self->length = start;
2902 goto done;
2917 goto done;
2903 }
2918 }
2904
2919
2905 if (self->ntinitialized) {
2920 if (self->ntinitialized) {
2906 index_invalidate_added(self, start);
2921 index_invalidate_added(self, start);
2907 if (self->ntrev > start)
2922 if (self->ntrev > start)
2908 self->ntrev = (int)start;
2923 self->ntrev = (int)start;
2909 } else {
2924 } else {
2910 self->new_length = start - self->length;
2925 self->new_length = start - self->length;
2911 }
2926 }
2912 done:
2927 done:
2913 Py_CLEAR(self->headrevs);
2928 Py_CLEAR(self->headrevs);
2914 return ret;
2929 return ret;
2915 }
2930 }
2916
2931
2917 /*
2932 /*
2918 * Supported ops:
2933 * Supported ops:
2919 *
2934 *
2920 * slice deletion
2935 * slice deletion
2921 * string assignment (extend node->rev mapping)
2936 * string assignment (extend node->rev mapping)
2922 * string deletion (shrink node->rev mapping)
2937 * string deletion (shrink node->rev mapping)
2923 */
2938 */
2924 static int index_assign_subscript(indexObject *self, PyObject *item,
2939 static int index_assign_subscript(indexObject *self, PyObject *item,
2925 PyObject *value)
2940 PyObject *value)
2926 {
2941 {
2927 char *node;
2942 char *node;
2928 long rev;
2943 long rev;
2929
2944
2930 if (PySlice_Check(item) && value == NULL)
2945 if (PySlice_Check(item) && value == NULL)
2931 return index_slice_del(self, item);
2946 return index_slice_del(self, item);
2932
2947
2933 if (node_check(self->nodelen, item, &node) == -1)
2948 if (node_check(self->nodelen, item, &node) == -1)
2934 return -1;
2949 return -1;
2935
2950
2936 if (value == NULL)
2951 if (value == NULL)
2937 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2952 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2938 : 0;
2953 : 0;
2939 rev = PyLong_AsLong(value);
2954 rev = PyLong_AsLong(value);
2940 if (rev > INT_MAX || rev < 0) {
2955 if (rev > INT_MAX || rev < 0) {
2941 if (!PyErr_Occurred())
2956 if (!PyErr_Occurred())
2942 PyErr_SetString(PyExc_ValueError, "rev out of range");
2957 PyErr_SetString(PyExc_ValueError, "rev out of range");
2943 return -1;
2958 return -1;
2944 }
2959 }
2945
2960
2946 if (index_init_nt(self) == -1)
2961 if (index_init_nt(self) == -1)
2947 return -1;
2962 return -1;
2948 return nt_insert(&self->nt, node, (int)rev);
2963 return nt_insert(&self->nt, node, (int)rev);
2949 }
2964 }
2950
2965
2951 /*
2966 /*
2952 * Find all RevlogNG entries in an index that has inline data. Update
2967 * Find all RevlogNG entries in an index that has inline data. Update
2953 * the optional "offsets" table with those entries.
2968 * the optional "offsets" table with those entries.
2954 */
2969 */
2955 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2970 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2956 {
2971 {
2957 const char *data = (const char *)self->buf.buf;
2972 const char *data = (const char *)self->buf.buf;
2958 Py_ssize_t pos = 0;
2973 Py_ssize_t pos = 0;
2959 Py_ssize_t end = self->buf.len;
2974 Py_ssize_t end = self->buf.len;
2960 long incr = self->entry_size;
2975 long incr = self->entry_size;
2961 Py_ssize_t len = 0;
2976 Py_ssize_t len = 0;
2962
2977
2963 while (pos + self->entry_size <= end && pos >= 0) {
2978 while (pos + self->entry_size <= end && pos >= 0) {
2964 uint32_t comp_len, sidedata_comp_len = 0;
2979 uint32_t comp_len, sidedata_comp_len = 0;
2965 /* 3rd element of header is length of compressed inline data */
2980 /* 3rd element of header is length of compressed inline data */
2966 if (self->format_version == format_v1) {
2981 if (self->format_version == format_v1) {
2967 comp_len =
2982 comp_len =
2968 getbe32(data + pos + entry_v1_offset_comp_len);
2983 getbe32(data + pos + entry_v1_offset_comp_len);
2969 sidedata_comp_len = 0;
2984 sidedata_comp_len = 0;
2970 } else if (self->format_version == format_v2) {
2985 } else if (self->format_version == format_v2) {
2971 comp_len =
2986 comp_len =
2972 getbe32(data + pos + entry_v2_offset_comp_len);
2987 getbe32(data + pos + entry_v2_offset_comp_len);
2973 sidedata_comp_len = getbe32(
2988 sidedata_comp_len = getbe32(
2974 data + pos + entry_v2_offset_sidedata_comp_len);
2989 data + pos + entry_v2_offset_sidedata_comp_len);
2975 } else {
2990 } else {
2976 raise_revlog_error();
2991 raise_revlog_error();
2977 return -1;
2992 return -1;
2978 }
2993 }
2979 incr = self->entry_size + comp_len + sidedata_comp_len;
2994 incr = self->entry_size + comp_len + sidedata_comp_len;
2980 if (offsets)
2995 if (offsets)
2981 offsets[len] = data + pos;
2996 offsets[len] = data + pos;
2982 len++;
2997 len++;
2983 pos += incr;
2998 pos += incr;
2984 }
2999 }
2985
3000
2986 if (pos != end) {
3001 if (pos != end) {
2987 if (!PyErr_Occurred())
3002 if (!PyErr_Occurred())
2988 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3003 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2989 return -1;
3004 return -1;
2990 }
3005 }
2991
3006
2992 return len;
3007 return len;
2993 }
3008 }
2994
3009
2995 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
3010 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2996 {
3011 {
2997 PyObject *data_obj, *inlined_obj;
3012 PyObject *data_obj, *inlined_obj;
2998 Py_ssize_t size;
3013 Py_ssize_t size;
2999
3014
3000 static char *kwlist[] = {"data", "inlined", "format", NULL};
3015 static char *kwlist[] = {"data", "inlined", "format", NULL};
3001
3016
3002 /* Initialize before argument-checking to avoid index_dealloc() crash.
3017 /* Initialize before argument-checking to avoid index_dealloc() crash.
3003 */
3018 */
3004 self->added = NULL;
3019 self->added = NULL;
3005 self->new_length = 0;
3020 self->new_length = 0;
3006 self->added_length = 0;
3021 self->added_length = 0;
3007 self->data = NULL;
3022 self->data = NULL;
3008 memset(&self->buf, 0, sizeof(self->buf));
3023 memset(&self->buf, 0, sizeof(self->buf));
3009 self->headrevs = NULL;
3024 self->headrevs = NULL;
3010 self->filteredrevs = Py_None;
3025 self->filteredrevs = Py_None;
3011 Py_INCREF(Py_None);
3026 Py_INCREF(Py_None);
3012 self->ntinitialized = 0;
3027 self->ntinitialized = 0;
3013 self->offsets = NULL;
3028 self->offsets = NULL;
3014 self->nodelen = 20;
3029 self->nodelen = 20;
3015 self->nullentry = NULL;
3030 self->nullentry = NULL;
3016 self->rust_ext_compat = 1;
3031 self->rust_ext_compat = 1;
3017 self->format_version = format_v1;
3032 self->format_version = format_v1;
3018
3033
3019 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3034 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3020 &data_obj, &inlined_obj,
3035 &data_obj, &inlined_obj,
3021 &(self->format_version)))
3036 &(self->format_version)))
3022 return -1;
3037 return -1;
3023 if (!PyObject_CheckBuffer(data_obj)) {
3038 if (!PyObject_CheckBuffer(data_obj)) {
3024 PyErr_SetString(PyExc_TypeError,
3039 PyErr_SetString(PyExc_TypeError,
3025 "data does not support buffer interface");
3040 "data does not support buffer interface");
3026 return -1;
3041 return -1;
3027 }
3042 }
3028 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3043 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3029 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3044 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3030 return -1;
3045 return -1;
3031 }
3046 }
3032
3047
3033 if (self->format_version == format_v1) {
3048 if (self->format_version == format_v1) {
3034 self->entry_size = v1_entry_size;
3049 self->entry_size = v1_entry_size;
3035 } else if (self->format_version == format_v2) {
3050 } else if (self->format_version == format_v2) {
3036 self->entry_size = v2_entry_size;
3051 self->entry_size = v2_entry_size;
3037 } else if (self->format_version == format_cl2) {
3052 } else if (self->format_version == format_cl2) {
3038 self->entry_size = cl2_entry_size;
3053 self->entry_size = cl2_entry_size;
3039 }
3054 }
3040
3055
3041 self->nullentry = Py_BuildValue(
3056 self->nullentry = Py_BuildValue(
3042 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3057 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3043 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3058 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3044
3059
3045 if (!self->nullentry)
3060 if (!self->nullentry)
3046 return -1;
3061 return -1;
3047 PyObject_GC_UnTrack(self->nullentry);
3062 PyObject_GC_UnTrack(self->nullentry);
3048
3063
3049 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3064 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3050 return -1;
3065 return -1;
3051 size = self->buf.len;
3066 size = self->buf.len;
3052
3067
3053 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3068 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3054 self->data = data_obj;
3069 self->data = data_obj;
3055
3070
3056 self->ntlookups = self->ntmisses = 0;
3071 self->ntlookups = self->ntmisses = 0;
3057 self->ntrev = -1;
3072 self->ntrev = -1;
3058 Py_INCREF(self->data);
3073 Py_INCREF(self->data);
3059
3074
3060 if (self->inlined) {
3075 if (self->inlined) {
3061 Py_ssize_t len = inline_scan(self, NULL);
3076 Py_ssize_t len = inline_scan(self, NULL);
3062 if (len == -1)
3077 if (len == -1)
3063 goto bail;
3078 goto bail;
3064 self->length = len;
3079 self->length = len;
3065 } else {
3080 } else {
3066 if (size % self->entry_size) {
3081 if (size % self->entry_size) {
3067 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3082 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3068 goto bail;
3083 goto bail;
3069 }
3084 }
3070 self->length = size / self->entry_size;
3085 self->length = size / self->entry_size;
3071 }
3086 }
3072
3087
3073 return 0;
3088 return 0;
3074 bail:
3089 bail:
3075 return -1;
3090 return -1;
3076 }
3091 }
3077
3092
3078 static PyObject *index_nodemap(indexObject *self)
3093 static PyObject *index_nodemap(indexObject *self)
3079 {
3094 {
3080 Py_INCREF(self);
3095 Py_INCREF(self);
3081 return (PyObject *)self;
3096 return (PyObject *)self;
3082 }
3097 }
3083
3098
3084 static void _index_clearcaches(indexObject *self)
3099 static void _index_clearcaches(indexObject *self)
3085 {
3100 {
3086 if (self->offsets) {
3101 if (self->offsets) {
3087 PyMem_Free((void *)self->offsets);
3102 PyMem_Free((void *)self->offsets);
3088 self->offsets = NULL;
3103 self->offsets = NULL;
3089 }
3104 }
3090 if (self->ntinitialized) {
3105 if (self->ntinitialized) {
3091 nt_dealloc(&self->nt);
3106 nt_dealloc(&self->nt);
3092 }
3107 }
3093 self->ntinitialized = 0;
3108 self->ntinitialized = 0;
3094 Py_CLEAR(self->headrevs);
3109 Py_CLEAR(self->headrevs);
3095 }
3110 }
3096
3111
3097 static PyObject *index_clearcaches(indexObject *self)
3112 static PyObject *index_clearcaches(indexObject *self)
3098 {
3113 {
3099 _index_clearcaches(self);
3114 _index_clearcaches(self);
3100 self->ntrev = -1;
3115 self->ntrev = -1;
3101 self->ntlookups = self->ntmisses = 0;
3116 self->ntlookups = self->ntmisses = 0;
3102 Py_RETURN_NONE;
3117 Py_RETURN_NONE;
3103 }
3118 }
3104
3119
3105 static void index_dealloc(indexObject *self)
3120 static void index_dealloc(indexObject *self)
3106 {
3121 {
3107 _index_clearcaches(self);
3122 _index_clearcaches(self);
3108 Py_XDECREF(self->filteredrevs);
3123 Py_XDECREF(self->filteredrevs);
3109 if (self->buf.buf) {
3124 if (self->buf.buf) {
3110 PyBuffer_Release(&self->buf);
3125 PyBuffer_Release(&self->buf);
3111 memset(&self->buf, 0, sizeof(self->buf));
3126 memset(&self->buf, 0, sizeof(self->buf));
3112 }
3127 }
3113 Py_XDECREF(self->data);
3128 Py_XDECREF(self->data);
3114 PyMem_Free(self->added);
3129 PyMem_Free(self->added);
3115 Py_XDECREF(self->nullentry);
3130 Py_XDECREF(self->nullentry);
3116 PyObject_Del(self);
3131 PyObject_Del(self);
3117 }
3132 }
3118
3133
3119 static PySequenceMethods index_sequence_methods = {
3134 static PySequenceMethods index_sequence_methods = {
3120 (lenfunc)index_length, /* sq_length */
3135 (lenfunc)index_length, /* sq_length */
3121 0, /* sq_concat */
3136 0, /* sq_concat */
3122 0, /* sq_repeat */
3137 0, /* sq_repeat */
3123 (ssizeargfunc)index_get, /* sq_item */
3138 (ssizeargfunc)index_get, /* sq_item */
3124 0, /* sq_slice */
3139 0, /* sq_slice */
3125 0, /* sq_ass_item */
3140 0, /* sq_ass_item */
3126 0, /* sq_ass_slice */
3141 0, /* sq_ass_slice */
3127 (objobjproc)index_contains, /* sq_contains */
3142 (objobjproc)index_contains, /* sq_contains */
3128 };
3143 };
3129
3144
3130 static PyMappingMethods index_mapping_methods = {
3145 static PyMappingMethods index_mapping_methods = {
3131 (lenfunc)index_length, /* mp_length */
3146 (lenfunc)index_length, /* mp_length */
3132 (binaryfunc)index_getitem, /* mp_subscript */
3147 (binaryfunc)index_getitem, /* mp_subscript */
3133 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3148 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3134 };
3149 };
3135
3150
3136 static PyMethodDef index_methods[] = {
3151 static PyMethodDef index_methods[] = {
3137 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3152 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3138 "return the gca set of the given revs"},
3153 "return the gca set of the given revs"},
3139 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3154 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3140 METH_VARARGS,
3155 METH_VARARGS,
3141 "return the heads of the common ancestors of the given revs"},
3156 "return the heads of the common ancestors of the given revs"},
3142 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3157 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3143 "clear the index caches"},
3158 "clear the index caches"},
3144 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3159 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3145 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3160 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3146 "return `rev` associated with a node or None"},
3161 "return `rev` associated with a node or None"},
3147 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3162 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3148 "return True if the node exist in the index"},
3163 "return True if the node exist in the index"},
3149 {"rev", (PyCFunction)index_m_rev, METH_O,
3164 {"rev", (PyCFunction)index_m_rev, METH_O,
3150 "return `rev` associated with a node or raise RevlogError"},
3165 "return `rev` associated with a node or raise RevlogError"},
3151 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3166 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3152 "compute phases"},
3167 "compute phases"},
3153 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3168 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3154 "reachableroots"},
3169 "reachableroots"},
3155 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3170 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3156 METH_VARARGS, "replace an existing index entry with a new value"},
3171 METH_VARARGS, "replace an existing index entry with a new value"},
3157 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3172 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3158 "get head revisions"}, /* Can do filtering since 3.2 */
3173 "get head revisions"}, /* Can do filtering since 3.2 */
3159 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3174 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3160 "get filtered head revisions"}, /* Can always do filtering */
3175 "get filtered head revisions"}, /* Can always do filtering */
3161 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3176 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3162 "True if the object is a snapshot"},
3177 "True if the object is a snapshot"},
3163 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3178 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3164 "Gather snapshot data in a cache dict"},
3179 "Gather snapshot data in a cache dict"},
3165 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3180 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3166 "determine revisions with deltas to reconstruct fulltext"},
3181 "determine revisions with deltas to reconstruct fulltext"},
3167 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3182 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3168 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3183 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3169 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3184 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3170 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3185 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3171 "match a potentially ambiguous node ID"},
3186 "match a potentially ambiguous node ID"},
3172 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3187 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3173 "find length of shortest hex nodeid of a binary ID"},
3188 "find length of shortest hex nodeid of a binary ID"},
3174 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3189 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3175 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3190 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3176 "return an entry in binary form"},
3191 "return an entry in binary form"},
3177 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3192 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3178 "pack the revlog header information into binary"},
3193 "pack the revlog header information into binary"},
3179 {NULL} /* Sentinel */
3194 {NULL} /* Sentinel */
3180 };
3195 };
3181
3196
3182 static PyGetSetDef index_getset[] = {
3197 static PyGetSetDef index_getset[] = {
3183 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3198 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3184 {NULL} /* Sentinel */
3199 {NULL} /* Sentinel */
3185 };
3200 };
3186
3201
3187 static PyMemberDef index_members[] = {
3202 static PyMemberDef index_members[] = {
3188 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3203 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3189 "size of an index entry"},
3204 "size of an index entry"},
3190 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3205 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3191 "size of an index entry"},
3206 "size of an index entry"},
3192 {NULL} /* Sentinel */
3207 {NULL} /* Sentinel */
3193 };
3208 };
3194
3209
3195 PyTypeObject HgRevlogIndex_Type = {
3210 PyTypeObject HgRevlogIndex_Type = {
3196 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3211 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3197 "parsers.index", /* tp_name */
3212 "parsers.index", /* tp_name */
3198 sizeof(indexObject), /* tp_basicsize */
3213 sizeof(indexObject), /* tp_basicsize */
3199 0, /* tp_itemsize */
3214 0, /* tp_itemsize */
3200 (destructor)index_dealloc, /* tp_dealloc */
3215 (destructor)index_dealloc, /* tp_dealloc */
3201 0, /* tp_print */
3216 0, /* tp_print */
3202 0, /* tp_getattr */
3217 0, /* tp_getattr */
3203 0, /* tp_setattr */
3218 0, /* tp_setattr */
3204 0, /* tp_compare */
3219 0, /* tp_compare */
3205 0, /* tp_repr */
3220 0, /* tp_repr */
3206 0, /* tp_as_number */
3221 0, /* tp_as_number */
3207 &index_sequence_methods, /* tp_as_sequence */
3222 &index_sequence_methods, /* tp_as_sequence */
3208 &index_mapping_methods, /* tp_as_mapping */
3223 &index_mapping_methods, /* tp_as_mapping */
3209 0, /* tp_hash */
3224 0, /* tp_hash */
3210 0, /* tp_call */
3225 0, /* tp_call */
3211 0, /* tp_str */
3226 0, /* tp_str */
3212 0, /* tp_getattro */
3227 0, /* tp_getattro */
3213 0, /* tp_setattro */
3228 0, /* tp_setattro */
3214 0, /* tp_as_buffer */
3229 0, /* tp_as_buffer */
3215 Py_TPFLAGS_DEFAULT, /* tp_flags */
3230 Py_TPFLAGS_DEFAULT, /* tp_flags */
3216 "revlog index", /* tp_doc */
3231 "revlog index", /* tp_doc */
3217 0, /* tp_traverse */
3232 0, /* tp_traverse */
3218 0, /* tp_clear */
3233 0, /* tp_clear */
3219 0, /* tp_richcompare */
3234 0, /* tp_richcompare */
3220 0, /* tp_weaklistoffset */
3235 0, /* tp_weaklistoffset */
3221 0, /* tp_iter */
3236 0, /* tp_iter */
3222 0, /* tp_iternext */
3237 0, /* tp_iternext */
3223 index_methods, /* tp_methods */
3238 index_methods, /* tp_methods */
3224 index_members, /* tp_members */
3239 index_members, /* tp_members */
3225 index_getset, /* tp_getset */
3240 index_getset, /* tp_getset */
3226 0, /* tp_base */
3241 0, /* tp_base */
3227 0, /* tp_dict */
3242 0, /* tp_dict */
3228 0, /* tp_descr_get */
3243 0, /* tp_descr_get */
3229 0, /* tp_descr_set */
3244 0, /* tp_descr_set */
3230 0, /* tp_dictoffset */
3245 0, /* tp_dictoffset */
3231 (initproc)index_init, /* tp_init */
3246 (initproc)index_init, /* tp_init */
3232 0, /* tp_alloc */
3247 0, /* tp_alloc */
3233 };
3248 };
3234
3249
3235 /*
3250 /*
3236 * returns a tuple of the form (index, cache) with elements as
3251 * returns a tuple of the form (index, cache) with elements as
3237 * follows:
3252 * follows:
3238 *
3253 *
3239 * index: an index object that lazily parses Revlog (v1 or v2) records
3254 * index: an index object that lazily parses Revlog (v1 or v2) records
3240 * cache: if data is inlined, a tuple (0, index_file_content), else None
3255 * cache: if data is inlined, a tuple (0, index_file_content), else None
3241 * index_file_content could be a string, or a buffer
3256 * index_file_content could be a string, or a buffer
3242 *
3257 *
3243 * added complications are for backwards compatibility
3258 * added complications are for backwards compatibility
3244 */
3259 */
3245 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3260 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3246 {
3261 {
3247 PyObject *cache = NULL;
3262 PyObject *cache = NULL;
3248 indexObject *idx;
3263 indexObject *idx;
3249 int ret;
3264 int ret;
3250
3265
3251 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3266 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3252 if (idx == NULL)
3267 if (idx == NULL)
3253 goto bail;
3268 goto bail;
3254
3269
3255 ret = index_init(idx, args, kwargs);
3270 ret = index_init(idx, args, kwargs);
3256 if (ret == -1)
3271 if (ret == -1)
3257 goto bail;
3272 goto bail;
3258
3273
3259 if (idx->inlined) {
3274 if (idx->inlined) {
3260 cache = Py_BuildValue("iO", 0, idx->data);
3275 cache = Py_BuildValue("iO", 0, idx->data);
3261 if (cache == NULL)
3276 if (cache == NULL)
3262 goto bail;
3277 goto bail;
3263 } else {
3278 } else {
3264 cache = Py_None;
3279 cache = Py_None;
3265 Py_INCREF(cache);
3280 Py_INCREF(cache);
3266 }
3281 }
3267
3282
3268 return Py_BuildValue("NN", idx, cache);
3283 return Py_BuildValue("NN", idx, cache);
3269
3284
3270 bail:
3285 bail:
3271 Py_XDECREF(idx);
3286 Py_XDECREF(idx);
3272 Py_XDECREF(cache);
3287 Py_XDECREF(cache);
3273 return NULL;
3288 return NULL;
3274 }
3289 }
3275
3290
3276 static Revlog_CAPI CAPI = {
3291 static Revlog_CAPI CAPI = {
3277 /* increment the abi_version field upon each change in the Revlog_CAPI
3292 /* increment the abi_version field upon each change in the Revlog_CAPI
3278 struct or in the ABI of the listed functions */
3293 struct or in the ABI of the listed functions */
3279 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3294 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3280 };
3295 };
3281
3296
3282 void revlog_module_init(PyObject *mod)
3297 void revlog_module_init(PyObject *mod)
3283 {
3298 {
3284 PyObject *caps = NULL;
3299 PyObject *caps = NULL;
3285 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3300 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3286 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3301 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3287 return;
3302 return;
3288 Py_INCREF(&HgRevlogIndex_Type);
3303 Py_INCREF(&HgRevlogIndex_Type);
3289 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3304 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3290
3305
3291 nodetreeType.tp_new = PyType_GenericNew;
3306 nodetreeType.tp_new = PyType_GenericNew;
3292 if (PyType_Ready(&nodetreeType) < 0)
3307 if (PyType_Ready(&nodetreeType) < 0)
3293 return;
3308 return;
3294 Py_INCREF(&nodetreeType);
3309 Py_INCREF(&nodetreeType);
3295 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3310 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3296
3311
3297 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3312 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3298 if (caps != NULL)
3313 if (caps != NULL)
3299 PyModule_AddObject(mod, "revlog_CAPI", caps);
3314 PyModule_AddObject(mod, "revlog_CAPI", caps);
3300 }
3315 }
@@ -1,3337 +1,3347 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullrev,
28 nullrev,
29 sha1nodeconstants,
29 sha1nodeconstants,
30 short,
30 short,
31 wdirrev,
31 wdirrev,
32 )
32 )
33 from .i18n import _
33 from .i18n import _
34 from .pycompat import getattr
34 from .pycompat import getattr
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 ENTRY_RANK,
41 ENTRY_RANK,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 RANK_UNKNOWN,
47 RANK_UNKNOWN,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 revlogutils,
75 revlogutils,
76 templatefilters,
76 templatefilters,
77 util,
77 util,
78 )
78 )
79 from .interfaces import (
79 from .interfaces import (
80 repository,
80 repository,
81 util as interfaceutil,
81 util as interfaceutil,
82 )
82 )
83 from .revlogutils import (
83 from .revlogutils import (
84 deltas as deltautil,
84 deltas as deltautil,
85 docket as docketutil,
85 docket as docketutil,
86 flagutil,
86 flagutil,
87 nodemap as nodemaputil,
87 nodemap as nodemaputil,
88 randomaccessfile,
88 randomaccessfile,
89 revlogv0,
89 revlogv0,
90 rewrite,
90 rewrite,
91 sidedata as sidedatautil,
91 sidedata as sidedatautil,
92 )
92 )
93 from .utils import (
93 from .utils import (
94 storageutil,
94 storageutil,
95 stringutil,
95 stringutil,
96 )
96 )
97
97
98 # blanked usage of all the name to prevent pyflakes constraints
98 # blanked usage of all the name to prevent pyflakes constraints
99 # We need these name available in the module for extensions.
99 # We need these name available in the module for extensions.
100
100
101 REVLOGV0
101 REVLOGV0
102 REVLOGV1
102 REVLOGV1
103 REVLOGV2
103 REVLOGV2
104 CHANGELOGV2
104 CHANGELOGV2
105 FLAG_INLINE_DATA
105 FLAG_INLINE_DATA
106 FLAG_GENERALDELTA
106 FLAG_GENERALDELTA
107 REVLOG_DEFAULT_FLAGS
107 REVLOG_DEFAULT_FLAGS
108 REVLOG_DEFAULT_FORMAT
108 REVLOG_DEFAULT_FORMAT
109 REVLOG_DEFAULT_VERSION
109 REVLOG_DEFAULT_VERSION
110 REVLOGV1_FLAGS
110 REVLOGV1_FLAGS
111 REVLOGV2_FLAGS
111 REVLOGV2_FLAGS
112 REVIDX_ISCENSORED
112 REVIDX_ISCENSORED
113 REVIDX_ELLIPSIS
113 REVIDX_ELLIPSIS
114 REVIDX_HASCOPIESINFO
114 REVIDX_HASCOPIESINFO
115 REVIDX_EXTSTORED
115 REVIDX_EXTSTORED
116 REVIDX_DEFAULT_FLAGS
116 REVIDX_DEFAULT_FLAGS
117 REVIDX_FLAGS_ORDER
117 REVIDX_FLAGS_ORDER
118 REVIDX_RAWTEXT_CHANGING_FLAGS
118 REVIDX_RAWTEXT_CHANGING_FLAGS
119
119
120 parsers = policy.importmod('parsers')
120 parsers = policy.importmod('parsers')
121 rustancestor = policy.importrust('ancestor')
121 rustancestor = policy.importrust('ancestor')
122 rustdagop = policy.importrust('dagop')
122 rustdagop = policy.importrust('dagop')
123 rustrevlog = policy.importrust('revlog')
123 rustrevlog = policy.importrust('revlog')
124
124
125 # Aliased for performance.
125 # Aliased for performance.
126 _zlibdecompress = zlib.decompress
126 _zlibdecompress = zlib.decompress
127
127
128 # max size of revlog with inline data
128 # max size of revlog with inline data
129 _maxinline = 131072
129 _maxinline = 131072
130
130
131 # Flag processors for REVIDX_ELLIPSIS.
131 # Flag processors for REVIDX_ELLIPSIS.
132 def ellipsisreadprocessor(rl, text):
132 def ellipsisreadprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsiswriteprocessor(rl, text):
136 def ellipsiswriteprocessor(rl, text):
137 return text, False
137 return text, False
138
138
139
139
140 def ellipsisrawprocessor(rl, text):
140 def ellipsisrawprocessor(rl, text):
141 return False
141 return False
142
142
143
143
144 ellipsisprocessor = (
144 ellipsisprocessor = (
145 ellipsisreadprocessor,
145 ellipsisreadprocessor,
146 ellipsiswriteprocessor,
146 ellipsiswriteprocessor,
147 ellipsisrawprocessor,
147 ellipsisrawprocessor,
148 )
148 )
149
149
150
150
151 def _verify_revision(rl, skipflags, state, node):
151 def _verify_revision(rl, skipflags, state, node):
152 """Verify the integrity of the given revlog ``node`` while providing a hook
152 """Verify the integrity of the given revlog ``node`` while providing a hook
153 point for extensions to influence the operation."""
153 point for extensions to influence the operation."""
154 if skipflags:
154 if skipflags:
155 state[b'skipread'].add(node)
155 state[b'skipread'].add(node)
156 else:
156 else:
157 # Side-effect: read content and verify hash.
157 # Side-effect: read content and verify hash.
158 rl.revision(node)
158 rl.revision(node)
159
159
160
160
161 # True if a fast implementation for persistent-nodemap is available
161 # True if a fast implementation for persistent-nodemap is available
162 #
162 #
163 # We also consider we have a "fast" implementation in "pure" python because
163 # We also consider we have a "fast" implementation in "pure" python because
164 # people using pure don't really have performance consideration (and a
164 # people using pure don't really have performance consideration (and a
165 # wheelbarrow of other slowness source)
165 # wheelbarrow of other slowness source)
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
167 parsers, 'BaseIndexObject'
167 parsers, 'BaseIndexObject'
168 )
168 )
169
169
170
170
171 @interfaceutil.implementer(repository.irevisiondelta)
171 @interfaceutil.implementer(repository.irevisiondelta)
172 @attr.s(slots=True)
172 @attr.s(slots=True)
173 class revlogrevisiondelta:
173 class revlogrevisiondelta:
174 node = attr.ib()
174 node = attr.ib()
175 p1node = attr.ib()
175 p1node = attr.ib()
176 p2node = attr.ib()
176 p2node = attr.ib()
177 basenode = attr.ib()
177 basenode = attr.ib()
178 flags = attr.ib()
178 flags = attr.ib()
179 baserevisionsize = attr.ib()
179 baserevisionsize = attr.ib()
180 revision = attr.ib()
180 revision = attr.ib()
181 delta = attr.ib()
181 delta = attr.ib()
182 sidedata = attr.ib()
182 sidedata = attr.ib()
183 protocol_flags = attr.ib()
183 protocol_flags = attr.ib()
184 linknode = attr.ib(default=None)
184 linknode = attr.ib(default=None)
185
185
186
186
187 @interfaceutil.implementer(repository.iverifyproblem)
187 @interfaceutil.implementer(repository.iverifyproblem)
188 @attr.s(frozen=True)
188 @attr.s(frozen=True)
189 class revlogproblem:
189 class revlogproblem:
190 warning = attr.ib(default=None)
190 warning = attr.ib(default=None)
191 error = attr.ib(default=None)
191 error = attr.ib(default=None)
192 node = attr.ib(default=None)
192 node = attr.ib(default=None)
193
193
194
194
195 def parse_index_v1(data, inline):
195 def parse_index_v1(data, inline):
196 # call the C implementation to parse the index data
196 # call the C implementation to parse the index data
197 index, cache = parsers.parse_index2(data, inline)
197 index, cache = parsers.parse_index2(data, inline)
198 return index, cache
198 return index, cache
199
199
200
200
201 def parse_index_v2(data, inline):
201 def parse_index_v2(data, inline):
202 # call the C implementation to parse the index data
202 # call the C implementation to parse the index data
203 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
203 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
204 return index, cache
204 return index, cache
205
205
206
206
207 def parse_index_cl_v2(data, inline):
207 def parse_index_cl_v2(data, inline):
208 # call the C implementation to parse the index data
208 # call the C implementation to parse the index data
209 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
209 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
210 return index, cache
210 return index, cache
211
211
212
212
213 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
213 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
214
214
215 def parse_index_v1_nodemap(data, inline):
215 def parse_index_v1_nodemap(data, inline):
216 index, cache = parsers.parse_index_devel_nodemap(data, inline)
216 index, cache = parsers.parse_index_devel_nodemap(data, inline)
217 return index, cache
217 return index, cache
218
218
219
219
220 else:
220 else:
221 parse_index_v1_nodemap = None
221 parse_index_v1_nodemap = None
222
222
223
223
224 def parse_index_v1_mixed(data, inline):
224 def parse_index_v1_mixed(data, inline):
225 index, cache = parse_index_v1(data, inline)
225 index, cache = parse_index_v1(data, inline)
226 return rustrevlog.MixedIndex(index), cache
226 return rustrevlog.MixedIndex(index), cache
227
227
228
228
229 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
229 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
230 # signed integer)
230 # signed integer)
231 _maxentrysize = 0x7FFFFFFF
231 _maxentrysize = 0x7FFFFFFF
232
232
233 FILE_TOO_SHORT_MSG = _(
233 FILE_TOO_SHORT_MSG = _(
234 b'cannot read from revlog %s;'
234 b'cannot read from revlog %s;'
235 b' expected %d bytes from offset %d, data size is %d'
235 b' expected %d bytes from offset %d, data size is %d'
236 )
236 )
237
237
238 hexdigits = b'0123456789abcdefABCDEF'
238 hexdigits = b'0123456789abcdefABCDEF'
239
239
240
240
241 class revlog:
241 class revlog:
242 """
242 """
243 the underlying revision storage object
243 the underlying revision storage object
244
244
245 A revlog consists of two parts, an index and the revision data.
245 A revlog consists of two parts, an index and the revision data.
246
246
247 The index is a file with a fixed record size containing
247 The index is a file with a fixed record size containing
248 information on each revision, including its nodeid (hash), the
248 information on each revision, including its nodeid (hash), the
249 nodeids of its parents, the position and offset of its data within
249 nodeids of its parents, the position and offset of its data within
250 the data file, and the revision it's based on. Finally, each entry
250 the data file, and the revision it's based on. Finally, each entry
251 contains a linkrev entry that can serve as a pointer to external
251 contains a linkrev entry that can serve as a pointer to external
252 data.
252 data.
253
253
254 The revision data itself is a linear collection of data chunks.
254 The revision data itself is a linear collection of data chunks.
255 Each chunk represents a revision and is usually represented as a
255 Each chunk represents a revision and is usually represented as a
256 delta against the previous chunk. To bound lookup time, runs of
256 delta against the previous chunk. To bound lookup time, runs of
257 deltas are limited to about 2 times the length of the original
257 deltas are limited to about 2 times the length of the original
258 version data. This makes retrieval of a version proportional to
258 version data. This makes retrieval of a version proportional to
259 its size, or O(1) relative to the number of revisions.
259 its size, or O(1) relative to the number of revisions.
260
260
261 Both pieces of the revlog are written to in an append-only
261 Both pieces of the revlog are written to in an append-only
262 fashion, which means we never need to rewrite a file to insert or
262 fashion, which means we never need to rewrite a file to insert or
263 remove data, and can use some simple techniques to avoid the need
263 remove data, and can use some simple techniques to avoid the need
264 for locking while reading.
264 for locking while reading.
265
265
266 If checkambig, indexfile is opened with checkambig=True at
266 If checkambig, indexfile is opened with checkambig=True at
267 writing, to avoid file stat ambiguity.
267 writing, to avoid file stat ambiguity.
268
268
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 index will be mmapped rather than read if it is larger than the
270 index will be mmapped rather than read if it is larger than the
271 configured threshold.
271 configured threshold.
272
272
273 If censorable is True, the revlog can have censored revisions.
273 If censorable is True, the revlog can have censored revisions.
274
274
275 If `upperboundcomp` is not None, this is the expected maximal gain from
275 If `upperboundcomp` is not None, this is the expected maximal gain from
276 compression for the data content.
276 compression for the data content.
277
277
278 `concurrencychecker` is an optional function that receives 3 arguments: a
278 `concurrencychecker` is an optional function that receives 3 arguments: a
279 file handle, a filename, and an expected position. It should check whether
279 file handle, a filename, and an expected position. It should check whether
280 the current position in the file handle is valid, and log/warn/fail (by
280 the current position in the file handle is valid, and log/warn/fail (by
281 raising).
281 raising).
282
282
283 See mercurial/revlogutils/contants.py for details about the content of an
283 See mercurial/revlogutils/contants.py for details about the content of an
284 index entry.
284 index entry.
285 """
285 """
286
286
287 _flagserrorclass = error.RevlogError
287 _flagserrorclass = error.RevlogError
288
288
289 def __init__(
289 def __init__(
290 self,
290 self,
291 opener,
291 opener,
292 target,
292 target,
293 radix,
293 radix,
294 postfix=None, # only exist for `tmpcensored` now
294 postfix=None, # only exist for `tmpcensored` now
295 checkambig=False,
295 checkambig=False,
296 mmaplargeindex=False,
296 mmaplargeindex=False,
297 censorable=False,
297 censorable=False,
298 upperboundcomp=None,
298 upperboundcomp=None,
299 persistentnodemap=False,
299 persistentnodemap=False,
300 concurrencychecker=None,
300 concurrencychecker=None,
301 trypending=False,
301 trypending=False,
302 canonical_parent_order=True,
302 canonical_parent_order=True,
303 ):
303 ):
304 """
304 """
305 create a revlog object
305 create a revlog object
306
306
307 opener is a function that abstracts the file opening operation
307 opener is a function that abstracts the file opening operation
308 and can be used to implement COW semantics or the like.
308 and can be used to implement COW semantics or the like.
309
309
310 `target`: a (KIND, ID) tuple that identify the content stored in
310 `target`: a (KIND, ID) tuple that identify the content stored in
311 this revlog. It help the rest of the code to understand what the revlog
311 this revlog. It help the rest of the code to understand what the revlog
312 is about without having to resort to heuristic and index filename
312 is about without having to resort to heuristic and index filename
313 analysis. Note: that this must be reliably be set by normal code, but
313 analysis. Note: that this must be reliably be set by normal code, but
314 that test, debug, or performance measurement code might not set this to
314 that test, debug, or performance measurement code might not set this to
315 accurate value.
315 accurate value.
316 """
316 """
317 self.upperboundcomp = upperboundcomp
317 self.upperboundcomp = upperboundcomp
318
318
319 self.radix = radix
319 self.radix = radix
320
320
321 self._docket_file = None
321 self._docket_file = None
322 self._indexfile = None
322 self._indexfile = None
323 self._datafile = None
323 self._datafile = None
324 self._sidedatafile = None
324 self._sidedatafile = None
325 self._nodemap_file = None
325 self._nodemap_file = None
326 self.postfix = postfix
326 self.postfix = postfix
327 self._trypending = trypending
327 self._trypending = trypending
328 self.opener = opener
328 self.opener = opener
329 if persistentnodemap:
329 if persistentnodemap:
330 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330 self._nodemap_file = nodemaputil.get_nodemap_file(self)
331
331
332 assert target[0] in ALL_KINDS
332 assert target[0] in ALL_KINDS
333 assert len(target) == 2
333 assert len(target) == 2
334 self.target = target
334 self.target = target
335 # When True, indexfile is opened with checkambig=True at writing, to
335 # When True, indexfile is opened with checkambig=True at writing, to
336 # avoid file stat ambiguity.
336 # avoid file stat ambiguity.
337 self._checkambig = checkambig
337 self._checkambig = checkambig
338 self._mmaplargeindex = mmaplargeindex
338 self._mmaplargeindex = mmaplargeindex
339 self._censorable = censorable
339 self._censorable = censorable
340 # 3-tuple of (node, rev, text) for a raw revision.
340 # 3-tuple of (node, rev, text) for a raw revision.
341 self._revisioncache = None
341 self._revisioncache = None
342 # Maps rev to chain base rev.
342 # Maps rev to chain base rev.
343 self._chainbasecache = util.lrucachedict(100)
343 self._chainbasecache = util.lrucachedict(100)
344 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
345 self._chunkcache = (0, b'')
345 self._chunkcache = (0, b'')
346 # How much data to read and cache into the raw revlog data cache.
346 # How much data to read and cache into the raw revlog data cache.
347 self._chunkcachesize = 65536
347 self._chunkcachesize = 65536
348 self._maxchainlen = None
348 self._maxchainlen = None
349 self._deltabothparents = True
349 self._deltabothparents = True
350 self._debug_delta = False
350 self._debug_delta = False
351 self.index = None
351 self.index = None
352 self._docket = None
352 self._docket = None
353 self._nodemap_docket = None
353 self._nodemap_docket = None
354 # Mapping of partial identifiers to full nodes.
354 # Mapping of partial identifiers to full nodes.
355 self._pcache = {}
355 self._pcache = {}
356 # Mapping of revision integer to full node.
356 # Mapping of revision integer to full node.
357 self._compengine = b'zlib'
357 self._compengine = b'zlib'
358 self._compengineopts = {}
358 self._compengineopts = {}
359 self._maxdeltachainspan = -1
359 self._maxdeltachainspan = -1
360 self._withsparseread = False
360 self._withsparseread = False
361 self._sparserevlog = False
361 self._sparserevlog = False
362 self.hassidedata = False
362 self.hassidedata = False
363 self._srdensitythreshold = 0.50
363 self._srdensitythreshold = 0.50
364 self._srmingapsize = 262144
364 self._srmingapsize = 262144
365
365
366 # Make copy of flag processors so each revlog instance can support
366 # Make copy of flag processors so each revlog instance can support
367 # custom flags.
367 # custom flags.
368 self._flagprocessors = dict(flagutil.flagprocessors)
368 self._flagprocessors = dict(flagutil.flagprocessors)
369
369
370 # 3-tuple of file handles being used for active writing.
370 # 3-tuple of file handles being used for active writing.
371 self._writinghandles = None
371 self._writinghandles = None
372 # prevent nesting of addgroup
372 # prevent nesting of addgroup
373 self._adding_group = None
373 self._adding_group = None
374
374
375 self._loadindex()
375 self._loadindex()
376
376
377 self._concurrencychecker = concurrencychecker
377 self._concurrencychecker = concurrencychecker
378
378
379 # parent order is supposed to be semantically irrelevant, so we
379 # parent order is supposed to be semantically irrelevant, so we
380 # normally resort parents to ensure that the first parent is non-null,
380 # normally resort parents to ensure that the first parent is non-null,
381 # if there is a non-null parent at all.
381 # if there is a non-null parent at all.
382 # filelog abuses the parent order as flag to mark some instances of
382 # filelog abuses the parent order as flag to mark some instances of
383 # meta-encoded files, so allow it to disable this behavior.
383 # meta-encoded files, so allow it to disable this behavior.
384 self.canonical_parent_order = canonical_parent_order
384 self.canonical_parent_order = canonical_parent_order
385
385
386 def _init_opts(self):
386 def _init_opts(self):
387 """process options (from above/config) to setup associated default revlog mode
387 """process options (from above/config) to setup associated default revlog mode
388
388
389 These values might be affected when actually reading on disk information.
389 These values might be affected when actually reading on disk information.
390
390
391 The relevant values are returned for use in _loadindex().
391 The relevant values are returned for use in _loadindex().
392
392
393 * newversionflags:
393 * newversionflags:
394 version header to use if we need to create a new revlog
394 version header to use if we need to create a new revlog
395
395
396 * mmapindexthreshold:
396 * mmapindexthreshold:
397 minimal index size for start to use mmap
397 minimal index size for start to use mmap
398
398
399 * force_nodemap:
399 * force_nodemap:
400 force the usage of a "development" version of the nodemap code
400 force the usage of a "development" version of the nodemap code
401 """
401 """
402 mmapindexthreshold = None
402 mmapindexthreshold = None
403 opts = self.opener.options
403 opts = self.opener.options
404
404
405 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
405 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
406 new_header = CHANGELOGV2
406 new_header = CHANGELOGV2
407 elif b'revlogv2' in opts:
407 elif b'revlogv2' in opts:
408 new_header = REVLOGV2
408 new_header = REVLOGV2
409 elif b'revlogv1' in opts:
409 elif b'revlogv1' in opts:
410 new_header = REVLOGV1 | FLAG_INLINE_DATA
410 new_header = REVLOGV1 | FLAG_INLINE_DATA
411 if b'generaldelta' in opts:
411 if b'generaldelta' in opts:
412 new_header |= FLAG_GENERALDELTA
412 new_header |= FLAG_GENERALDELTA
413 elif b'revlogv0' in self.opener.options:
413 elif b'revlogv0' in self.opener.options:
414 new_header = REVLOGV0
414 new_header = REVLOGV0
415 else:
415 else:
416 new_header = REVLOG_DEFAULT_VERSION
416 new_header = REVLOG_DEFAULT_VERSION
417
417
418 if b'chunkcachesize' in opts:
418 if b'chunkcachesize' in opts:
419 self._chunkcachesize = opts[b'chunkcachesize']
419 self._chunkcachesize = opts[b'chunkcachesize']
420 if b'maxchainlen' in opts:
420 if b'maxchainlen' in opts:
421 self._maxchainlen = opts[b'maxchainlen']
421 self._maxchainlen = opts[b'maxchainlen']
422 if b'deltabothparents' in opts:
422 if b'deltabothparents' in opts:
423 self._deltabothparents = opts[b'deltabothparents']
423 self._deltabothparents = opts[b'deltabothparents']
424 self._lazydelta = bool(opts.get(b'lazydelta', True))
424 self._lazydelta = bool(opts.get(b'lazydelta', True))
425 self._lazydeltabase = False
425 self._lazydeltabase = False
426 if self._lazydelta:
426 if self._lazydelta:
427 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
427 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
428 if b'debug-delta' in opts:
428 if b'debug-delta' in opts:
429 self._debug_delta = opts[b'debug-delta']
429 self._debug_delta = opts[b'debug-delta']
430 if b'compengine' in opts:
430 if b'compengine' in opts:
431 self._compengine = opts[b'compengine']
431 self._compengine = opts[b'compengine']
432 if b'zlib.level' in opts:
432 if b'zlib.level' in opts:
433 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
433 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
434 if b'zstd.level' in opts:
434 if b'zstd.level' in opts:
435 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
435 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
436 if b'maxdeltachainspan' in opts:
436 if b'maxdeltachainspan' in opts:
437 self._maxdeltachainspan = opts[b'maxdeltachainspan']
437 self._maxdeltachainspan = opts[b'maxdeltachainspan']
438 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
438 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
439 mmapindexthreshold = opts[b'mmapindexthreshold']
439 mmapindexthreshold = opts[b'mmapindexthreshold']
440 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
440 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
441 withsparseread = bool(opts.get(b'with-sparse-read', False))
441 withsparseread = bool(opts.get(b'with-sparse-read', False))
442 # sparse-revlog forces sparse-read
442 # sparse-revlog forces sparse-read
443 self._withsparseread = self._sparserevlog or withsparseread
443 self._withsparseread = self._sparserevlog or withsparseread
444 if b'sparse-read-density-threshold' in opts:
444 if b'sparse-read-density-threshold' in opts:
445 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
445 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
446 if b'sparse-read-min-gap-size' in opts:
446 if b'sparse-read-min-gap-size' in opts:
447 self._srmingapsize = opts[b'sparse-read-min-gap-size']
447 self._srmingapsize = opts[b'sparse-read-min-gap-size']
448 if opts.get(b'enableellipsis'):
448 if opts.get(b'enableellipsis'):
449 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
449 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
450
450
451 # revlog v0 doesn't have flag processors
451 # revlog v0 doesn't have flag processors
452 for flag, processor in opts.get(b'flagprocessors', {}).items():
452 for flag, processor in opts.get(b'flagprocessors', {}).items():
453 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
453 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
454
454
455 if self._chunkcachesize <= 0:
455 if self._chunkcachesize <= 0:
456 raise error.RevlogError(
456 raise error.RevlogError(
457 _(b'revlog chunk cache size %r is not greater than 0')
457 _(b'revlog chunk cache size %r is not greater than 0')
458 % self._chunkcachesize
458 % self._chunkcachesize
459 )
459 )
460 elif self._chunkcachesize & (self._chunkcachesize - 1):
460 elif self._chunkcachesize & (self._chunkcachesize - 1):
461 raise error.RevlogError(
461 raise error.RevlogError(
462 _(b'revlog chunk cache size %r is not a power of 2')
462 _(b'revlog chunk cache size %r is not a power of 2')
463 % self._chunkcachesize
463 % self._chunkcachesize
464 )
464 )
465 force_nodemap = opts.get(b'devel-force-nodemap', False)
465 force_nodemap = opts.get(b'devel-force-nodemap', False)
466 return new_header, mmapindexthreshold, force_nodemap
466 return new_header, mmapindexthreshold, force_nodemap
467
467
468 def _get_data(self, filepath, mmap_threshold, size=None):
468 def _get_data(self, filepath, mmap_threshold, size=None):
469 """return a file content with or without mmap
469 """return a file content with or without mmap
470
470
471 If the file is missing return the empty string"""
471 If the file is missing return the empty string"""
472 try:
472 try:
473 with self.opener(filepath) as fp:
473 with self.opener(filepath) as fp:
474 if mmap_threshold is not None:
474 if mmap_threshold is not None:
475 file_size = self.opener.fstat(fp).st_size
475 file_size = self.opener.fstat(fp).st_size
476 if file_size >= mmap_threshold:
476 if file_size >= mmap_threshold:
477 if size is not None:
477 if size is not None:
478 # avoid potentiel mmap crash
478 # avoid potentiel mmap crash
479 size = min(file_size, size)
479 size = min(file_size, size)
480 # TODO: should .close() to release resources without
480 # TODO: should .close() to release resources without
481 # relying on Python GC
481 # relying on Python GC
482 if size is None:
482 if size is None:
483 return util.buffer(util.mmapread(fp))
483 return util.buffer(util.mmapread(fp))
484 else:
484 else:
485 return util.buffer(util.mmapread(fp, size))
485 return util.buffer(util.mmapread(fp, size))
486 if size is None:
486 if size is None:
487 return fp.read()
487 return fp.read()
488 else:
488 else:
489 return fp.read(size)
489 return fp.read(size)
490 except FileNotFoundError:
490 except FileNotFoundError:
491 return b''
491 return b''
492
492
493 def _loadindex(self, docket=None):
493 def _loadindex(self, docket=None):
494
494
495 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
495 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
496
496
497 if self.postfix is not None:
497 if self.postfix is not None:
498 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
498 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
499 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
499 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
500 entry_point = b'%s.i.a' % self.radix
500 entry_point = b'%s.i.a' % self.radix
501 else:
501 else:
502 entry_point = b'%s.i' % self.radix
502 entry_point = b'%s.i' % self.radix
503
503
504 if docket is not None:
504 if docket is not None:
505 self._docket = docket
505 self._docket = docket
506 self._docket_file = entry_point
506 self._docket_file = entry_point
507 else:
507 else:
508 entry_data = b''
508 entry_data = b''
509 self._initempty = True
509 self._initempty = True
510 entry_data = self._get_data(entry_point, mmapindexthreshold)
510 entry_data = self._get_data(entry_point, mmapindexthreshold)
511 if len(entry_data) > 0:
511 if len(entry_data) > 0:
512 header = INDEX_HEADER.unpack(entry_data[:4])[0]
512 header = INDEX_HEADER.unpack(entry_data[:4])[0]
513 self._initempty = False
513 self._initempty = False
514 else:
514 else:
515 header = new_header
515 header = new_header
516
516
517 self._format_flags = header & ~0xFFFF
517 self._format_flags = header & ~0xFFFF
518 self._format_version = header & 0xFFFF
518 self._format_version = header & 0xFFFF
519
519
520 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
520 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
521 if supported_flags is None:
521 if supported_flags is None:
522 msg = _(b'unknown version (%d) in revlog %s')
522 msg = _(b'unknown version (%d) in revlog %s')
523 msg %= (self._format_version, self.display_id)
523 msg %= (self._format_version, self.display_id)
524 raise error.RevlogError(msg)
524 raise error.RevlogError(msg)
525 elif self._format_flags & ~supported_flags:
525 elif self._format_flags & ~supported_flags:
526 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
526 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
527 display_flag = self._format_flags >> 16
527 display_flag = self._format_flags >> 16
528 msg %= (display_flag, self._format_version, self.display_id)
528 msg %= (display_flag, self._format_version, self.display_id)
529 raise error.RevlogError(msg)
529 raise error.RevlogError(msg)
530
530
531 features = FEATURES_BY_VERSION[self._format_version]
531 features = FEATURES_BY_VERSION[self._format_version]
532 self._inline = features[b'inline'](self._format_flags)
532 self._inline = features[b'inline'](self._format_flags)
533 self._generaldelta = features[b'generaldelta'](self._format_flags)
533 self._generaldelta = features[b'generaldelta'](self._format_flags)
534 self.hassidedata = features[b'sidedata']
534 self.hassidedata = features[b'sidedata']
535
535
536 if not features[b'docket']:
536 if not features[b'docket']:
537 self._indexfile = entry_point
537 self._indexfile = entry_point
538 index_data = entry_data
538 index_data = entry_data
539 else:
539 else:
540 self._docket_file = entry_point
540 self._docket_file = entry_point
541 if self._initempty:
541 if self._initempty:
542 self._docket = docketutil.default_docket(self, header)
542 self._docket = docketutil.default_docket(self, header)
543 else:
543 else:
544 self._docket = docketutil.parse_docket(
544 self._docket = docketutil.parse_docket(
545 self, entry_data, use_pending=self._trypending
545 self, entry_data, use_pending=self._trypending
546 )
546 )
547
547
548 if self._docket is not None:
548 if self._docket is not None:
549 self._indexfile = self._docket.index_filepath()
549 self._indexfile = self._docket.index_filepath()
550 index_data = b''
550 index_data = b''
551 index_size = self._docket.index_end
551 index_size = self._docket.index_end
552 if index_size > 0:
552 if index_size > 0:
553 index_data = self._get_data(
553 index_data = self._get_data(
554 self._indexfile, mmapindexthreshold, size=index_size
554 self._indexfile, mmapindexthreshold, size=index_size
555 )
555 )
556 if len(index_data) < index_size:
556 if len(index_data) < index_size:
557 msg = _(b'too few index data for %s: got %d, expected %d')
557 msg = _(b'too few index data for %s: got %d, expected %d')
558 msg %= (self.display_id, len(index_data), index_size)
558 msg %= (self.display_id, len(index_data), index_size)
559 raise error.RevlogError(msg)
559 raise error.RevlogError(msg)
560
560
561 self._inline = False
561 self._inline = False
562 # generaldelta implied by version 2 revlogs.
562 # generaldelta implied by version 2 revlogs.
563 self._generaldelta = True
563 self._generaldelta = True
564 # the logic for persistent nodemap will be dealt with within the
564 # the logic for persistent nodemap will be dealt with within the
565 # main docket, so disable it for now.
565 # main docket, so disable it for now.
566 self._nodemap_file = None
566 self._nodemap_file = None
567
567
568 if self._docket is not None:
568 if self._docket is not None:
569 self._datafile = self._docket.data_filepath()
569 self._datafile = self._docket.data_filepath()
570 self._sidedatafile = self._docket.sidedata_filepath()
570 self._sidedatafile = self._docket.sidedata_filepath()
571 elif self.postfix is None:
571 elif self.postfix is None:
572 self._datafile = b'%s.d' % self.radix
572 self._datafile = b'%s.d' % self.radix
573 else:
573 else:
574 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
574 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
575
575
576 self.nodeconstants = sha1nodeconstants
576 self.nodeconstants = sha1nodeconstants
577 self.nullid = self.nodeconstants.nullid
577 self.nullid = self.nodeconstants.nullid
578
578
579 # sparse-revlog can't be on without general-delta (issue6056)
579 # sparse-revlog can't be on without general-delta (issue6056)
580 if not self._generaldelta:
580 if not self._generaldelta:
581 self._sparserevlog = False
581 self._sparserevlog = False
582
582
583 self._storedeltachains = True
583 self._storedeltachains = True
584
584
585 devel_nodemap = (
585 devel_nodemap = (
586 self._nodemap_file
586 self._nodemap_file
587 and force_nodemap
587 and force_nodemap
588 and parse_index_v1_nodemap is not None
588 and parse_index_v1_nodemap is not None
589 )
589 )
590
590
591 use_rust_index = False
591 use_rust_index = False
592 if rustrevlog is not None:
592 if rustrevlog is not None:
593 if self._nodemap_file is not None:
593 if self._nodemap_file is not None:
594 use_rust_index = True
594 use_rust_index = True
595 else:
595 else:
596 use_rust_index = self.opener.options.get(b'rust.index')
596 use_rust_index = self.opener.options.get(b'rust.index')
597
597
598 self._parse_index = parse_index_v1
598 self._parse_index = parse_index_v1
599 if self._format_version == REVLOGV0:
599 if self._format_version == REVLOGV0:
600 self._parse_index = revlogv0.parse_index_v0
600 self._parse_index = revlogv0.parse_index_v0
601 elif self._format_version == REVLOGV2:
601 elif self._format_version == REVLOGV2:
602 self._parse_index = parse_index_v2
602 self._parse_index = parse_index_v2
603 elif self._format_version == CHANGELOGV2:
603 elif self._format_version == CHANGELOGV2:
604 self._parse_index = parse_index_cl_v2
604 self._parse_index = parse_index_cl_v2
605 elif devel_nodemap:
605 elif devel_nodemap:
606 self._parse_index = parse_index_v1_nodemap
606 self._parse_index = parse_index_v1_nodemap
607 elif use_rust_index:
607 elif use_rust_index:
608 self._parse_index = parse_index_v1_mixed
608 self._parse_index = parse_index_v1_mixed
609 try:
609 try:
610 d = self._parse_index(index_data, self._inline)
610 d = self._parse_index(index_data, self._inline)
611 index, chunkcache = d
611 index, chunkcache = d
612 use_nodemap = (
612 use_nodemap = (
613 not self._inline
613 not self._inline
614 and self._nodemap_file is not None
614 and self._nodemap_file is not None
615 and util.safehasattr(index, 'update_nodemap_data')
615 and util.safehasattr(index, 'update_nodemap_data')
616 )
616 )
617 if use_nodemap:
617 if use_nodemap:
618 nodemap_data = nodemaputil.persisted_data(self)
618 nodemap_data = nodemaputil.persisted_data(self)
619 if nodemap_data is not None:
619 if nodemap_data is not None:
620 docket = nodemap_data[0]
620 docket = nodemap_data[0]
621 if (
621 if (
622 len(d[0]) > docket.tip_rev
622 len(d[0]) > docket.tip_rev
623 and d[0][docket.tip_rev][7] == docket.tip_node
623 and d[0][docket.tip_rev][7] == docket.tip_node
624 ):
624 ):
625 # no changelog tampering
625 # no changelog tampering
626 self._nodemap_docket = docket
626 self._nodemap_docket = docket
627 index.update_nodemap_data(*nodemap_data)
627 index.update_nodemap_data(*nodemap_data)
628 except (ValueError, IndexError):
628 except (ValueError, IndexError):
629 raise error.RevlogError(
629 raise error.RevlogError(
630 _(b"index %s is corrupted") % self.display_id
630 _(b"index %s is corrupted") % self.display_id
631 )
631 )
632 self.index = index
632 self.index = index
633 self._segmentfile = randomaccessfile.randomaccessfile(
633 self._segmentfile = randomaccessfile.randomaccessfile(
634 self.opener,
634 self.opener,
635 (self._indexfile if self._inline else self._datafile),
635 (self._indexfile if self._inline else self._datafile),
636 self._chunkcachesize,
636 self._chunkcachesize,
637 chunkcache,
637 chunkcache,
638 )
638 )
639 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
639 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
640 self.opener,
640 self.opener,
641 self._sidedatafile,
641 self._sidedatafile,
642 self._chunkcachesize,
642 self._chunkcachesize,
643 )
643 )
644 # revnum -> (chain-length, sum-delta-length)
644 # revnum -> (chain-length, sum-delta-length)
645 self._chaininfocache = util.lrucachedict(500)
645 self._chaininfocache = util.lrucachedict(500)
646 # revlog header -> revlog compressor
646 # revlog header -> revlog compressor
647 self._decompressors = {}
647 self._decompressors = {}
648
648
649 @util.propertycache
649 @util.propertycache
650 def revlog_kind(self):
650 def revlog_kind(self):
651 return self.target[0]
651 return self.target[0]
652
652
653 @util.propertycache
653 @util.propertycache
654 def display_id(self):
654 def display_id(self):
655 """The public facing "ID" of the revlog that we use in message"""
655 """The public facing "ID" of the revlog that we use in message"""
656 # Maybe we should build a user facing representation of
656 # Maybe we should build a user facing representation of
657 # revlog.target instead of using `self.radix`
657 # revlog.target instead of using `self.radix`
658 return self.radix
658 return self.radix
659
659
660 def _get_decompressor(self, t):
660 def _get_decompressor(self, t):
661 try:
661 try:
662 compressor = self._decompressors[t]
662 compressor = self._decompressors[t]
663 except KeyError:
663 except KeyError:
664 try:
664 try:
665 engine = util.compengines.forrevlogheader(t)
665 engine = util.compengines.forrevlogheader(t)
666 compressor = engine.revlogcompressor(self._compengineopts)
666 compressor = engine.revlogcompressor(self._compengineopts)
667 self._decompressors[t] = compressor
667 self._decompressors[t] = compressor
668 except KeyError:
668 except KeyError:
669 raise error.RevlogError(
669 raise error.RevlogError(
670 _(b'unknown compression type %s') % binascii.hexlify(t)
670 _(b'unknown compression type %s') % binascii.hexlify(t)
671 )
671 )
672 return compressor
672 return compressor
673
673
674 @util.propertycache
674 @util.propertycache
675 def _compressor(self):
675 def _compressor(self):
676 engine = util.compengines[self._compengine]
676 engine = util.compengines[self._compengine]
677 return engine.revlogcompressor(self._compengineopts)
677 return engine.revlogcompressor(self._compengineopts)
678
678
679 @util.propertycache
679 @util.propertycache
680 def _decompressor(self):
680 def _decompressor(self):
681 """the default decompressor"""
681 """the default decompressor"""
682 if self._docket is None:
682 if self._docket is None:
683 return None
683 return None
684 t = self._docket.default_compression_header
684 t = self._docket.default_compression_header
685 c = self._get_decompressor(t)
685 c = self._get_decompressor(t)
686 return c.decompress
686 return c.decompress
687
687
688 def _indexfp(self):
688 def _indexfp(self):
689 """file object for the revlog's index file"""
689 """file object for the revlog's index file"""
690 return self.opener(self._indexfile, mode=b"r")
690 return self.opener(self._indexfile, mode=b"r")
691
691
692 def __index_write_fp(self):
692 def __index_write_fp(self):
693 # You should not use this directly and use `_writing` instead
693 # You should not use this directly and use `_writing` instead
694 try:
694 try:
695 f = self.opener(
695 f = self.opener(
696 self._indexfile, mode=b"r+", checkambig=self._checkambig
696 self._indexfile, mode=b"r+", checkambig=self._checkambig
697 )
697 )
698 if self._docket is None:
698 if self._docket is None:
699 f.seek(0, os.SEEK_END)
699 f.seek(0, os.SEEK_END)
700 else:
700 else:
701 f.seek(self._docket.index_end, os.SEEK_SET)
701 f.seek(self._docket.index_end, os.SEEK_SET)
702 return f
702 return f
703 except FileNotFoundError:
703 except FileNotFoundError:
704 return self.opener(
704 return self.opener(
705 self._indexfile, mode=b"w+", checkambig=self._checkambig
705 self._indexfile, mode=b"w+", checkambig=self._checkambig
706 )
706 )
707
707
708 def __index_new_fp(self):
708 def __index_new_fp(self):
709 # You should not use this unless you are upgrading from inline revlog
709 # You should not use this unless you are upgrading from inline revlog
710 return self.opener(
710 return self.opener(
711 self._indexfile,
711 self._indexfile,
712 mode=b"w",
712 mode=b"w",
713 checkambig=self._checkambig,
713 checkambig=self._checkambig,
714 atomictemp=True,
714 atomictemp=True,
715 )
715 )
716
716
717 def _datafp(self, mode=b'r'):
717 def _datafp(self, mode=b'r'):
718 """file object for the revlog's data file"""
718 """file object for the revlog's data file"""
719 return self.opener(self._datafile, mode=mode)
719 return self.opener(self._datafile, mode=mode)
720
720
721 @contextlib.contextmanager
721 @contextlib.contextmanager
722 def _sidedatareadfp(self):
722 def _sidedatareadfp(self):
723 """file object suitable to read sidedata"""
723 """file object suitable to read sidedata"""
724 if self._writinghandles:
724 if self._writinghandles:
725 yield self._writinghandles[2]
725 yield self._writinghandles[2]
726 else:
726 else:
727 with self.opener(self._sidedatafile) as fp:
727 with self.opener(self._sidedatafile) as fp:
728 yield fp
728 yield fp
729
729
730 def tiprev(self):
730 def tiprev(self):
731 return len(self.index) - 1
731 return len(self.index) - 1
732
732
733 def tip(self):
733 def tip(self):
734 return self.node(self.tiprev())
734 return self.node(self.tiprev())
735
735
736 def __contains__(self, rev):
736 def __contains__(self, rev):
737 return 0 <= rev < len(self)
737 return 0 <= rev < len(self)
738
738
739 def __len__(self):
739 def __len__(self):
740 return len(self.index)
740 return len(self.index)
741
741
742 def __iter__(self):
742 def __iter__(self):
743 return iter(range(len(self)))
743 return iter(range(len(self)))
744
744
745 def revs(self, start=0, stop=None):
745 def revs(self, start=0, stop=None):
746 """iterate over all rev in this revlog (from start to stop)"""
746 """iterate over all rev in this revlog (from start to stop)"""
747 return storageutil.iterrevs(len(self), start=start, stop=stop)
747 return storageutil.iterrevs(len(self), start=start, stop=stop)
748
748
749 def hasnode(self, node):
749 def hasnode(self, node):
750 try:
750 try:
751 self.rev(node)
751 self.rev(node)
752 return True
752 return True
753 except KeyError:
753 except KeyError:
754 return False
754 return False
755
755
756 def candelta(self, baserev, rev):
756 def candelta(self, baserev, rev):
757 """whether two revisions (baserev, rev) can be delta-ed or not"""
757 """whether two revisions (baserev, rev) can be delta-ed or not"""
758 # Disable delta if either rev requires a content-changing flag
758 # Disable delta if either rev requires a content-changing flag
759 # processor (ex. LFS). This is because such flag processor can alter
759 # processor (ex. LFS). This is because such flag processor can alter
760 # the rawtext content that the delta will be based on, and two clients
760 # the rawtext content that the delta will be based on, and two clients
761 # could have a same revlog node with different flags (i.e. different
761 # could have a same revlog node with different flags (i.e. different
762 # rawtext contents) and the delta could be incompatible.
762 # rawtext contents) and the delta could be incompatible.
763 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
763 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
764 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
764 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
765 ):
765 ):
766 return False
766 return False
767 return True
767 return True
768
768
769 def update_caches(self, transaction):
769 def update_caches(self, transaction):
770 if self._nodemap_file is not None:
770 if self._nodemap_file is not None:
771 if transaction is None:
771 if transaction is None:
772 nodemaputil.update_persistent_nodemap(self)
772 nodemaputil.update_persistent_nodemap(self)
773 else:
773 else:
774 nodemaputil.setup_persistent_nodemap(transaction, self)
774 nodemaputil.setup_persistent_nodemap(transaction, self)
775
775
776 def clearcaches(self):
776 def clearcaches(self):
777 self._revisioncache = None
777 self._revisioncache = None
778 self._chainbasecache.clear()
778 self._chainbasecache.clear()
779 self._segmentfile.clear_cache()
779 self._segmentfile.clear_cache()
780 self._segmentfile_sidedata.clear_cache()
780 self._segmentfile_sidedata.clear_cache()
781 self._pcache = {}
781 self._pcache = {}
782 self._nodemap_docket = None
782 self._nodemap_docket = None
783 self.index.clearcaches()
783 self.index.clearcaches()
784 # The python code is the one responsible for validating the docket, we
784 # The python code is the one responsible for validating the docket, we
785 # end up having to refresh it here.
785 # end up having to refresh it here.
786 use_nodemap = (
786 use_nodemap = (
787 not self._inline
787 not self._inline
788 and self._nodemap_file is not None
788 and self._nodemap_file is not None
789 and util.safehasattr(self.index, 'update_nodemap_data')
789 and util.safehasattr(self.index, 'update_nodemap_data')
790 )
790 )
791 if use_nodemap:
791 if use_nodemap:
792 nodemap_data = nodemaputil.persisted_data(self)
792 nodemap_data = nodemaputil.persisted_data(self)
793 if nodemap_data is not None:
793 if nodemap_data is not None:
794 self._nodemap_docket = nodemap_data[0]
794 self._nodemap_docket = nodemap_data[0]
795 self.index.update_nodemap_data(*nodemap_data)
795 self.index.update_nodemap_data(*nodemap_data)
796
796
797 def rev(self, node):
797 def rev(self, node):
798 try:
798 try:
799 return self.index.rev(node)
799 return self.index.rev(node)
800 except TypeError:
800 except TypeError:
801 raise
801 raise
802 except error.RevlogError:
802 except error.RevlogError:
803 # parsers.c radix tree lookup failed
803 # parsers.c radix tree lookup failed
804 if (
804 if (
805 node == self.nodeconstants.wdirid
805 node == self.nodeconstants.wdirid
806 or node in self.nodeconstants.wdirfilenodeids
806 or node in self.nodeconstants.wdirfilenodeids
807 ):
807 ):
808 raise error.WdirUnsupported
808 raise error.WdirUnsupported
809 raise error.LookupError(node, self.display_id, _(b'no node'))
809 raise error.LookupError(node, self.display_id, _(b'no node'))
810
810
811 # Accessors for index entries.
811 # Accessors for index entries.
812
812
813 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
813 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
814 # are flags.
814 # are flags.
815 def start(self, rev):
815 def start(self, rev):
816 return int(self.index[rev][0] >> 16)
816 return int(self.index[rev][0] >> 16)
817
817
818 def sidedata_cut_off(self, rev):
818 def sidedata_cut_off(self, rev):
819 sd_cut_off = self.index[rev][8]
819 sd_cut_off = self.index[rev][8]
820 if sd_cut_off != 0:
820 if sd_cut_off != 0:
821 return sd_cut_off
821 return sd_cut_off
822 # This is some annoying dance, because entries without sidedata
822 # This is some annoying dance, because entries without sidedata
823 # currently use 0 as their ofsset. (instead of previous-offset +
823 # currently use 0 as their ofsset. (instead of previous-offset +
824 # previous-size)
824 # previous-size)
825 #
825 #
826 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
826 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
827 # In the meantime, we need this.
827 # In the meantime, we need this.
828 while 0 <= rev:
828 while 0 <= rev:
829 e = self.index[rev]
829 e = self.index[rev]
830 if e[9] != 0:
830 if e[9] != 0:
831 return e[8] + e[9]
831 return e[8] + e[9]
832 rev -= 1
832 rev -= 1
833 return 0
833 return 0
834
834
835 def flags(self, rev):
835 def flags(self, rev):
836 return self.index[rev][0] & 0xFFFF
836 return self.index[rev][0] & 0xFFFF
837
837
838 def length(self, rev):
838 def length(self, rev):
839 return self.index[rev][1]
839 return self.index[rev][1]
840
840
841 def sidedata_length(self, rev):
841 def sidedata_length(self, rev):
842 if not self.hassidedata:
842 if not self.hassidedata:
843 return 0
843 return 0
844 return self.index[rev][9]
844 return self.index[rev][9]
845
845
846 def rawsize(self, rev):
846 def rawsize(self, rev):
847 """return the length of the uncompressed text for a given revision"""
847 """return the length of the uncompressed text for a given revision"""
848 l = self.index[rev][2]
848 l = self.index[rev][2]
849 if l >= 0:
849 if l >= 0:
850 return l
850 return l
851
851
852 t = self.rawdata(rev)
852 t = self.rawdata(rev)
853 return len(t)
853 return len(t)
854
854
855 def size(self, rev):
855 def size(self, rev):
856 """length of non-raw text (processed by a "read" flag processor)"""
856 """length of non-raw text (processed by a "read" flag processor)"""
857 # fast path: if no "read" flag processor could change the content,
857 # fast path: if no "read" flag processor could change the content,
858 # size is rawsize. note: ELLIPSIS is known to not change the content.
858 # size is rawsize. note: ELLIPSIS is known to not change the content.
859 flags = self.flags(rev)
859 flags = self.flags(rev)
860 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
860 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
861 return self.rawsize(rev)
861 return self.rawsize(rev)
862
862
863 return len(self.revision(rev))
863 return len(self.revision(rev))
864
864
865 def fast_rank(self, rev):
865 def fast_rank(self, rev):
866 """Return the rank of a revision if already known, or None otherwise.
866 """Return the rank of a revision if already known, or None otherwise.
867
867
868 The rank of a revision is the size of the sub-graph it defines as a
868 The rank of a revision is the size of the sub-graph it defines as a
869 head. Equivalently, the rank of a revision `r` is the size of the set
869 head. Equivalently, the rank of a revision `r` is the size of the set
870 `ancestors(r)`, `r` included.
870 `ancestors(r)`, `r` included.
871
871
872 This method returns the rank retrieved from the revlog in constant
872 This method returns the rank retrieved from the revlog in constant
873 time. It makes no attempt at computing unknown values for versions of
873 time. It makes no attempt at computing unknown values for versions of
874 the revlog which do not persist the rank.
874 the revlog which do not persist the rank.
875 """
875 """
876 rank = self.index[rev][ENTRY_RANK]
876 rank = self.index[rev][ENTRY_RANK]
877 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
877 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
878 return None
878 return None
879 if rev == nullrev:
879 if rev == nullrev:
880 return 0 # convention
880 return 0 # convention
881 return rank
881 return rank
882
882
883 def chainbase(self, rev):
883 def chainbase(self, rev):
884 base = self._chainbasecache.get(rev)
884 base = self._chainbasecache.get(rev)
885 if base is not None:
885 if base is not None:
886 return base
886 return base
887
887
888 index = self.index
888 index = self.index
889 iterrev = rev
889 iterrev = rev
890 base = index[iterrev][3]
890 base = index[iterrev][3]
891 while base != iterrev:
891 while base != iterrev:
892 iterrev = base
892 iterrev = base
893 base = index[iterrev][3]
893 base = index[iterrev][3]
894
894
895 self._chainbasecache[rev] = base
895 self._chainbasecache[rev] = base
896 return base
896 return base
897
897
898 def linkrev(self, rev):
898 def linkrev(self, rev):
899 return self.index[rev][4]
899 return self.index[rev][4]
900
900
901 def parentrevs(self, rev):
901 def parentrevs(self, rev):
902 try:
902 try:
903 entry = self.index[rev]
903 entry = self.index[rev]
904 except IndexError:
904 except IndexError:
905 if rev == wdirrev:
905 if rev == wdirrev:
906 raise error.WdirUnsupported
906 raise error.WdirUnsupported
907 raise
907 raise
908
908
909 if self.canonical_parent_order and entry[5] == nullrev:
909 if self.canonical_parent_order and entry[5] == nullrev:
910 return entry[6], entry[5]
910 return entry[6], entry[5]
911 else:
911 else:
912 return entry[5], entry[6]
912 return entry[5], entry[6]
913
913
914 # fast parentrevs(rev) where rev isn't filtered
914 # fast parentrevs(rev) where rev isn't filtered
915 _uncheckedparentrevs = parentrevs
915 _uncheckedparentrevs = parentrevs
916
916
917 def node(self, rev):
917 def node(self, rev):
918 try:
918 try:
919 return self.index[rev][7]
919 return self.index[rev][7]
920 except IndexError:
920 except IndexError:
921 if rev == wdirrev:
921 if rev == wdirrev:
922 raise error.WdirUnsupported
922 raise error.WdirUnsupported
923 raise
923 raise
924
924
925 # Derived from index values.
925 # Derived from index values.
926
926
927 def end(self, rev):
927 def end(self, rev):
928 return self.start(rev) + self.length(rev)
928 return self.start(rev) + self.length(rev)
929
929
930 def parents(self, node):
930 def parents(self, node):
931 i = self.index
931 i = self.index
932 d = i[self.rev(node)]
932 d = i[self.rev(node)]
933 # inline node() to avoid function call overhead
933 # inline node() to avoid function call overhead
934 if self.canonical_parent_order and d[5] == self.nullid:
934 if self.canonical_parent_order and d[5] == self.nullid:
935 return i[d[6]][7], i[d[5]][7]
935 return i[d[6]][7], i[d[5]][7]
936 else:
936 else:
937 return i[d[5]][7], i[d[6]][7]
937 return i[d[5]][7], i[d[6]][7]
938
938
939 def chainlen(self, rev):
939 def chainlen(self, rev):
940 return self._chaininfo(rev)[0]
940 return self._chaininfo(rev)[0]
941
941
942 def _chaininfo(self, rev):
942 def _chaininfo(self, rev):
943 chaininfocache = self._chaininfocache
943 chaininfocache = self._chaininfocache
944 if rev in chaininfocache:
944 if rev in chaininfocache:
945 return chaininfocache[rev]
945 return chaininfocache[rev]
946 index = self.index
946 index = self.index
947 generaldelta = self._generaldelta
947 generaldelta = self._generaldelta
948 iterrev = rev
948 iterrev = rev
949 e = index[iterrev]
949 e = index[iterrev]
950 clen = 0
950 clen = 0
951 compresseddeltalen = 0
951 compresseddeltalen = 0
952 while iterrev != e[3]:
952 while iterrev != e[3]:
953 clen += 1
953 clen += 1
954 compresseddeltalen += e[1]
954 compresseddeltalen += e[1]
955 if generaldelta:
955 if generaldelta:
956 iterrev = e[3]
956 iterrev = e[3]
957 else:
957 else:
958 iterrev -= 1
958 iterrev -= 1
959 if iterrev in chaininfocache:
959 if iterrev in chaininfocache:
960 t = chaininfocache[iterrev]
960 t = chaininfocache[iterrev]
961 clen += t[0]
961 clen += t[0]
962 compresseddeltalen += t[1]
962 compresseddeltalen += t[1]
963 break
963 break
964 e = index[iterrev]
964 e = index[iterrev]
965 else:
965 else:
966 # Add text length of base since decompressing that also takes
966 # Add text length of base since decompressing that also takes
967 # work. For cache hits the length is already included.
967 # work. For cache hits the length is already included.
968 compresseddeltalen += e[1]
968 compresseddeltalen += e[1]
969 r = (clen, compresseddeltalen)
969 r = (clen, compresseddeltalen)
970 chaininfocache[rev] = r
970 chaininfocache[rev] = r
971 return r
971 return r
972
972
973 def _deltachain(self, rev, stoprev=None):
973 def _deltachain(self, rev, stoprev=None):
974 """Obtain the delta chain for a revision.
974 """Obtain the delta chain for a revision.
975
975
976 ``stoprev`` specifies a revision to stop at. If not specified, we
976 ``stoprev`` specifies a revision to stop at. If not specified, we
977 stop at the base of the chain.
977 stop at the base of the chain.
978
978
979 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
979 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
980 revs in ascending order and ``stopped`` is a bool indicating whether
980 revs in ascending order and ``stopped`` is a bool indicating whether
981 ``stoprev`` was hit.
981 ``stoprev`` was hit.
982 """
982 """
983 # Try C implementation.
983 # Try C implementation.
984 try:
984 try:
985 return self.index.deltachain(rev, stoprev, self._generaldelta)
985 return self.index.deltachain(rev, stoprev, self._generaldelta)
986 except AttributeError:
986 except AttributeError:
987 pass
987 pass
988
988
989 chain = []
989 chain = []
990
990
991 # Alias to prevent attribute lookup in tight loop.
991 # Alias to prevent attribute lookup in tight loop.
992 index = self.index
992 index = self.index
993 generaldelta = self._generaldelta
993 generaldelta = self._generaldelta
994
994
995 iterrev = rev
995 iterrev = rev
996 e = index[iterrev]
996 e = index[iterrev]
997 while iterrev != e[3] and iterrev != stoprev:
997 while iterrev != e[3] and iterrev != stoprev:
998 chain.append(iterrev)
998 chain.append(iterrev)
999 if generaldelta:
999 if generaldelta:
1000 iterrev = e[3]
1000 iterrev = e[3]
1001 else:
1001 else:
1002 iterrev -= 1
1002 iterrev -= 1
1003 e = index[iterrev]
1003 e = index[iterrev]
1004
1004
1005 if iterrev == stoprev:
1005 if iterrev == stoprev:
1006 stopped = True
1006 stopped = True
1007 else:
1007 else:
1008 chain.append(iterrev)
1008 chain.append(iterrev)
1009 stopped = False
1009 stopped = False
1010
1010
1011 chain.reverse()
1011 chain.reverse()
1012 return chain, stopped
1012 return chain, stopped
1013
1013
1014 def ancestors(self, revs, stoprev=0, inclusive=False):
1014 def ancestors(self, revs, stoprev=0, inclusive=False):
1015 """Generate the ancestors of 'revs' in reverse revision order.
1015 """Generate the ancestors of 'revs' in reverse revision order.
1016 Does not generate revs lower than stoprev.
1016 Does not generate revs lower than stoprev.
1017
1017
1018 See the documentation for ancestor.lazyancestors for more details."""
1018 See the documentation for ancestor.lazyancestors for more details."""
1019
1019
1020 # first, make sure start revisions aren't filtered
1020 # first, make sure start revisions aren't filtered
1021 revs = list(revs)
1021 revs = list(revs)
1022 checkrev = self.node
1022 checkrev = self.node
1023 for r in revs:
1023 for r in revs:
1024 checkrev(r)
1024 checkrev(r)
1025 # and we're sure ancestors aren't filtered as well
1025 # and we're sure ancestors aren't filtered as well
1026
1026
1027 if rustancestor is not None and self.index.rust_ext_compat:
1027 if rustancestor is not None and self.index.rust_ext_compat:
1028 lazyancestors = rustancestor.LazyAncestors
1028 lazyancestors = rustancestor.LazyAncestors
1029 arg = self.index
1029 arg = self.index
1030 else:
1030 else:
1031 lazyancestors = ancestor.lazyancestors
1031 lazyancestors = ancestor.lazyancestors
1032 arg = self._uncheckedparentrevs
1032 arg = self._uncheckedparentrevs
1033 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1033 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1034
1034
1035 def descendants(self, revs):
1035 def descendants(self, revs):
1036 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1036 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1037
1037
1038 def findcommonmissing(self, common=None, heads=None):
1038 def findcommonmissing(self, common=None, heads=None):
1039 """Return a tuple of the ancestors of common and the ancestors of heads
1039 """Return a tuple of the ancestors of common and the ancestors of heads
1040 that are not ancestors of common. In revset terminology, we return the
1040 that are not ancestors of common. In revset terminology, we return the
1041 tuple:
1041 tuple:
1042
1042
1043 ::common, (::heads) - (::common)
1043 ::common, (::heads) - (::common)
1044
1044
1045 The list is sorted by revision number, meaning it is
1045 The list is sorted by revision number, meaning it is
1046 topologically sorted.
1046 topologically sorted.
1047
1047
1048 'heads' and 'common' are both lists of node IDs. If heads is
1048 'heads' and 'common' are both lists of node IDs. If heads is
1049 not supplied, uses all of the revlog's heads. If common is not
1049 not supplied, uses all of the revlog's heads. If common is not
1050 supplied, uses nullid."""
1050 supplied, uses nullid."""
1051 if common is None:
1051 if common is None:
1052 common = [self.nullid]
1052 common = [self.nullid]
1053 if heads is None:
1053 if heads is None:
1054 heads = self.heads()
1054 heads = self.heads()
1055
1055
1056 common = [self.rev(n) for n in common]
1056 common = [self.rev(n) for n in common]
1057 heads = [self.rev(n) for n in heads]
1057 heads = [self.rev(n) for n in heads]
1058
1058
1059 # we want the ancestors, but inclusive
1059 # we want the ancestors, but inclusive
1060 class lazyset:
1060 class lazyset:
1061 def __init__(self, lazyvalues):
1061 def __init__(self, lazyvalues):
1062 self.addedvalues = set()
1062 self.addedvalues = set()
1063 self.lazyvalues = lazyvalues
1063 self.lazyvalues = lazyvalues
1064
1064
1065 def __contains__(self, value):
1065 def __contains__(self, value):
1066 return value in self.addedvalues or value in self.lazyvalues
1066 return value in self.addedvalues or value in self.lazyvalues
1067
1067
1068 def __iter__(self):
1068 def __iter__(self):
1069 added = self.addedvalues
1069 added = self.addedvalues
1070 for r in added:
1070 for r in added:
1071 yield r
1071 yield r
1072 for r in self.lazyvalues:
1072 for r in self.lazyvalues:
1073 if not r in added:
1073 if not r in added:
1074 yield r
1074 yield r
1075
1075
1076 def add(self, value):
1076 def add(self, value):
1077 self.addedvalues.add(value)
1077 self.addedvalues.add(value)
1078
1078
1079 def update(self, values):
1079 def update(self, values):
1080 self.addedvalues.update(values)
1080 self.addedvalues.update(values)
1081
1081
1082 has = lazyset(self.ancestors(common))
1082 has = lazyset(self.ancestors(common))
1083 has.add(nullrev)
1083 has.add(nullrev)
1084 has.update(common)
1084 has.update(common)
1085
1085
1086 # take all ancestors from heads that aren't in has
1086 # take all ancestors from heads that aren't in has
1087 missing = set()
1087 missing = set()
1088 visit = collections.deque(r for r in heads if r not in has)
1088 visit = collections.deque(r for r in heads if r not in has)
1089 while visit:
1089 while visit:
1090 r = visit.popleft()
1090 r = visit.popleft()
1091 if r in missing:
1091 if r in missing:
1092 continue
1092 continue
1093 else:
1093 else:
1094 missing.add(r)
1094 missing.add(r)
1095 for p in self.parentrevs(r):
1095 for p in self.parentrevs(r):
1096 if p not in has:
1096 if p not in has:
1097 visit.append(p)
1097 visit.append(p)
1098 missing = list(missing)
1098 missing = list(missing)
1099 missing.sort()
1099 missing.sort()
1100 return has, [self.node(miss) for miss in missing]
1100 return has, [self.node(miss) for miss in missing]
1101
1101
1102 def incrementalmissingrevs(self, common=None):
1102 def incrementalmissingrevs(self, common=None):
1103 """Return an object that can be used to incrementally compute the
1103 """Return an object that can be used to incrementally compute the
1104 revision numbers of the ancestors of arbitrary sets that are not
1104 revision numbers of the ancestors of arbitrary sets that are not
1105 ancestors of common. This is an ancestor.incrementalmissingancestors
1105 ancestors of common. This is an ancestor.incrementalmissingancestors
1106 object.
1106 object.
1107
1107
1108 'common' is a list of revision numbers. If common is not supplied, uses
1108 'common' is a list of revision numbers. If common is not supplied, uses
1109 nullrev.
1109 nullrev.
1110 """
1110 """
1111 if common is None:
1111 if common is None:
1112 common = [nullrev]
1112 common = [nullrev]
1113
1113
1114 if rustancestor is not None and self.index.rust_ext_compat:
1114 if rustancestor is not None and self.index.rust_ext_compat:
1115 return rustancestor.MissingAncestors(self.index, common)
1115 return rustancestor.MissingAncestors(self.index, common)
1116 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1116 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1117
1117
1118 def findmissingrevs(self, common=None, heads=None):
1118 def findmissingrevs(self, common=None, heads=None):
1119 """Return the revision numbers of the ancestors of heads that
1119 """Return the revision numbers of the ancestors of heads that
1120 are not ancestors of common.
1120 are not ancestors of common.
1121
1121
1122 More specifically, return a list of revision numbers corresponding to
1122 More specifically, return a list of revision numbers corresponding to
1123 nodes N such that every N satisfies the following constraints:
1123 nodes N such that every N satisfies the following constraints:
1124
1124
1125 1. N is an ancestor of some node in 'heads'
1125 1. N is an ancestor of some node in 'heads'
1126 2. N is not an ancestor of any node in 'common'
1126 2. N is not an ancestor of any node in 'common'
1127
1127
1128 The list is sorted by revision number, meaning it is
1128 The list is sorted by revision number, meaning it is
1129 topologically sorted.
1129 topologically sorted.
1130
1130
1131 'heads' and 'common' are both lists of revision numbers. If heads is
1131 'heads' and 'common' are both lists of revision numbers. If heads is
1132 not supplied, uses all of the revlog's heads. If common is not
1132 not supplied, uses all of the revlog's heads. If common is not
1133 supplied, uses nullid."""
1133 supplied, uses nullid."""
1134 if common is None:
1134 if common is None:
1135 common = [nullrev]
1135 common = [nullrev]
1136 if heads is None:
1136 if heads is None:
1137 heads = self.headrevs()
1137 heads = self.headrevs()
1138
1138
1139 inc = self.incrementalmissingrevs(common=common)
1139 inc = self.incrementalmissingrevs(common=common)
1140 return inc.missingancestors(heads)
1140 return inc.missingancestors(heads)
1141
1141
1142 def findmissing(self, common=None, heads=None):
1142 def findmissing(self, common=None, heads=None):
1143 """Return the ancestors of heads that are not ancestors of common.
1143 """Return the ancestors of heads that are not ancestors of common.
1144
1144
1145 More specifically, return a list of nodes N such that every N
1145 More specifically, return a list of nodes N such that every N
1146 satisfies the following constraints:
1146 satisfies the following constraints:
1147
1147
1148 1. N is an ancestor of some node in 'heads'
1148 1. N is an ancestor of some node in 'heads'
1149 2. N is not an ancestor of any node in 'common'
1149 2. N is not an ancestor of any node in 'common'
1150
1150
1151 The list is sorted by revision number, meaning it is
1151 The list is sorted by revision number, meaning it is
1152 topologically sorted.
1152 topologically sorted.
1153
1153
1154 'heads' and 'common' are both lists of node IDs. If heads is
1154 'heads' and 'common' are both lists of node IDs. If heads is
1155 not supplied, uses all of the revlog's heads. If common is not
1155 not supplied, uses all of the revlog's heads. If common is not
1156 supplied, uses nullid."""
1156 supplied, uses nullid."""
1157 if common is None:
1157 if common is None:
1158 common = [self.nullid]
1158 common = [self.nullid]
1159 if heads is None:
1159 if heads is None:
1160 heads = self.heads()
1160 heads = self.heads()
1161
1161
1162 common = [self.rev(n) for n in common]
1162 common = [self.rev(n) for n in common]
1163 heads = [self.rev(n) for n in heads]
1163 heads = [self.rev(n) for n in heads]
1164
1164
1165 inc = self.incrementalmissingrevs(common=common)
1165 inc = self.incrementalmissingrevs(common=common)
1166 return [self.node(r) for r in inc.missingancestors(heads)]
1166 return [self.node(r) for r in inc.missingancestors(heads)]
1167
1167
1168 def nodesbetween(self, roots=None, heads=None):
1168 def nodesbetween(self, roots=None, heads=None):
1169 """Return a topological path from 'roots' to 'heads'.
1169 """Return a topological path from 'roots' to 'heads'.
1170
1170
1171 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1171 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1172 topologically sorted list of all nodes N that satisfy both of
1172 topologically sorted list of all nodes N that satisfy both of
1173 these constraints:
1173 these constraints:
1174
1174
1175 1. N is a descendant of some node in 'roots'
1175 1. N is a descendant of some node in 'roots'
1176 2. N is an ancestor of some node in 'heads'
1176 2. N is an ancestor of some node in 'heads'
1177
1177
1178 Every node is considered to be both a descendant and an ancestor
1178 Every node is considered to be both a descendant and an ancestor
1179 of itself, so every reachable node in 'roots' and 'heads' will be
1179 of itself, so every reachable node in 'roots' and 'heads' will be
1180 included in 'nodes'.
1180 included in 'nodes'.
1181
1181
1182 'outroots' is the list of reachable nodes in 'roots', i.e., the
1182 'outroots' is the list of reachable nodes in 'roots', i.e., the
1183 subset of 'roots' that is returned in 'nodes'. Likewise,
1183 subset of 'roots' that is returned in 'nodes'. Likewise,
1184 'outheads' is the subset of 'heads' that is also in 'nodes'.
1184 'outheads' is the subset of 'heads' that is also in 'nodes'.
1185
1185
1186 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1186 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1187 unspecified, uses nullid as the only root. If 'heads' is
1187 unspecified, uses nullid as the only root. If 'heads' is
1188 unspecified, uses list of all of the revlog's heads."""
1188 unspecified, uses list of all of the revlog's heads."""
1189 nonodes = ([], [], [])
1189 nonodes = ([], [], [])
1190 if roots is not None:
1190 if roots is not None:
1191 roots = list(roots)
1191 roots = list(roots)
1192 if not roots:
1192 if not roots:
1193 return nonodes
1193 return nonodes
1194 lowestrev = min([self.rev(n) for n in roots])
1194 lowestrev = min([self.rev(n) for n in roots])
1195 else:
1195 else:
1196 roots = [self.nullid] # Everybody's a descendant of nullid
1196 roots = [self.nullid] # Everybody's a descendant of nullid
1197 lowestrev = nullrev
1197 lowestrev = nullrev
1198 if (lowestrev == nullrev) and (heads is None):
1198 if (lowestrev == nullrev) and (heads is None):
1199 # We want _all_ the nodes!
1199 # We want _all_ the nodes!
1200 return (
1200 return (
1201 [self.node(r) for r in self],
1201 [self.node(r) for r in self],
1202 [self.nullid],
1202 [self.nullid],
1203 list(self.heads()),
1203 list(self.heads()),
1204 )
1204 )
1205 if heads is None:
1205 if heads is None:
1206 # All nodes are ancestors, so the latest ancestor is the last
1206 # All nodes are ancestors, so the latest ancestor is the last
1207 # node.
1207 # node.
1208 highestrev = len(self) - 1
1208 highestrev = len(self) - 1
1209 # Set ancestors to None to signal that every node is an ancestor.
1209 # Set ancestors to None to signal that every node is an ancestor.
1210 ancestors = None
1210 ancestors = None
1211 # Set heads to an empty dictionary for later discovery of heads
1211 # Set heads to an empty dictionary for later discovery of heads
1212 heads = {}
1212 heads = {}
1213 else:
1213 else:
1214 heads = list(heads)
1214 heads = list(heads)
1215 if not heads:
1215 if not heads:
1216 return nonodes
1216 return nonodes
1217 ancestors = set()
1217 ancestors = set()
1218 # Turn heads into a dictionary so we can remove 'fake' heads.
1218 # Turn heads into a dictionary so we can remove 'fake' heads.
1219 # Also, later we will be using it to filter out the heads we can't
1219 # Also, later we will be using it to filter out the heads we can't
1220 # find from roots.
1220 # find from roots.
1221 heads = dict.fromkeys(heads, False)
1221 heads = dict.fromkeys(heads, False)
1222 # Start at the top and keep marking parents until we're done.
1222 # Start at the top and keep marking parents until we're done.
1223 nodestotag = set(heads)
1223 nodestotag = set(heads)
1224 # Remember where the top was so we can use it as a limit later.
1224 # Remember where the top was so we can use it as a limit later.
1225 highestrev = max([self.rev(n) for n in nodestotag])
1225 highestrev = max([self.rev(n) for n in nodestotag])
1226 while nodestotag:
1226 while nodestotag:
1227 # grab a node to tag
1227 # grab a node to tag
1228 n = nodestotag.pop()
1228 n = nodestotag.pop()
1229 # Never tag nullid
1229 # Never tag nullid
1230 if n == self.nullid:
1230 if n == self.nullid:
1231 continue
1231 continue
1232 # A node's revision number represents its place in a
1232 # A node's revision number represents its place in a
1233 # topologically sorted list of nodes.
1233 # topologically sorted list of nodes.
1234 r = self.rev(n)
1234 r = self.rev(n)
1235 if r >= lowestrev:
1235 if r >= lowestrev:
1236 if n not in ancestors:
1236 if n not in ancestors:
1237 # If we are possibly a descendant of one of the roots
1237 # If we are possibly a descendant of one of the roots
1238 # and we haven't already been marked as an ancestor
1238 # and we haven't already been marked as an ancestor
1239 ancestors.add(n) # Mark as ancestor
1239 ancestors.add(n) # Mark as ancestor
1240 # Add non-nullid parents to list of nodes to tag.
1240 # Add non-nullid parents to list of nodes to tag.
1241 nodestotag.update(
1241 nodestotag.update(
1242 [p for p in self.parents(n) if p != self.nullid]
1242 [p for p in self.parents(n) if p != self.nullid]
1243 )
1243 )
1244 elif n in heads: # We've seen it before, is it a fake head?
1244 elif n in heads: # We've seen it before, is it a fake head?
1245 # So it is, real heads should not be the ancestors of
1245 # So it is, real heads should not be the ancestors of
1246 # any other heads.
1246 # any other heads.
1247 heads.pop(n)
1247 heads.pop(n)
1248 if not ancestors:
1248 if not ancestors:
1249 return nonodes
1249 return nonodes
1250 # Now that we have our set of ancestors, we want to remove any
1250 # Now that we have our set of ancestors, we want to remove any
1251 # roots that are not ancestors.
1251 # roots that are not ancestors.
1252
1252
1253 # If one of the roots was nullid, everything is included anyway.
1253 # If one of the roots was nullid, everything is included anyway.
1254 if lowestrev > nullrev:
1254 if lowestrev > nullrev:
1255 # But, since we weren't, let's recompute the lowest rev to not
1255 # But, since we weren't, let's recompute the lowest rev to not
1256 # include roots that aren't ancestors.
1256 # include roots that aren't ancestors.
1257
1257
1258 # Filter out roots that aren't ancestors of heads
1258 # Filter out roots that aren't ancestors of heads
1259 roots = [root for root in roots if root in ancestors]
1259 roots = [root for root in roots if root in ancestors]
1260 # Recompute the lowest revision
1260 # Recompute the lowest revision
1261 if roots:
1261 if roots:
1262 lowestrev = min([self.rev(root) for root in roots])
1262 lowestrev = min([self.rev(root) for root in roots])
1263 else:
1263 else:
1264 # No more roots? Return empty list
1264 # No more roots? Return empty list
1265 return nonodes
1265 return nonodes
1266 else:
1266 else:
1267 # We are descending from nullid, and don't need to care about
1267 # We are descending from nullid, and don't need to care about
1268 # any other roots.
1268 # any other roots.
1269 lowestrev = nullrev
1269 lowestrev = nullrev
1270 roots = [self.nullid]
1270 roots = [self.nullid]
1271 # Transform our roots list into a set.
1271 # Transform our roots list into a set.
1272 descendants = set(roots)
1272 descendants = set(roots)
1273 # Also, keep the original roots so we can filter out roots that aren't
1273 # Also, keep the original roots so we can filter out roots that aren't
1274 # 'real' roots (i.e. are descended from other roots).
1274 # 'real' roots (i.e. are descended from other roots).
1275 roots = descendants.copy()
1275 roots = descendants.copy()
1276 # Our topologically sorted list of output nodes.
1276 # Our topologically sorted list of output nodes.
1277 orderedout = []
1277 orderedout = []
1278 # Don't start at nullid since we don't want nullid in our output list,
1278 # Don't start at nullid since we don't want nullid in our output list,
1279 # and if nullid shows up in descendants, empty parents will look like
1279 # and if nullid shows up in descendants, empty parents will look like
1280 # they're descendants.
1280 # they're descendants.
1281 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1281 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1282 n = self.node(r)
1282 n = self.node(r)
1283 isdescendant = False
1283 isdescendant = False
1284 if lowestrev == nullrev: # Everybody is a descendant of nullid
1284 if lowestrev == nullrev: # Everybody is a descendant of nullid
1285 isdescendant = True
1285 isdescendant = True
1286 elif n in descendants:
1286 elif n in descendants:
1287 # n is already a descendant
1287 # n is already a descendant
1288 isdescendant = True
1288 isdescendant = True
1289 # This check only needs to be done here because all the roots
1289 # This check only needs to be done here because all the roots
1290 # will start being marked is descendants before the loop.
1290 # will start being marked is descendants before the loop.
1291 if n in roots:
1291 if n in roots:
1292 # If n was a root, check if it's a 'real' root.
1292 # If n was a root, check if it's a 'real' root.
1293 p = tuple(self.parents(n))
1293 p = tuple(self.parents(n))
1294 # If any of its parents are descendants, it's not a root.
1294 # If any of its parents are descendants, it's not a root.
1295 if (p[0] in descendants) or (p[1] in descendants):
1295 if (p[0] in descendants) or (p[1] in descendants):
1296 roots.remove(n)
1296 roots.remove(n)
1297 else:
1297 else:
1298 p = tuple(self.parents(n))
1298 p = tuple(self.parents(n))
1299 # A node is a descendant if either of its parents are
1299 # A node is a descendant if either of its parents are
1300 # descendants. (We seeded the dependents list with the roots
1300 # descendants. (We seeded the dependents list with the roots
1301 # up there, remember?)
1301 # up there, remember?)
1302 if (p[0] in descendants) or (p[1] in descendants):
1302 if (p[0] in descendants) or (p[1] in descendants):
1303 descendants.add(n)
1303 descendants.add(n)
1304 isdescendant = True
1304 isdescendant = True
1305 if isdescendant and ((ancestors is None) or (n in ancestors)):
1305 if isdescendant and ((ancestors is None) or (n in ancestors)):
1306 # Only include nodes that are both descendants and ancestors.
1306 # Only include nodes that are both descendants and ancestors.
1307 orderedout.append(n)
1307 orderedout.append(n)
1308 if (ancestors is not None) and (n in heads):
1308 if (ancestors is not None) and (n in heads):
1309 # We're trying to figure out which heads are reachable
1309 # We're trying to figure out which heads are reachable
1310 # from roots.
1310 # from roots.
1311 # Mark this head as having been reached
1311 # Mark this head as having been reached
1312 heads[n] = True
1312 heads[n] = True
1313 elif ancestors is None:
1313 elif ancestors is None:
1314 # Otherwise, we're trying to discover the heads.
1314 # Otherwise, we're trying to discover the heads.
1315 # Assume this is a head because if it isn't, the next step
1315 # Assume this is a head because if it isn't, the next step
1316 # will eventually remove it.
1316 # will eventually remove it.
1317 heads[n] = True
1317 heads[n] = True
1318 # But, obviously its parents aren't.
1318 # But, obviously its parents aren't.
1319 for p in self.parents(n):
1319 for p in self.parents(n):
1320 heads.pop(p, None)
1320 heads.pop(p, None)
1321 heads = [head for head, flag in heads.items() if flag]
1321 heads = [head for head, flag in heads.items() if flag]
1322 roots = list(roots)
1322 roots = list(roots)
1323 assert orderedout
1323 assert orderedout
1324 assert roots
1324 assert roots
1325 assert heads
1325 assert heads
1326 return (orderedout, roots, heads)
1326 return (orderedout, roots, heads)
1327
1327
1328 def headrevs(self, revs=None):
1328 def headrevs(self, revs=None):
1329 if revs is None:
1329 if revs is None:
1330 try:
1330 try:
1331 return self.index.headrevs()
1331 return self.index.headrevs()
1332 except AttributeError:
1332 except AttributeError:
1333 return self._headrevs()
1333 return self._headrevs()
1334 if rustdagop is not None and self.index.rust_ext_compat:
1334 if rustdagop is not None and self.index.rust_ext_compat:
1335 return rustdagop.headrevs(self.index, revs)
1335 return rustdagop.headrevs(self.index, revs)
1336 return dagop.headrevs(revs, self._uncheckedparentrevs)
1336 return dagop.headrevs(revs, self._uncheckedparentrevs)
1337
1337
1338 def computephases(self, roots):
1338 def computephases(self, roots):
1339 return self.index.computephasesmapsets(roots)
1339 return self.index.computephasesmapsets(roots)
1340
1340
1341 def _headrevs(self):
1341 def _headrevs(self):
1342 count = len(self)
1342 count = len(self)
1343 if not count:
1343 if not count:
1344 return [nullrev]
1344 return [nullrev]
1345 # we won't iter over filtered rev so nobody is a head at start
1345 # we won't iter over filtered rev so nobody is a head at start
1346 ishead = [0] * (count + 1)
1346 ishead = [0] * (count + 1)
1347 index = self.index
1347 index = self.index
1348 for r in self:
1348 for r in self:
1349 ishead[r] = 1 # I may be an head
1349 ishead[r] = 1 # I may be an head
1350 e = index[r]
1350 e = index[r]
1351 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1351 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1352 return [r for r, val in enumerate(ishead) if val]
1352 return [r for r, val in enumerate(ishead) if val]
1353
1353
1354 def heads(self, start=None, stop=None):
1354 def heads(self, start=None, stop=None):
1355 """return the list of all nodes that have no children
1355 """return the list of all nodes that have no children
1356
1356
1357 if start is specified, only heads that are descendants of
1357 if start is specified, only heads that are descendants of
1358 start will be returned
1358 start will be returned
1359 if stop is specified, it will consider all the revs from stop
1359 if stop is specified, it will consider all the revs from stop
1360 as if they had no children
1360 as if they had no children
1361 """
1361 """
1362 if start is None and stop is None:
1362 if start is None and stop is None:
1363 if not len(self):
1363 if not len(self):
1364 return [self.nullid]
1364 return [self.nullid]
1365 return [self.node(r) for r in self.headrevs()]
1365 return [self.node(r) for r in self.headrevs()]
1366
1366
1367 if start is None:
1367 if start is None:
1368 start = nullrev
1368 start = nullrev
1369 else:
1369 else:
1370 start = self.rev(start)
1370 start = self.rev(start)
1371
1371
1372 stoprevs = {self.rev(n) for n in stop or []}
1372 stoprevs = {self.rev(n) for n in stop or []}
1373
1373
1374 revs = dagop.headrevssubset(
1374 revs = dagop.headrevssubset(
1375 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1375 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1376 )
1376 )
1377
1377
1378 return [self.node(rev) for rev in revs]
1378 return [self.node(rev) for rev in revs]
1379
1379
1380 def children(self, node):
1380 def children(self, node):
1381 """find the children of a given node"""
1381 """find the children of a given node"""
1382 c = []
1382 c = []
1383 p = self.rev(node)
1383 p = self.rev(node)
1384 for r in self.revs(start=p + 1):
1384 for r in self.revs(start=p + 1):
1385 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1385 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1386 if prevs:
1386 if prevs:
1387 for pr in prevs:
1387 for pr in prevs:
1388 if pr == p:
1388 if pr == p:
1389 c.append(self.node(r))
1389 c.append(self.node(r))
1390 elif p == nullrev:
1390 elif p == nullrev:
1391 c.append(self.node(r))
1391 c.append(self.node(r))
1392 return c
1392 return c
1393
1393
1394 def commonancestorsheads(self, a, b):
1394 def commonancestorsheads(self, a, b):
1395 """calculate all the heads of the common ancestors of nodes a and b"""
1395 """calculate all the heads of the common ancestors of nodes a and b"""
1396 a, b = self.rev(a), self.rev(b)
1396 a, b = self.rev(a), self.rev(b)
1397 ancs = self._commonancestorsheads(a, b)
1397 ancs = self._commonancestorsheads(a, b)
1398 return pycompat.maplist(self.node, ancs)
1398 return pycompat.maplist(self.node, ancs)
1399
1399
1400 def _commonancestorsheads(self, *revs):
1400 def _commonancestorsheads(self, *revs):
1401 """calculate all the heads of the common ancestors of revs"""
1401 """calculate all the heads of the common ancestors of revs"""
1402 try:
1402 try:
1403 ancs = self.index.commonancestorsheads(*revs)
1403 ancs = self.index.commonancestorsheads(*revs)
1404 except (AttributeError, OverflowError): # C implementation failed
1404 except (AttributeError, OverflowError): # C implementation failed
1405 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1405 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1406 return ancs
1406 return ancs
1407
1407
1408 def isancestor(self, a, b):
1408 def isancestor(self, a, b):
1409 """return True if node a is an ancestor of node b
1409 """return True if node a is an ancestor of node b
1410
1410
1411 A revision is considered an ancestor of itself."""
1411 A revision is considered an ancestor of itself."""
1412 a, b = self.rev(a), self.rev(b)
1412 a, b = self.rev(a), self.rev(b)
1413 return self.isancestorrev(a, b)
1413 return self.isancestorrev(a, b)
1414
1414
1415 def isancestorrev(self, a, b):
1415 def isancestorrev(self, a, b):
1416 """return True if revision a is an ancestor of revision b
1416 """return True if revision a is an ancestor of revision b
1417
1417
1418 A revision is considered an ancestor of itself.
1418 A revision is considered an ancestor of itself.
1419
1419
1420 The implementation of this is trivial but the use of
1420 The implementation of this is trivial but the use of
1421 reachableroots is not."""
1421 reachableroots is not."""
1422 if a == nullrev:
1422 if a == nullrev:
1423 return True
1423 return True
1424 elif a == b:
1424 elif a == b:
1425 return True
1425 return True
1426 elif a > b:
1426 elif a > b:
1427 return False
1427 return False
1428 return bool(self.reachableroots(a, [b], [a], includepath=False))
1428 return bool(self.reachableroots(a, [b], [a], includepath=False))
1429
1429
1430 def reachableroots(self, minroot, heads, roots, includepath=False):
1430 def reachableroots(self, minroot, heads, roots, includepath=False):
1431 """return (heads(::(<roots> and <roots>::<heads>)))
1431 """return (heads(::(<roots> and <roots>::<heads>)))
1432
1432
1433 If includepath is True, return (<roots>::<heads>)."""
1433 If includepath is True, return (<roots>::<heads>)."""
1434 try:
1434 try:
1435 return self.index.reachableroots2(
1435 return self.index.reachableroots2(
1436 minroot, heads, roots, includepath
1436 minroot, heads, roots, includepath
1437 )
1437 )
1438 except AttributeError:
1438 except AttributeError:
1439 return dagop._reachablerootspure(
1439 return dagop._reachablerootspure(
1440 self.parentrevs, minroot, roots, heads, includepath
1440 self.parentrevs, minroot, roots, heads, includepath
1441 )
1441 )
1442
1442
1443 def ancestor(self, a, b):
1443 def ancestor(self, a, b):
1444 """calculate the "best" common ancestor of nodes a and b"""
1444 """calculate the "best" common ancestor of nodes a and b"""
1445
1445
1446 a, b = self.rev(a), self.rev(b)
1446 a, b = self.rev(a), self.rev(b)
1447 try:
1447 try:
1448 ancs = self.index.ancestors(a, b)
1448 ancs = self.index.ancestors(a, b)
1449 except (AttributeError, OverflowError):
1449 except (AttributeError, OverflowError):
1450 ancs = ancestor.ancestors(self.parentrevs, a, b)
1450 ancs = ancestor.ancestors(self.parentrevs, a, b)
1451 if ancs:
1451 if ancs:
1452 # choose a consistent winner when there's a tie
1452 # choose a consistent winner when there's a tie
1453 return min(map(self.node, ancs))
1453 return min(map(self.node, ancs))
1454 return self.nullid
1454 return self.nullid
1455
1455
1456 def _match(self, id):
1456 def _match(self, id):
1457 if isinstance(id, int):
1457 if isinstance(id, int):
1458 # rev
1458 # rev
1459 return self.node(id)
1459 return self.node(id)
1460 if len(id) == self.nodeconstants.nodelen:
1460 if len(id) == self.nodeconstants.nodelen:
1461 # possibly a binary node
1461 # possibly a binary node
1462 # odds of a binary node being all hex in ASCII are 1 in 10**25
1462 # odds of a binary node being all hex in ASCII are 1 in 10**25
1463 try:
1463 try:
1464 node = id
1464 node = id
1465 self.rev(node) # quick search the index
1465 self.rev(node) # quick search the index
1466 return node
1466 return node
1467 except error.LookupError:
1467 except error.LookupError:
1468 pass # may be partial hex id
1468 pass # may be partial hex id
1469 try:
1469 try:
1470 # str(rev)
1470 # str(rev)
1471 rev = int(id)
1471 rev = int(id)
1472 if b"%d" % rev != id:
1472 if b"%d" % rev != id:
1473 raise ValueError
1473 raise ValueError
1474 if rev < 0:
1474 if rev < 0:
1475 rev = len(self) + rev
1475 rev = len(self) + rev
1476 if rev < 0 or rev >= len(self):
1476 if rev < 0 or rev >= len(self):
1477 raise ValueError
1477 raise ValueError
1478 return self.node(rev)
1478 return self.node(rev)
1479 except (ValueError, OverflowError):
1479 except (ValueError, OverflowError):
1480 pass
1480 pass
1481 if len(id) == 2 * self.nodeconstants.nodelen:
1481 if len(id) == 2 * self.nodeconstants.nodelen:
1482 try:
1482 try:
1483 # a full hex nodeid?
1483 # a full hex nodeid?
1484 node = bin(id)
1484 node = bin(id)
1485 self.rev(node)
1485 self.rev(node)
1486 return node
1486 return node
1487 except (binascii.Error, error.LookupError):
1487 except (binascii.Error, error.LookupError):
1488 pass
1488 pass
1489
1489
1490 def _partialmatch(self, id):
1490 def _partialmatch(self, id):
1491 # we don't care wdirfilenodeids as they should be always full hash
1491 # we don't care wdirfilenodeids as they should be always full hash
1492 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1492 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1493 ambiguous = False
1493 ambiguous = False
1494 try:
1494 try:
1495 partial = self.index.partialmatch(id)
1495 partial = self.index.partialmatch(id)
1496 if partial and self.hasnode(partial):
1496 if partial and self.hasnode(partial):
1497 if maybewdir:
1497 if maybewdir:
1498 # single 'ff...' match in radix tree, ambiguous with wdir
1498 # single 'ff...' match in radix tree, ambiguous with wdir
1499 ambiguous = True
1499 ambiguous = True
1500 else:
1500 else:
1501 return partial
1501 return partial
1502 elif maybewdir:
1502 elif maybewdir:
1503 # no 'ff...' match in radix tree, wdir identified
1503 # no 'ff...' match in radix tree, wdir identified
1504 raise error.WdirUnsupported
1504 raise error.WdirUnsupported
1505 else:
1505 else:
1506 return None
1506 return None
1507 except error.RevlogError:
1507 except error.RevlogError:
1508 # parsers.c radix tree lookup gave multiple matches
1508 # parsers.c radix tree lookup gave multiple matches
1509 # fast path: for unfiltered changelog, radix tree is accurate
1509 # fast path: for unfiltered changelog, radix tree is accurate
1510 if not getattr(self, 'filteredrevs', None):
1510 if not getattr(self, 'filteredrevs', None):
1511 ambiguous = True
1511 ambiguous = True
1512 # fall through to slow path that filters hidden revisions
1512 # fall through to slow path that filters hidden revisions
1513 except (AttributeError, ValueError):
1513 except (AttributeError, ValueError):
1514 # we are pure python, or key is not hex
1514 # we are pure python, or key is not hex
1515 pass
1515 pass
1516 if ambiguous:
1516 if ambiguous:
1517 raise error.AmbiguousPrefixLookupError(
1517 raise error.AmbiguousPrefixLookupError(
1518 id, self.display_id, _(b'ambiguous identifier')
1518 id, self.display_id, _(b'ambiguous identifier')
1519 )
1519 )
1520
1520
1521 if id in self._pcache:
1521 if id in self._pcache:
1522 return self._pcache[id]
1522 return self._pcache[id]
1523
1523
1524 if len(id) <= 40:
1524 if len(id) <= 40:
1525 # hex(node)[:...]
1525 # hex(node)[:...]
1526 l = len(id) // 2 * 2 # grab an even number of digits
1526 l = len(id) // 2 * 2 # grab an even number of digits
1527 try:
1527 try:
1528 # we're dropping the last digit, so let's check that it's hex,
1528 # we're dropping the last digit, so let's check that it's hex,
1529 # to avoid the expensive computation below if it's not
1529 # to avoid the expensive computation below if it's not
1530 if len(id) % 2 > 0:
1530 if len(id) % 2 > 0:
1531 if not (id[-1] in hexdigits):
1531 if not (id[-1] in hexdigits):
1532 return None
1532 return None
1533 prefix = bin(id[:l])
1533 prefix = bin(id[:l])
1534 except binascii.Error:
1534 except binascii.Error:
1535 pass
1535 pass
1536 else:
1536 else:
1537 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1537 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1538 nl = [
1538 nl = [
1539 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1539 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1540 ]
1540 ]
1541 if self.nodeconstants.nullhex.startswith(id):
1541 if self.nodeconstants.nullhex.startswith(id):
1542 nl.append(self.nullid)
1542 nl.append(self.nullid)
1543 if len(nl) > 0:
1543 if len(nl) > 0:
1544 if len(nl) == 1 and not maybewdir:
1544 if len(nl) == 1 and not maybewdir:
1545 self._pcache[id] = nl[0]
1545 self._pcache[id] = nl[0]
1546 return nl[0]
1546 return nl[0]
1547 raise error.AmbiguousPrefixLookupError(
1547 raise error.AmbiguousPrefixLookupError(
1548 id, self.display_id, _(b'ambiguous identifier')
1548 id, self.display_id, _(b'ambiguous identifier')
1549 )
1549 )
1550 if maybewdir:
1550 if maybewdir:
1551 raise error.WdirUnsupported
1551 raise error.WdirUnsupported
1552 return None
1552 return None
1553
1553
1554 def lookup(self, id):
1554 def lookup(self, id):
1555 """locate a node based on:
1555 """locate a node based on:
1556 - revision number or str(revision number)
1556 - revision number or str(revision number)
1557 - nodeid or subset of hex nodeid
1557 - nodeid or subset of hex nodeid
1558 """
1558 """
1559 n = self._match(id)
1559 n = self._match(id)
1560 if n is not None:
1560 if n is not None:
1561 return n
1561 return n
1562 n = self._partialmatch(id)
1562 n = self._partialmatch(id)
1563 if n:
1563 if n:
1564 return n
1564 return n
1565
1565
1566 raise error.LookupError(id, self.display_id, _(b'no match found'))
1566 raise error.LookupError(id, self.display_id, _(b'no match found'))
1567
1567
1568 def shortest(self, node, minlength=1):
1568 def shortest(self, node, minlength=1):
1569 """Find the shortest unambiguous prefix that matches node."""
1569 """Find the shortest unambiguous prefix that matches node."""
1570
1570
1571 def isvalid(prefix):
1571 def isvalid(prefix):
1572 try:
1572 try:
1573 matchednode = self._partialmatch(prefix)
1573 matchednode = self._partialmatch(prefix)
1574 except error.AmbiguousPrefixLookupError:
1574 except error.AmbiguousPrefixLookupError:
1575 return False
1575 return False
1576 except error.WdirUnsupported:
1576 except error.WdirUnsupported:
1577 # single 'ff...' match
1577 # single 'ff...' match
1578 return True
1578 return True
1579 if matchednode is None:
1579 if matchednode is None:
1580 raise error.LookupError(node, self.display_id, _(b'no node'))
1580 raise error.LookupError(node, self.display_id, _(b'no node'))
1581 return True
1581 return True
1582
1582
1583 def maybewdir(prefix):
1583 def maybewdir(prefix):
1584 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1584 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1585
1585
1586 hexnode = hex(node)
1586 hexnode = hex(node)
1587
1587
1588 def disambiguate(hexnode, minlength):
1588 def disambiguate(hexnode, minlength):
1589 """Disambiguate against wdirid."""
1589 """Disambiguate against wdirid."""
1590 for length in range(minlength, len(hexnode) + 1):
1590 for length in range(minlength, len(hexnode) + 1):
1591 prefix = hexnode[:length]
1591 prefix = hexnode[:length]
1592 if not maybewdir(prefix):
1592 if not maybewdir(prefix):
1593 return prefix
1593 return prefix
1594
1594
1595 if not getattr(self, 'filteredrevs', None):
1595 if not getattr(self, 'filteredrevs', None):
1596 try:
1596 try:
1597 length = max(self.index.shortest(node), minlength)
1597 length = max(self.index.shortest(node), minlength)
1598 return disambiguate(hexnode, length)
1598 return disambiguate(hexnode, length)
1599 except error.RevlogError:
1599 except error.RevlogError:
1600 if node != self.nodeconstants.wdirid:
1600 if node != self.nodeconstants.wdirid:
1601 raise error.LookupError(
1601 raise error.LookupError(
1602 node, self.display_id, _(b'no node')
1602 node, self.display_id, _(b'no node')
1603 )
1603 )
1604 except AttributeError:
1604 except AttributeError:
1605 # Fall through to pure code
1605 # Fall through to pure code
1606 pass
1606 pass
1607
1607
1608 if node == self.nodeconstants.wdirid:
1608 if node == self.nodeconstants.wdirid:
1609 for length in range(minlength, len(hexnode) + 1):
1609 for length in range(minlength, len(hexnode) + 1):
1610 prefix = hexnode[:length]
1610 prefix = hexnode[:length]
1611 if isvalid(prefix):
1611 if isvalid(prefix):
1612 return prefix
1612 return prefix
1613
1613
1614 for length in range(minlength, len(hexnode) + 1):
1614 for length in range(minlength, len(hexnode) + 1):
1615 prefix = hexnode[:length]
1615 prefix = hexnode[:length]
1616 if isvalid(prefix):
1616 if isvalid(prefix):
1617 return disambiguate(hexnode, length)
1617 return disambiguate(hexnode, length)
1618
1618
1619 def cmp(self, node, text):
1619 def cmp(self, node, text):
1620 """compare text with a given file revision
1620 """compare text with a given file revision
1621
1621
1622 returns True if text is different than what is stored.
1622 returns True if text is different than what is stored.
1623 """
1623 """
1624 p1, p2 = self.parents(node)
1624 p1, p2 = self.parents(node)
1625 return storageutil.hashrevisionsha1(text, p1, p2) != node
1625 return storageutil.hashrevisionsha1(text, p1, p2) != node
1626
1626
1627 def _getsegmentforrevs(self, startrev, endrev, df=None):
1627 def _getsegmentforrevs(self, startrev, endrev, df=None):
1628 """Obtain a segment of raw data corresponding to a range of revisions.
1628 """Obtain a segment of raw data corresponding to a range of revisions.
1629
1629
1630 Accepts the start and end revisions and an optional already-open
1630 Accepts the start and end revisions and an optional already-open
1631 file handle to be used for reading. If the file handle is read, its
1631 file handle to be used for reading. If the file handle is read, its
1632 seek position will not be preserved.
1632 seek position will not be preserved.
1633
1633
1634 Requests for data may be satisfied by a cache.
1634 Requests for data may be satisfied by a cache.
1635
1635
1636 Returns a 2-tuple of (offset, data) for the requested range of
1636 Returns a 2-tuple of (offset, data) for the requested range of
1637 revisions. Offset is the integer offset from the beginning of the
1637 revisions. Offset is the integer offset from the beginning of the
1638 revlog and data is a str or buffer of the raw byte data.
1638 revlog and data is a str or buffer of the raw byte data.
1639
1639
1640 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1640 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1641 to determine where each revision's data begins and ends.
1641 to determine where each revision's data begins and ends.
1642 """
1642 """
1643 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1643 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1644 # (functions are expensive).
1644 # (functions are expensive).
1645 index = self.index
1645 index = self.index
1646 istart = index[startrev]
1646 istart = index[startrev]
1647 start = int(istart[0] >> 16)
1647 start = int(istart[0] >> 16)
1648 if startrev == endrev:
1648 if startrev == endrev:
1649 end = start + istart[1]
1649 end = start + istart[1]
1650 else:
1650 else:
1651 iend = index[endrev]
1651 iend = index[endrev]
1652 end = int(iend[0] >> 16) + iend[1]
1652 end = int(iend[0] >> 16) + iend[1]
1653
1653
1654 if self._inline:
1654 if self._inline:
1655 start += (startrev + 1) * self.index.entry_size
1655 start += (startrev + 1) * self.index.entry_size
1656 end += (endrev + 1) * self.index.entry_size
1656 end += (endrev + 1) * self.index.entry_size
1657 length = end - start
1657 length = end - start
1658
1658
1659 return start, self._segmentfile.read_chunk(start, length, df)
1659 return start, self._segmentfile.read_chunk(start, length, df)
1660
1660
1661 def _chunk(self, rev, df=None):
1661 def _chunk(self, rev, df=None):
1662 """Obtain a single decompressed chunk for a revision.
1662 """Obtain a single decompressed chunk for a revision.
1663
1663
1664 Accepts an integer revision and an optional already-open file handle
1664 Accepts an integer revision and an optional already-open file handle
1665 to be used for reading. If used, the seek position of the file will not
1665 to be used for reading. If used, the seek position of the file will not
1666 be preserved.
1666 be preserved.
1667
1667
1668 Returns a str holding uncompressed data for the requested revision.
1668 Returns a str holding uncompressed data for the requested revision.
1669 """
1669 """
1670 compression_mode = self.index[rev][10]
1670 compression_mode = self.index[rev][10]
1671 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1671 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1672 if compression_mode == COMP_MODE_PLAIN:
1672 if compression_mode == COMP_MODE_PLAIN:
1673 return data
1673 return data
1674 elif compression_mode == COMP_MODE_DEFAULT:
1674 elif compression_mode == COMP_MODE_DEFAULT:
1675 return self._decompressor(data)
1675 return self._decompressor(data)
1676 elif compression_mode == COMP_MODE_INLINE:
1676 elif compression_mode == COMP_MODE_INLINE:
1677 return self.decompress(data)
1677 return self.decompress(data)
1678 else:
1678 else:
1679 msg = b'unknown compression mode %d'
1679 msg = b'unknown compression mode %d'
1680 msg %= compression_mode
1680 msg %= compression_mode
1681 raise error.RevlogError(msg)
1681 raise error.RevlogError(msg)
1682
1682
1683 def _chunks(self, revs, df=None, targetsize=None):
1683 def _chunks(self, revs, df=None, targetsize=None):
1684 """Obtain decompressed chunks for the specified revisions.
1684 """Obtain decompressed chunks for the specified revisions.
1685
1685
1686 Accepts an iterable of numeric revisions that are assumed to be in
1686 Accepts an iterable of numeric revisions that are assumed to be in
1687 ascending order. Also accepts an optional already-open file handle
1687 ascending order. Also accepts an optional already-open file handle
1688 to be used for reading. If used, the seek position of the file will
1688 to be used for reading. If used, the seek position of the file will
1689 not be preserved.
1689 not be preserved.
1690
1690
1691 This function is similar to calling ``self._chunk()`` multiple times,
1691 This function is similar to calling ``self._chunk()`` multiple times,
1692 but is faster.
1692 but is faster.
1693
1693
1694 Returns a list with decompressed data for each requested revision.
1694 Returns a list with decompressed data for each requested revision.
1695 """
1695 """
1696 if not revs:
1696 if not revs:
1697 return []
1697 return []
1698 start = self.start
1698 start = self.start
1699 length = self.length
1699 length = self.length
1700 inline = self._inline
1700 inline = self._inline
1701 iosize = self.index.entry_size
1701 iosize = self.index.entry_size
1702 buffer = util.buffer
1702 buffer = util.buffer
1703
1703
1704 l = []
1704 l = []
1705 ladd = l.append
1705 ladd = l.append
1706
1706
1707 if not self._withsparseread:
1707 if not self._withsparseread:
1708 slicedchunks = (revs,)
1708 slicedchunks = (revs,)
1709 else:
1709 else:
1710 slicedchunks = deltautil.slicechunk(
1710 slicedchunks = deltautil.slicechunk(
1711 self, revs, targetsize=targetsize
1711 self, revs, targetsize=targetsize
1712 )
1712 )
1713
1713
1714 for revschunk in slicedchunks:
1714 for revschunk in slicedchunks:
1715 firstrev = revschunk[0]
1715 firstrev = revschunk[0]
1716 # Skip trailing revisions with empty diff
1716 # Skip trailing revisions with empty diff
1717 for lastrev in revschunk[::-1]:
1717 for lastrev in revschunk[::-1]:
1718 if length(lastrev) != 0:
1718 if length(lastrev) != 0:
1719 break
1719 break
1720
1720
1721 try:
1721 try:
1722 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1722 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1723 except OverflowError:
1723 except OverflowError:
1724 # issue4215 - we can't cache a run of chunks greater than
1724 # issue4215 - we can't cache a run of chunks greater than
1725 # 2G on Windows
1725 # 2G on Windows
1726 return [self._chunk(rev, df=df) for rev in revschunk]
1726 return [self._chunk(rev, df=df) for rev in revschunk]
1727
1727
1728 decomp = self.decompress
1728 decomp = self.decompress
1729 # self._decompressor might be None, but will not be used in that case
1729 # self._decompressor might be None, but will not be used in that case
1730 def_decomp = self._decompressor
1730 def_decomp = self._decompressor
1731 for rev in revschunk:
1731 for rev in revschunk:
1732 chunkstart = start(rev)
1732 chunkstart = start(rev)
1733 if inline:
1733 if inline:
1734 chunkstart += (rev + 1) * iosize
1734 chunkstart += (rev + 1) * iosize
1735 chunklength = length(rev)
1735 chunklength = length(rev)
1736 comp_mode = self.index[rev][10]
1736 comp_mode = self.index[rev][10]
1737 c = buffer(data, chunkstart - offset, chunklength)
1737 c = buffer(data, chunkstart - offset, chunklength)
1738 if comp_mode == COMP_MODE_PLAIN:
1738 if comp_mode == COMP_MODE_PLAIN:
1739 ladd(c)
1739 ladd(c)
1740 elif comp_mode == COMP_MODE_INLINE:
1740 elif comp_mode == COMP_MODE_INLINE:
1741 ladd(decomp(c))
1741 ladd(decomp(c))
1742 elif comp_mode == COMP_MODE_DEFAULT:
1742 elif comp_mode == COMP_MODE_DEFAULT:
1743 ladd(def_decomp(c))
1743 ladd(def_decomp(c))
1744 else:
1744 else:
1745 msg = b'unknown compression mode %d'
1745 msg = b'unknown compression mode %d'
1746 msg %= comp_mode
1746 msg %= comp_mode
1747 raise error.RevlogError(msg)
1747 raise error.RevlogError(msg)
1748
1748
1749 return l
1749 return l
1750
1750
1751 def deltaparent(self, rev):
1751 def deltaparent(self, rev):
1752 """return deltaparent of the given revision"""
1752 """return deltaparent of the given revision"""
1753 base = self.index[rev][3]
1753 base = self.index[rev][3]
1754 if base == rev:
1754 if base == rev:
1755 return nullrev
1755 return nullrev
1756 elif self._generaldelta:
1756 elif self._generaldelta:
1757 return base
1757 return base
1758 else:
1758 else:
1759 return rev - 1
1759 return rev - 1
1760
1760
1761 def issnapshot(self, rev):
1761 def issnapshot(self, rev):
1762 """tells whether rev is a snapshot"""
1762 """tells whether rev is a snapshot"""
1763 if not self._sparserevlog:
1763 if not self._sparserevlog:
1764 return self.deltaparent(rev) == nullrev
1764 return self.deltaparent(rev) == nullrev
1765 elif util.safehasattr(self.index, b'issnapshot'):
1765 elif util.safehasattr(self.index, b'issnapshot'):
1766 # directly assign the method to cache the testing and access
1766 # directly assign the method to cache the testing and access
1767 self.issnapshot = self.index.issnapshot
1767 self.issnapshot = self.index.issnapshot
1768 return self.issnapshot(rev)
1768 return self.issnapshot(rev)
1769 if rev == nullrev:
1769 if rev == nullrev:
1770 return True
1770 return True
1771 entry = self.index[rev]
1771 entry = self.index[rev]
1772 base = entry[3]
1772 base = entry[3]
1773 if base == rev:
1773 if base == rev:
1774 return True
1774 return True
1775 if base == nullrev:
1775 if base == nullrev:
1776 return True
1776 return True
1777 p1 = entry[5]
1777 p1 = entry[5]
1778 while self.length(p1) == 0:
1779 b = self.deltaparent(p1)
1780 if b == p1:
1781 break
1782 p1 = b
1778 p2 = entry[6]
1783 p2 = entry[6]
1784 while self.length(p2) == 0:
1785 b = self.deltaparent(p2)
1786 if b == p2:
1787 break
1788 p2 = b
1779 if base == p1 or base == p2:
1789 if base == p1 or base == p2:
1780 return False
1790 return False
1781 return self.issnapshot(base)
1791 return self.issnapshot(base)
1782
1792
1783 def snapshotdepth(self, rev):
1793 def snapshotdepth(self, rev):
1784 """number of snapshot in the chain before this one"""
1794 """number of snapshot in the chain before this one"""
1785 if not self.issnapshot(rev):
1795 if not self.issnapshot(rev):
1786 raise error.ProgrammingError(b'revision %d not a snapshot')
1796 raise error.ProgrammingError(b'revision %d not a snapshot')
1787 return len(self._deltachain(rev)[0]) - 1
1797 return len(self._deltachain(rev)[0]) - 1
1788
1798
1789 def revdiff(self, rev1, rev2):
1799 def revdiff(self, rev1, rev2):
1790 """return or calculate a delta between two revisions
1800 """return or calculate a delta between two revisions
1791
1801
1792 The delta calculated is in binary form and is intended to be written to
1802 The delta calculated is in binary form and is intended to be written to
1793 revlog data directly. So this function needs raw revision data.
1803 revlog data directly. So this function needs raw revision data.
1794 """
1804 """
1795 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1805 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1796 return bytes(self._chunk(rev2))
1806 return bytes(self._chunk(rev2))
1797
1807
1798 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1808 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1799
1809
1800 def revision(self, nodeorrev, _df=None):
1810 def revision(self, nodeorrev, _df=None):
1801 """return an uncompressed revision of a given node or revision
1811 """return an uncompressed revision of a given node or revision
1802 number.
1812 number.
1803
1813
1804 _df - an existing file handle to read from. (internal-only)
1814 _df - an existing file handle to read from. (internal-only)
1805 """
1815 """
1806 return self._revisiondata(nodeorrev, _df)
1816 return self._revisiondata(nodeorrev, _df)
1807
1817
1808 def sidedata(self, nodeorrev, _df=None):
1818 def sidedata(self, nodeorrev, _df=None):
1809 """a map of extra data related to the changeset but not part of the hash
1819 """a map of extra data related to the changeset but not part of the hash
1810
1820
1811 This function currently return a dictionary. However, more advanced
1821 This function currently return a dictionary. However, more advanced
1812 mapping object will likely be used in the future for a more
1822 mapping object will likely be used in the future for a more
1813 efficient/lazy code.
1823 efficient/lazy code.
1814 """
1824 """
1815 # deal with <nodeorrev> argument type
1825 # deal with <nodeorrev> argument type
1816 if isinstance(nodeorrev, int):
1826 if isinstance(nodeorrev, int):
1817 rev = nodeorrev
1827 rev = nodeorrev
1818 else:
1828 else:
1819 rev = self.rev(nodeorrev)
1829 rev = self.rev(nodeorrev)
1820 return self._sidedata(rev)
1830 return self._sidedata(rev)
1821
1831
1822 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1832 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1823 # deal with <nodeorrev> argument type
1833 # deal with <nodeorrev> argument type
1824 if isinstance(nodeorrev, int):
1834 if isinstance(nodeorrev, int):
1825 rev = nodeorrev
1835 rev = nodeorrev
1826 node = self.node(rev)
1836 node = self.node(rev)
1827 else:
1837 else:
1828 node = nodeorrev
1838 node = nodeorrev
1829 rev = None
1839 rev = None
1830
1840
1831 # fast path the special `nullid` rev
1841 # fast path the special `nullid` rev
1832 if node == self.nullid:
1842 if node == self.nullid:
1833 return b""
1843 return b""
1834
1844
1835 # ``rawtext`` is the text as stored inside the revlog. Might be the
1845 # ``rawtext`` is the text as stored inside the revlog. Might be the
1836 # revision or might need to be processed to retrieve the revision.
1846 # revision or might need to be processed to retrieve the revision.
1837 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1847 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1838
1848
1839 if raw and validated:
1849 if raw and validated:
1840 # if we don't want to process the raw text and that raw
1850 # if we don't want to process the raw text and that raw
1841 # text is cached, we can exit early.
1851 # text is cached, we can exit early.
1842 return rawtext
1852 return rawtext
1843 if rev is None:
1853 if rev is None:
1844 rev = self.rev(node)
1854 rev = self.rev(node)
1845 # the revlog's flag for this revision
1855 # the revlog's flag for this revision
1846 # (usually alter its state or content)
1856 # (usually alter its state or content)
1847 flags = self.flags(rev)
1857 flags = self.flags(rev)
1848
1858
1849 if validated and flags == REVIDX_DEFAULT_FLAGS:
1859 if validated and flags == REVIDX_DEFAULT_FLAGS:
1850 # no extra flags set, no flag processor runs, text = rawtext
1860 # no extra flags set, no flag processor runs, text = rawtext
1851 return rawtext
1861 return rawtext
1852
1862
1853 if raw:
1863 if raw:
1854 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1864 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1855 text = rawtext
1865 text = rawtext
1856 else:
1866 else:
1857 r = flagutil.processflagsread(self, rawtext, flags)
1867 r = flagutil.processflagsread(self, rawtext, flags)
1858 text, validatehash = r
1868 text, validatehash = r
1859 if validatehash:
1869 if validatehash:
1860 self.checkhash(text, node, rev=rev)
1870 self.checkhash(text, node, rev=rev)
1861 if not validated:
1871 if not validated:
1862 self._revisioncache = (node, rev, rawtext)
1872 self._revisioncache = (node, rev, rawtext)
1863
1873
1864 return text
1874 return text
1865
1875
1866 def _rawtext(self, node, rev, _df=None):
1876 def _rawtext(self, node, rev, _df=None):
1867 """return the possibly unvalidated rawtext for a revision
1877 """return the possibly unvalidated rawtext for a revision
1868
1878
1869 returns (rev, rawtext, validated)
1879 returns (rev, rawtext, validated)
1870 """
1880 """
1871
1881
1872 # revision in the cache (could be useful to apply delta)
1882 # revision in the cache (could be useful to apply delta)
1873 cachedrev = None
1883 cachedrev = None
1874 # An intermediate text to apply deltas to
1884 # An intermediate text to apply deltas to
1875 basetext = None
1885 basetext = None
1876
1886
1877 # Check if we have the entry in cache
1887 # Check if we have the entry in cache
1878 # The cache entry looks like (node, rev, rawtext)
1888 # The cache entry looks like (node, rev, rawtext)
1879 if self._revisioncache:
1889 if self._revisioncache:
1880 if self._revisioncache[0] == node:
1890 if self._revisioncache[0] == node:
1881 return (rev, self._revisioncache[2], True)
1891 return (rev, self._revisioncache[2], True)
1882 cachedrev = self._revisioncache[1]
1892 cachedrev = self._revisioncache[1]
1883
1893
1884 if rev is None:
1894 if rev is None:
1885 rev = self.rev(node)
1895 rev = self.rev(node)
1886
1896
1887 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1897 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1888 if stopped:
1898 if stopped:
1889 basetext = self._revisioncache[2]
1899 basetext = self._revisioncache[2]
1890
1900
1891 # drop cache to save memory, the caller is expected to
1901 # drop cache to save memory, the caller is expected to
1892 # update self._revisioncache after validating the text
1902 # update self._revisioncache after validating the text
1893 self._revisioncache = None
1903 self._revisioncache = None
1894
1904
1895 targetsize = None
1905 targetsize = None
1896 rawsize = self.index[rev][2]
1906 rawsize = self.index[rev][2]
1897 if 0 <= rawsize:
1907 if 0 <= rawsize:
1898 targetsize = 4 * rawsize
1908 targetsize = 4 * rawsize
1899
1909
1900 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1910 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1901 if basetext is None:
1911 if basetext is None:
1902 basetext = bytes(bins[0])
1912 basetext = bytes(bins[0])
1903 bins = bins[1:]
1913 bins = bins[1:]
1904
1914
1905 rawtext = mdiff.patches(basetext, bins)
1915 rawtext = mdiff.patches(basetext, bins)
1906 del basetext # let us have a chance to free memory early
1916 del basetext # let us have a chance to free memory early
1907 return (rev, rawtext, False)
1917 return (rev, rawtext, False)
1908
1918
1909 def _sidedata(self, rev):
1919 def _sidedata(self, rev):
1910 """Return the sidedata for a given revision number."""
1920 """Return the sidedata for a given revision number."""
1911 index_entry = self.index[rev]
1921 index_entry = self.index[rev]
1912 sidedata_offset = index_entry[8]
1922 sidedata_offset = index_entry[8]
1913 sidedata_size = index_entry[9]
1923 sidedata_size = index_entry[9]
1914
1924
1915 if self._inline:
1925 if self._inline:
1916 sidedata_offset += self.index.entry_size * (1 + rev)
1926 sidedata_offset += self.index.entry_size * (1 + rev)
1917 if sidedata_size == 0:
1927 if sidedata_size == 0:
1918 return {}
1928 return {}
1919
1929
1920 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1930 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1921 filename = self._sidedatafile
1931 filename = self._sidedatafile
1922 end = self._docket.sidedata_end
1932 end = self._docket.sidedata_end
1923 offset = sidedata_offset
1933 offset = sidedata_offset
1924 length = sidedata_size
1934 length = sidedata_size
1925 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1935 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1926 raise error.RevlogError(m)
1936 raise error.RevlogError(m)
1927
1937
1928 comp_segment = self._segmentfile_sidedata.read_chunk(
1938 comp_segment = self._segmentfile_sidedata.read_chunk(
1929 sidedata_offset, sidedata_size
1939 sidedata_offset, sidedata_size
1930 )
1940 )
1931
1941
1932 comp = self.index[rev][11]
1942 comp = self.index[rev][11]
1933 if comp == COMP_MODE_PLAIN:
1943 if comp == COMP_MODE_PLAIN:
1934 segment = comp_segment
1944 segment = comp_segment
1935 elif comp == COMP_MODE_DEFAULT:
1945 elif comp == COMP_MODE_DEFAULT:
1936 segment = self._decompressor(comp_segment)
1946 segment = self._decompressor(comp_segment)
1937 elif comp == COMP_MODE_INLINE:
1947 elif comp == COMP_MODE_INLINE:
1938 segment = self.decompress(comp_segment)
1948 segment = self.decompress(comp_segment)
1939 else:
1949 else:
1940 msg = b'unknown compression mode %d'
1950 msg = b'unknown compression mode %d'
1941 msg %= comp
1951 msg %= comp
1942 raise error.RevlogError(msg)
1952 raise error.RevlogError(msg)
1943
1953
1944 sidedata = sidedatautil.deserialize_sidedata(segment)
1954 sidedata = sidedatautil.deserialize_sidedata(segment)
1945 return sidedata
1955 return sidedata
1946
1956
1947 def rawdata(self, nodeorrev, _df=None):
1957 def rawdata(self, nodeorrev, _df=None):
1948 """return an uncompressed raw data of a given node or revision number.
1958 """return an uncompressed raw data of a given node or revision number.
1949
1959
1950 _df - an existing file handle to read from. (internal-only)
1960 _df - an existing file handle to read from. (internal-only)
1951 """
1961 """
1952 return self._revisiondata(nodeorrev, _df, raw=True)
1962 return self._revisiondata(nodeorrev, _df, raw=True)
1953
1963
1954 def hash(self, text, p1, p2):
1964 def hash(self, text, p1, p2):
1955 """Compute a node hash.
1965 """Compute a node hash.
1956
1966
1957 Available as a function so that subclasses can replace the hash
1967 Available as a function so that subclasses can replace the hash
1958 as needed.
1968 as needed.
1959 """
1969 """
1960 return storageutil.hashrevisionsha1(text, p1, p2)
1970 return storageutil.hashrevisionsha1(text, p1, p2)
1961
1971
1962 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1972 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1963 """Check node hash integrity.
1973 """Check node hash integrity.
1964
1974
1965 Available as a function so that subclasses can extend hash mismatch
1975 Available as a function so that subclasses can extend hash mismatch
1966 behaviors as needed.
1976 behaviors as needed.
1967 """
1977 """
1968 try:
1978 try:
1969 if p1 is None and p2 is None:
1979 if p1 is None and p2 is None:
1970 p1, p2 = self.parents(node)
1980 p1, p2 = self.parents(node)
1971 if node != self.hash(text, p1, p2):
1981 if node != self.hash(text, p1, p2):
1972 # Clear the revision cache on hash failure. The revision cache
1982 # Clear the revision cache on hash failure. The revision cache
1973 # only stores the raw revision and clearing the cache does have
1983 # only stores the raw revision and clearing the cache does have
1974 # the side-effect that we won't have a cache hit when the raw
1984 # the side-effect that we won't have a cache hit when the raw
1975 # revision data is accessed. But this case should be rare and
1985 # revision data is accessed. But this case should be rare and
1976 # it is extra work to teach the cache about the hash
1986 # it is extra work to teach the cache about the hash
1977 # verification state.
1987 # verification state.
1978 if self._revisioncache and self._revisioncache[0] == node:
1988 if self._revisioncache and self._revisioncache[0] == node:
1979 self._revisioncache = None
1989 self._revisioncache = None
1980
1990
1981 revornode = rev
1991 revornode = rev
1982 if revornode is None:
1992 if revornode is None:
1983 revornode = templatefilters.short(hex(node))
1993 revornode = templatefilters.short(hex(node))
1984 raise error.RevlogError(
1994 raise error.RevlogError(
1985 _(b"integrity check failed on %s:%s")
1995 _(b"integrity check failed on %s:%s")
1986 % (self.display_id, pycompat.bytestr(revornode))
1996 % (self.display_id, pycompat.bytestr(revornode))
1987 )
1997 )
1988 except error.RevlogError:
1998 except error.RevlogError:
1989 if self._censorable and storageutil.iscensoredtext(text):
1999 if self._censorable and storageutil.iscensoredtext(text):
1990 raise error.CensoredNodeError(self.display_id, node, text)
2000 raise error.CensoredNodeError(self.display_id, node, text)
1991 raise
2001 raise
1992
2002
1993 def _enforceinlinesize(self, tr):
2003 def _enforceinlinesize(self, tr):
1994 """Check if the revlog is too big for inline and convert if so.
2004 """Check if the revlog is too big for inline and convert if so.
1995
2005
1996 This should be called after revisions are added to the revlog. If the
2006 This should be called after revisions are added to the revlog. If the
1997 revlog has grown too large to be an inline revlog, it will convert it
2007 revlog has grown too large to be an inline revlog, it will convert it
1998 to use multiple index and data files.
2008 to use multiple index and data files.
1999 """
2009 """
2000 tiprev = len(self) - 1
2010 tiprev = len(self) - 1
2001 total_size = self.start(tiprev) + self.length(tiprev)
2011 total_size = self.start(tiprev) + self.length(tiprev)
2002 if not self._inline or total_size < _maxinline:
2012 if not self._inline or total_size < _maxinline:
2003 return
2013 return
2004
2014
2005 troffset = tr.findoffset(self._indexfile)
2015 troffset = tr.findoffset(self._indexfile)
2006 if troffset is None:
2016 if troffset is None:
2007 raise error.RevlogError(
2017 raise error.RevlogError(
2008 _(b"%s not found in the transaction") % self._indexfile
2018 _(b"%s not found in the transaction") % self._indexfile
2009 )
2019 )
2010 trindex = None
2020 trindex = None
2011 tr.add(self._datafile, 0)
2021 tr.add(self._datafile, 0)
2012
2022
2013 existing_handles = False
2023 existing_handles = False
2014 if self._writinghandles is not None:
2024 if self._writinghandles is not None:
2015 existing_handles = True
2025 existing_handles = True
2016 fp = self._writinghandles[0]
2026 fp = self._writinghandles[0]
2017 fp.flush()
2027 fp.flush()
2018 fp.close()
2028 fp.close()
2019 # We can't use the cached file handle after close(). So prevent
2029 # We can't use the cached file handle after close(). So prevent
2020 # its usage.
2030 # its usage.
2021 self._writinghandles = None
2031 self._writinghandles = None
2022 self._segmentfile.writing_handle = None
2032 self._segmentfile.writing_handle = None
2023 # No need to deal with sidedata writing handle as it is only
2033 # No need to deal with sidedata writing handle as it is only
2024 # relevant with revlog-v2 which is never inline, not reaching
2034 # relevant with revlog-v2 which is never inline, not reaching
2025 # this code
2035 # this code
2026
2036
2027 new_dfh = self._datafp(b'w+')
2037 new_dfh = self._datafp(b'w+')
2028 new_dfh.truncate(0) # drop any potentially existing data
2038 new_dfh.truncate(0) # drop any potentially existing data
2029 try:
2039 try:
2030 with self._indexfp() as read_ifh:
2040 with self._indexfp() as read_ifh:
2031 for r in self:
2041 for r in self:
2032 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2042 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2033 if (
2043 if (
2034 trindex is None
2044 trindex is None
2035 and troffset
2045 and troffset
2036 <= self.start(r) + r * self.index.entry_size
2046 <= self.start(r) + r * self.index.entry_size
2037 ):
2047 ):
2038 trindex = r
2048 trindex = r
2039 new_dfh.flush()
2049 new_dfh.flush()
2040
2050
2041 if trindex is None:
2051 if trindex is None:
2042 trindex = 0
2052 trindex = 0
2043
2053
2044 with self.__index_new_fp() as fp:
2054 with self.__index_new_fp() as fp:
2045 self._format_flags &= ~FLAG_INLINE_DATA
2055 self._format_flags &= ~FLAG_INLINE_DATA
2046 self._inline = False
2056 self._inline = False
2047 for i in self:
2057 for i in self:
2048 e = self.index.entry_binary(i)
2058 e = self.index.entry_binary(i)
2049 if i == 0 and self._docket is None:
2059 if i == 0 and self._docket is None:
2050 header = self._format_flags | self._format_version
2060 header = self._format_flags | self._format_version
2051 header = self.index.pack_header(header)
2061 header = self.index.pack_header(header)
2052 e = header + e
2062 e = header + e
2053 fp.write(e)
2063 fp.write(e)
2054 if self._docket is not None:
2064 if self._docket is not None:
2055 self._docket.index_end = fp.tell()
2065 self._docket.index_end = fp.tell()
2056
2066
2057 # There is a small transactional race here. If the rename of
2067 # There is a small transactional race here. If the rename of
2058 # the index fails, we should remove the datafile. It is more
2068 # the index fails, we should remove the datafile. It is more
2059 # important to ensure that the data file is not truncated
2069 # important to ensure that the data file is not truncated
2060 # when the index is replaced as otherwise data is lost.
2070 # when the index is replaced as otherwise data is lost.
2061 tr.replace(self._datafile, self.start(trindex))
2071 tr.replace(self._datafile, self.start(trindex))
2062
2072
2063 # the temp file replace the real index when we exit the context
2073 # the temp file replace the real index when we exit the context
2064 # manager
2074 # manager
2065
2075
2066 tr.replace(self._indexfile, trindex * self.index.entry_size)
2076 tr.replace(self._indexfile, trindex * self.index.entry_size)
2067 nodemaputil.setup_persistent_nodemap(tr, self)
2077 nodemaputil.setup_persistent_nodemap(tr, self)
2068 self._segmentfile = randomaccessfile.randomaccessfile(
2078 self._segmentfile = randomaccessfile.randomaccessfile(
2069 self.opener,
2079 self.opener,
2070 self._datafile,
2080 self._datafile,
2071 self._chunkcachesize,
2081 self._chunkcachesize,
2072 )
2082 )
2073
2083
2074 if existing_handles:
2084 if existing_handles:
2075 # switched from inline to conventional reopen the index
2085 # switched from inline to conventional reopen the index
2076 ifh = self.__index_write_fp()
2086 ifh = self.__index_write_fp()
2077 self._writinghandles = (ifh, new_dfh, None)
2087 self._writinghandles = (ifh, new_dfh, None)
2078 self._segmentfile.writing_handle = new_dfh
2088 self._segmentfile.writing_handle = new_dfh
2079 new_dfh = None
2089 new_dfh = None
2080 # No need to deal with sidedata writing handle as it is only
2090 # No need to deal with sidedata writing handle as it is only
2081 # relevant with revlog-v2 which is never inline, not reaching
2091 # relevant with revlog-v2 which is never inline, not reaching
2082 # this code
2092 # this code
2083 finally:
2093 finally:
2084 if new_dfh is not None:
2094 if new_dfh is not None:
2085 new_dfh.close()
2095 new_dfh.close()
2086
2096
2087 def _nodeduplicatecallback(self, transaction, node):
2097 def _nodeduplicatecallback(self, transaction, node):
2088 """called when trying to add a node already stored."""
2098 """called when trying to add a node already stored."""
2089
2099
2090 @contextlib.contextmanager
2100 @contextlib.contextmanager
2091 def reading(self):
2101 def reading(self):
2092 """Context manager that keeps data and sidedata files open for reading"""
2102 """Context manager that keeps data and sidedata files open for reading"""
2093 with self._segmentfile.reading():
2103 with self._segmentfile.reading():
2094 with self._segmentfile_sidedata.reading():
2104 with self._segmentfile_sidedata.reading():
2095 yield
2105 yield
2096
2106
2097 @contextlib.contextmanager
2107 @contextlib.contextmanager
2098 def _writing(self, transaction):
2108 def _writing(self, transaction):
2099 if self._trypending:
2109 if self._trypending:
2100 msg = b'try to write in a `trypending` revlog: %s'
2110 msg = b'try to write in a `trypending` revlog: %s'
2101 msg %= self.display_id
2111 msg %= self.display_id
2102 raise error.ProgrammingError(msg)
2112 raise error.ProgrammingError(msg)
2103 if self._writinghandles is not None:
2113 if self._writinghandles is not None:
2104 yield
2114 yield
2105 else:
2115 else:
2106 ifh = dfh = sdfh = None
2116 ifh = dfh = sdfh = None
2107 try:
2117 try:
2108 r = len(self)
2118 r = len(self)
2109 # opening the data file.
2119 # opening the data file.
2110 dsize = 0
2120 dsize = 0
2111 if r:
2121 if r:
2112 dsize = self.end(r - 1)
2122 dsize = self.end(r - 1)
2113 dfh = None
2123 dfh = None
2114 if not self._inline:
2124 if not self._inline:
2115 try:
2125 try:
2116 dfh = self._datafp(b"r+")
2126 dfh = self._datafp(b"r+")
2117 if self._docket is None:
2127 if self._docket is None:
2118 dfh.seek(0, os.SEEK_END)
2128 dfh.seek(0, os.SEEK_END)
2119 else:
2129 else:
2120 dfh.seek(self._docket.data_end, os.SEEK_SET)
2130 dfh.seek(self._docket.data_end, os.SEEK_SET)
2121 except FileNotFoundError:
2131 except FileNotFoundError:
2122 dfh = self._datafp(b"w+")
2132 dfh = self._datafp(b"w+")
2123 transaction.add(self._datafile, dsize)
2133 transaction.add(self._datafile, dsize)
2124 if self._sidedatafile is not None:
2134 if self._sidedatafile is not None:
2125 # revlog-v2 does not inline, help Pytype
2135 # revlog-v2 does not inline, help Pytype
2126 assert dfh is not None
2136 assert dfh is not None
2127 try:
2137 try:
2128 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2138 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2129 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2139 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2130 except FileNotFoundError:
2140 except FileNotFoundError:
2131 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2141 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2132 transaction.add(
2142 transaction.add(
2133 self._sidedatafile, self._docket.sidedata_end
2143 self._sidedatafile, self._docket.sidedata_end
2134 )
2144 )
2135
2145
2136 # opening the index file.
2146 # opening the index file.
2137 isize = r * self.index.entry_size
2147 isize = r * self.index.entry_size
2138 ifh = self.__index_write_fp()
2148 ifh = self.__index_write_fp()
2139 if self._inline:
2149 if self._inline:
2140 transaction.add(self._indexfile, dsize + isize)
2150 transaction.add(self._indexfile, dsize + isize)
2141 else:
2151 else:
2142 transaction.add(self._indexfile, isize)
2152 transaction.add(self._indexfile, isize)
2143 # exposing all file handle for writing.
2153 # exposing all file handle for writing.
2144 self._writinghandles = (ifh, dfh, sdfh)
2154 self._writinghandles = (ifh, dfh, sdfh)
2145 self._segmentfile.writing_handle = ifh if self._inline else dfh
2155 self._segmentfile.writing_handle = ifh if self._inline else dfh
2146 self._segmentfile_sidedata.writing_handle = sdfh
2156 self._segmentfile_sidedata.writing_handle = sdfh
2147 yield
2157 yield
2148 if self._docket is not None:
2158 if self._docket is not None:
2149 self._write_docket(transaction)
2159 self._write_docket(transaction)
2150 finally:
2160 finally:
2151 self._writinghandles = None
2161 self._writinghandles = None
2152 self._segmentfile.writing_handle = None
2162 self._segmentfile.writing_handle = None
2153 self._segmentfile_sidedata.writing_handle = None
2163 self._segmentfile_sidedata.writing_handle = None
2154 if dfh is not None:
2164 if dfh is not None:
2155 dfh.close()
2165 dfh.close()
2156 if sdfh is not None:
2166 if sdfh is not None:
2157 sdfh.close()
2167 sdfh.close()
2158 # closing the index file last to avoid exposing referent to
2168 # closing the index file last to avoid exposing referent to
2159 # potential unflushed data content.
2169 # potential unflushed data content.
2160 if ifh is not None:
2170 if ifh is not None:
2161 ifh.close()
2171 ifh.close()
2162
2172
2163 def _write_docket(self, transaction):
2173 def _write_docket(self, transaction):
2164 """write the current docket on disk
2174 """write the current docket on disk
2165
2175
2166 Exist as a method to help changelog to implement transaction logic
2176 Exist as a method to help changelog to implement transaction logic
2167
2177
2168 We could also imagine using the same transaction logic for all revlog
2178 We could also imagine using the same transaction logic for all revlog
2169 since docket are cheap."""
2179 since docket are cheap."""
2170 self._docket.write(transaction)
2180 self._docket.write(transaction)
2171
2181
2172 def addrevision(
2182 def addrevision(
2173 self,
2183 self,
2174 text,
2184 text,
2175 transaction,
2185 transaction,
2176 link,
2186 link,
2177 p1,
2187 p1,
2178 p2,
2188 p2,
2179 cachedelta=None,
2189 cachedelta=None,
2180 node=None,
2190 node=None,
2181 flags=REVIDX_DEFAULT_FLAGS,
2191 flags=REVIDX_DEFAULT_FLAGS,
2182 deltacomputer=None,
2192 deltacomputer=None,
2183 sidedata=None,
2193 sidedata=None,
2184 ):
2194 ):
2185 """add a revision to the log
2195 """add a revision to the log
2186
2196
2187 text - the revision data to add
2197 text - the revision data to add
2188 transaction - the transaction object used for rollback
2198 transaction - the transaction object used for rollback
2189 link - the linkrev data to add
2199 link - the linkrev data to add
2190 p1, p2 - the parent nodeids of the revision
2200 p1, p2 - the parent nodeids of the revision
2191 cachedelta - an optional precomputed delta
2201 cachedelta - an optional precomputed delta
2192 node - nodeid of revision; typically node is not specified, and it is
2202 node - nodeid of revision; typically node is not specified, and it is
2193 computed by default as hash(text, p1, p2), however subclasses might
2203 computed by default as hash(text, p1, p2), however subclasses might
2194 use different hashing method (and override checkhash() in such case)
2204 use different hashing method (and override checkhash() in such case)
2195 flags - the known flags to set on the revision
2205 flags - the known flags to set on the revision
2196 deltacomputer - an optional deltacomputer instance shared between
2206 deltacomputer - an optional deltacomputer instance shared between
2197 multiple calls
2207 multiple calls
2198 """
2208 """
2199 if link == nullrev:
2209 if link == nullrev:
2200 raise error.RevlogError(
2210 raise error.RevlogError(
2201 _(b"attempted to add linkrev -1 to %s") % self.display_id
2211 _(b"attempted to add linkrev -1 to %s") % self.display_id
2202 )
2212 )
2203
2213
2204 if sidedata is None:
2214 if sidedata is None:
2205 sidedata = {}
2215 sidedata = {}
2206 elif sidedata and not self.hassidedata:
2216 elif sidedata and not self.hassidedata:
2207 raise error.ProgrammingError(
2217 raise error.ProgrammingError(
2208 _(b"trying to add sidedata to a revlog who don't support them")
2218 _(b"trying to add sidedata to a revlog who don't support them")
2209 )
2219 )
2210
2220
2211 if flags:
2221 if flags:
2212 node = node or self.hash(text, p1, p2)
2222 node = node or self.hash(text, p1, p2)
2213
2223
2214 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2224 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2215
2225
2216 # If the flag processor modifies the revision data, ignore any provided
2226 # If the flag processor modifies the revision data, ignore any provided
2217 # cachedelta.
2227 # cachedelta.
2218 if rawtext != text:
2228 if rawtext != text:
2219 cachedelta = None
2229 cachedelta = None
2220
2230
2221 if len(rawtext) > _maxentrysize:
2231 if len(rawtext) > _maxentrysize:
2222 raise error.RevlogError(
2232 raise error.RevlogError(
2223 _(
2233 _(
2224 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2234 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2225 )
2235 )
2226 % (self.display_id, len(rawtext))
2236 % (self.display_id, len(rawtext))
2227 )
2237 )
2228
2238
2229 node = node or self.hash(rawtext, p1, p2)
2239 node = node or self.hash(rawtext, p1, p2)
2230 rev = self.index.get_rev(node)
2240 rev = self.index.get_rev(node)
2231 if rev is not None:
2241 if rev is not None:
2232 return rev
2242 return rev
2233
2243
2234 if validatehash:
2244 if validatehash:
2235 self.checkhash(rawtext, node, p1=p1, p2=p2)
2245 self.checkhash(rawtext, node, p1=p1, p2=p2)
2236
2246
2237 return self.addrawrevision(
2247 return self.addrawrevision(
2238 rawtext,
2248 rawtext,
2239 transaction,
2249 transaction,
2240 link,
2250 link,
2241 p1,
2251 p1,
2242 p2,
2252 p2,
2243 node,
2253 node,
2244 flags,
2254 flags,
2245 cachedelta=cachedelta,
2255 cachedelta=cachedelta,
2246 deltacomputer=deltacomputer,
2256 deltacomputer=deltacomputer,
2247 sidedata=sidedata,
2257 sidedata=sidedata,
2248 )
2258 )
2249
2259
2250 def addrawrevision(
2260 def addrawrevision(
2251 self,
2261 self,
2252 rawtext,
2262 rawtext,
2253 transaction,
2263 transaction,
2254 link,
2264 link,
2255 p1,
2265 p1,
2256 p2,
2266 p2,
2257 node,
2267 node,
2258 flags,
2268 flags,
2259 cachedelta=None,
2269 cachedelta=None,
2260 deltacomputer=None,
2270 deltacomputer=None,
2261 sidedata=None,
2271 sidedata=None,
2262 ):
2272 ):
2263 """add a raw revision with known flags, node and parents
2273 """add a raw revision with known flags, node and parents
2264 useful when reusing a revision not stored in this revlog (ex: received
2274 useful when reusing a revision not stored in this revlog (ex: received
2265 over wire, or read from an external bundle).
2275 over wire, or read from an external bundle).
2266 """
2276 """
2267 with self._writing(transaction):
2277 with self._writing(transaction):
2268 return self._addrevision(
2278 return self._addrevision(
2269 node,
2279 node,
2270 rawtext,
2280 rawtext,
2271 transaction,
2281 transaction,
2272 link,
2282 link,
2273 p1,
2283 p1,
2274 p2,
2284 p2,
2275 flags,
2285 flags,
2276 cachedelta,
2286 cachedelta,
2277 deltacomputer=deltacomputer,
2287 deltacomputer=deltacomputer,
2278 sidedata=sidedata,
2288 sidedata=sidedata,
2279 )
2289 )
2280
2290
2281 def compress(self, data):
2291 def compress(self, data):
2282 """Generate a possibly-compressed representation of data."""
2292 """Generate a possibly-compressed representation of data."""
2283 if not data:
2293 if not data:
2284 return b'', data
2294 return b'', data
2285
2295
2286 compressed = self._compressor.compress(data)
2296 compressed = self._compressor.compress(data)
2287
2297
2288 if compressed:
2298 if compressed:
2289 # The revlog compressor added the header in the returned data.
2299 # The revlog compressor added the header in the returned data.
2290 return b'', compressed
2300 return b'', compressed
2291
2301
2292 if data[0:1] == b'\0':
2302 if data[0:1] == b'\0':
2293 return b'', data
2303 return b'', data
2294 return b'u', data
2304 return b'u', data
2295
2305
2296 def decompress(self, data):
2306 def decompress(self, data):
2297 """Decompress a revlog chunk.
2307 """Decompress a revlog chunk.
2298
2308
2299 The chunk is expected to begin with a header identifying the
2309 The chunk is expected to begin with a header identifying the
2300 format type so it can be routed to an appropriate decompressor.
2310 format type so it can be routed to an appropriate decompressor.
2301 """
2311 """
2302 if not data:
2312 if not data:
2303 return data
2313 return data
2304
2314
2305 # Revlogs are read much more frequently than they are written and many
2315 # Revlogs are read much more frequently than they are written and many
2306 # chunks only take microseconds to decompress, so performance is
2316 # chunks only take microseconds to decompress, so performance is
2307 # important here.
2317 # important here.
2308 #
2318 #
2309 # We can make a few assumptions about revlogs:
2319 # We can make a few assumptions about revlogs:
2310 #
2320 #
2311 # 1) the majority of chunks will be compressed (as opposed to inline
2321 # 1) the majority of chunks will be compressed (as opposed to inline
2312 # raw data).
2322 # raw data).
2313 # 2) decompressing *any* data will likely by at least 10x slower than
2323 # 2) decompressing *any* data will likely by at least 10x slower than
2314 # returning raw inline data.
2324 # returning raw inline data.
2315 # 3) we want to prioritize common and officially supported compression
2325 # 3) we want to prioritize common and officially supported compression
2316 # engines
2326 # engines
2317 #
2327 #
2318 # It follows that we want to optimize for "decompress compressed data
2328 # It follows that we want to optimize for "decompress compressed data
2319 # when encoded with common and officially supported compression engines"
2329 # when encoded with common and officially supported compression engines"
2320 # case over "raw data" and "data encoded by less common or non-official
2330 # case over "raw data" and "data encoded by less common or non-official
2321 # compression engines." That is why we have the inline lookup first
2331 # compression engines." That is why we have the inline lookup first
2322 # followed by the compengines lookup.
2332 # followed by the compengines lookup.
2323 #
2333 #
2324 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2334 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2325 # compressed chunks. And this matters for changelog and manifest reads.
2335 # compressed chunks. And this matters for changelog and manifest reads.
2326 t = data[0:1]
2336 t = data[0:1]
2327
2337
2328 if t == b'x':
2338 if t == b'x':
2329 try:
2339 try:
2330 return _zlibdecompress(data)
2340 return _zlibdecompress(data)
2331 except zlib.error as e:
2341 except zlib.error as e:
2332 raise error.RevlogError(
2342 raise error.RevlogError(
2333 _(b'revlog decompress error: %s')
2343 _(b'revlog decompress error: %s')
2334 % stringutil.forcebytestr(e)
2344 % stringutil.forcebytestr(e)
2335 )
2345 )
2336 # '\0' is more common than 'u' so it goes first.
2346 # '\0' is more common than 'u' so it goes first.
2337 elif t == b'\0':
2347 elif t == b'\0':
2338 return data
2348 return data
2339 elif t == b'u':
2349 elif t == b'u':
2340 return util.buffer(data, 1)
2350 return util.buffer(data, 1)
2341
2351
2342 compressor = self._get_decompressor(t)
2352 compressor = self._get_decompressor(t)
2343
2353
2344 return compressor.decompress(data)
2354 return compressor.decompress(data)
2345
2355
2346 def _addrevision(
2356 def _addrevision(
2347 self,
2357 self,
2348 node,
2358 node,
2349 rawtext,
2359 rawtext,
2350 transaction,
2360 transaction,
2351 link,
2361 link,
2352 p1,
2362 p1,
2353 p2,
2363 p2,
2354 flags,
2364 flags,
2355 cachedelta,
2365 cachedelta,
2356 alwayscache=False,
2366 alwayscache=False,
2357 deltacomputer=None,
2367 deltacomputer=None,
2358 sidedata=None,
2368 sidedata=None,
2359 ):
2369 ):
2360 """internal function to add revisions to the log
2370 """internal function to add revisions to the log
2361
2371
2362 see addrevision for argument descriptions.
2372 see addrevision for argument descriptions.
2363
2373
2364 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2374 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2365
2375
2366 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2376 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2367 be used.
2377 be used.
2368
2378
2369 invariants:
2379 invariants:
2370 - rawtext is optional (can be None); if not set, cachedelta must be set.
2380 - rawtext is optional (can be None); if not set, cachedelta must be set.
2371 if both are set, they must correspond to each other.
2381 if both are set, they must correspond to each other.
2372 """
2382 """
2373 if node == self.nullid:
2383 if node == self.nullid:
2374 raise error.RevlogError(
2384 raise error.RevlogError(
2375 _(b"%s: attempt to add null revision") % self.display_id
2385 _(b"%s: attempt to add null revision") % self.display_id
2376 )
2386 )
2377 if (
2387 if (
2378 node == self.nodeconstants.wdirid
2388 node == self.nodeconstants.wdirid
2379 or node in self.nodeconstants.wdirfilenodeids
2389 or node in self.nodeconstants.wdirfilenodeids
2380 ):
2390 ):
2381 raise error.RevlogError(
2391 raise error.RevlogError(
2382 _(b"%s: attempt to add wdir revision") % self.display_id
2392 _(b"%s: attempt to add wdir revision") % self.display_id
2383 )
2393 )
2384 if self._writinghandles is None:
2394 if self._writinghandles is None:
2385 msg = b'adding revision outside `revlog._writing` context'
2395 msg = b'adding revision outside `revlog._writing` context'
2386 raise error.ProgrammingError(msg)
2396 raise error.ProgrammingError(msg)
2387
2397
2388 if self._inline:
2398 if self._inline:
2389 fh = self._writinghandles[0]
2399 fh = self._writinghandles[0]
2390 else:
2400 else:
2391 fh = self._writinghandles[1]
2401 fh = self._writinghandles[1]
2392
2402
2393 btext = [rawtext]
2403 btext = [rawtext]
2394
2404
2395 curr = len(self)
2405 curr = len(self)
2396 prev = curr - 1
2406 prev = curr - 1
2397
2407
2398 offset = self._get_data_offset(prev)
2408 offset = self._get_data_offset(prev)
2399
2409
2400 if self._concurrencychecker:
2410 if self._concurrencychecker:
2401 ifh, dfh, sdfh = self._writinghandles
2411 ifh, dfh, sdfh = self._writinghandles
2402 # XXX no checking for the sidedata file
2412 # XXX no checking for the sidedata file
2403 if self._inline:
2413 if self._inline:
2404 # offset is "as if" it were in the .d file, so we need to add on
2414 # offset is "as if" it were in the .d file, so we need to add on
2405 # the size of the entry metadata.
2415 # the size of the entry metadata.
2406 self._concurrencychecker(
2416 self._concurrencychecker(
2407 ifh, self._indexfile, offset + curr * self.index.entry_size
2417 ifh, self._indexfile, offset + curr * self.index.entry_size
2408 )
2418 )
2409 else:
2419 else:
2410 # Entries in the .i are a consistent size.
2420 # Entries in the .i are a consistent size.
2411 self._concurrencychecker(
2421 self._concurrencychecker(
2412 ifh, self._indexfile, curr * self.index.entry_size
2422 ifh, self._indexfile, curr * self.index.entry_size
2413 )
2423 )
2414 self._concurrencychecker(dfh, self._datafile, offset)
2424 self._concurrencychecker(dfh, self._datafile, offset)
2415
2425
2416 p1r, p2r = self.rev(p1), self.rev(p2)
2426 p1r, p2r = self.rev(p1), self.rev(p2)
2417
2427
2418 # full versions are inserted when the needed deltas
2428 # full versions are inserted when the needed deltas
2419 # become comparable to the uncompressed text
2429 # become comparable to the uncompressed text
2420 if rawtext is None:
2430 if rawtext is None:
2421 # need rawtext size, before changed by flag processors, which is
2431 # need rawtext size, before changed by flag processors, which is
2422 # the non-raw size. use revlog explicitly to avoid filelog's extra
2432 # the non-raw size. use revlog explicitly to avoid filelog's extra
2423 # logic that might remove metadata size.
2433 # logic that might remove metadata size.
2424 textlen = mdiff.patchedsize(
2434 textlen = mdiff.patchedsize(
2425 revlog.size(self, cachedelta[0]), cachedelta[1]
2435 revlog.size(self, cachedelta[0]), cachedelta[1]
2426 )
2436 )
2427 else:
2437 else:
2428 textlen = len(rawtext)
2438 textlen = len(rawtext)
2429
2439
2430 if deltacomputer is None:
2440 if deltacomputer is None:
2431 write_debug = None
2441 write_debug = None
2432 if self._debug_delta:
2442 if self._debug_delta:
2433 write_debug = transaction._report
2443 write_debug = transaction._report
2434 deltacomputer = deltautil.deltacomputer(
2444 deltacomputer = deltautil.deltacomputer(
2435 self, write_debug=write_debug
2445 self, write_debug=write_debug
2436 )
2446 )
2437
2447
2438 revinfo = revlogutils.revisioninfo(
2448 revinfo = revlogutils.revisioninfo(
2439 node,
2449 node,
2440 p1,
2450 p1,
2441 p2,
2451 p2,
2442 btext,
2452 btext,
2443 textlen,
2453 textlen,
2444 cachedelta,
2454 cachedelta,
2445 flags,
2455 flags,
2446 )
2456 )
2447
2457
2448 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2458 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2449
2459
2450 compression_mode = COMP_MODE_INLINE
2460 compression_mode = COMP_MODE_INLINE
2451 if self._docket is not None:
2461 if self._docket is not None:
2452 default_comp = self._docket.default_compression_header
2462 default_comp = self._docket.default_compression_header
2453 r = deltautil.delta_compression(default_comp, deltainfo)
2463 r = deltautil.delta_compression(default_comp, deltainfo)
2454 compression_mode, deltainfo = r
2464 compression_mode, deltainfo = r
2455
2465
2456 sidedata_compression_mode = COMP_MODE_INLINE
2466 sidedata_compression_mode = COMP_MODE_INLINE
2457 if sidedata and self.hassidedata:
2467 if sidedata and self.hassidedata:
2458 sidedata_compression_mode = COMP_MODE_PLAIN
2468 sidedata_compression_mode = COMP_MODE_PLAIN
2459 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2469 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2460 sidedata_offset = self._docket.sidedata_end
2470 sidedata_offset = self._docket.sidedata_end
2461 h, comp_sidedata = self.compress(serialized_sidedata)
2471 h, comp_sidedata = self.compress(serialized_sidedata)
2462 if (
2472 if (
2463 h != b'u'
2473 h != b'u'
2464 and comp_sidedata[0:1] != b'\0'
2474 and comp_sidedata[0:1] != b'\0'
2465 and len(comp_sidedata) < len(serialized_sidedata)
2475 and len(comp_sidedata) < len(serialized_sidedata)
2466 ):
2476 ):
2467 assert not h
2477 assert not h
2468 if (
2478 if (
2469 comp_sidedata[0:1]
2479 comp_sidedata[0:1]
2470 == self._docket.default_compression_header
2480 == self._docket.default_compression_header
2471 ):
2481 ):
2472 sidedata_compression_mode = COMP_MODE_DEFAULT
2482 sidedata_compression_mode = COMP_MODE_DEFAULT
2473 serialized_sidedata = comp_sidedata
2483 serialized_sidedata = comp_sidedata
2474 else:
2484 else:
2475 sidedata_compression_mode = COMP_MODE_INLINE
2485 sidedata_compression_mode = COMP_MODE_INLINE
2476 serialized_sidedata = comp_sidedata
2486 serialized_sidedata = comp_sidedata
2477 else:
2487 else:
2478 serialized_sidedata = b""
2488 serialized_sidedata = b""
2479 # Don't store the offset if the sidedata is empty, that way
2489 # Don't store the offset if the sidedata is empty, that way
2480 # we can easily detect empty sidedata and they will be no different
2490 # we can easily detect empty sidedata and they will be no different
2481 # than ones we manually add.
2491 # than ones we manually add.
2482 sidedata_offset = 0
2492 sidedata_offset = 0
2483
2493
2484 rank = RANK_UNKNOWN
2494 rank = RANK_UNKNOWN
2485 if self._format_version == CHANGELOGV2:
2495 if self._format_version == CHANGELOGV2:
2486 if (p1r, p2r) == (nullrev, nullrev):
2496 if (p1r, p2r) == (nullrev, nullrev):
2487 rank = 1
2497 rank = 1
2488 elif p1r != nullrev and p2r == nullrev:
2498 elif p1r != nullrev and p2r == nullrev:
2489 rank = 1 + self.fast_rank(p1r)
2499 rank = 1 + self.fast_rank(p1r)
2490 elif p1r == nullrev and p2r != nullrev:
2500 elif p1r == nullrev and p2r != nullrev:
2491 rank = 1 + self.fast_rank(p2r)
2501 rank = 1 + self.fast_rank(p2r)
2492 else: # merge node
2502 else: # merge node
2493 if rustdagop is not None and self.index.rust_ext_compat:
2503 if rustdagop is not None and self.index.rust_ext_compat:
2494 rank = rustdagop.rank(self.index, p1r, p2r)
2504 rank = rustdagop.rank(self.index, p1r, p2r)
2495 else:
2505 else:
2496 pmin, pmax = sorted((p1r, p2r))
2506 pmin, pmax = sorted((p1r, p2r))
2497 rank = 1 + self.fast_rank(pmax)
2507 rank = 1 + self.fast_rank(pmax)
2498 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2508 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2499
2509
2500 e = revlogutils.entry(
2510 e = revlogutils.entry(
2501 flags=flags,
2511 flags=flags,
2502 data_offset=offset,
2512 data_offset=offset,
2503 data_compressed_length=deltainfo.deltalen,
2513 data_compressed_length=deltainfo.deltalen,
2504 data_uncompressed_length=textlen,
2514 data_uncompressed_length=textlen,
2505 data_compression_mode=compression_mode,
2515 data_compression_mode=compression_mode,
2506 data_delta_base=deltainfo.base,
2516 data_delta_base=deltainfo.base,
2507 link_rev=link,
2517 link_rev=link,
2508 parent_rev_1=p1r,
2518 parent_rev_1=p1r,
2509 parent_rev_2=p2r,
2519 parent_rev_2=p2r,
2510 node_id=node,
2520 node_id=node,
2511 sidedata_offset=sidedata_offset,
2521 sidedata_offset=sidedata_offset,
2512 sidedata_compressed_length=len(serialized_sidedata),
2522 sidedata_compressed_length=len(serialized_sidedata),
2513 sidedata_compression_mode=sidedata_compression_mode,
2523 sidedata_compression_mode=sidedata_compression_mode,
2514 rank=rank,
2524 rank=rank,
2515 )
2525 )
2516
2526
2517 self.index.append(e)
2527 self.index.append(e)
2518 entry = self.index.entry_binary(curr)
2528 entry = self.index.entry_binary(curr)
2519 if curr == 0 and self._docket is None:
2529 if curr == 0 and self._docket is None:
2520 header = self._format_flags | self._format_version
2530 header = self._format_flags | self._format_version
2521 header = self.index.pack_header(header)
2531 header = self.index.pack_header(header)
2522 entry = header + entry
2532 entry = header + entry
2523 self._writeentry(
2533 self._writeentry(
2524 transaction,
2534 transaction,
2525 entry,
2535 entry,
2526 deltainfo.data,
2536 deltainfo.data,
2527 link,
2537 link,
2528 offset,
2538 offset,
2529 serialized_sidedata,
2539 serialized_sidedata,
2530 sidedata_offset,
2540 sidedata_offset,
2531 )
2541 )
2532
2542
2533 rawtext = btext[0]
2543 rawtext = btext[0]
2534
2544
2535 if alwayscache and rawtext is None:
2545 if alwayscache and rawtext is None:
2536 rawtext = deltacomputer.buildtext(revinfo, fh)
2546 rawtext = deltacomputer.buildtext(revinfo, fh)
2537
2547
2538 if type(rawtext) == bytes: # only accept immutable objects
2548 if type(rawtext) == bytes: # only accept immutable objects
2539 self._revisioncache = (node, curr, rawtext)
2549 self._revisioncache = (node, curr, rawtext)
2540 self._chainbasecache[curr] = deltainfo.chainbase
2550 self._chainbasecache[curr] = deltainfo.chainbase
2541 return curr
2551 return curr
2542
2552
2543 def _get_data_offset(self, prev):
2553 def _get_data_offset(self, prev):
2544 """Returns the current offset in the (in-transaction) data file.
2554 """Returns the current offset in the (in-transaction) data file.
2545 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2555 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2546 file to store that information: since sidedata can be rewritten to the
2556 file to store that information: since sidedata can be rewritten to the
2547 end of the data file within a transaction, you can have cases where, for
2557 end of the data file within a transaction, you can have cases where, for
2548 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2558 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2549 to `n - 1`'s sidedata being written after `n`'s data.
2559 to `n - 1`'s sidedata being written after `n`'s data.
2550
2560
2551 TODO cache this in a docket file before getting out of experimental."""
2561 TODO cache this in a docket file before getting out of experimental."""
2552 if self._docket is None:
2562 if self._docket is None:
2553 return self.end(prev)
2563 return self.end(prev)
2554 else:
2564 else:
2555 return self._docket.data_end
2565 return self._docket.data_end
2556
2566
2557 def _writeentry(
2567 def _writeentry(
2558 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2568 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2559 ):
2569 ):
2560 # Files opened in a+ mode have inconsistent behavior on various
2570 # Files opened in a+ mode have inconsistent behavior on various
2561 # platforms. Windows requires that a file positioning call be made
2571 # platforms. Windows requires that a file positioning call be made
2562 # when the file handle transitions between reads and writes. See
2572 # when the file handle transitions between reads and writes. See
2563 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2573 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2564 # platforms, Python or the platform itself can be buggy. Some versions
2574 # platforms, Python or the platform itself can be buggy. Some versions
2565 # of Solaris have been observed to not append at the end of the file
2575 # of Solaris have been observed to not append at the end of the file
2566 # if the file was seeked to before the end. See issue4943 for more.
2576 # if the file was seeked to before the end. See issue4943 for more.
2567 #
2577 #
2568 # We work around this issue by inserting a seek() before writing.
2578 # We work around this issue by inserting a seek() before writing.
2569 # Note: This is likely not necessary on Python 3. However, because
2579 # Note: This is likely not necessary on Python 3. However, because
2570 # the file handle is reused for reads and may be seeked there, we need
2580 # the file handle is reused for reads and may be seeked there, we need
2571 # to be careful before changing this.
2581 # to be careful before changing this.
2572 if self._writinghandles is None:
2582 if self._writinghandles is None:
2573 msg = b'adding revision outside `revlog._writing` context'
2583 msg = b'adding revision outside `revlog._writing` context'
2574 raise error.ProgrammingError(msg)
2584 raise error.ProgrammingError(msg)
2575 ifh, dfh, sdfh = self._writinghandles
2585 ifh, dfh, sdfh = self._writinghandles
2576 if self._docket is None:
2586 if self._docket is None:
2577 ifh.seek(0, os.SEEK_END)
2587 ifh.seek(0, os.SEEK_END)
2578 else:
2588 else:
2579 ifh.seek(self._docket.index_end, os.SEEK_SET)
2589 ifh.seek(self._docket.index_end, os.SEEK_SET)
2580 if dfh:
2590 if dfh:
2581 if self._docket is None:
2591 if self._docket is None:
2582 dfh.seek(0, os.SEEK_END)
2592 dfh.seek(0, os.SEEK_END)
2583 else:
2593 else:
2584 dfh.seek(self._docket.data_end, os.SEEK_SET)
2594 dfh.seek(self._docket.data_end, os.SEEK_SET)
2585 if sdfh:
2595 if sdfh:
2586 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2596 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2587
2597
2588 curr = len(self) - 1
2598 curr = len(self) - 1
2589 if not self._inline:
2599 if not self._inline:
2590 transaction.add(self._datafile, offset)
2600 transaction.add(self._datafile, offset)
2591 if self._sidedatafile:
2601 if self._sidedatafile:
2592 transaction.add(self._sidedatafile, sidedata_offset)
2602 transaction.add(self._sidedatafile, sidedata_offset)
2593 transaction.add(self._indexfile, curr * len(entry))
2603 transaction.add(self._indexfile, curr * len(entry))
2594 if data[0]:
2604 if data[0]:
2595 dfh.write(data[0])
2605 dfh.write(data[0])
2596 dfh.write(data[1])
2606 dfh.write(data[1])
2597 if sidedata:
2607 if sidedata:
2598 sdfh.write(sidedata)
2608 sdfh.write(sidedata)
2599 ifh.write(entry)
2609 ifh.write(entry)
2600 else:
2610 else:
2601 offset += curr * self.index.entry_size
2611 offset += curr * self.index.entry_size
2602 transaction.add(self._indexfile, offset)
2612 transaction.add(self._indexfile, offset)
2603 ifh.write(entry)
2613 ifh.write(entry)
2604 ifh.write(data[0])
2614 ifh.write(data[0])
2605 ifh.write(data[1])
2615 ifh.write(data[1])
2606 assert not sidedata
2616 assert not sidedata
2607 self._enforceinlinesize(transaction)
2617 self._enforceinlinesize(transaction)
2608 if self._docket is not None:
2618 if self._docket is not None:
2609 # revlog-v2 always has 3 writing handles, help Pytype
2619 # revlog-v2 always has 3 writing handles, help Pytype
2610 wh1 = self._writinghandles[0]
2620 wh1 = self._writinghandles[0]
2611 wh2 = self._writinghandles[1]
2621 wh2 = self._writinghandles[1]
2612 wh3 = self._writinghandles[2]
2622 wh3 = self._writinghandles[2]
2613 assert wh1 is not None
2623 assert wh1 is not None
2614 assert wh2 is not None
2624 assert wh2 is not None
2615 assert wh3 is not None
2625 assert wh3 is not None
2616 self._docket.index_end = wh1.tell()
2626 self._docket.index_end = wh1.tell()
2617 self._docket.data_end = wh2.tell()
2627 self._docket.data_end = wh2.tell()
2618 self._docket.sidedata_end = wh3.tell()
2628 self._docket.sidedata_end = wh3.tell()
2619
2629
2620 nodemaputil.setup_persistent_nodemap(transaction, self)
2630 nodemaputil.setup_persistent_nodemap(transaction, self)
2621
2631
2622 def addgroup(
2632 def addgroup(
2623 self,
2633 self,
2624 deltas,
2634 deltas,
2625 linkmapper,
2635 linkmapper,
2626 transaction,
2636 transaction,
2627 alwayscache=False,
2637 alwayscache=False,
2628 addrevisioncb=None,
2638 addrevisioncb=None,
2629 duplicaterevisioncb=None,
2639 duplicaterevisioncb=None,
2630 ):
2640 ):
2631 """
2641 """
2632 add a delta group
2642 add a delta group
2633
2643
2634 given a set of deltas, add them to the revision log. the
2644 given a set of deltas, add them to the revision log. the
2635 first delta is against its parent, which should be in our
2645 first delta is against its parent, which should be in our
2636 log, the rest are against the previous delta.
2646 log, the rest are against the previous delta.
2637
2647
2638 If ``addrevisioncb`` is defined, it will be called with arguments of
2648 If ``addrevisioncb`` is defined, it will be called with arguments of
2639 this revlog and the node that was added.
2649 this revlog and the node that was added.
2640 """
2650 """
2641
2651
2642 if self._adding_group:
2652 if self._adding_group:
2643 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2653 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2644
2654
2645 self._adding_group = True
2655 self._adding_group = True
2646 empty = True
2656 empty = True
2647 try:
2657 try:
2648 with self._writing(transaction):
2658 with self._writing(transaction):
2649 write_debug = None
2659 write_debug = None
2650 if self._debug_delta:
2660 if self._debug_delta:
2651 write_debug = transaction._report
2661 write_debug = transaction._report
2652 deltacomputer = deltautil.deltacomputer(
2662 deltacomputer = deltautil.deltacomputer(
2653 self,
2663 self,
2654 write_debug=write_debug,
2664 write_debug=write_debug,
2655 )
2665 )
2656 # loop through our set of deltas
2666 # loop through our set of deltas
2657 for data in deltas:
2667 for data in deltas:
2658 (
2668 (
2659 node,
2669 node,
2660 p1,
2670 p1,
2661 p2,
2671 p2,
2662 linknode,
2672 linknode,
2663 deltabase,
2673 deltabase,
2664 delta,
2674 delta,
2665 flags,
2675 flags,
2666 sidedata,
2676 sidedata,
2667 ) = data
2677 ) = data
2668 link = linkmapper(linknode)
2678 link = linkmapper(linknode)
2669 flags = flags or REVIDX_DEFAULT_FLAGS
2679 flags = flags or REVIDX_DEFAULT_FLAGS
2670
2680
2671 rev = self.index.get_rev(node)
2681 rev = self.index.get_rev(node)
2672 if rev is not None:
2682 if rev is not None:
2673 # this can happen if two branches make the same change
2683 # this can happen if two branches make the same change
2674 self._nodeduplicatecallback(transaction, rev)
2684 self._nodeduplicatecallback(transaction, rev)
2675 if duplicaterevisioncb:
2685 if duplicaterevisioncb:
2676 duplicaterevisioncb(self, rev)
2686 duplicaterevisioncb(self, rev)
2677 empty = False
2687 empty = False
2678 continue
2688 continue
2679
2689
2680 for p in (p1, p2):
2690 for p in (p1, p2):
2681 if not self.index.has_node(p):
2691 if not self.index.has_node(p):
2682 raise error.LookupError(
2692 raise error.LookupError(
2683 p, self.radix, _(b'unknown parent')
2693 p, self.radix, _(b'unknown parent')
2684 )
2694 )
2685
2695
2686 if not self.index.has_node(deltabase):
2696 if not self.index.has_node(deltabase):
2687 raise error.LookupError(
2697 raise error.LookupError(
2688 deltabase, self.display_id, _(b'unknown delta base')
2698 deltabase, self.display_id, _(b'unknown delta base')
2689 )
2699 )
2690
2700
2691 baserev = self.rev(deltabase)
2701 baserev = self.rev(deltabase)
2692
2702
2693 if baserev != nullrev and self.iscensored(baserev):
2703 if baserev != nullrev and self.iscensored(baserev):
2694 # if base is censored, delta must be full replacement in a
2704 # if base is censored, delta must be full replacement in a
2695 # single patch operation
2705 # single patch operation
2696 hlen = struct.calcsize(b">lll")
2706 hlen = struct.calcsize(b">lll")
2697 oldlen = self.rawsize(baserev)
2707 oldlen = self.rawsize(baserev)
2698 newlen = len(delta) - hlen
2708 newlen = len(delta) - hlen
2699 if delta[:hlen] != mdiff.replacediffheader(
2709 if delta[:hlen] != mdiff.replacediffheader(
2700 oldlen, newlen
2710 oldlen, newlen
2701 ):
2711 ):
2702 raise error.CensoredBaseError(
2712 raise error.CensoredBaseError(
2703 self.display_id, self.node(baserev)
2713 self.display_id, self.node(baserev)
2704 )
2714 )
2705
2715
2706 if not flags and self._peek_iscensored(baserev, delta):
2716 if not flags and self._peek_iscensored(baserev, delta):
2707 flags |= REVIDX_ISCENSORED
2717 flags |= REVIDX_ISCENSORED
2708
2718
2709 # We assume consumers of addrevisioncb will want to retrieve
2719 # We assume consumers of addrevisioncb will want to retrieve
2710 # the added revision, which will require a call to
2720 # the added revision, which will require a call to
2711 # revision(). revision() will fast path if there is a cache
2721 # revision(). revision() will fast path if there is a cache
2712 # hit. So, we tell _addrevision() to always cache in this case.
2722 # hit. So, we tell _addrevision() to always cache in this case.
2713 # We're only using addgroup() in the context of changegroup
2723 # We're only using addgroup() in the context of changegroup
2714 # generation so the revision data can always be handled as raw
2724 # generation so the revision data can always be handled as raw
2715 # by the flagprocessor.
2725 # by the flagprocessor.
2716 rev = self._addrevision(
2726 rev = self._addrevision(
2717 node,
2727 node,
2718 None,
2728 None,
2719 transaction,
2729 transaction,
2720 link,
2730 link,
2721 p1,
2731 p1,
2722 p2,
2732 p2,
2723 flags,
2733 flags,
2724 (baserev, delta),
2734 (baserev, delta),
2725 alwayscache=alwayscache,
2735 alwayscache=alwayscache,
2726 deltacomputer=deltacomputer,
2736 deltacomputer=deltacomputer,
2727 sidedata=sidedata,
2737 sidedata=sidedata,
2728 )
2738 )
2729
2739
2730 if addrevisioncb:
2740 if addrevisioncb:
2731 addrevisioncb(self, rev)
2741 addrevisioncb(self, rev)
2732 empty = False
2742 empty = False
2733 finally:
2743 finally:
2734 self._adding_group = False
2744 self._adding_group = False
2735 return not empty
2745 return not empty
2736
2746
2737 def iscensored(self, rev):
2747 def iscensored(self, rev):
2738 """Check if a file revision is censored."""
2748 """Check if a file revision is censored."""
2739 if not self._censorable:
2749 if not self._censorable:
2740 return False
2750 return False
2741
2751
2742 return self.flags(rev) & REVIDX_ISCENSORED
2752 return self.flags(rev) & REVIDX_ISCENSORED
2743
2753
2744 def _peek_iscensored(self, baserev, delta):
2754 def _peek_iscensored(self, baserev, delta):
2745 """Quickly check if a delta produces a censored revision."""
2755 """Quickly check if a delta produces a censored revision."""
2746 if not self._censorable:
2756 if not self._censorable:
2747 return False
2757 return False
2748
2758
2749 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2759 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2750
2760
2751 def getstrippoint(self, minlink):
2761 def getstrippoint(self, minlink):
2752 """find the minimum rev that must be stripped to strip the linkrev
2762 """find the minimum rev that must be stripped to strip the linkrev
2753
2763
2754 Returns a tuple containing the minimum rev and a set of all revs that
2764 Returns a tuple containing the minimum rev and a set of all revs that
2755 have linkrevs that will be broken by this strip.
2765 have linkrevs that will be broken by this strip.
2756 """
2766 """
2757 return storageutil.resolvestripinfo(
2767 return storageutil.resolvestripinfo(
2758 minlink,
2768 minlink,
2759 len(self) - 1,
2769 len(self) - 1,
2760 self.headrevs(),
2770 self.headrevs(),
2761 self.linkrev,
2771 self.linkrev,
2762 self.parentrevs,
2772 self.parentrevs,
2763 )
2773 )
2764
2774
2765 def strip(self, minlink, transaction):
2775 def strip(self, minlink, transaction):
2766 """truncate the revlog on the first revision with a linkrev >= minlink
2776 """truncate the revlog on the first revision with a linkrev >= minlink
2767
2777
2768 This function is called when we're stripping revision minlink and
2778 This function is called when we're stripping revision minlink and
2769 its descendants from the repository.
2779 its descendants from the repository.
2770
2780
2771 We have to remove all revisions with linkrev >= minlink, because
2781 We have to remove all revisions with linkrev >= minlink, because
2772 the equivalent changelog revisions will be renumbered after the
2782 the equivalent changelog revisions will be renumbered after the
2773 strip.
2783 strip.
2774
2784
2775 So we truncate the revlog on the first of these revisions, and
2785 So we truncate the revlog on the first of these revisions, and
2776 trust that the caller has saved the revisions that shouldn't be
2786 trust that the caller has saved the revisions that shouldn't be
2777 removed and that it'll re-add them after this truncation.
2787 removed and that it'll re-add them after this truncation.
2778 """
2788 """
2779 if len(self) == 0:
2789 if len(self) == 0:
2780 return
2790 return
2781
2791
2782 rev, _ = self.getstrippoint(minlink)
2792 rev, _ = self.getstrippoint(minlink)
2783 if rev == len(self):
2793 if rev == len(self):
2784 return
2794 return
2785
2795
2786 # first truncate the files on disk
2796 # first truncate the files on disk
2787 data_end = self.start(rev)
2797 data_end = self.start(rev)
2788 if not self._inline:
2798 if not self._inline:
2789 transaction.add(self._datafile, data_end)
2799 transaction.add(self._datafile, data_end)
2790 end = rev * self.index.entry_size
2800 end = rev * self.index.entry_size
2791 else:
2801 else:
2792 end = data_end + (rev * self.index.entry_size)
2802 end = data_end + (rev * self.index.entry_size)
2793
2803
2794 if self._sidedatafile:
2804 if self._sidedatafile:
2795 sidedata_end = self.sidedata_cut_off(rev)
2805 sidedata_end = self.sidedata_cut_off(rev)
2796 transaction.add(self._sidedatafile, sidedata_end)
2806 transaction.add(self._sidedatafile, sidedata_end)
2797
2807
2798 transaction.add(self._indexfile, end)
2808 transaction.add(self._indexfile, end)
2799 if self._docket is not None:
2809 if self._docket is not None:
2800 # XXX we could, leverage the docket while stripping. However it is
2810 # XXX we could, leverage the docket while stripping. However it is
2801 # not powerfull enough at the time of this comment
2811 # not powerfull enough at the time of this comment
2802 self._docket.index_end = end
2812 self._docket.index_end = end
2803 self._docket.data_end = data_end
2813 self._docket.data_end = data_end
2804 self._docket.sidedata_end = sidedata_end
2814 self._docket.sidedata_end = sidedata_end
2805 self._docket.write(transaction, stripping=True)
2815 self._docket.write(transaction, stripping=True)
2806
2816
2807 # then reset internal state in memory to forget those revisions
2817 # then reset internal state in memory to forget those revisions
2808 self._revisioncache = None
2818 self._revisioncache = None
2809 self._chaininfocache = util.lrucachedict(500)
2819 self._chaininfocache = util.lrucachedict(500)
2810 self._segmentfile.clear_cache()
2820 self._segmentfile.clear_cache()
2811 self._segmentfile_sidedata.clear_cache()
2821 self._segmentfile_sidedata.clear_cache()
2812
2822
2813 del self.index[rev:-1]
2823 del self.index[rev:-1]
2814
2824
2815 def checksize(self):
2825 def checksize(self):
2816 """Check size of index and data files
2826 """Check size of index and data files
2817
2827
2818 return a (dd, di) tuple.
2828 return a (dd, di) tuple.
2819 - dd: extra bytes for the "data" file
2829 - dd: extra bytes for the "data" file
2820 - di: extra bytes for the "index" file
2830 - di: extra bytes for the "index" file
2821
2831
2822 A healthy revlog will return (0, 0).
2832 A healthy revlog will return (0, 0).
2823 """
2833 """
2824 expected = 0
2834 expected = 0
2825 if len(self):
2835 if len(self):
2826 expected = max(0, self.end(len(self) - 1))
2836 expected = max(0, self.end(len(self) - 1))
2827
2837
2828 try:
2838 try:
2829 with self._datafp() as f:
2839 with self._datafp() as f:
2830 f.seek(0, io.SEEK_END)
2840 f.seek(0, io.SEEK_END)
2831 actual = f.tell()
2841 actual = f.tell()
2832 dd = actual - expected
2842 dd = actual - expected
2833 except FileNotFoundError:
2843 except FileNotFoundError:
2834 dd = 0
2844 dd = 0
2835
2845
2836 try:
2846 try:
2837 f = self.opener(self._indexfile)
2847 f = self.opener(self._indexfile)
2838 f.seek(0, io.SEEK_END)
2848 f.seek(0, io.SEEK_END)
2839 actual = f.tell()
2849 actual = f.tell()
2840 f.close()
2850 f.close()
2841 s = self.index.entry_size
2851 s = self.index.entry_size
2842 i = max(0, actual // s)
2852 i = max(0, actual // s)
2843 di = actual - (i * s)
2853 di = actual - (i * s)
2844 if self._inline:
2854 if self._inline:
2845 databytes = 0
2855 databytes = 0
2846 for r in self:
2856 for r in self:
2847 databytes += max(0, self.length(r))
2857 databytes += max(0, self.length(r))
2848 dd = 0
2858 dd = 0
2849 di = actual - len(self) * s - databytes
2859 di = actual - len(self) * s - databytes
2850 except FileNotFoundError:
2860 except FileNotFoundError:
2851 di = 0
2861 di = 0
2852
2862
2853 return (dd, di)
2863 return (dd, di)
2854
2864
2855 def files(self):
2865 def files(self):
2856 res = [self._indexfile]
2866 res = [self._indexfile]
2857 if self._docket_file is None:
2867 if self._docket_file is None:
2858 if not self._inline:
2868 if not self._inline:
2859 res.append(self._datafile)
2869 res.append(self._datafile)
2860 else:
2870 else:
2861 res.append(self._docket_file)
2871 res.append(self._docket_file)
2862 res.extend(self._docket.old_index_filepaths(include_empty=False))
2872 res.extend(self._docket.old_index_filepaths(include_empty=False))
2863 if self._docket.data_end:
2873 if self._docket.data_end:
2864 res.append(self._datafile)
2874 res.append(self._datafile)
2865 res.extend(self._docket.old_data_filepaths(include_empty=False))
2875 res.extend(self._docket.old_data_filepaths(include_empty=False))
2866 if self._docket.sidedata_end:
2876 if self._docket.sidedata_end:
2867 res.append(self._sidedatafile)
2877 res.append(self._sidedatafile)
2868 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2878 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2869 return res
2879 return res
2870
2880
2871 def emitrevisions(
2881 def emitrevisions(
2872 self,
2882 self,
2873 nodes,
2883 nodes,
2874 nodesorder=None,
2884 nodesorder=None,
2875 revisiondata=False,
2885 revisiondata=False,
2876 assumehaveparentrevisions=False,
2886 assumehaveparentrevisions=False,
2877 deltamode=repository.CG_DELTAMODE_STD,
2887 deltamode=repository.CG_DELTAMODE_STD,
2878 sidedata_helpers=None,
2888 sidedata_helpers=None,
2879 ):
2889 ):
2880 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2890 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2881 raise error.ProgrammingError(
2891 raise error.ProgrammingError(
2882 b'unhandled value for nodesorder: %s' % nodesorder
2892 b'unhandled value for nodesorder: %s' % nodesorder
2883 )
2893 )
2884
2894
2885 if nodesorder is None and not self._generaldelta:
2895 if nodesorder is None and not self._generaldelta:
2886 nodesorder = b'storage'
2896 nodesorder = b'storage'
2887
2897
2888 if (
2898 if (
2889 not self._storedeltachains
2899 not self._storedeltachains
2890 and deltamode != repository.CG_DELTAMODE_PREV
2900 and deltamode != repository.CG_DELTAMODE_PREV
2891 ):
2901 ):
2892 deltamode = repository.CG_DELTAMODE_FULL
2902 deltamode = repository.CG_DELTAMODE_FULL
2893
2903
2894 return storageutil.emitrevisions(
2904 return storageutil.emitrevisions(
2895 self,
2905 self,
2896 nodes,
2906 nodes,
2897 nodesorder,
2907 nodesorder,
2898 revlogrevisiondelta,
2908 revlogrevisiondelta,
2899 deltaparentfn=self.deltaparent,
2909 deltaparentfn=self.deltaparent,
2900 candeltafn=self.candelta,
2910 candeltafn=self.candelta,
2901 rawsizefn=self.rawsize,
2911 rawsizefn=self.rawsize,
2902 revdifffn=self.revdiff,
2912 revdifffn=self.revdiff,
2903 flagsfn=self.flags,
2913 flagsfn=self.flags,
2904 deltamode=deltamode,
2914 deltamode=deltamode,
2905 revisiondata=revisiondata,
2915 revisiondata=revisiondata,
2906 assumehaveparentrevisions=assumehaveparentrevisions,
2916 assumehaveparentrevisions=assumehaveparentrevisions,
2907 sidedata_helpers=sidedata_helpers,
2917 sidedata_helpers=sidedata_helpers,
2908 )
2918 )
2909
2919
2910 DELTAREUSEALWAYS = b'always'
2920 DELTAREUSEALWAYS = b'always'
2911 DELTAREUSESAMEREVS = b'samerevs'
2921 DELTAREUSESAMEREVS = b'samerevs'
2912 DELTAREUSENEVER = b'never'
2922 DELTAREUSENEVER = b'never'
2913
2923
2914 DELTAREUSEFULLADD = b'fulladd'
2924 DELTAREUSEFULLADD = b'fulladd'
2915
2925
2916 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2926 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2917
2927
2918 def clone(
2928 def clone(
2919 self,
2929 self,
2920 tr,
2930 tr,
2921 destrevlog,
2931 destrevlog,
2922 addrevisioncb=None,
2932 addrevisioncb=None,
2923 deltareuse=DELTAREUSESAMEREVS,
2933 deltareuse=DELTAREUSESAMEREVS,
2924 forcedeltabothparents=None,
2934 forcedeltabothparents=None,
2925 sidedata_helpers=None,
2935 sidedata_helpers=None,
2926 ):
2936 ):
2927 """Copy this revlog to another, possibly with format changes.
2937 """Copy this revlog to another, possibly with format changes.
2928
2938
2929 The destination revlog will contain the same revisions and nodes.
2939 The destination revlog will contain the same revisions and nodes.
2930 However, it may not be bit-for-bit identical due to e.g. delta encoding
2940 However, it may not be bit-for-bit identical due to e.g. delta encoding
2931 differences.
2941 differences.
2932
2942
2933 The ``deltareuse`` argument control how deltas from the existing revlog
2943 The ``deltareuse`` argument control how deltas from the existing revlog
2934 are preserved in the destination revlog. The argument can have the
2944 are preserved in the destination revlog. The argument can have the
2935 following values:
2945 following values:
2936
2946
2937 DELTAREUSEALWAYS
2947 DELTAREUSEALWAYS
2938 Deltas will always be reused (if possible), even if the destination
2948 Deltas will always be reused (if possible), even if the destination
2939 revlog would not select the same revisions for the delta. This is the
2949 revlog would not select the same revisions for the delta. This is the
2940 fastest mode of operation.
2950 fastest mode of operation.
2941 DELTAREUSESAMEREVS
2951 DELTAREUSESAMEREVS
2942 Deltas will be reused if the destination revlog would pick the same
2952 Deltas will be reused if the destination revlog would pick the same
2943 revisions for the delta. This mode strikes a balance between speed
2953 revisions for the delta. This mode strikes a balance between speed
2944 and optimization.
2954 and optimization.
2945 DELTAREUSENEVER
2955 DELTAREUSENEVER
2946 Deltas will never be reused. This is the slowest mode of execution.
2956 Deltas will never be reused. This is the slowest mode of execution.
2947 This mode can be used to recompute deltas (e.g. if the diff/delta
2957 This mode can be used to recompute deltas (e.g. if the diff/delta
2948 algorithm changes).
2958 algorithm changes).
2949 DELTAREUSEFULLADD
2959 DELTAREUSEFULLADD
2950 Revision will be re-added as if their were new content. This is
2960 Revision will be re-added as if their were new content. This is
2951 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2961 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2952 eg: large file detection and handling.
2962 eg: large file detection and handling.
2953
2963
2954 Delta computation can be slow, so the choice of delta reuse policy can
2964 Delta computation can be slow, so the choice of delta reuse policy can
2955 significantly affect run time.
2965 significantly affect run time.
2956
2966
2957 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2967 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2958 two extremes. Deltas will be reused if they are appropriate. But if the
2968 two extremes. Deltas will be reused if they are appropriate. But if the
2959 delta could choose a better revision, it will do so. This means if you
2969 delta could choose a better revision, it will do so. This means if you
2960 are converting a non-generaldelta revlog to a generaldelta revlog,
2970 are converting a non-generaldelta revlog to a generaldelta revlog,
2961 deltas will be recomputed if the delta's parent isn't a parent of the
2971 deltas will be recomputed if the delta's parent isn't a parent of the
2962 revision.
2972 revision.
2963
2973
2964 In addition to the delta policy, the ``forcedeltabothparents``
2974 In addition to the delta policy, the ``forcedeltabothparents``
2965 argument controls whether to force compute deltas against both parents
2975 argument controls whether to force compute deltas against both parents
2966 for merges. By default, the current default is used.
2976 for merges. By default, the current default is used.
2967
2977
2968 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2978 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2969 `sidedata_helpers`.
2979 `sidedata_helpers`.
2970 """
2980 """
2971 if deltareuse not in self.DELTAREUSEALL:
2981 if deltareuse not in self.DELTAREUSEALL:
2972 raise ValueError(
2982 raise ValueError(
2973 _(b'value for deltareuse invalid: %s') % deltareuse
2983 _(b'value for deltareuse invalid: %s') % deltareuse
2974 )
2984 )
2975
2985
2976 if len(destrevlog):
2986 if len(destrevlog):
2977 raise ValueError(_(b'destination revlog is not empty'))
2987 raise ValueError(_(b'destination revlog is not empty'))
2978
2988
2979 if getattr(self, 'filteredrevs', None):
2989 if getattr(self, 'filteredrevs', None):
2980 raise ValueError(_(b'source revlog has filtered revisions'))
2990 raise ValueError(_(b'source revlog has filtered revisions'))
2981 if getattr(destrevlog, 'filteredrevs', None):
2991 if getattr(destrevlog, 'filteredrevs', None):
2982 raise ValueError(_(b'destination revlog has filtered revisions'))
2992 raise ValueError(_(b'destination revlog has filtered revisions'))
2983
2993
2984 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2994 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2985 # if possible.
2995 # if possible.
2986 oldlazydelta = destrevlog._lazydelta
2996 oldlazydelta = destrevlog._lazydelta
2987 oldlazydeltabase = destrevlog._lazydeltabase
2997 oldlazydeltabase = destrevlog._lazydeltabase
2988 oldamd = destrevlog._deltabothparents
2998 oldamd = destrevlog._deltabothparents
2989
2999
2990 try:
3000 try:
2991 if deltareuse == self.DELTAREUSEALWAYS:
3001 if deltareuse == self.DELTAREUSEALWAYS:
2992 destrevlog._lazydeltabase = True
3002 destrevlog._lazydeltabase = True
2993 destrevlog._lazydelta = True
3003 destrevlog._lazydelta = True
2994 elif deltareuse == self.DELTAREUSESAMEREVS:
3004 elif deltareuse == self.DELTAREUSESAMEREVS:
2995 destrevlog._lazydeltabase = False
3005 destrevlog._lazydeltabase = False
2996 destrevlog._lazydelta = True
3006 destrevlog._lazydelta = True
2997 elif deltareuse == self.DELTAREUSENEVER:
3007 elif deltareuse == self.DELTAREUSENEVER:
2998 destrevlog._lazydeltabase = False
3008 destrevlog._lazydeltabase = False
2999 destrevlog._lazydelta = False
3009 destrevlog._lazydelta = False
3000
3010
3001 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3011 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3002
3012
3003 self._clone(
3013 self._clone(
3004 tr,
3014 tr,
3005 destrevlog,
3015 destrevlog,
3006 addrevisioncb,
3016 addrevisioncb,
3007 deltareuse,
3017 deltareuse,
3008 forcedeltabothparents,
3018 forcedeltabothparents,
3009 sidedata_helpers,
3019 sidedata_helpers,
3010 )
3020 )
3011
3021
3012 finally:
3022 finally:
3013 destrevlog._lazydelta = oldlazydelta
3023 destrevlog._lazydelta = oldlazydelta
3014 destrevlog._lazydeltabase = oldlazydeltabase
3024 destrevlog._lazydeltabase = oldlazydeltabase
3015 destrevlog._deltabothparents = oldamd
3025 destrevlog._deltabothparents = oldamd
3016
3026
3017 def _clone(
3027 def _clone(
3018 self,
3028 self,
3019 tr,
3029 tr,
3020 destrevlog,
3030 destrevlog,
3021 addrevisioncb,
3031 addrevisioncb,
3022 deltareuse,
3032 deltareuse,
3023 forcedeltabothparents,
3033 forcedeltabothparents,
3024 sidedata_helpers,
3034 sidedata_helpers,
3025 ):
3035 ):
3026 """perform the core duty of `revlog.clone` after parameter processing"""
3036 """perform the core duty of `revlog.clone` after parameter processing"""
3027 write_debug = None
3037 write_debug = None
3028 if self._debug_delta:
3038 if self._debug_delta:
3029 write_debug = tr._report
3039 write_debug = tr._report
3030 deltacomputer = deltautil.deltacomputer(
3040 deltacomputer = deltautil.deltacomputer(
3031 destrevlog,
3041 destrevlog,
3032 write_debug=write_debug,
3042 write_debug=write_debug,
3033 )
3043 )
3034 index = self.index
3044 index = self.index
3035 for rev in self:
3045 for rev in self:
3036 entry = index[rev]
3046 entry = index[rev]
3037
3047
3038 # Some classes override linkrev to take filtered revs into
3048 # Some classes override linkrev to take filtered revs into
3039 # account. Use raw entry from index.
3049 # account. Use raw entry from index.
3040 flags = entry[0] & 0xFFFF
3050 flags = entry[0] & 0xFFFF
3041 linkrev = entry[4]
3051 linkrev = entry[4]
3042 p1 = index[entry[5]][7]
3052 p1 = index[entry[5]][7]
3043 p2 = index[entry[6]][7]
3053 p2 = index[entry[6]][7]
3044 node = entry[7]
3054 node = entry[7]
3045
3055
3046 # (Possibly) reuse the delta from the revlog if allowed and
3056 # (Possibly) reuse the delta from the revlog if allowed and
3047 # the revlog chunk is a delta.
3057 # the revlog chunk is a delta.
3048 cachedelta = None
3058 cachedelta = None
3049 rawtext = None
3059 rawtext = None
3050 if deltareuse == self.DELTAREUSEFULLADD:
3060 if deltareuse == self.DELTAREUSEFULLADD:
3051 text = self._revisiondata(rev)
3061 text = self._revisiondata(rev)
3052 sidedata = self.sidedata(rev)
3062 sidedata = self.sidedata(rev)
3053
3063
3054 if sidedata_helpers is not None:
3064 if sidedata_helpers is not None:
3055 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3065 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3056 self, sidedata_helpers, sidedata, rev
3066 self, sidedata_helpers, sidedata, rev
3057 )
3067 )
3058 flags = flags | new_flags[0] & ~new_flags[1]
3068 flags = flags | new_flags[0] & ~new_flags[1]
3059
3069
3060 destrevlog.addrevision(
3070 destrevlog.addrevision(
3061 text,
3071 text,
3062 tr,
3072 tr,
3063 linkrev,
3073 linkrev,
3064 p1,
3074 p1,
3065 p2,
3075 p2,
3066 cachedelta=cachedelta,
3076 cachedelta=cachedelta,
3067 node=node,
3077 node=node,
3068 flags=flags,
3078 flags=flags,
3069 deltacomputer=deltacomputer,
3079 deltacomputer=deltacomputer,
3070 sidedata=sidedata,
3080 sidedata=sidedata,
3071 )
3081 )
3072 else:
3082 else:
3073 if destrevlog._lazydelta:
3083 if destrevlog._lazydelta:
3074 dp = self.deltaparent(rev)
3084 dp = self.deltaparent(rev)
3075 if dp != nullrev:
3085 if dp != nullrev:
3076 cachedelta = (dp, bytes(self._chunk(rev)))
3086 cachedelta = (dp, bytes(self._chunk(rev)))
3077
3087
3078 sidedata = None
3088 sidedata = None
3079 if not cachedelta:
3089 if not cachedelta:
3080 rawtext = self._revisiondata(rev)
3090 rawtext = self._revisiondata(rev)
3081 sidedata = self.sidedata(rev)
3091 sidedata = self.sidedata(rev)
3082 if sidedata is None:
3092 if sidedata is None:
3083 sidedata = self.sidedata(rev)
3093 sidedata = self.sidedata(rev)
3084
3094
3085 if sidedata_helpers is not None:
3095 if sidedata_helpers is not None:
3086 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3096 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3087 self, sidedata_helpers, sidedata, rev
3097 self, sidedata_helpers, sidedata, rev
3088 )
3098 )
3089 flags = flags | new_flags[0] & ~new_flags[1]
3099 flags = flags | new_flags[0] & ~new_flags[1]
3090
3100
3091 with destrevlog._writing(tr):
3101 with destrevlog._writing(tr):
3092 destrevlog._addrevision(
3102 destrevlog._addrevision(
3093 node,
3103 node,
3094 rawtext,
3104 rawtext,
3095 tr,
3105 tr,
3096 linkrev,
3106 linkrev,
3097 p1,
3107 p1,
3098 p2,
3108 p2,
3099 flags,
3109 flags,
3100 cachedelta,
3110 cachedelta,
3101 deltacomputer=deltacomputer,
3111 deltacomputer=deltacomputer,
3102 sidedata=sidedata,
3112 sidedata=sidedata,
3103 )
3113 )
3104
3114
3105 if addrevisioncb:
3115 if addrevisioncb:
3106 addrevisioncb(self, rev, node)
3116 addrevisioncb(self, rev, node)
3107
3117
3108 def censorrevision(self, tr, censornode, tombstone=b''):
3118 def censorrevision(self, tr, censornode, tombstone=b''):
3109 if self._format_version == REVLOGV0:
3119 if self._format_version == REVLOGV0:
3110 raise error.RevlogError(
3120 raise error.RevlogError(
3111 _(b'cannot censor with version %d revlogs')
3121 _(b'cannot censor with version %d revlogs')
3112 % self._format_version
3122 % self._format_version
3113 )
3123 )
3114 elif self._format_version == REVLOGV1:
3124 elif self._format_version == REVLOGV1:
3115 rewrite.v1_censor(self, tr, censornode, tombstone)
3125 rewrite.v1_censor(self, tr, censornode, tombstone)
3116 else:
3126 else:
3117 rewrite.v2_censor(self, tr, censornode, tombstone)
3127 rewrite.v2_censor(self, tr, censornode, tombstone)
3118
3128
3119 def verifyintegrity(self, state):
3129 def verifyintegrity(self, state):
3120 """Verifies the integrity of the revlog.
3130 """Verifies the integrity of the revlog.
3121
3131
3122 Yields ``revlogproblem`` instances describing problems that are
3132 Yields ``revlogproblem`` instances describing problems that are
3123 found.
3133 found.
3124 """
3134 """
3125 dd, di = self.checksize()
3135 dd, di = self.checksize()
3126 if dd:
3136 if dd:
3127 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3137 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3128 if di:
3138 if di:
3129 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3139 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3130
3140
3131 version = self._format_version
3141 version = self._format_version
3132
3142
3133 # The verifier tells us what version revlog we should be.
3143 # The verifier tells us what version revlog we should be.
3134 if version != state[b'expectedversion']:
3144 if version != state[b'expectedversion']:
3135 yield revlogproblem(
3145 yield revlogproblem(
3136 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3146 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3137 % (self.display_id, version, state[b'expectedversion'])
3147 % (self.display_id, version, state[b'expectedversion'])
3138 )
3148 )
3139
3149
3140 state[b'skipread'] = set()
3150 state[b'skipread'] = set()
3141 state[b'safe_renamed'] = set()
3151 state[b'safe_renamed'] = set()
3142
3152
3143 for rev in self:
3153 for rev in self:
3144 node = self.node(rev)
3154 node = self.node(rev)
3145
3155
3146 # Verify contents. 4 cases to care about:
3156 # Verify contents. 4 cases to care about:
3147 #
3157 #
3148 # common: the most common case
3158 # common: the most common case
3149 # rename: with a rename
3159 # rename: with a rename
3150 # meta: file content starts with b'\1\n', the metadata
3160 # meta: file content starts with b'\1\n', the metadata
3151 # header defined in filelog.py, but without a rename
3161 # header defined in filelog.py, but without a rename
3152 # ext: content stored externally
3162 # ext: content stored externally
3153 #
3163 #
3154 # More formally, their differences are shown below:
3164 # More formally, their differences are shown below:
3155 #
3165 #
3156 # | common | rename | meta | ext
3166 # | common | rename | meta | ext
3157 # -------------------------------------------------------
3167 # -------------------------------------------------------
3158 # flags() | 0 | 0 | 0 | not 0
3168 # flags() | 0 | 0 | 0 | not 0
3159 # renamed() | False | True | False | ?
3169 # renamed() | False | True | False | ?
3160 # rawtext[0:2]=='\1\n'| False | True | True | ?
3170 # rawtext[0:2]=='\1\n'| False | True | True | ?
3161 #
3171 #
3162 # "rawtext" means the raw text stored in revlog data, which
3172 # "rawtext" means the raw text stored in revlog data, which
3163 # could be retrieved by "rawdata(rev)". "text"
3173 # could be retrieved by "rawdata(rev)". "text"
3164 # mentioned below is "revision(rev)".
3174 # mentioned below is "revision(rev)".
3165 #
3175 #
3166 # There are 3 different lengths stored physically:
3176 # There are 3 different lengths stored physically:
3167 # 1. L1: rawsize, stored in revlog index
3177 # 1. L1: rawsize, stored in revlog index
3168 # 2. L2: len(rawtext), stored in revlog data
3178 # 2. L2: len(rawtext), stored in revlog data
3169 # 3. L3: len(text), stored in revlog data if flags==0, or
3179 # 3. L3: len(text), stored in revlog data if flags==0, or
3170 # possibly somewhere else if flags!=0
3180 # possibly somewhere else if flags!=0
3171 #
3181 #
3172 # L1 should be equal to L2. L3 could be different from them.
3182 # L1 should be equal to L2. L3 could be different from them.
3173 # "text" may or may not affect commit hash depending on flag
3183 # "text" may or may not affect commit hash depending on flag
3174 # processors (see flagutil.addflagprocessor).
3184 # processors (see flagutil.addflagprocessor).
3175 #
3185 #
3176 # | common | rename | meta | ext
3186 # | common | rename | meta | ext
3177 # -------------------------------------------------
3187 # -------------------------------------------------
3178 # rawsize() | L1 | L1 | L1 | L1
3188 # rawsize() | L1 | L1 | L1 | L1
3179 # size() | L1 | L2-LM | L1(*) | L1 (?)
3189 # size() | L1 | L2-LM | L1(*) | L1 (?)
3180 # len(rawtext) | L2 | L2 | L2 | L2
3190 # len(rawtext) | L2 | L2 | L2 | L2
3181 # len(text) | L2 | L2 | L2 | L3
3191 # len(text) | L2 | L2 | L2 | L3
3182 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3192 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3183 #
3193 #
3184 # LM: length of metadata, depending on rawtext
3194 # LM: length of metadata, depending on rawtext
3185 # (*): not ideal, see comment in filelog.size
3195 # (*): not ideal, see comment in filelog.size
3186 # (?): could be "- len(meta)" if the resolved content has
3196 # (?): could be "- len(meta)" if the resolved content has
3187 # rename metadata
3197 # rename metadata
3188 #
3198 #
3189 # Checks needed to be done:
3199 # Checks needed to be done:
3190 # 1. length check: L1 == L2, in all cases.
3200 # 1. length check: L1 == L2, in all cases.
3191 # 2. hash check: depending on flag processor, we may need to
3201 # 2. hash check: depending on flag processor, we may need to
3192 # use either "text" (external), or "rawtext" (in revlog).
3202 # use either "text" (external), or "rawtext" (in revlog).
3193
3203
3194 try:
3204 try:
3195 skipflags = state.get(b'skipflags', 0)
3205 skipflags = state.get(b'skipflags', 0)
3196 if skipflags:
3206 if skipflags:
3197 skipflags &= self.flags(rev)
3207 skipflags &= self.flags(rev)
3198
3208
3199 _verify_revision(self, skipflags, state, node)
3209 _verify_revision(self, skipflags, state, node)
3200
3210
3201 l1 = self.rawsize(rev)
3211 l1 = self.rawsize(rev)
3202 l2 = len(self.rawdata(node))
3212 l2 = len(self.rawdata(node))
3203
3213
3204 if l1 != l2:
3214 if l1 != l2:
3205 yield revlogproblem(
3215 yield revlogproblem(
3206 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3216 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3207 node=node,
3217 node=node,
3208 )
3218 )
3209
3219
3210 except error.CensoredNodeError:
3220 except error.CensoredNodeError:
3211 if state[b'erroroncensored']:
3221 if state[b'erroroncensored']:
3212 yield revlogproblem(
3222 yield revlogproblem(
3213 error=_(b'censored file data'), node=node
3223 error=_(b'censored file data'), node=node
3214 )
3224 )
3215 state[b'skipread'].add(node)
3225 state[b'skipread'].add(node)
3216 except Exception as e:
3226 except Exception as e:
3217 yield revlogproblem(
3227 yield revlogproblem(
3218 error=_(b'unpacking %s: %s')
3228 error=_(b'unpacking %s: %s')
3219 % (short(node), stringutil.forcebytestr(e)),
3229 % (short(node), stringutil.forcebytestr(e)),
3220 node=node,
3230 node=node,
3221 )
3231 )
3222 state[b'skipread'].add(node)
3232 state[b'skipread'].add(node)
3223
3233
3224 def storageinfo(
3234 def storageinfo(
3225 self,
3235 self,
3226 exclusivefiles=False,
3236 exclusivefiles=False,
3227 sharedfiles=False,
3237 sharedfiles=False,
3228 revisionscount=False,
3238 revisionscount=False,
3229 trackedsize=False,
3239 trackedsize=False,
3230 storedsize=False,
3240 storedsize=False,
3231 ):
3241 ):
3232 d = {}
3242 d = {}
3233
3243
3234 if exclusivefiles:
3244 if exclusivefiles:
3235 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3245 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3236 if not self._inline:
3246 if not self._inline:
3237 d[b'exclusivefiles'].append((self.opener, self._datafile))
3247 d[b'exclusivefiles'].append((self.opener, self._datafile))
3238
3248
3239 if sharedfiles:
3249 if sharedfiles:
3240 d[b'sharedfiles'] = []
3250 d[b'sharedfiles'] = []
3241
3251
3242 if revisionscount:
3252 if revisionscount:
3243 d[b'revisionscount'] = len(self)
3253 d[b'revisionscount'] = len(self)
3244
3254
3245 if trackedsize:
3255 if trackedsize:
3246 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3256 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3247
3257
3248 if storedsize:
3258 if storedsize:
3249 d[b'storedsize'] = sum(
3259 d[b'storedsize'] = sum(
3250 self.opener.stat(path).st_size for path in self.files()
3260 self.opener.stat(path).st_size for path in self.files()
3251 )
3261 )
3252
3262
3253 return d
3263 return d
3254
3264
3255 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3265 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3256 if not self.hassidedata:
3266 if not self.hassidedata:
3257 return
3267 return
3258 # revlog formats with sidedata support does not support inline
3268 # revlog formats with sidedata support does not support inline
3259 assert not self._inline
3269 assert not self._inline
3260 if not helpers[1] and not helpers[2]:
3270 if not helpers[1] and not helpers[2]:
3261 # Nothing to generate or remove
3271 # Nothing to generate or remove
3262 return
3272 return
3263
3273
3264 new_entries = []
3274 new_entries = []
3265 # append the new sidedata
3275 # append the new sidedata
3266 with self._writing(transaction):
3276 with self._writing(transaction):
3267 ifh, dfh, sdfh = self._writinghandles
3277 ifh, dfh, sdfh = self._writinghandles
3268 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3278 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3269
3279
3270 current_offset = sdfh.tell()
3280 current_offset = sdfh.tell()
3271 for rev in range(startrev, endrev + 1):
3281 for rev in range(startrev, endrev + 1):
3272 entry = self.index[rev]
3282 entry = self.index[rev]
3273 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3283 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3274 store=self,
3284 store=self,
3275 sidedata_helpers=helpers,
3285 sidedata_helpers=helpers,
3276 sidedata={},
3286 sidedata={},
3277 rev=rev,
3287 rev=rev,
3278 )
3288 )
3279
3289
3280 serialized_sidedata = sidedatautil.serialize_sidedata(
3290 serialized_sidedata = sidedatautil.serialize_sidedata(
3281 new_sidedata
3291 new_sidedata
3282 )
3292 )
3283
3293
3284 sidedata_compression_mode = COMP_MODE_INLINE
3294 sidedata_compression_mode = COMP_MODE_INLINE
3285 if serialized_sidedata and self.hassidedata:
3295 if serialized_sidedata and self.hassidedata:
3286 sidedata_compression_mode = COMP_MODE_PLAIN
3296 sidedata_compression_mode = COMP_MODE_PLAIN
3287 h, comp_sidedata = self.compress(serialized_sidedata)
3297 h, comp_sidedata = self.compress(serialized_sidedata)
3288 if (
3298 if (
3289 h != b'u'
3299 h != b'u'
3290 and comp_sidedata[0] != b'\0'
3300 and comp_sidedata[0] != b'\0'
3291 and len(comp_sidedata) < len(serialized_sidedata)
3301 and len(comp_sidedata) < len(serialized_sidedata)
3292 ):
3302 ):
3293 assert not h
3303 assert not h
3294 if (
3304 if (
3295 comp_sidedata[0]
3305 comp_sidedata[0]
3296 == self._docket.default_compression_header
3306 == self._docket.default_compression_header
3297 ):
3307 ):
3298 sidedata_compression_mode = COMP_MODE_DEFAULT
3308 sidedata_compression_mode = COMP_MODE_DEFAULT
3299 serialized_sidedata = comp_sidedata
3309 serialized_sidedata = comp_sidedata
3300 else:
3310 else:
3301 sidedata_compression_mode = COMP_MODE_INLINE
3311 sidedata_compression_mode = COMP_MODE_INLINE
3302 serialized_sidedata = comp_sidedata
3312 serialized_sidedata = comp_sidedata
3303 if entry[8] != 0 or entry[9] != 0:
3313 if entry[8] != 0 or entry[9] != 0:
3304 # rewriting entries that already have sidedata is not
3314 # rewriting entries that already have sidedata is not
3305 # supported yet, because it introduces garbage data in the
3315 # supported yet, because it introduces garbage data in the
3306 # revlog.
3316 # revlog.
3307 msg = b"rewriting existing sidedata is not supported yet"
3317 msg = b"rewriting existing sidedata is not supported yet"
3308 raise error.Abort(msg)
3318 raise error.Abort(msg)
3309
3319
3310 # Apply (potential) flags to add and to remove after running
3320 # Apply (potential) flags to add and to remove after running
3311 # the sidedata helpers
3321 # the sidedata helpers
3312 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3322 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3313 entry_update = (
3323 entry_update = (
3314 current_offset,
3324 current_offset,
3315 len(serialized_sidedata),
3325 len(serialized_sidedata),
3316 new_offset_flags,
3326 new_offset_flags,
3317 sidedata_compression_mode,
3327 sidedata_compression_mode,
3318 )
3328 )
3319
3329
3320 # the sidedata computation might have move the file cursors around
3330 # the sidedata computation might have move the file cursors around
3321 sdfh.seek(current_offset, os.SEEK_SET)
3331 sdfh.seek(current_offset, os.SEEK_SET)
3322 sdfh.write(serialized_sidedata)
3332 sdfh.write(serialized_sidedata)
3323 new_entries.append(entry_update)
3333 new_entries.append(entry_update)
3324 current_offset += len(serialized_sidedata)
3334 current_offset += len(serialized_sidedata)
3325 self._docket.sidedata_end = sdfh.tell()
3335 self._docket.sidedata_end = sdfh.tell()
3326
3336
3327 # rewrite the new index entries
3337 # rewrite the new index entries
3328 ifh.seek(startrev * self.index.entry_size)
3338 ifh.seek(startrev * self.index.entry_size)
3329 for i, e in enumerate(new_entries):
3339 for i, e in enumerate(new_entries):
3330 rev = startrev + i
3340 rev = startrev + i
3331 self.index.replace_sidedata_info(rev, *e)
3341 self.index.replace_sidedata_info(rev, *e)
3332 packed = self.index.entry_binary(rev)
3342 packed = self.index.entry_binary(rev)
3333 if rev == 0 and self._docket is None:
3343 if rev == 0 and self._docket is None:
3334 header = self._format_flags | self._format_version
3344 header = self._format_flags | self._format_version
3335 header = self.index.pack_header(header)
3345 header = self.index.pack_header(header)
3336 packed = header + packed
3346 packed = header + packed
3337 ifh.write(packed)
3347 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now