##// END OF EJS Templates
delta-find: use sets instead of list in the snapshot cache...
marmoute -
r50574:b670eb3d default
parent child Browse files
Show More
@@ -1,3324 +1,3324 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <assert.h>
12 #include <assert.h>
13 #include <ctype.h>
13 #include <ctype.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stddef.h>
15 #include <stddef.h>
16 #include <stdlib.h>
16 #include <stdlib.h>
17 #include <string.h>
17 #include <string.h>
18 #include <structmember.h>
18 #include <structmember.h>
19
19
20 #include "bitmanipulation.h"
20 #include "bitmanipulation.h"
21 #include "charencode.h"
21 #include "charencode.h"
22 #include "compat.h"
22 #include "compat.h"
23 #include "revlog.h"
23 #include "revlog.h"
24 #include "util.h"
24 #include "util.h"
25
25
26 typedef struct indexObjectStruct indexObject;
26 typedef struct indexObjectStruct indexObject;
27
27
28 typedef struct {
28 typedef struct {
29 int children[16];
29 int children[16];
30 } nodetreenode;
30 } nodetreenode;
31
31
32 typedef struct {
32 typedef struct {
33 int abi_version;
33 int abi_version;
34 Py_ssize_t (*index_length)(const indexObject *);
34 Py_ssize_t (*index_length)(const indexObject *);
35 const char *(*index_node)(indexObject *, Py_ssize_t);
35 const char *(*index_node)(indexObject *, Py_ssize_t);
36 int (*fast_rank)(indexObject *, Py_ssize_t);
36 int (*fast_rank)(indexObject *, Py_ssize_t);
37 int (*index_parents)(PyObject *, int, int *);
37 int (*index_parents)(PyObject *, int, int *);
38 } Revlog_CAPI;
38 } Revlog_CAPI;
39
39
40 /*
40 /*
41 * A base-16 trie for fast node->rev mapping.
41 * A base-16 trie for fast node->rev mapping.
42 *
42 *
43 * Positive value is index of the next node in the trie
43 * Positive value is index of the next node in the trie
44 * Negative value is a leaf: -(rev + 2)
44 * Negative value is a leaf: -(rev + 2)
45 * Zero is empty
45 * Zero is empty
46 */
46 */
47 typedef struct {
47 typedef struct {
48 indexObject *index;
48 indexObject *index;
49 nodetreenode *nodes;
49 nodetreenode *nodes;
50 Py_ssize_t nodelen;
50 Py_ssize_t nodelen;
51 size_t length; /* # nodes in use */
51 size_t length; /* # nodes in use */
52 size_t capacity; /* # nodes allocated */
52 size_t capacity; /* # nodes allocated */
53 int depth; /* maximum depth of tree */
53 int depth; /* maximum depth of tree */
54 int splits; /* # splits performed */
54 int splits; /* # splits performed */
55 } nodetree;
55 } nodetree;
56
56
57 typedef struct {
57 typedef struct {
58 PyObject_HEAD /* ; */
58 PyObject_HEAD /* ; */
59 nodetree nt;
59 nodetree nt;
60 } nodetreeObject;
60 } nodetreeObject;
61
61
62 /*
62 /*
63 * This class has two behaviors.
63 * This class has two behaviors.
64 *
64 *
65 * When used in a list-like way (with integer keys), we decode an
65 * When used in a list-like way (with integer keys), we decode an
66 * entry in a RevlogNG index file on demand. We have limited support for
66 * entry in a RevlogNG index file on demand. We have limited support for
67 * integer-keyed insert and delete, only at elements right before the
67 * integer-keyed insert and delete, only at elements right before the
68 * end.
68 * end.
69 *
69 *
70 * With string keys, we lazily perform a reverse mapping from node to
70 * With string keys, we lazily perform a reverse mapping from node to
71 * rev, using a base-16 trie.
71 * rev, using a base-16 trie.
72 */
72 */
73 struct indexObjectStruct {
73 struct indexObjectStruct {
74 PyObject_HEAD
74 PyObject_HEAD
75 /* Type-specific fields go here. */
75 /* Type-specific fields go here. */
76 PyObject *data; /* raw bytes of index */
76 PyObject *data; /* raw bytes of index */
77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
77 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
78 PyObject *nullentry; /* fast path for references to null */
78 PyObject *nullentry; /* fast path for references to null */
79 Py_buffer buf; /* buffer of data */
79 Py_buffer buf; /* buffer of data */
80 const char **offsets; /* populated on demand */
80 const char **offsets; /* populated on demand */
81 Py_ssize_t length; /* current on-disk number of elements */
81 Py_ssize_t length; /* current on-disk number of elements */
82 unsigned new_length; /* number of added elements */
82 unsigned new_length; /* number of added elements */
83 unsigned added_length; /* space reserved for added elements */
83 unsigned added_length; /* space reserved for added elements */
84 char *added; /* populated on demand */
84 char *added; /* populated on demand */
85 PyObject *headrevs; /* cache, invalidated on changes */
85 PyObject *headrevs; /* cache, invalidated on changes */
86 PyObject *filteredrevs; /* filtered revs set */
86 PyObject *filteredrevs; /* filtered revs set */
87 nodetree nt; /* base-16 trie */
87 nodetree nt; /* base-16 trie */
88 int ntinitialized; /* 0 or 1 */
88 int ntinitialized; /* 0 or 1 */
89 int ntrev; /* last rev scanned */
89 int ntrev; /* last rev scanned */
90 int ntlookups; /* # lookups */
90 int ntlookups; /* # lookups */
91 int ntmisses; /* # lookups that miss the cache */
91 int ntmisses; /* # lookups that miss the cache */
92 int inlined;
92 int inlined;
93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
93 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
94 */
94 */
95 long rust_ext_compat; /* compatibility with being used in rust
95 long rust_ext_compat; /* compatibility with being used in rust
96 extensions */
96 extensions */
97 long format_version; /* format version selector (format_*) */
97 long format_version; /* format version selector (format_*) */
98 };
98 };
99
99
100 static Py_ssize_t index_length(const indexObject *self)
100 static Py_ssize_t index_length(const indexObject *self)
101 {
101 {
102 return self->length + self->new_length;
102 return self->length + self->new_length;
103 }
103 }
104
104
105 static const char nullid[32] = {0};
105 static const char nullid[32] = {0};
106 static const Py_ssize_t nullrev = -1;
106 static const Py_ssize_t nullrev = -1;
107
107
108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
108 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
109
109
110 static int index_find_node(indexObject *self, const char *node);
110 static int index_find_node(indexObject *self, const char *node);
111
111
112 #if LONG_MAX == 0x7fffffffL
112 #if LONG_MAX == 0x7fffffffL
113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
113 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
114 #else
114 #else
115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
115 static const char *const tuple_format = "kiiiiiiy#kiBBi";
116 #endif
116 #endif
117
117
118 /* A RevlogNG v1 index entry is 64 bytes long. */
118 /* A RevlogNG v1 index entry is 64 bytes long. */
119 static const long v1_entry_size = 64;
119 static const long v1_entry_size = 64;
120
120
121 /* A Revlogv2 index entry is 96 bytes long. */
121 /* A Revlogv2 index entry is 96 bytes long. */
122 static const long v2_entry_size = 96;
122 static const long v2_entry_size = 96;
123
123
124 /* A Changelogv2 index entry is 96 bytes long. */
124 /* A Changelogv2 index entry is 96 bytes long. */
125 static const long cl2_entry_size = 96;
125 static const long cl2_entry_size = 96;
126
126
127 /* Internal format version.
127 /* Internal format version.
128 * Must match their counterparts in revlogutils/constants.py */
128 * Must match their counterparts in revlogutils/constants.py */
129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
129 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
130 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
131 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
132
132
133 static const long entry_v1_offset_high = 0;
133 static const long entry_v1_offset_high = 0;
134 static const long entry_v1_offset_offset_flags = 4;
134 static const long entry_v1_offset_offset_flags = 4;
135 static const long entry_v1_offset_comp_len = 8;
135 static const long entry_v1_offset_comp_len = 8;
136 static const long entry_v1_offset_uncomp_len = 12;
136 static const long entry_v1_offset_uncomp_len = 12;
137 static const long entry_v1_offset_base_rev = 16;
137 static const long entry_v1_offset_base_rev = 16;
138 static const long entry_v1_offset_link_rev = 20;
138 static const long entry_v1_offset_link_rev = 20;
139 static const long entry_v1_offset_parent_1 = 24;
139 static const long entry_v1_offset_parent_1 = 24;
140 static const long entry_v1_offset_parent_2 = 28;
140 static const long entry_v1_offset_parent_2 = 28;
141 static const long entry_v1_offset_node_id = 32;
141 static const long entry_v1_offset_node_id = 32;
142
142
143 static const long entry_v2_offset_high = 0;
143 static const long entry_v2_offset_high = 0;
144 static const long entry_v2_offset_offset_flags = 4;
144 static const long entry_v2_offset_offset_flags = 4;
145 static const long entry_v2_offset_comp_len = 8;
145 static const long entry_v2_offset_comp_len = 8;
146 static const long entry_v2_offset_uncomp_len = 12;
146 static const long entry_v2_offset_uncomp_len = 12;
147 static const long entry_v2_offset_base_rev = 16;
147 static const long entry_v2_offset_base_rev = 16;
148 static const long entry_v2_offset_link_rev = 20;
148 static const long entry_v2_offset_link_rev = 20;
149 static const long entry_v2_offset_parent_1 = 24;
149 static const long entry_v2_offset_parent_1 = 24;
150 static const long entry_v2_offset_parent_2 = 28;
150 static const long entry_v2_offset_parent_2 = 28;
151 static const long entry_v2_offset_node_id = 32;
151 static const long entry_v2_offset_node_id = 32;
152 static const long entry_v2_offset_sidedata_offset = 64;
152 static const long entry_v2_offset_sidedata_offset = 64;
153 static const long entry_v2_offset_sidedata_comp_len = 72;
153 static const long entry_v2_offset_sidedata_comp_len = 72;
154 static const long entry_v2_offset_all_comp_mode = 76;
154 static const long entry_v2_offset_all_comp_mode = 76;
155 /* next free offset: 77 */
155 /* next free offset: 77 */
156
156
157 static const long entry_cl2_offset_high = 0;
157 static const long entry_cl2_offset_high = 0;
158 static const long entry_cl2_offset_offset_flags = 4;
158 static const long entry_cl2_offset_offset_flags = 4;
159 static const long entry_cl2_offset_comp_len = 8;
159 static const long entry_cl2_offset_comp_len = 8;
160 static const long entry_cl2_offset_uncomp_len = 12;
160 static const long entry_cl2_offset_uncomp_len = 12;
161 static const long entry_cl2_offset_parent_1 = 16;
161 static const long entry_cl2_offset_parent_1 = 16;
162 static const long entry_cl2_offset_parent_2 = 20;
162 static const long entry_cl2_offset_parent_2 = 20;
163 static const long entry_cl2_offset_node_id = 24;
163 static const long entry_cl2_offset_node_id = 24;
164 static const long entry_cl2_offset_sidedata_offset = 56;
164 static const long entry_cl2_offset_sidedata_offset = 56;
165 static const long entry_cl2_offset_sidedata_comp_len = 64;
165 static const long entry_cl2_offset_sidedata_comp_len = 64;
166 static const long entry_cl2_offset_all_comp_mode = 68;
166 static const long entry_cl2_offset_all_comp_mode = 68;
167 static const long entry_cl2_offset_rank = 69;
167 static const long entry_cl2_offset_rank = 69;
168 /* next free offset: 73 */
168 /* next free offset: 73 */
169
169
170 static const char comp_mode_inline = 2;
170 static const char comp_mode_inline = 2;
171 static const int rank_unknown = -1;
171 static const int rank_unknown = -1;
172
172
173 static void raise_revlog_error(void)
173 static void raise_revlog_error(void)
174 {
174 {
175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
175 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
176
176
177 mod = PyImport_ImportModule("mercurial.error");
177 mod = PyImport_ImportModule("mercurial.error");
178 if (mod == NULL) {
178 if (mod == NULL) {
179 goto cleanup;
179 goto cleanup;
180 }
180 }
181
181
182 dict = PyModule_GetDict(mod);
182 dict = PyModule_GetDict(mod);
183 if (dict == NULL) {
183 if (dict == NULL) {
184 goto cleanup;
184 goto cleanup;
185 }
185 }
186 Py_INCREF(dict);
186 Py_INCREF(dict);
187
187
188 errclass = PyDict_GetItemString(dict, "RevlogError");
188 errclass = PyDict_GetItemString(dict, "RevlogError");
189 if (errclass == NULL) {
189 if (errclass == NULL) {
190 PyErr_SetString(PyExc_SystemError,
190 PyErr_SetString(PyExc_SystemError,
191 "could not find RevlogError");
191 "could not find RevlogError");
192 goto cleanup;
192 goto cleanup;
193 }
193 }
194
194
195 /* value of exception is ignored by callers */
195 /* value of exception is ignored by callers */
196 PyErr_SetString(errclass, "RevlogError");
196 PyErr_SetString(errclass, "RevlogError");
197
197
198 cleanup:
198 cleanup:
199 Py_XDECREF(dict);
199 Py_XDECREF(dict);
200 Py_XDECREF(mod);
200 Py_XDECREF(mod);
201 }
201 }
202
202
203 /*
203 /*
204 * Return a pointer to the beginning of a RevlogNG record.
204 * Return a pointer to the beginning of a RevlogNG record.
205 */
205 */
206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
206 static const char *index_deref(indexObject *self, Py_ssize_t pos)
207 {
207 {
208 if (pos >= self->length)
208 if (pos >= self->length)
209 return self->added + (pos - self->length) * self->entry_size;
209 return self->added + (pos - self->length) * self->entry_size;
210
210
211 if (self->inlined && pos > 0) {
211 if (self->inlined && pos > 0) {
212 if (self->offsets == NULL) {
212 if (self->offsets == NULL) {
213 Py_ssize_t ret;
213 Py_ssize_t ret;
214 self->offsets =
214 self->offsets =
215 PyMem_Malloc(self->length * sizeof(*self->offsets));
215 PyMem_Malloc(self->length * sizeof(*self->offsets));
216 if (self->offsets == NULL)
216 if (self->offsets == NULL)
217 return (const char *)PyErr_NoMemory();
217 return (const char *)PyErr_NoMemory();
218 ret = inline_scan(self, self->offsets);
218 ret = inline_scan(self, self->offsets);
219 if (ret == -1) {
219 if (ret == -1) {
220 return NULL;
220 return NULL;
221 };
221 };
222 }
222 }
223 return self->offsets[pos];
223 return self->offsets[pos];
224 }
224 }
225
225
226 return (const char *)(self->buf.buf) + pos * self->entry_size;
226 return (const char *)(self->buf.buf) + pos * self->entry_size;
227 }
227 }
228
228
229 /*
229 /*
230 * Get parents of the given rev.
230 * Get parents of the given rev.
231 *
231 *
232 * The specified rev must be valid and must not be nullrev. A returned
232 * The specified rev must be valid and must not be nullrev. A returned
233 * parent revision may be nullrev, but is guaranteed to be in valid range.
233 * parent revision may be nullrev, but is guaranteed to be in valid range.
234 */
234 */
235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
235 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
236 int maxrev)
236 int maxrev)
237 {
237 {
238 const char *data = index_deref(self, rev);
238 const char *data = index_deref(self, rev);
239
239
240 if (self->format_version == format_v1) {
240 if (self->format_version == format_v1) {
241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
241 ps[0] = getbe32(data + entry_v1_offset_parent_1);
242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
242 ps[1] = getbe32(data + entry_v1_offset_parent_2);
243 } else if (self->format_version == format_v2) {
243 } else if (self->format_version == format_v2) {
244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
244 ps[0] = getbe32(data + entry_v2_offset_parent_1);
245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
245 ps[1] = getbe32(data + entry_v2_offset_parent_2);
246 } else if (self->format_version == format_cl2) {
246 } else if (self->format_version == format_cl2) {
247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
247 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
248 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
249 } else {
249 } else {
250 raise_revlog_error();
250 raise_revlog_error();
251 return -1;
251 return -1;
252 }
252 }
253
253
254 /* If index file is corrupted, ps[] may point to invalid revisions. So
254 /* If index file is corrupted, ps[] may point to invalid revisions. So
255 * there is a risk of buffer overflow to trust them unconditionally. */
255 * there is a risk of buffer overflow to trust them unconditionally. */
256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
256 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
257 PyErr_SetString(PyExc_ValueError, "parent out of range");
257 PyErr_SetString(PyExc_ValueError, "parent out of range");
258 return -1;
258 return -1;
259 }
259 }
260 return 0;
260 return 0;
261 }
261 }
262
262
263 /*
263 /*
264 * Get parents of the given rev.
264 * Get parents of the given rev.
265 *
265 *
266 * If the specified rev is out of range, IndexError will be raised. If the
266 * If the specified rev is out of range, IndexError will be raised. If the
267 * revlog entry is corrupted, ValueError may be raised.
267 * revlog entry is corrupted, ValueError may be raised.
268 *
268 *
269 * Returns 0 on success or -1 on failure.
269 * Returns 0 on success or -1 on failure.
270 */
270 */
271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
271 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
272 {
272 {
273 int tiprev;
273 int tiprev;
274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
274 if (!op || !HgRevlogIndex_Check(op) || !ps) {
275 PyErr_BadInternalCall();
275 PyErr_BadInternalCall();
276 return -1;
276 return -1;
277 }
277 }
278 tiprev = (int)index_length((indexObject *)op) - 1;
278 tiprev = (int)index_length((indexObject *)op) - 1;
279 if (rev < -1 || rev > tiprev) {
279 if (rev < -1 || rev > tiprev) {
280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
280 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
281 return -1;
281 return -1;
282 } else if (rev == -1) {
282 } else if (rev == -1) {
283 ps[0] = ps[1] = -1;
283 ps[0] = ps[1] = -1;
284 return 0;
284 return 0;
285 } else {
285 } else {
286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
286 return index_get_parents((indexObject *)op, rev, ps, tiprev);
287 }
287 }
288 }
288 }
289
289
290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
290 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
291 {
291 {
292 const char *data;
292 const char *data;
293 uint64_t offset;
293 uint64_t offset;
294
294
295 if (rev == nullrev)
295 if (rev == nullrev)
296 return 0;
296 return 0;
297
297
298 data = index_deref(self, rev);
298 data = index_deref(self, rev);
299
299
300 if (self->format_version == format_v1) {
300 if (self->format_version == format_v1) {
301 offset = getbe32(data + entry_v1_offset_offset_flags);
301 offset = getbe32(data + entry_v1_offset_offset_flags);
302 if (rev == 0) {
302 if (rev == 0) {
303 /* mask out version number for the first entry */
303 /* mask out version number for the first entry */
304 offset &= 0xFFFF;
304 offset &= 0xFFFF;
305 } else {
305 } else {
306 uint32_t offset_high =
306 uint32_t offset_high =
307 getbe32(data + entry_v1_offset_high);
307 getbe32(data + entry_v1_offset_high);
308 offset |= ((uint64_t)offset_high) << 32;
308 offset |= ((uint64_t)offset_high) << 32;
309 }
309 }
310 } else if (self->format_version == format_v2) {
310 } else if (self->format_version == format_v2) {
311 offset = getbe32(data + entry_v2_offset_offset_flags);
311 offset = getbe32(data + entry_v2_offset_offset_flags);
312 if (rev == 0) {
312 if (rev == 0) {
313 /* mask out version number for the first entry */
313 /* mask out version number for the first entry */
314 offset &= 0xFFFF;
314 offset &= 0xFFFF;
315 } else {
315 } else {
316 uint32_t offset_high =
316 uint32_t offset_high =
317 getbe32(data + entry_v2_offset_high);
317 getbe32(data + entry_v2_offset_high);
318 offset |= ((uint64_t)offset_high) << 32;
318 offset |= ((uint64_t)offset_high) << 32;
319 }
319 }
320 } else if (self->format_version == format_cl2) {
320 } else if (self->format_version == format_cl2) {
321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
321 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
322 offset = getbe32(data + entry_cl2_offset_offset_flags);
322 offset = getbe32(data + entry_cl2_offset_offset_flags);
323 offset |= ((uint64_t)offset_high) << 32;
323 offset |= ((uint64_t)offset_high) << 32;
324 } else {
324 } else {
325 raise_revlog_error();
325 raise_revlog_error();
326 return -1;
326 return -1;
327 }
327 }
328
328
329 return (int64_t)(offset >> 16);
329 return (int64_t)(offset >> 16);
330 }
330 }
331
331
332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
332 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
333 {
333 {
334 const char *data;
334 const char *data;
335 int tmp;
335 int tmp;
336
336
337 if (rev == nullrev)
337 if (rev == nullrev)
338 return 0;
338 return 0;
339
339
340 data = index_deref(self, rev);
340 data = index_deref(self, rev);
341
341
342 if (self->format_version == format_v1) {
342 if (self->format_version == format_v1) {
343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
343 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
344 } else if (self->format_version == format_v2) {
344 } else if (self->format_version == format_v2) {
345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
345 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
346 } else if (self->format_version == format_cl2) {
346 } else if (self->format_version == format_cl2) {
347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
347 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
348 } else {
348 } else {
349 raise_revlog_error();
349 raise_revlog_error();
350 return -1;
350 return -1;
351 }
351 }
352 if (tmp < 0) {
352 if (tmp < 0) {
353 PyErr_Format(PyExc_OverflowError,
353 PyErr_Format(PyExc_OverflowError,
354 "revlog entry size out of bound (%d)", tmp);
354 "revlog entry size out of bound (%d)", tmp);
355 return -1;
355 return -1;
356 }
356 }
357 return tmp;
357 return tmp;
358 }
358 }
359
359
360 /*
360 /*
361 * RevlogNG format (all in big endian, data may be inlined):
361 * RevlogNG format (all in big endian, data may be inlined):
362 * 6 bytes: offset
362 * 6 bytes: offset
363 * 2 bytes: flags
363 * 2 bytes: flags
364 * 4 bytes: compressed length
364 * 4 bytes: compressed length
365 * 4 bytes: uncompressed length
365 * 4 bytes: uncompressed length
366 * 4 bytes: base revision
366 * 4 bytes: base revision
367 * 4 bytes: link revision
367 * 4 bytes: link revision
368 * 4 bytes: parent 1 revision
368 * 4 bytes: parent 1 revision
369 * 4 bytes: parent 2 revision
369 * 4 bytes: parent 2 revision
370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
370 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
371 */
371 */
372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
372 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
373 {
373 {
374 uint64_t offset_flags, sidedata_offset;
374 uint64_t offset_flags, sidedata_offset;
375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
375 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
376 sidedata_comp_len, rank = rank_unknown;
376 sidedata_comp_len, rank = rank_unknown;
377 char data_comp_mode, sidedata_comp_mode;
377 char data_comp_mode, sidedata_comp_mode;
378 const char *c_node_id;
378 const char *c_node_id;
379 const char *data;
379 const char *data;
380 Py_ssize_t length = index_length(self);
380 Py_ssize_t length = index_length(self);
381
381
382 if (pos == nullrev) {
382 if (pos == nullrev) {
383 Py_INCREF(self->nullentry);
383 Py_INCREF(self->nullentry);
384 return self->nullentry;
384 return self->nullentry;
385 }
385 }
386
386
387 if (pos < 0 || pos >= length) {
387 if (pos < 0 || pos >= length) {
388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
388 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
389 return NULL;
389 return NULL;
390 }
390 }
391
391
392 data = index_deref(self, pos);
392 data = index_deref(self, pos);
393 if (data == NULL)
393 if (data == NULL)
394 return NULL;
394 return NULL;
395
395
396 if (self->format_version == format_v1) {
396 if (self->format_version == format_v1) {
397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
397 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
398 /*
398 /*
399 * The first entry on-disk needs the version number masked out,
399 * The first entry on-disk needs the version number masked out,
400 * but this doesn't apply if entries are added to an empty
400 * but this doesn't apply if entries are added to an empty
401 * index.
401 * index.
402 */
402 */
403 if (self->length && pos == 0)
403 if (self->length && pos == 0)
404 offset_flags &= 0xFFFF;
404 offset_flags &= 0xFFFF;
405 else {
405 else {
406 uint32_t offset_high =
406 uint32_t offset_high =
407 getbe32(data + entry_v1_offset_high);
407 getbe32(data + entry_v1_offset_high);
408 offset_flags |= ((uint64_t)offset_high) << 32;
408 offset_flags |= ((uint64_t)offset_high) << 32;
409 }
409 }
410
410
411 comp_len = getbe32(data + entry_v1_offset_comp_len);
411 comp_len = getbe32(data + entry_v1_offset_comp_len);
412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
412 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
413 base_rev = getbe32(data + entry_v1_offset_base_rev);
413 base_rev = getbe32(data + entry_v1_offset_base_rev);
414 link_rev = getbe32(data + entry_v1_offset_link_rev);
414 link_rev = getbe32(data + entry_v1_offset_link_rev);
415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
415 parent_1 = getbe32(data + entry_v1_offset_parent_1);
416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
416 parent_2 = getbe32(data + entry_v1_offset_parent_2);
417 c_node_id = data + entry_v1_offset_node_id;
417 c_node_id = data + entry_v1_offset_node_id;
418
418
419 sidedata_offset = 0;
419 sidedata_offset = 0;
420 sidedata_comp_len = 0;
420 sidedata_comp_len = 0;
421 data_comp_mode = comp_mode_inline;
421 data_comp_mode = comp_mode_inline;
422 sidedata_comp_mode = comp_mode_inline;
422 sidedata_comp_mode = comp_mode_inline;
423 } else if (self->format_version == format_v2) {
423 } else if (self->format_version == format_v2) {
424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
424 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
425 /*
425 /*
426 * The first entry on-disk needs the version number masked out,
426 * The first entry on-disk needs the version number masked out,
427 * but this doesn't apply if entries are added to an empty
427 * but this doesn't apply if entries are added to an empty
428 * index.
428 * index.
429 */
429 */
430 if (self->length && pos == 0)
430 if (self->length && pos == 0)
431 offset_flags &= 0xFFFF;
431 offset_flags &= 0xFFFF;
432 else {
432 else {
433 uint32_t offset_high =
433 uint32_t offset_high =
434 getbe32(data + entry_v2_offset_high);
434 getbe32(data + entry_v2_offset_high);
435 offset_flags |= ((uint64_t)offset_high) << 32;
435 offset_flags |= ((uint64_t)offset_high) << 32;
436 }
436 }
437
437
438 comp_len = getbe32(data + entry_v2_offset_comp_len);
438 comp_len = getbe32(data + entry_v2_offset_comp_len);
439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
439 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
440 base_rev = getbe32(data + entry_v2_offset_base_rev);
440 base_rev = getbe32(data + entry_v2_offset_base_rev);
441 link_rev = getbe32(data + entry_v2_offset_link_rev);
441 link_rev = getbe32(data + entry_v2_offset_link_rev);
442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
442 parent_1 = getbe32(data + entry_v2_offset_parent_1);
443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
443 parent_2 = getbe32(data + entry_v2_offset_parent_2);
444 c_node_id = data + entry_v2_offset_node_id;
444 c_node_id = data + entry_v2_offset_node_id;
445
445
446 sidedata_offset =
446 sidedata_offset =
447 getbe64(data + entry_v2_offset_sidedata_offset);
447 getbe64(data + entry_v2_offset_sidedata_offset);
448 sidedata_comp_len =
448 sidedata_comp_len =
449 getbe32(data + entry_v2_offset_sidedata_comp_len);
449 getbe32(data + entry_v2_offset_sidedata_comp_len);
450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
450 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
451 sidedata_comp_mode =
451 sidedata_comp_mode =
452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
452 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
453 } else if (self->format_version == format_cl2) {
453 } else if (self->format_version == format_cl2) {
454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
454 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
455 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
456 offset_flags |= ((uint64_t)offset_high) << 32;
456 offset_flags |= ((uint64_t)offset_high) << 32;
457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
457 comp_len = getbe32(data + entry_cl2_offset_comp_len);
458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
458 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
459 /* base_rev and link_rev are not stored in changelogv2, but are
459 /* base_rev and link_rev are not stored in changelogv2, but are
460 still used by some functions shared with the other revlogs.
460 still used by some functions shared with the other revlogs.
461 They are supposed to contain links to other revisions,
461 They are supposed to contain links to other revisions,
462 but they always point to themselves in the case of a changelog.
462 but they always point to themselves in the case of a changelog.
463 */
463 */
464 base_rev = pos;
464 base_rev = pos;
465 link_rev = pos;
465 link_rev = pos;
466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
466 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
467 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
468 c_node_id = data + entry_cl2_offset_node_id;
468 c_node_id = data + entry_cl2_offset_node_id;
469 sidedata_offset =
469 sidedata_offset =
470 getbe64(data + entry_cl2_offset_sidedata_offset);
470 getbe64(data + entry_cl2_offset_sidedata_offset);
471 sidedata_comp_len =
471 sidedata_comp_len =
472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
472 getbe32(data + entry_cl2_offset_sidedata_comp_len);
473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
473 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
474 sidedata_comp_mode =
474 sidedata_comp_mode =
475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
475 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
476 rank = getbe32(data + entry_cl2_offset_rank);
476 rank = getbe32(data + entry_cl2_offset_rank);
477 } else {
477 } else {
478 raise_revlog_error();
478 raise_revlog_error();
479 return NULL;
479 return NULL;
480 }
480 }
481
481
482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
482 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
483 base_rev, link_rev, parent_1, parent_2, c_node_id,
483 base_rev, link_rev, parent_1, parent_2, c_node_id,
484 self->nodelen, sidedata_offset, sidedata_comp_len,
484 self->nodelen, sidedata_offset, sidedata_comp_len,
485 data_comp_mode, sidedata_comp_mode, rank);
485 data_comp_mode, sidedata_comp_mode, rank);
486 }
486 }
487 /*
487 /*
488 * Pack header information in binary
488 * Pack header information in binary
489 */
489 */
490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
490 static PyObject *index_pack_header(indexObject *self, PyObject *args)
491 {
491 {
492 int header;
492 int header;
493 char out[4];
493 char out[4];
494 if (!PyArg_ParseTuple(args, "i", &header)) {
494 if (!PyArg_ParseTuple(args, "i", &header)) {
495 return NULL;
495 return NULL;
496 }
496 }
497 if (self->format_version != format_v1) {
497 if (self->format_version != format_v1) {
498 PyErr_Format(PyExc_RuntimeError,
498 PyErr_Format(PyExc_RuntimeError,
499 "version header should go in the docket, not the "
499 "version header should go in the docket, not the "
500 "index: %d",
500 "index: %d",
501 header);
501 header);
502 return NULL;
502 return NULL;
503 }
503 }
504 putbe32(header, out);
504 putbe32(header, out);
505 return PyBytes_FromStringAndSize(out, 4);
505 return PyBytes_FromStringAndSize(out, 4);
506 }
506 }
507 /*
507 /*
508 * Return the raw binary string representing a revision
508 * Return the raw binary string representing a revision
509 */
509 */
510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
510 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
511 {
511 {
512 long rev;
512 long rev;
513 const char *data;
513 const char *data;
514 Py_ssize_t length = index_length(self);
514 Py_ssize_t length = index_length(self);
515
515
516 if (!pylong_to_long(value, &rev)) {
516 if (!pylong_to_long(value, &rev)) {
517 return NULL;
517 return NULL;
518 }
518 }
519 if (rev < 0 || rev >= length) {
519 if (rev < 0 || rev >= length) {
520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
520 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
521 rev);
521 rev);
522 return NULL;
522 return NULL;
523 };
523 };
524
524
525 data = index_deref(self, rev);
525 data = index_deref(self, rev);
526 if (data == NULL)
526 if (data == NULL)
527 return NULL;
527 return NULL;
528 if (rev == 0 && self->format_version == format_v1) {
528 if (rev == 0 && self->format_version == format_v1) {
529 /* the header is eating the start of the first entry */
529 /* the header is eating the start of the first entry */
530 return PyBytes_FromStringAndSize(data + 4,
530 return PyBytes_FromStringAndSize(data + 4,
531 self->entry_size - 4);
531 self->entry_size - 4);
532 }
532 }
533 return PyBytes_FromStringAndSize(data, self->entry_size);
533 return PyBytes_FromStringAndSize(data, self->entry_size);
534 }
534 }
535
535
536 /*
536 /*
537 * Return the hash of node corresponding to the given rev.
537 * Return the hash of node corresponding to the given rev.
538 */
538 */
539 static const char *index_node(indexObject *self, Py_ssize_t pos)
539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 {
540 {
541 Py_ssize_t length = index_length(self);
541 Py_ssize_t length = index_length(self);
542 const char *data;
542 const char *data;
543 const char *node_id;
543 const char *node_id;
544
544
545 if (pos == nullrev)
545 if (pos == nullrev)
546 return nullid;
546 return nullid;
547
547
548 if (pos >= length)
548 if (pos >= length)
549 return NULL;
549 return NULL;
550
550
551 data = index_deref(self, pos);
551 data = index_deref(self, pos);
552
552
553 if (self->format_version == format_v1) {
553 if (self->format_version == format_v1) {
554 node_id = data + entry_v1_offset_node_id;
554 node_id = data + entry_v1_offset_node_id;
555 } else if (self->format_version == format_v2) {
555 } else if (self->format_version == format_v2) {
556 node_id = data + entry_v2_offset_node_id;
556 node_id = data + entry_v2_offset_node_id;
557 } else if (self->format_version == format_cl2) {
557 } else if (self->format_version == format_cl2) {
558 node_id = data + entry_cl2_offset_node_id;
558 node_id = data + entry_cl2_offset_node_id;
559 } else {
559 } else {
560 raise_revlog_error();
560 raise_revlog_error();
561 return NULL;
561 return NULL;
562 }
562 }
563
563
564 return data ? node_id : NULL;
564 return data ? node_id : NULL;
565 }
565 }
566
566
567 /*
567 /*
568 * Return the stored rank of a given revision if known, or rank_unknown
568 * Return the stored rank of a given revision if known, or rank_unknown
569 * otherwise.
569 * otherwise.
570 *
570 *
571 * The rank of a revision is the size of the sub-graph it defines as a head.
571 * The rank of a revision is the size of the sub-graph it defines as a head.
572 * Equivalently, the rank of a revision `r` is the size of the set
572 * Equivalently, the rank of a revision `r` is the size of the set
573 * `ancestors(r)`, `r` included.
573 * `ancestors(r)`, `r` included.
574 *
574 *
575 * This method returns the rank retrieved from the revlog in constant time. It
575 * This method returns the rank retrieved from the revlog in constant time. It
576 * makes no attempt at computing unknown values for versions of the revlog
576 * makes no attempt at computing unknown values for versions of the revlog
577 * which do not persist the rank.
577 * which do not persist the rank.
578 */
578 */
579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
579 static int index_fast_rank(indexObject *self, Py_ssize_t pos)
580 {
580 {
581 Py_ssize_t length = index_length(self);
581 Py_ssize_t length = index_length(self);
582
582
583 if (self->format_version != format_cl2 || pos >= length) {
583 if (self->format_version != format_cl2 || pos >= length) {
584 return rank_unknown;
584 return rank_unknown;
585 }
585 }
586
586
587 if (pos == nullrev) {
587 if (pos == nullrev) {
588 return 0; /* convention */
588 return 0; /* convention */
589 }
589 }
590
590
591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
591 return getbe32(index_deref(self, pos) + entry_cl2_offset_rank);
592 }
592 }
593
593
594 /*
594 /*
595 * Return the hash of the node corresponding to the given rev. The
595 * Return the hash of the node corresponding to the given rev. The
596 * rev is assumed to be existing. If not, an exception is set.
596 * rev is assumed to be existing. If not, an exception is set.
597 */
597 */
598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
598 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
599 {
599 {
600 const char *node = index_node(self, pos);
600 const char *node = index_node(self, pos);
601 if (node == NULL) {
601 if (node == NULL) {
602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
602 PyErr_Format(PyExc_IndexError, "could not access rev %d",
603 (int)pos);
603 (int)pos);
604 }
604 }
605 return node;
605 return node;
606 }
606 }
607
607
608 static int nt_insert(nodetree *self, const char *node, int rev);
608 static int nt_insert(nodetree *self, const char *node, int rev);
609
609
610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
610 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
611 {
611 {
612 Py_ssize_t thisnodelen;
612 Py_ssize_t thisnodelen;
613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
613 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
614 return -1;
614 return -1;
615 if (nodelen == thisnodelen)
615 if (nodelen == thisnodelen)
616 return 0;
616 return 0;
617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
617 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
618 thisnodelen, nodelen);
618 thisnodelen, nodelen);
619 return -1;
619 return -1;
620 }
620 }
621
621
622 static PyObject *index_append(indexObject *self, PyObject *obj)
622 static PyObject *index_append(indexObject *self, PyObject *obj)
623 {
623 {
624 uint64_t offset_flags, sidedata_offset;
624 uint64_t offset_flags, sidedata_offset;
625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
625 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
626 sidedata_comp_len, rank;
626 sidedata_comp_len, rank;
627 char data_comp_mode, sidedata_comp_mode;
627 char data_comp_mode, sidedata_comp_mode;
628 Py_ssize_t c_node_id_len;
628 Py_ssize_t c_node_id_len;
629 const char *c_node_id;
629 const char *c_node_id;
630 char comp_field;
630 char comp_field;
631 char *data;
631 char *data;
632
632
633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
633 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
634 &uncomp_len, &base_rev, &link_rev, &parent_1,
634 &uncomp_len, &base_rev, &link_rev, &parent_1,
635 &parent_2, &c_node_id, &c_node_id_len,
635 &parent_2, &c_node_id, &c_node_id_len,
636 &sidedata_offset, &sidedata_comp_len,
636 &sidedata_offset, &sidedata_comp_len,
637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
637 &data_comp_mode, &sidedata_comp_mode, &rank)) {
638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
638 PyErr_SetString(PyExc_TypeError, "12-tuple required");
639 return NULL;
639 return NULL;
640 }
640 }
641
641
642 if (c_node_id_len != self->nodelen) {
642 if (c_node_id_len != self->nodelen) {
643 PyErr_SetString(PyExc_TypeError, "invalid node");
643 PyErr_SetString(PyExc_TypeError, "invalid node");
644 return NULL;
644 return NULL;
645 }
645 }
646 if (self->format_version == format_v1) {
646 if (self->format_version == format_v1) {
647
647
648 if (data_comp_mode != comp_mode_inline) {
648 if (data_comp_mode != comp_mode_inline) {
649 PyErr_Format(PyExc_ValueError,
649 PyErr_Format(PyExc_ValueError,
650 "invalid data compression mode: %i",
650 "invalid data compression mode: %i",
651 data_comp_mode);
651 data_comp_mode);
652 return NULL;
652 return NULL;
653 }
653 }
654 if (sidedata_comp_mode != comp_mode_inline) {
654 if (sidedata_comp_mode != comp_mode_inline) {
655 PyErr_Format(PyExc_ValueError,
655 PyErr_Format(PyExc_ValueError,
656 "invalid sidedata compression mode: %i",
656 "invalid sidedata compression mode: %i",
657 sidedata_comp_mode);
657 sidedata_comp_mode);
658 return NULL;
658 return NULL;
659 }
659 }
660 }
660 }
661
661
662 if (self->new_length == self->added_length) {
662 if (self->new_length == self->added_length) {
663 size_t new_added_length =
663 size_t new_added_length =
664 self->added_length ? self->added_length * 2 : 4096;
664 self->added_length ? self->added_length * 2 : 4096;
665 void *new_added = PyMem_Realloc(
665 void *new_added = PyMem_Realloc(
666 self->added, new_added_length * self->entry_size);
666 self->added, new_added_length * self->entry_size);
667 if (!new_added)
667 if (!new_added)
668 return PyErr_NoMemory();
668 return PyErr_NoMemory();
669 self->added = new_added;
669 self->added = new_added;
670 self->added_length = new_added_length;
670 self->added_length = new_added_length;
671 }
671 }
672 rev = self->length + self->new_length;
672 rev = self->length + self->new_length;
673 data = self->added + self->entry_size * self->new_length++;
673 data = self->added + self->entry_size * self->new_length++;
674
674
675 memset(data, 0, self->entry_size);
675 memset(data, 0, self->entry_size);
676
676
677 if (self->format_version == format_v1) {
677 if (self->format_version == format_v1) {
678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
678 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
679 putbe32(offset_flags & 0xffffffffU,
679 putbe32(offset_flags & 0xffffffffU,
680 data + entry_v1_offset_offset_flags);
680 data + entry_v1_offset_offset_flags);
681 putbe32(comp_len, data + entry_v1_offset_comp_len);
681 putbe32(comp_len, data + entry_v1_offset_comp_len);
682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
682 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
683 putbe32(base_rev, data + entry_v1_offset_base_rev);
683 putbe32(base_rev, data + entry_v1_offset_base_rev);
684 putbe32(link_rev, data + entry_v1_offset_link_rev);
684 putbe32(link_rev, data + entry_v1_offset_link_rev);
685 putbe32(parent_1, data + entry_v1_offset_parent_1);
685 putbe32(parent_1, data + entry_v1_offset_parent_1);
686 putbe32(parent_2, data + entry_v1_offset_parent_2);
686 putbe32(parent_2, data + entry_v1_offset_parent_2);
687 memcpy(data + entry_v1_offset_node_id, c_node_id,
687 memcpy(data + entry_v1_offset_node_id, c_node_id,
688 c_node_id_len);
688 c_node_id_len);
689 } else if (self->format_version == format_v2) {
689 } else if (self->format_version == format_v2) {
690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
690 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
691 putbe32(offset_flags & 0xffffffffU,
691 putbe32(offset_flags & 0xffffffffU,
692 data + entry_v2_offset_offset_flags);
692 data + entry_v2_offset_offset_flags);
693 putbe32(comp_len, data + entry_v2_offset_comp_len);
693 putbe32(comp_len, data + entry_v2_offset_comp_len);
694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
694 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
695 putbe32(base_rev, data + entry_v2_offset_base_rev);
695 putbe32(base_rev, data + entry_v2_offset_base_rev);
696 putbe32(link_rev, data + entry_v2_offset_link_rev);
696 putbe32(link_rev, data + entry_v2_offset_link_rev);
697 putbe32(parent_1, data + entry_v2_offset_parent_1);
697 putbe32(parent_1, data + entry_v2_offset_parent_1);
698 putbe32(parent_2, data + entry_v2_offset_parent_2);
698 putbe32(parent_2, data + entry_v2_offset_parent_2);
699 memcpy(data + entry_v2_offset_node_id, c_node_id,
699 memcpy(data + entry_v2_offset_node_id, c_node_id,
700 c_node_id_len);
700 c_node_id_len);
701 putbe64(sidedata_offset,
701 putbe64(sidedata_offset,
702 data + entry_v2_offset_sidedata_offset);
702 data + entry_v2_offset_sidedata_offset);
703 putbe32(sidedata_comp_len,
703 putbe32(sidedata_comp_len,
704 data + entry_v2_offset_sidedata_comp_len);
704 data + entry_v2_offset_sidedata_comp_len);
705 comp_field = data_comp_mode & 3;
705 comp_field = data_comp_mode & 3;
706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
706 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
707 data[entry_v2_offset_all_comp_mode] = comp_field;
707 data[entry_v2_offset_all_comp_mode] = comp_field;
708 } else if (self->format_version == format_cl2) {
708 } else if (self->format_version == format_cl2) {
709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
709 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
710 putbe32(offset_flags & 0xffffffffU,
710 putbe32(offset_flags & 0xffffffffU,
711 data + entry_cl2_offset_offset_flags);
711 data + entry_cl2_offset_offset_flags);
712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
712 putbe32(comp_len, data + entry_cl2_offset_comp_len);
713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
713 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
714 putbe32(parent_1, data + entry_cl2_offset_parent_1);
715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
715 putbe32(parent_2, data + entry_cl2_offset_parent_2);
716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
716 memcpy(data + entry_cl2_offset_node_id, c_node_id,
717 c_node_id_len);
717 c_node_id_len);
718 putbe64(sidedata_offset,
718 putbe64(sidedata_offset,
719 data + entry_cl2_offset_sidedata_offset);
719 data + entry_cl2_offset_sidedata_offset);
720 putbe32(sidedata_comp_len,
720 putbe32(sidedata_comp_len,
721 data + entry_cl2_offset_sidedata_comp_len);
721 data + entry_cl2_offset_sidedata_comp_len);
722 comp_field = data_comp_mode & 3;
722 comp_field = data_comp_mode & 3;
723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
723 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
724 data[entry_cl2_offset_all_comp_mode] = comp_field;
724 data[entry_cl2_offset_all_comp_mode] = comp_field;
725 putbe32(rank, data + entry_cl2_offset_rank);
725 putbe32(rank, data + entry_cl2_offset_rank);
726 } else {
726 } else {
727 raise_revlog_error();
727 raise_revlog_error();
728 return NULL;
728 return NULL;
729 }
729 }
730
730
731 if (self->ntinitialized)
731 if (self->ntinitialized)
732 nt_insert(&self->nt, c_node_id, rev);
732 nt_insert(&self->nt, c_node_id, rev);
733
733
734 Py_CLEAR(self->headrevs);
734 Py_CLEAR(self->headrevs);
735 Py_RETURN_NONE;
735 Py_RETURN_NONE;
736 }
736 }
737
737
738 /* Replace an existing index entry's sidedata offset and length with new ones.
738 /* Replace an existing index entry's sidedata offset and length with new ones.
739 This cannot be used outside of the context of sidedata rewriting,
739 This cannot be used outside of the context of sidedata rewriting,
740 inside the transaction that creates the given revision. */
740 inside the transaction that creates the given revision. */
741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
741 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
742 {
742 {
743 uint64_t offset_flags, sidedata_offset;
743 uint64_t offset_flags, sidedata_offset;
744 Py_ssize_t rev;
744 Py_ssize_t rev;
745 int sidedata_comp_len;
745 int sidedata_comp_len;
746 char comp_mode;
746 char comp_mode;
747 char *data;
747 char *data;
748 #if LONG_MAX == 0x7fffffffL
748 #if LONG_MAX == 0x7fffffffL
749 const char *const sidedata_format = "nKiKB";
749 const char *const sidedata_format = "nKiKB";
750 #else
750 #else
751 const char *const sidedata_format = "nkikB";
751 const char *const sidedata_format = "nkikB";
752 #endif
752 #endif
753
753
754 if (self->entry_size == v1_entry_size || self->inlined) {
754 if (self->entry_size == v1_entry_size || self->inlined) {
755 /*
755 /*
756 There is a bug in the transaction handling when going from an
756 There is a bug in the transaction handling when going from an
757 inline revlog to a separate index and data file. Turn it off until
757 inline revlog to a separate index and data file. Turn it off until
758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
758 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
759 See issue6485.
759 See issue6485.
760 */
760 */
761 raise_revlog_error();
761 raise_revlog_error();
762 return NULL;
762 return NULL;
763 }
763 }
764
764
765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
765 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
766 &sidedata_comp_len, &offset_flags, &comp_mode))
766 &sidedata_comp_len, &offset_flags, &comp_mode))
767 return NULL;
767 return NULL;
768
768
769 if (rev < 0 || rev >= index_length(self)) {
769 if (rev < 0 || rev >= index_length(self)) {
770 PyErr_SetString(PyExc_IndexError, "revision outside index");
770 PyErr_SetString(PyExc_IndexError, "revision outside index");
771 return NULL;
771 return NULL;
772 }
772 }
773 if (rev < self->length) {
773 if (rev < self->length) {
774 PyErr_SetString(
774 PyErr_SetString(
775 PyExc_IndexError,
775 PyExc_IndexError,
776 "cannot rewrite entries outside of this transaction");
776 "cannot rewrite entries outside of this transaction");
777 return NULL;
777 return NULL;
778 }
778 }
779
779
780 /* Find the newly added node, offset from the "already on-disk" length
780 /* Find the newly added node, offset from the "already on-disk" length
781 */
781 */
782 data = self->added + self->entry_size * (rev - self->length);
782 data = self->added + self->entry_size * (rev - self->length);
783 if (self->format_version == format_v2) {
783 if (self->format_version == format_v2) {
784 putbe64(offset_flags, data + entry_v2_offset_high);
784 putbe64(offset_flags, data + entry_v2_offset_high);
785 putbe64(sidedata_offset,
785 putbe64(sidedata_offset,
786 data + entry_v2_offset_sidedata_offset);
786 data + entry_v2_offset_sidedata_offset);
787 putbe32(sidedata_comp_len,
787 putbe32(sidedata_comp_len,
788 data + entry_v2_offset_sidedata_comp_len);
788 data + entry_v2_offset_sidedata_comp_len);
789 data[entry_v2_offset_all_comp_mode] =
789 data[entry_v2_offset_all_comp_mode] =
790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
790 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
791 ((comp_mode & 3) << 2);
791 ((comp_mode & 3) << 2);
792 } else if (self->format_version == format_cl2) {
792 } else if (self->format_version == format_cl2) {
793 putbe64(offset_flags, data + entry_cl2_offset_high);
793 putbe64(offset_flags, data + entry_cl2_offset_high);
794 putbe64(sidedata_offset,
794 putbe64(sidedata_offset,
795 data + entry_cl2_offset_sidedata_offset);
795 data + entry_cl2_offset_sidedata_offset);
796 putbe32(sidedata_comp_len,
796 putbe32(sidedata_comp_len,
797 data + entry_cl2_offset_sidedata_comp_len);
797 data + entry_cl2_offset_sidedata_comp_len);
798 data[entry_cl2_offset_all_comp_mode] =
798 data[entry_cl2_offset_all_comp_mode] =
799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
799 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
800 ((comp_mode & 3) << 2);
800 ((comp_mode & 3) << 2);
801 } else {
801 } else {
802 raise_revlog_error();
802 raise_revlog_error();
803 return NULL;
803 return NULL;
804 }
804 }
805
805
806 Py_RETURN_NONE;
806 Py_RETURN_NONE;
807 }
807 }
808
808
809 static PyObject *index_stats(indexObject *self)
809 static PyObject *index_stats(indexObject *self)
810 {
810 {
811 PyObject *obj = PyDict_New();
811 PyObject *obj = PyDict_New();
812 PyObject *s = NULL;
812 PyObject *s = NULL;
813 PyObject *t = NULL;
813 PyObject *t = NULL;
814
814
815 if (obj == NULL)
815 if (obj == NULL)
816 return NULL;
816 return NULL;
817
817
818 #define istat(__n, __d) \
818 #define istat(__n, __d) \
819 do { \
819 do { \
820 s = PyBytes_FromString(__d); \
820 s = PyBytes_FromString(__d); \
821 t = PyLong_FromSsize_t(self->__n); \
821 t = PyLong_FromSsize_t(self->__n); \
822 if (!s || !t) \
822 if (!s || !t) \
823 goto bail; \
823 goto bail; \
824 if (PyDict_SetItem(obj, s, t) == -1) \
824 if (PyDict_SetItem(obj, s, t) == -1) \
825 goto bail; \
825 goto bail; \
826 Py_CLEAR(s); \
826 Py_CLEAR(s); \
827 Py_CLEAR(t); \
827 Py_CLEAR(t); \
828 } while (0)
828 } while (0)
829
829
830 if (self->added_length)
830 if (self->added_length)
831 istat(new_length, "index entries added");
831 istat(new_length, "index entries added");
832 istat(length, "revs in memory");
832 istat(length, "revs in memory");
833 istat(ntlookups, "node trie lookups");
833 istat(ntlookups, "node trie lookups");
834 istat(ntmisses, "node trie misses");
834 istat(ntmisses, "node trie misses");
835 istat(ntrev, "node trie last rev scanned");
835 istat(ntrev, "node trie last rev scanned");
836 if (self->ntinitialized) {
836 if (self->ntinitialized) {
837 istat(nt.capacity, "node trie capacity");
837 istat(nt.capacity, "node trie capacity");
838 istat(nt.depth, "node trie depth");
838 istat(nt.depth, "node trie depth");
839 istat(nt.length, "node trie count");
839 istat(nt.length, "node trie count");
840 istat(nt.splits, "node trie splits");
840 istat(nt.splits, "node trie splits");
841 }
841 }
842
842
843 #undef istat
843 #undef istat
844
844
845 return obj;
845 return obj;
846
846
847 bail:
847 bail:
848 Py_XDECREF(obj);
848 Py_XDECREF(obj);
849 Py_XDECREF(s);
849 Py_XDECREF(s);
850 Py_XDECREF(t);
850 Py_XDECREF(t);
851 return NULL;
851 return NULL;
852 }
852 }
853
853
854 /*
854 /*
855 * When we cache a list, we want to be sure the caller can't mutate
855 * When we cache a list, we want to be sure the caller can't mutate
856 * the cached copy.
856 * the cached copy.
857 */
857 */
858 static PyObject *list_copy(PyObject *list)
858 static PyObject *list_copy(PyObject *list)
859 {
859 {
860 Py_ssize_t len = PyList_GET_SIZE(list);
860 Py_ssize_t len = PyList_GET_SIZE(list);
861 PyObject *newlist = PyList_New(len);
861 PyObject *newlist = PyList_New(len);
862 Py_ssize_t i;
862 Py_ssize_t i;
863
863
864 if (newlist == NULL)
864 if (newlist == NULL)
865 return NULL;
865 return NULL;
866
866
867 for (i = 0; i < len; i++) {
867 for (i = 0; i < len; i++) {
868 PyObject *obj = PyList_GET_ITEM(list, i);
868 PyObject *obj = PyList_GET_ITEM(list, i);
869 Py_INCREF(obj);
869 Py_INCREF(obj);
870 PyList_SET_ITEM(newlist, i, obj);
870 PyList_SET_ITEM(newlist, i, obj);
871 }
871 }
872
872
873 return newlist;
873 return newlist;
874 }
874 }
875
875
876 static int check_filter(PyObject *filter, Py_ssize_t arg)
876 static int check_filter(PyObject *filter, Py_ssize_t arg)
877 {
877 {
878 if (filter) {
878 if (filter) {
879 PyObject *arglist, *result;
879 PyObject *arglist, *result;
880 int isfiltered;
880 int isfiltered;
881
881
882 arglist = Py_BuildValue("(n)", arg);
882 arglist = Py_BuildValue("(n)", arg);
883 if (!arglist) {
883 if (!arglist) {
884 return -1;
884 return -1;
885 }
885 }
886
886
887 result = PyObject_Call(filter, arglist, NULL);
887 result = PyObject_Call(filter, arglist, NULL);
888 Py_DECREF(arglist);
888 Py_DECREF(arglist);
889 if (!result) {
889 if (!result) {
890 return -1;
890 return -1;
891 }
891 }
892
892
893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
893 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
894 * same as this function, so we can just return it directly.*/
894 * same as this function, so we can just return it directly.*/
895 isfiltered = PyObject_IsTrue(result);
895 isfiltered = PyObject_IsTrue(result);
896 Py_DECREF(result);
896 Py_DECREF(result);
897 return isfiltered;
897 return isfiltered;
898 } else {
898 } else {
899 return 0;
899 return 0;
900 }
900 }
901 }
901 }
902
902
903 static inline void set_phase_from_parents(char *phases, int parent_1,
903 static inline void set_phase_from_parents(char *phases, int parent_1,
904 int parent_2, Py_ssize_t i)
904 int parent_2, Py_ssize_t i)
905 {
905 {
906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
906 if (parent_1 >= 0 && phases[parent_1] > phases[i])
907 phases[i] = phases[parent_1];
907 phases[i] = phases[parent_1];
908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
908 if (parent_2 >= 0 && phases[parent_2] > phases[i])
909 phases[i] = phases[parent_2];
909 phases[i] = phases[parent_2];
910 }
910 }
911
911
912 static PyObject *reachableroots2(indexObject *self, PyObject *args)
912 static PyObject *reachableroots2(indexObject *self, PyObject *args)
913 {
913 {
914
914
915 /* Input */
915 /* Input */
916 long minroot;
916 long minroot;
917 PyObject *includepatharg = NULL;
917 PyObject *includepatharg = NULL;
918 int includepath = 0;
918 int includepath = 0;
919 /* heads and roots are lists */
919 /* heads and roots are lists */
920 PyObject *heads = NULL;
920 PyObject *heads = NULL;
921 PyObject *roots = NULL;
921 PyObject *roots = NULL;
922 PyObject *reachable = NULL;
922 PyObject *reachable = NULL;
923
923
924 PyObject *val;
924 PyObject *val;
925 Py_ssize_t len = index_length(self);
925 Py_ssize_t len = index_length(self);
926 long revnum;
926 long revnum;
927 Py_ssize_t k;
927 Py_ssize_t k;
928 Py_ssize_t i;
928 Py_ssize_t i;
929 Py_ssize_t l;
929 Py_ssize_t l;
930 int r;
930 int r;
931 int parents[2];
931 int parents[2];
932
932
933 /* Internal data structure:
933 /* Internal data structure:
934 * tovisit: array of length len+1 (all revs + nullrev), filled upto
934 * tovisit: array of length len+1 (all revs + nullrev), filled upto
935 * lentovisit
935 * lentovisit
936 *
936 *
937 * revstates: array of length len+1 (all revs + nullrev) */
937 * revstates: array of length len+1 (all revs + nullrev) */
938 int *tovisit = NULL;
938 int *tovisit = NULL;
939 long lentovisit = 0;
939 long lentovisit = 0;
940 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
940 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
941 char *revstates = NULL;
941 char *revstates = NULL;
942
942
943 /* Get arguments */
943 /* Get arguments */
944 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
944 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
945 &PyList_Type, &roots, &PyBool_Type,
945 &PyList_Type, &roots, &PyBool_Type,
946 &includepatharg))
946 &includepatharg))
947 goto bail;
947 goto bail;
948
948
949 if (includepatharg == Py_True)
949 if (includepatharg == Py_True)
950 includepath = 1;
950 includepath = 1;
951
951
952 /* Initialize return set */
952 /* Initialize return set */
953 reachable = PyList_New(0);
953 reachable = PyList_New(0);
954 if (reachable == NULL)
954 if (reachable == NULL)
955 goto bail;
955 goto bail;
956
956
957 /* Initialize internal datastructures */
957 /* Initialize internal datastructures */
958 tovisit = (int *)malloc((len + 1) * sizeof(int));
958 tovisit = (int *)malloc((len + 1) * sizeof(int));
959 if (tovisit == NULL) {
959 if (tovisit == NULL) {
960 PyErr_NoMemory();
960 PyErr_NoMemory();
961 goto bail;
961 goto bail;
962 }
962 }
963
963
964 revstates = (char *)calloc(len + 1, 1);
964 revstates = (char *)calloc(len + 1, 1);
965 if (revstates == NULL) {
965 if (revstates == NULL) {
966 PyErr_NoMemory();
966 PyErr_NoMemory();
967 goto bail;
967 goto bail;
968 }
968 }
969
969
970 l = PyList_GET_SIZE(roots);
970 l = PyList_GET_SIZE(roots);
971 for (i = 0; i < l; i++) {
971 for (i = 0; i < l; i++) {
972 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
972 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
973 if (revnum == -1 && PyErr_Occurred())
973 if (revnum == -1 && PyErr_Occurred())
974 goto bail;
974 goto bail;
975 /* If root is out of range, e.g. wdir(), it must be unreachable
975 /* If root is out of range, e.g. wdir(), it must be unreachable
976 * from heads. So we can just ignore it. */
976 * from heads. So we can just ignore it. */
977 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
977 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
978 continue;
978 continue;
979 revstates[revnum + 1] |= RS_ROOT;
979 revstates[revnum + 1] |= RS_ROOT;
980 }
980 }
981
981
982 /* Populate tovisit with all the heads */
982 /* Populate tovisit with all the heads */
983 l = PyList_GET_SIZE(heads);
983 l = PyList_GET_SIZE(heads);
984 for (i = 0; i < l; i++) {
984 for (i = 0; i < l; i++) {
985 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
985 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
986 if (revnum == -1 && PyErr_Occurred())
986 if (revnum == -1 && PyErr_Occurred())
987 goto bail;
987 goto bail;
988 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
988 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
989 PyErr_SetString(PyExc_IndexError, "head out of range");
989 PyErr_SetString(PyExc_IndexError, "head out of range");
990 goto bail;
990 goto bail;
991 }
991 }
992 if (!(revstates[revnum + 1] & RS_SEEN)) {
992 if (!(revstates[revnum + 1] & RS_SEEN)) {
993 tovisit[lentovisit++] = (int)revnum;
993 tovisit[lentovisit++] = (int)revnum;
994 revstates[revnum + 1] |= RS_SEEN;
994 revstates[revnum + 1] |= RS_SEEN;
995 }
995 }
996 }
996 }
997
997
998 /* Visit the tovisit list and find the reachable roots */
998 /* Visit the tovisit list and find the reachable roots */
999 k = 0;
999 k = 0;
1000 while (k < lentovisit) {
1000 while (k < lentovisit) {
1001 /* Add the node to reachable if it is a root*/
1001 /* Add the node to reachable if it is a root*/
1002 revnum = tovisit[k++];
1002 revnum = tovisit[k++];
1003 if (revstates[revnum + 1] & RS_ROOT) {
1003 if (revstates[revnum + 1] & RS_ROOT) {
1004 revstates[revnum + 1] |= RS_REACHABLE;
1004 revstates[revnum + 1] |= RS_REACHABLE;
1005 val = PyLong_FromLong(revnum);
1005 val = PyLong_FromLong(revnum);
1006 if (val == NULL)
1006 if (val == NULL)
1007 goto bail;
1007 goto bail;
1008 r = PyList_Append(reachable, val);
1008 r = PyList_Append(reachable, val);
1009 Py_DECREF(val);
1009 Py_DECREF(val);
1010 if (r < 0)
1010 if (r < 0)
1011 goto bail;
1011 goto bail;
1012 if (includepath == 0)
1012 if (includepath == 0)
1013 continue;
1013 continue;
1014 }
1014 }
1015
1015
1016 /* Add its parents to the list of nodes to visit */
1016 /* Add its parents to the list of nodes to visit */
1017 if (revnum == nullrev)
1017 if (revnum == nullrev)
1018 continue;
1018 continue;
1019 r = index_get_parents(self, revnum, parents, (int)len - 1);
1019 r = index_get_parents(self, revnum, parents, (int)len - 1);
1020 if (r < 0)
1020 if (r < 0)
1021 goto bail;
1021 goto bail;
1022 for (i = 0; i < 2; i++) {
1022 for (i = 0; i < 2; i++) {
1023 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1023 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
1024 parents[i] >= minroot) {
1024 parents[i] >= minroot) {
1025 tovisit[lentovisit++] = parents[i];
1025 tovisit[lentovisit++] = parents[i];
1026 revstates[parents[i] + 1] |= RS_SEEN;
1026 revstates[parents[i] + 1] |= RS_SEEN;
1027 }
1027 }
1028 }
1028 }
1029 }
1029 }
1030
1030
1031 /* Find all the nodes in between the roots we found and the heads
1031 /* Find all the nodes in between the roots we found and the heads
1032 * and add them to the reachable set */
1032 * and add them to the reachable set */
1033 if (includepath == 1) {
1033 if (includepath == 1) {
1034 long minidx = minroot;
1034 long minidx = minroot;
1035 if (minidx < 0)
1035 if (minidx < 0)
1036 minidx = 0;
1036 minidx = 0;
1037 for (i = minidx; i < len; i++) {
1037 for (i = minidx; i < len; i++) {
1038 if (!(revstates[i + 1] & RS_SEEN))
1038 if (!(revstates[i + 1] & RS_SEEN))
1039 continue;
1039 continue;
1040 r = index_get_parents(self, i, parents, (int)len - 1);
1040 r = index_get_parents(self, i, parents, (int)len - 1);
1041 /* Corrupted index file, error is set from
1041 /* Corrupted index file, error is set from
1042 * index_get_parents */
1042 * index_get_parents */
1043 if (r < 0)
1043 if (r < 0)
1044 goto bail;
1044 goto bail;
1045 if (((revstates[parents[0] + 1] |
1045 if (((revstates[parents[0] + 1] |
1046 revstates[parents[1] + 1]) &
1046 revstates[parents[1] + 1]) &
1047 RS_REACHABLE) &&
1047 RS_REACHABLE) &&
1048 !(revstates[i + 1] & RS_REACHABLE)) {
1048 !(revstates[i + 1] & RS_REACHABLE)) {
1049 revstates[i + 1] |= RS_REACHABLE;
1049 revstates[i + 1] |= RS_REACHABLE;
1050 val = PyLong_FromSsize_t(i);
1050 val = PyLong_FromSsize_t(i);
1051 if (val == NULL)
1051 if (val == NULL)
1052 goto bail;
1052 goto bail;
1053 r = PyList_Append(reachable, val);
1053 r = PyList_Append(reachable, val);
1054 Py_DECREF(val);
1054 Py_DECREF(val);
1055 if (r < 0)
1055 if (r < 0)
1056 goto bail;
1056 goto bail;
1057 }
1057 }
1058 }
1058 }
1059 }
1059 }
1060
1060
1061 free(revstates);
1061 free(revstates);
1062 free(tovisit);
1062 free(tovisit);
1063 return reachable;
1063 return reachable;
1064 bail:
1064 bail:
1065 Py_XDECREF(reachable);
1065 Py_XDECREF(reachable);
1066 free(revstates);
1066 free(revstates);
1067 free(tovisit);
1067 free(tovisit);
1068 return NULL;
1068 return NULL;
1069 }
1069 }
1070
1070
1071 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1071 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1072 char phase)
1072 char phase)
1073 {
1073 {
1074 Py_ssize_t len = index_length(self);
1074 Py_ssize_t len = index_length(self);
1075 PyObject *item;
1075 PyObject *item;
1076 PyObject *iterator;
1076 PyObject *iterator;
1077 int rev, minrev = -1;
1077 int rev, minrev = -1;
1078 char *node;
1078 char *node;
1079
1079
1080 if (!PySet_Check(roots)) {
1080 if (!PySet_Check(roots)) {
1081 PyErr_SetString(PyExc_TypeError,
1081 PyErr_SetString(PyExc_TypeError,
1082 "roots must be a set of nodes");
1082 "roots must be a set of nodes");
1083 return -2;
1083 return -2;
1084 }
1084 }
1085 iterator = PyObject_GetIter(roots);
1085 iterator = PyObject_GetIter(roots);
1086 if (iterator == NULL)
1086 if (iterator == NULL)
1087 return -2;
1087 return -2;
1088 while ((item = PyIter_Next(iterator))) {
1088 while ((item = PyIter_Next(iterator))) {
1089 if (node_check(self->nodelen, item, &node) == -1)
1089 if (node_check(self->nodelen, item, &node) == -1)
1090 goto failed;
1090 goto failed;
1091 rev = index_find_node(self, node);
1091 rev = index_find_node(self, node);
1092 /* null is implicitly public, so negative is invalid */
1092 /* null is implicitly public, so negative is invalid */
1093 if (rev < 0 || rev >= len)
1093 if (rev < 0 || rev >= len)
1094 goto failed;
1094 goto failed;
1095 phases[rev] = phase;
1095 phases[rev] = phase;
1096 if (minrev == -1 || minrev > rev)
1096 if (minrev == -1 || minrev > rev)
1097 minrev = rev;
1097 minrev = rev;
1098 Py_DECREF(item);
1098 Py_DECREF(item);
1099 }
1099 }
1100 Py_DECREF(iterator);
1100 Py_DECREF(iterator);
1101 return minrev;
1101 return minrev;
1102 failed:
1102 failed:
1103 Py_DECREF(iterator);
1103 Py_DECREF(iterator);
1104 Py_DECREF(item);
1104 Py_DECREF(item);
1105 return -2;
1105 return -2;
1106 }
1106 }
1107
1107
1108 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1108 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1109 {
1109 {
1110 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1110 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1111 96: internal */
1111 96: internal */
1112 static const char trackedphases[] = {1, 2, 32, 96};
1112 static const char trackedphases[] = {1, 2, 32, 96};
1113 PyObject *roots = Py_None;
1113 PyObject *roots = Py_None;
1114 PyObject *phasesetsdict = NULL;
1114 PyObject *phasesetsdict = NULL;
1115 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1115 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1116 Py_ssize_t len = index_length(self);
1116 Py_ssize_t len = index_length(self);
1117 char *phases = NULL;
1117 char *phases = NULL;
1118 int minphaserev = -1, rev, i;
1118 int minphaserev = -1, rev, i;
1119 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1119 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1120
1120
1121 if (!PyArg_ParseTuple(args, "O", &roots))
1121 if (!PyArg_ParseTuple(args, "O", &roots))
1122 return NULL;
1122 return NULL;
1123 if (roots == NULL || !PyDict_Check(roots)) {
1123 if (roots == NULL || !PyDict_Check(roots)) {
1124 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1124 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1125 return NULL;
1125 return NULL;
1126 }
1126 }
1127
1127
1128 phases = calloc(len, 1);
1128 phases = calloc(len, 1);
1129 if (phases == NULL) {
1129 if (phases == NULL) {
1130 PyErr_NoMemory();
1130 PyErr_NoMemory();
1131 return NULL;
1131 return NULL;
1132 }
1132 }
1133
1133
1134 for (i = 0; i < numphases; ++i) {
1134 for (i = 0; i < numphases; ++i) {
1135 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1135 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1136 PyObject *phaseroots = NULL;
1136 PyObject *phaseroots = NULL;
1137 if (pyphase == NULL)
1137 if (pyphase == NULL)
1138 goto release;
1138 goto release;
1139 phaseroots = PyDict_GetItem(roots, pyphase);
1139 phaseroots = PyDict_GetItem(roots, pyphase);
1140 Py_DECREF(pyphase);
1140 Py_DECREF(pyphase);
1141 if (phaseroots == NULL)
1141 if (phaseroots == NULL)
1142 continue;
1142 continue;
1143 rev = add_roots_get_min(self, phaseroots, phases,
1143 rev = add_roots_get_min(self, phaseroots, phases,
1144 trackedphases[i]);
1144 trackedphases[i]);
1145 if (rev == -2)
1145 if (rev == -2)
1146 goto release;
1146 goto release;
1147 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1147 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1148 minphaserev = rev;
1148 minphaserev = rev;
1149 }
1149 }
1150
1150
1151 for (i = 0; i < numphases; ++i) {
1151 for (i = 0; i < numphases; ++i) {
1152 phasesets[i] = PySet_New(NULL);
1152 phasesets[i] = PySet_New(NULL);
1153 if (phasesets[i] == NULL)
1153 if (phasesets[i] == NULL)
1154 goto release;
1154 goto release;
1155 }
1155 }
1156
1156
1157 if (minphaserev == -1)
1157 if (minphaserev == -1)
1158 minphaserev = len;
1158 minphaserev = len;
1159 for (rev = minphaserev; rev < len; ++rev) {
1159 for (rev = minphaserev; rev < len; ++rev) {
1160 PyObject *pyphase = NULL;
1160 PyObject *pyphase = NULL;
1161 PyObject *pyrev = NULL;
1161 PyObject *pyrev = NULL;
1162 int parents[2];
1162 int parents[2];
1163 /*
1163 /*
1164 * The parent lookup could be skipped for phaseroots, but
1164 * The parent lookup could be skipped for phaseroots, but
1165 * phase --force would historically not recompute them
1165 * phase --force would historically not recompute them
1166 * correctly, leaving descendents with a lower phase around.
1166 * correctly, leaving descendents with a lower phase around.
1167 * As such, unconditionally recompute the phase.
1167 * As such, unconditionally recompute the phase.
1168 */
1168 */
1169 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1169 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1170 goto release;
1170 goto release;
1171 set_phase_from_parents(phases, parents[0], parents[1], rev);
1171 set_phase_from_parents(phases, parents[0], parents[1], rev);
1172 switch (phases[rev]) {
1172 switch (phases[rev]) {
1173 case 0:
1173 case 0:
1174 continue;
1174 continue;
1175 case 1:
1175 case 1:
1176 pyphase = phasesets[0];
1176 pyphase = phasesets[0];
1177 break;
1177 break;
1178 case 2:
1178 case 2:
1179 pyphase = phasesets[1];
1179 pyphase = phasesets[1];
1180 break;
1180 break;
1181 case 32:
1181 case 32:
1182 pyphase = phasesets[2];
1182 pyphase = phasesets[2];
1183 break;
1183 break;
1184 case 96:
1184 case 96:
1185 pyphase = phasesets[3];
1185 pyphase = phasesets[3];
1186 break;
1186 break;
1187 default:
1187 default:
1188 /* this should never happen since the phase number is
1188 /* this should never happen since the phase number is
1189 * specified by this function. */
1189 * specified by this function. */
1190 PyErr_SetString(PyExc_SystemError,
1190 PyErr_SetString(PyExc_SystemError,
1191 "bad phase number in internal list");
1191 "bad phase number in internal list");
1192 goto release;
1192 goto release;
1193 }
1193 }
1194 pyrev = PyLong_FromLong(rev);
1194 pyrev = PyLong_FromLong(rev);
1195 if (pyrev == NULL)
1195 if (pyrev == NULL)
1196 goto release;
1196 goto release;
1197 if (PySet_Add(pyphase, pyrev) == -1) {
1197 if (PySet_Add(pyphase, pyrev) == -1) {
1198 Py_DECREF(pyrev);
1198 Py_DECREF(pyrev);
1199 goto release;
1199 goto release;
1200 }
1200 }
1201 Py_DECREF(pyrev);
1201 Py_DECREF(pyrev);
1202 }
1202 }
1203
1203
1204 phasesetsdict = _dict_new_presized(numphases);
1204 phasesetsdict = _dict_new_presized(numphases);
1205 if (phasesetsdict == NULL)
1205 if (phasesetsdict == NULL)
1206 goto release;
1206 goto release;
1207 for (i = 0; i < numphases; ++i) {
1207 for (i = 0; i < numphases; ++i) {
1208 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1208 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1209 if (pyphase == NULL)
1209 if (pyphase == NULL)
1210 goto release;
1210 goto release;
1211 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1211 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1212 -1) {
1212 -1) {
1213 Py_DECREF(pyphase);
1213 Py_DECREF(pyphase);
1214 goto release;
1214 goto release;
1215 }
1215 }
1216 Py_DECREF(phasesets[i]);
1216 Py_DECREF(phasesets[i]);
1217 phasesets[i] = NULL;
1217 phasesets[i] = NULL;
1218 }
1218 }
1219
1219
1220 free(phases);
1220 free(phases);
1221 return Py_BuildValue("nN", len, phasesetsdict);
1221 return Py_BuildValue("nN", len, phasesetsdict);
1222
1222
1223 release:
1223 release:
1224 for (i = 0; i < numphases; ++i)
1224 for (i = 0; i < numphases; ++i)
1225 Py_XDECREF(phasesets[i]);
1225 Py_XDECREF(phasesets[i]);
1226 Py_XDECREF(phasesetsdict);
1226 Py_XDECREF(phasesetsdict);
1227
1227
1228 free(phases);
1228 free(phases);
1229 return NULL;
1229 return NULL;
1230 }
1230 }
1231
1231
1232 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1232 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1233 {
1233 {
1234 Py_ssize_t i, j, len;
1234 Py_ssize_t i, j, len;
1235 char *nothead = NULL;
1235 char *nothead = NULL;
1236 PyObject *heads = NULL;
1236 PyObject *heads = NULL;
1237 PyObject *filter = NULL;
1237 PyObject *filter = NULL;
1238 PyObject *filteredrevs = Py_None;
1238 PyObject *filteredrevs = Py_None;
1239
1239
1240 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1240 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1241 return NULL;
1241 return NULL;
1242 }
1242 }
1243
1243
1244 if (self->headrevs && filteredrevs == self->filteredrevs)
1244 if (self->headrevs && filteredrevs == self->filteredrevs)
1245 return list_copy(self->headrevs);
1245 return list_copy(self->headrevs);
1246
1246
1247 Py_DECREF(self->filteredrevs);
1247 Py_DECREF(self->filteredrevs);
1248 self->filteredrevs = filteredrevs;
1248 self->filteredrevs = filteredrevs;
1249 Py_INCREF(filteredrevs);
1249 Py_INCREF(filteredrevs);
1250
1250
1251 if (filteredrevs != Py_None) {
1251 if (filteredrevs != Py_None) {
1252 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1252 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1253 if (!filter) {
1253 if (!filter) {
1254 PyErr_SetString(
1254 PyErr_SetString(
1255 PyExc_TypeError,
1255 PyExc_TypeError,
1256 "filteredrevs has no attribute __contains__");
1256 "filteredrevs has no attribute __contains__");
1257 goto bail;
1257 goto bail;
1258 }
1258 }
1259 }
1259 }
1260
1260
1261 len = index_length(self);
1261 len = index_length(self);
1262 heads = PyList_New(0);
1262 heads = PyList_New(0);
1263 if (heads == NULL)
1263 if (heads == NULL)
1264 goto bail;
1264 goto bail;
1265 if (len == 0) {
1265 if (len == 0) {
1266 PyObject *nullid = PyLong_FromLong(-1);
1266 PyObject *nullid = PyLong_FromLong(-1);
1267 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1267 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1268 Py_XDECREF(nullid);
1268 Py_XDECREF(nullid);
1269 goto bail;
1269 goto bail;
1270 }
1270 }
1271 goto done;
1271 goto done;
1272 }
1272 }
1273
1273
1274 nothead = calloc(len, 1);
1274 nothead = calloc(len, 1);
1275 if (nothead == NULL) {
1275 if (nothead == NULL) {
1276 PyErr_NoMemory();
1276 PyErr_NoMemory();
1277 goto bail;
1277 goto bail;
1278 }
1278 }
1279
1279
1280 for (i = len - 1; i >= 0; i--) {
1280 for (i = len - 1; i >= 0; i--) {
1281 int isfiltered;
1281 int isfiltered;
1282 int parents[2];
1282 int parents[2];
1283
1283
1284 /* If nothead[i] == 1, it means we've seen an unfiltered child
1284 /* If nothead[i] == 1, it means we've seen an unfiltered child
1285 * of this node already, and therefore this node is not
1285 * of this node already, and therefore this node is not
1286 * filtered. So we can skip the expensive check_filter step.
1286 * filtered. So we can skip the expensive check_filter step.
1287 */
1287 */
1288 if (nothead[i] != 1) {
1288 if (nothead[i] != 1) {
1289 isfiltered = check_filter(filter, i);
1289 isfiltered = check_filter(filter, i);
1290 if (isfiltered == -1) {
1290 if (isfiltered == -1) {
1291 PyErr_SetString(PyExc_TypeError,
1291 PyErr_SetString(PyExc_TypeError,
1292 "unable to check filter");
1292 "unable to check filter");
1293 goto bail;
1293 goto bail;
1294 }
1294 }
1295
1295
1296 if (isfiltered) {
1296 if (isfiltered) {
1297 nothead[i] = 1;
1297 nothead[i] = 1;
1298 continue;
1298 continue;
1299 }
1299 }
1300 }
1300 }
1301
1301
1302 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1302 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1303 goto bail;
1303 goto bail;
1304 for (j = 0; j < 2; j++) {
1304 for (j = 0; j < 2; j++) {
1305 if (parents[j] >= 0)
1305 if (parents[j] >= 0)
1306 nothead[parents[j]] = 1;
1306 nothead[parents[j]] = 1;
1307 }
1307 }
1308 }
1308 }
1309
1309
1310 for (i = 0; i < len; i++) {
1310 for (i = 0; i < len; i++) {
1311 PyObject *head;
1311 PyObject *head;
1312
1312
1313 if (nothead[i])
1313 if (nothead[i])
1314 continue;
1314 continue;
1315 head = PyLong_FromSsize_t(i);
1315 head = PyLong_FromSsize_t(i);
1316 if (head == NULL || PyList_Append(heads, head) == -1) {
1316 if (head == NULL || PyList_Append(heads, head) == -1) {
1317 Py_XDECREF(head);
1317 Py_XDECREF(head);
1318 goto bail;
1318 goto bail;
1319 }
1319 }
1320 }
1320 }
1321
1321
1322 done:
1322 done:
1323 self->headrevs = heads;
1323 self->headrevs = heads;
1324 Py_XDECREF(filter);
1324 Py_XDECREF(filter);
1325 free(nothead);
1325 free(nothead);
1326 return list_copy(self->headrevs);
1326 return list_copy(self->headrevs);
1327 bail:
1327 bail:
1328 Py_XDECREF(filter);
1328 Py_XDECREF(filter);
1329 Py_XDECREF(heads);
1329 Py_XDECREF(heads);
1330 free(nothead);
1330 free(nothead);
1331 return NULL;
1331 return NULL;
1332 }
1332 }
1333
1333
1334 /**
1334 /**
1335 * Obtain the base revision index entry.
1335 * Obtain the base revision index entry.
1336 *
1336 *
1337 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1337 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1338 */
1338 */
1339 static inline int index_baserev(indexObject *self, int rev)
1339 static inline int index_baserev(indexObject *self, int rev)
1340 {
1340 {
1341 const char *data;
1341 const char *data;
1342 int result;
1342 int result;
1343
1343
1344 data = index_deref(self, rev);
1344 data = index_deref(self, rev);
1345 if (data == NULL)
1345 if (data == NULL)
1346 return -2;
1346 return -2;
1347
1347
1348 if (self->format_version == format_v1) {
1348 if (self->format_version == format_v1) {
1349 result = getbe32(data + entry_v1_offset_base_rev);
1349 result = getbe32(data + entry_v1_offset_base_rev);
1350 } else if (self->format_version == format_v2) {
1350 } else if (self->format_version == format_v2) {
1351 result = getbe32(data + entry_v2_offset_base_rev);
1351 result = getbe32(data + entry_v2_offset_base_rev);
1352 } else if (self->format_version == format_cl2) {
1352 } else if (self->format_version == format_cl2) {
1353 return rev;
1353 return rev;
1354 } else {
1354 } else {
1355 raise_revlog_error();
1355 raise_revlog_error();
1356 return -1;
1356 return -1;
1357 }
1357 }
1358
1358
1359 if (result > rev) {
1359 if (result > rev) {
1360 PyErr_Format(
1360 PyErr_Format(
1361 PyExc_ValueError,
1361 PyExc_ValueError,
1362 "corrupted revlog, revision base above revision: %d, %d",
1362 "corrupted revlog, revision base above revision: %d, %d",
1363 rev, result);
1363 rev, result);
1364 return -2;
1364 return -2;
1365 }
1365 }
1366 if (result < -1) {
1366 if (result < -1) {
1367 PyErr_Format(
1367 PyErr_Format(
1368 PyExc_ValueError,
1368 PyExc_ValueError,
1369 "corrupted revlog, revision base out of range: %d, %d", rev,
1369 "corrupted revlog, revision base out of range: %d, %d", rev,
1370 result);
1370 result);
1371 return -2;
1371 return -2;
1372 }
1372 }
1373 return result;
1373 return result;
1374 }
1374 }
1375
1375
1376 /**
1376 /**
1377 * Find if a revision is a snapshot or not
1377 * Find if a revision is a snapshot or not
1378 *
1378 *
1379 * Only relevant for sparse-revlog case.
1379 * Only relevant for sparse-revlog case.
1380 * Callers must ensure that rev is in a valid range.
1380 * Callers must ensure that rev is in a valid range.
1381 */
1381 */
1382 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1382 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1383 {
1383 {
1384 int ps[2];
1384 int ps[2];
1385 int b;
1385 int b;
1386 Py_ssize_t base;
1386 Py_ssize_t base;
1387 while (rev >= 0) {
1387 while (rev >= 0) {
1388 base = (Py_ssize_t)index_baserev(self, rev);
1388 base = (Py_ssize_t)index_baserev(self, rev);
1389 if (base == rev) {
1389 if (base == rev) {
1390 base = -1;
1390 base = -1;
1391 }
1391 }
1392 if (base == -2) {
1392 if (base == -2) {
1393 assert(PyErr_Occurred());
1393 assert(PyErr_Occurred());
1394 return -1;
1394 return -1;
1395 }
1395 }
1396 if (base == -1) {
1396 if (base == -1) {
1397 return 1;
1397 return 1;
1398 }
1398 }
1399 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1399 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1400 assert(PyErr_Occurred());
1400 assert(PyErr_Occurred());
1401 return -1;
1401 return -1;
1402 };
1402 };
1403 while ((index_get_length(self, ps[0]) == 0) && ps[0] >= 0) {
1403 while ((index_get_length(self, ps[0]) == 0) && ps[0] >= 0) {
1404 b = index_baserev(self, ps[0]);
1404 b = index_baserev(self, ps[0]);
1405 if (b == ps[0]) {
1405 if (b == ps[0]) {
1406 break;
1406 break;
1407 }
1407 }
1408 ps[0] = b;
1408 ps[0] = b;
1409 }
1409 }
1410 while ((index_get_length(self, ps[1]) == 0) && ps[1] >= 0) {
1410 while ((index_get_length(self, ps[1]) == 0) && ps[1] >= 0) {
1411 b = index_baserev(self, ps[1]);
1411 b = index_baserev(self, ps[1]);
1412 if (b == ps[1]) {
1412 if (b == ps[1]) {
1413 break;
1413 break;
1414 }
1414 }
1415 ps[1] = b;
1415 ps[1] = b;
1416 }
1416 }
1417 if (base == ps[0] || base == ps[1]) {
1417 if (base == ps[0] || base == ps[1]) {
1418 return 0;
1418 return 0;
1419 }
1419 }
1420 rev = base;
1420 rev = base;
1421 }
1421 }
1422 return rev == -1;
1422 return rev == -1;
1423 }
1423 }
1424
1424
1425 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1425 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1426 {
1426 {
1427 long rev;
1427 long rev;
1428 int issnap;
1428 int issnap;
1429 Py_ssize_t length = index_length(self);
1429 Py_ssize_t length = index_length(self);
1430
1430
1431 if (!pylong_to_long(value, &rev)) {
1431 if (!pylong_to_long(value, &rev)) {
1432 return NULL;
1432 return NULL;
1433 }
1433 }
1434 if (rev < -1 || rev >= length) {
1434 if (rev < -1 || rev >= length) {
1435 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1435 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1436 rev);
1436 rev);
1437 return NULL;
1437 return NULL;
1438 };
1438 };
1439 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1439 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1440 if (issnap < 0) {
1440 if (issnap < 0) {
1441 return NULL;
1441 return NULL;
1442 };
1442 };
1443 return PyBool_FromLong((long)issnap);
1443 return PyBool_FromLong((long)issnap);
1444 }
1444 }
1445
1445
1446 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1446 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1447 {
1447 {
1448 Py_ssize_t start_rev;
1448 Py_ssize_t start_rev;
1449 Py_ssize_t end_rev;
1449 Py_ssize_t end_rev;
1450 PyObject *cache;
1450 PyObject *cache;
1451 Py_ssize_t base;
1451 Py_ssize_t base;
1452 Py_ssize_t rev;
1452 Py_ssize_t rev;
1453 PyObject *key = NULL;
1453 PyObject *key = NULL;
1454 PyObject *value = NULL;
1454 PyObject *value = NULL;
1455 const Py_ssize_t length = index_length(self);
1455 const Py_ssize_t length = index_length(self);
1456 if (!PyArg_ParseTuple(args, "O!nn", &PyDict_Type, &cache, &start_rev,
1456 if (!PyArg_ParseTuple(args, "O!nn", &PyDict_Type, &cache, &start_rev,
1457 &end_rev)) {
1457 &end_rev)) {
1458 return NULL;
1458 return NULL;
1459 }
1459 }
1460 end_rev += 1;
1460 end_rev += 1;
1461 if (end_rev > length) {
1461 if (end_rev > length) {
1462 end_rev = length;
1462 end_rev = length;
1463 }
1463 }
1464 if (start_rev < 0) {
1464 if (start_rev < 0) {
1465 start_rev = 0;
1465 start_rev = 0;
1466 }
1466 }
1467 for (rev = start_rev; rev < end_rev; rev++) {
1467 for (rev = start_rev; rev < end_rev; rev++) {
1468 int issnap;
1468 int issnap;
1469 PyObject *allvalues = NULL;
1469 PyObject *allvalues = NULL;
1470 issnap = index_issnapshotrev(self, rev);
1470 issnap = index_issnapshotrev(self, rev);
1471 if (issnap < 0) {
1471 if (issnap < 0) {
1472 goto bail;
1472 goto bail;
1473 }
1473 }
1474 if (issnap == 0) {
1474 if (issnap == 0) {
1475 continue;
1475 continue;
1476 }
1476 }
1477 base = (Py_ssize_t)index_baserev(self, rev);
1477 base = (Py_ssize_t)index_baserev(self, rev);
1478 if (base == rev) {
1478 if (base == rev) {
1479 base = -1;
1479 base = -1;
1480 }
1480 }
1481 if (base == -2) {
1481 if (base == -2) {
1482 assert(PyErr_Occurred());
1482 assert(PyErr_Occurred());
1483 goto bail;
1483 goto bail;
1484 }
1484 }
1485 key = PyLong_FromSsize_t(base);
1485 key = PyLong_FromSsize_t(base);
1486 allvalues = PyDict_GetItem(cache, key);
1486 allvalues = PyDict_GetItem(cache, key);
1487 if (allvalues == NULL && PyErr_Occurred()) {
1487 if (allvalues == NULL && PyErr_Occurred()) {
1488 goto bail;
1488 goto bail;
1489 }
1489 }
1490 if (allvalues == NULL) {
1490 if (allvalues == NULL) {
1491 int r;
1491 int r;
1492 allvalues = PyList_New(0);
1492 allvalues = PySet_New(0);
1493 if (!allvalues) {
1493 if (!allvalues) {
1494 goto bail;
1494 goto bail;
1495 }
1495 }
1496 r = PyDict_SetItem(cache, key, allvalues);
1496 r = PyDict_SetItem(cache, key, allvalues);
1497 Py_DECREF(allvalues);
1497 Py_DECREF(allvalues);
1498 if (r < 0) {
1498 if (r < 0) {
1499 goto bail;
1499 goto bail;
1500 }
1500 }
1501 }
1501 }
1502 value = PyLong_FromSsize_t(rev);
1502 value = PyLong_FromSsize_t(rev);
1503 if (PyList_Append(allvalues, value)) {
1503 if (PySet_Add(allvalues, value)) {
1504 goto bail;
1504 goto bail;
1505 }
1505 }
1506 Py_CLEAR(key);
1506 Py_CLEAR(key);
1507 Py_CLEAR(value);
1507 Py_CLEAR(value);
1508 }
1508 }
1509 Py_RETURN_NONE;
1509 Py_RETURN_NONE;
1510 bail:
1510 bail:
1511 Py_XDECREF(key);
1511 Py_XDECREF(key);
1512 Py_XDECREF(value);
1512 Py_XDECREF(value);
1513 return NULL;
1513 return NULL;
1514 }
1514 }
1515
1515
1516 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1516 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1517 {
1517 {
1518 int rev, generaldelta;
1518 int rev, generaldelta;
1519 PyObject *stoparg;
1519 PyObject *stoparg;
1520 int stoprev, iterrev, baserev = -1;
1520 int stoprev, iterrev, baserev = -1;
1521 int stopped;
1521 int stopped;
1522 PyObject *chain = NULL, *result = NULL;
1522 PyObject *chain = NULL, *result = NULL;
1523 const Py_ssize_t length = index_length(self);
1523 const Py_ssize_t length = index_length(self);
1524
1524
1525 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1525 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1526 return NULL;
1526 return NULL;
1527 }
1527 }
1528
1528
1529 if (PyLong_Check(stoparg)) {
1529 if (PyLong_Check(stoparg)) {
1530 stoprev = (int)PyLong_AsLong(stoparg);
1530 stoprev = (int)PyLong_AsLong(stoparg);
1531 if (stoprev == -1 && PyErr_Occurred()) {
1531 if (stoprev == -1 && PyErr_Occurred()) {
1532 return NULL;
1532 return NULL;
1533 }
1533 }
1534 } else if (stoparg == Py_None) {
1534 } else if (stoparg == Py_None) {
1535 stoprev = -2;
1535 stoprev = -2;
1536 } else {
1536 } else {
1537 PyErr_SetString(PyExc_ValueError,
1537 PyErr_SetString(PyExc_ValueError,
1538 "stoprev must be integer or None");
1538 "stoprev must be integer or None");
1539 return NULL;
1539 return NULL;
1540 }
1540 }
1541
1541
1542 if (rev < 0 || rev >= length) {
1542 if (rev < 0 || rev >= length) {
1543 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1543 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1544 return NULL;
1544 return NULL;
1545 }
1545 }
1546
1546
1547 chain = PyList_New(0);
1547 chain = PyList_New(0);
1548 if (chain == NULL) {
1548 if (chain == NULL) {
1549 return NULL;
1549 return NULL;
1550 }
1550 }
1551
1551
1552 baserev = index_baserev(self, rev);
1552 baserev = index_baserev(self, rev);
1553
1553
1554 /* This should never happen. */
1554 /* This should never happen. */
1555 if (baserev <= -2) {
1555 if (baserev <= -2) {
1556 /* Error should be set by index_deref() */
1556 /* Error should be set by index_deref() */
1557 assert(PyErr_Occurred());
1557 assert(PyErr_Occurred());
1558 goto bail;
1558 goto bail;
1559 }
1559 }
1560
1560
1561 iterrev = rev;
1561 iterrev = rev;
1562
1562
1563 while (iterrev != baserev && iterrev != stoprev) {
1563 while (iterrev != baserev && iterrev != stoprev) {
1564 PyObject *value = PyLong_FromLong(iterrev);
1564 PyObject *value = PyLong_FromLong(iterrev);
1565 if (value == NULL) {
1565 if (value == NULL) {
1566 goto bail;
1566 goto bail;
1567 }
1567 }
1568 if (PyList_Append(chain, value)) {
1568 if (PyList_Append(chain, value)) {
1569 Py_DECREF(value);
1569 Py_DECREF(value);
1570 goto bail;
1570 goto bail;
1571 }
1571 }
1572 Py_DECREF(value);
1572 Py_DECREF(value);
1573
1573
1574 if (generaldelta) {
1574 if (generaldelta) {
1575 iterrev = baserev;
1575 iterrev = baserev;
1576 } else {
1576 } else {
1577 iterrev--;
1577 iterrev--;
1578 }
1578 }
1579
1579
1580 if (iterrev < 0) {
1580 if (iterrev < 0) {
1581 break;
1581 break;
1582 }
1582 }
1583
1583
1584 if (iterrev >= length) {
1584 if (iterrev >= length) {
1585 PyErr_SetString(PyExc_IndexError,
1585 PyErr_SetString(PyExc_IndexError,
1586 "revision outside index");
1586 "revision outside index");
1587 return NULL;
1587 return NULL;
1588 }
1588 }
1589
1589
1590 baserev = index_baserev(self, iterrev);
1590 baserev = index_baserev(self, iterrev);
1591
1591
1592 /* This should never happen. */
1592 /* This should never happen. */
1593 if (baserev <= -2) {
1593 if (baserev <= -2) {
1594 /* Error should be set by index_deref() */
1594 /* Error should be set by index_deref() */
1595 assert(PyErr_Occurred());
1595 assert(PyErr_Occurred());
1596 goto bail;
1596 goto bail;
1597 }
1597 }
1598 }
1598 }
1599
1599
1600 if (iterrev == stoprev) {
1600 if (iterrev == stoprev) {
1601 stopped = 1;
1601 stopped = 1;
1602 } else {
1602 } else {
1603 PyObject *value = PyLong_FromLong(iterrev);
1603 PyObject *value = PyLong_FromLong(iterrev);
1604 if (value == NULL) {
1604 if (value == NULL) {
1605 goto bail;
1605 goto bail;
1606 }
1606 }
1607 if (PyList_Append(chain, value)) {
1607 if (PyList_Append(chain, value)) {
1608 Py_DECREF(value);
1608 Py_DECREF(value);
1609 goto bail;
1609 goto bail;
1610 }
1610 }
1611 Py_DECREF(value);
1611 Py_DECREF(value);
1612
1612
1613 stopped = 0;
1613 stopped = 0;
1614 }
1614 }
1615
1615
1616 if (PyList_Reverse(chain)) {
1616 if (PyList_Reverse(chain)) {
1617 goto bail;
1617 goto bail;
1618 }
1618 }
1619
1619
1620 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1620 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1621 Py_DECREF(chain);
1621 Py_DECREF(chain);
1622 return result;
1622 return result;
1623
1623
1624 bail:
1624 bail:
1625 Py_DECREF(chain);
1625 Py_DECREF(chain);
1626 return NULL;
1626 return NULL;
1627 }
1627 }
1628
1628
1629 static inline int64_t
1629 static inline int64_t
1630 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1630 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1631 {
1631 {
1632 int64_t start_offset;
1632 int64_t start_offset;
1633 int64_t end_offset;
1633 int64_t end_offset;
1634 int end_size;
1634 int end_size;
1635 start_offset = index_get_start(self, start_rev);
1635 start_offset = index_get_start(self, start_rev);
1636 if (start_offset < 0) {
1636 if (start_offset < 0) {
1637 return -1;
1637 return -1;
1638 }
1638 }
1639 end_offset = index_get_start(self, end_rev);
1639 end_offset = index_get_start(self, end_rev);
1640 if (end_offset < 0) {
1640 if (end_offset < 0) {
1641 return -1;
1641 return -1;
1642 }
1642 }
1643 end_size = index_get_length(self, end_rev);
1643 end_size = index_get_length(self, end_rev);
1644 if (end_size < 0) {
1644 if (end_size < 0) {
1645 return -1;
1645 return -1;
1646 }
1646 }
1647 if (end_offset < start_offset) {
1647 if (end_offset < start_offset) {
1648 PyErr_Format(PyExc_ValueError,
1648 PyErr_Format(PyExc_ValueError,
1649 "corrupted revlog index: inconsistent offset "
1649 "corrupted revlog index: inconsistent offset "
1650 "between revisions (%zd) and (%zd)",
1650 "between revisions (%zd) and (%zd)",
1651 start_rev, end_rev);
1651 start_rev, end_rev);
1652 return -1;
1652 return -1;
1653 }
1653 }
1654 return (end_offset - start_offset) + (int64_t)end_size;
1654 return (end_offset - start_offset) + (int64_t)end_size;
1655 }
1655 }
1656
1656
1657 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1657 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1658 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1658 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1659 Py_ssize_t startidx, Py_ssize_t endidx)
1659 Py_ssize_t startidx, Py_ssize_t endidx)
1660 {
1660 {
1661 int length;
1661 int length;
1662 while (endidx > 1 && endidx > startidx) {
1662 while (endidx > 1 && endidx > startidx) {
1663 length = index_get_length(self, revs[endidx - 1]);
1663 length = index_get_length(self, revs[endidx - 1]);
1664 if (length < 0) {
1664 if (length < 0) {
1665 return -1;
1665 return -1;
1666 }
1666 }
1667 if (length != 0) {
1667 if (length != 0) {
1668 break;
1668 break;
1669 }
1669 }
1670 endidx -= 1;
1670 endidx -= 1;
1671 }
1671 }
1672 return endidx;
1672 return endidx;
1673 }
1673 }
1674
1674
1675 struct Gap {
1675 struct Gap {
1676 int64_t size;
1676 int64_t size;
1677 Py_ssize_t idx;
1677 Py_ssize_t idx;
1678 };
1678 };
1679
1679
1680 static int gap_compare(const void *left, const void *right)
1680 static int gap_compare(const void *left, const void *right)
1681 {
1681 {
1682 const struct Gap *l_left = ((const struct Gap *)left);
1682 const struct Gap *l_left = ((const struct Gap *)left);
1683 const struct Gap *l_right = ((const struct Gap *)right);
1683 const struct Gap *l_right = ((const struct Gap *)right);
1684 if (l_left->size < l_right->size) {
1684 if (l_left->size < l_right->size) {
1685 return -1;
1685 return -1;
1686 } else if (l_left->size > l_right->size) {
1686 } else if (l_left->size > l_right->size) {
1687 return 1;
1687 return 1;
1688 }
1688 }
1689 return 0;
1689 return 0;
1690 }
1690 }
1691 static int Py_ssize_t_compare(const void *left, const void *right)
1691 static int Py_ssize_t_compare(const void *left, const void *right)
1692 {
1692 {
1693 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1693 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1694 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1694 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1695 if (l_left < l_right) {
1695 if (l_left < l_right) {
1696 return -1;
1696 return -1;
1697 } else if (l_left > l_right) {
1697 } else if (l_left > l_right) {
1698 return 1;
1698 return 1;
1699 }
1699 }
1700 return 0;
1700 return 0;
1701 }
1701 }
1702
1702
1703 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1703 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1704 {
1704 {
1705 /* method arguments */
1705 /* method arguments */
1706 PyObject *list_revs = NULL; /* revisions in the chain */
1706 PyObject *list_revs = NULL; /* revisions in the chain */
1707 double targetdensity = 0; /* min density to achieve */
1707 double targetdensity = 0; /* min density to achieve */
1708 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1708 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1709
1709
1710 /* other core variables */
1710 /* other core variables */
1711 Py_ssize_t idxlen = index_length(self);
1711 Py_ssize_t idxlen = index_length(self);
1712 Py_ssize_t i; /* used for various iteration */
1712 Py_ssize_t i; /* used for various iteration */
1713 PyObject *result = NULL; /* the final return of the function */
1713 PyObject *result = NULL; /* the final return of the function */
1714
1714
1715 /* generic information about the delta chain being slice */
1715 /* generic information about the delta chain being slice */
1716 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1716 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1717 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1717 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1718 int64_t chainpayload = 0; /* sum of all delta in the chain */
1718 int64_t chainpayload = 0; /* sum of all delta in the chain */
1719 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1719 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1720
1720
1721 /* variable used for slicing the delta chain */
1721 /* variable used for slicing the delta chain */
1722 int64_t readdata = 0; /* amount of data currently planned to be read */
1722 int64_t readdata = 0; /* amount of data currently planned to be read */
1723 double density = 0; /* ration of payload data compared to read ones */
1723 double density = 0; /* ration of payload data compared to read ones */
1724 int64_t previous_end;
1724 int64_t previous_end;
1725 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1725 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1726 Py_ssize_t num_gaps =
1726 Py_ssize_t num_gaps =
1727 0; /* total number of notable gap recorded so far */
1727 0; /* total number of notable gap recorded so far */
1728 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1728 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1729 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1729 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1730 PyObject *chunk = NULL; /* individual slice */
1730 PyObject *chunk = NULL; /* individual slice */
1731 PyObject *allchunks = NULL; /* all slices */
1731 PyObject *allchunks = NULL; /* all slices */
1732 Py_ssize_t previdx;
1732 Py_ssize_t previdx;
1733
1733
1734 /* parsing argument */
1734 /* parsing argument */
1735 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1735 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1736 &targetdensity, &mingapsize)) {
1736 &targetdensity, &mingapsize)) {
1737 goto bail;
1737 goto bail;
1738 }
1738 }
1739
1739
1740 /* If the delta chain contains a single element, we do not need slicing
1740 /* If the delta chain contains a single element, we do not need slicing
1741 */
1741 */
1742 num_revs = PyList_GET_SIZE(list_revs);
1742 num_revs = PyList_GET_SIZE(list_revs);
1743 if (num_revs <= 1) {
1743 if (num_revs <= 1) {
1744 result = PyTuple_Pack(1, list_revs);
1744 result = PyTuple_Pack(1, list_revs);
1745 goto done;
1745 goto done;
1746 }
1746 }
1747
1747
1748 /* Turn the python list into a native integer array (for efficiency) */
1748 /* Turn the python list into a native integer array (for efficiency) */
1749 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1749 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1750 if (revs == NULL) {
1750 if (revs == NULL) {
1751 PyErr_NoMemory();
1751 PyErr_NoMemory();
1752 goto bail;
1752 goto bail;
1753 }
1753 }
1754 for (i = 0; i < num_revs; i++) {
1754 for (i = 0; i < num_revs; i++) {
1755 Py_ssize_t revnum =
1755 Py_ssize_t revnum =
1756 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1756 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1757 if (revnum == -1 && PyErr_Occurred()) {
1757 if (revnum == -1 && PyErr_Occurred()) {
1758 goto bail;
1758 goto bail;
1759 }
1759 }
1760 if (revnum < nullrev || revnum >= idxlen) {
1760 if (revnum < nullrev || revnum >= idxlen) {
1761 PyErr_Format(PyExc_IndexError,
1761 PyErr_Format(PyExc_IndexError,
1762 "index out of range: %zd", revnum);
1762 "index out of range: %zd", revnum);
1763 goto bail;
1763 goto bail;
1764 }
1764 }
1765 revs[i] = revnum;
1765 revs[i] = revnum;
1766 }
1766 }
1767
1767
1768 /* Compute and check various property of the unsliced delta chain */
1768 /* Compute and check various property of the unsliced delta chain */
1769 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1769 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1770 if (deltachainspan < 0) {
1770 if (deltachainspan < 0) {
1771 goto bail;
1771 goto bail;
1772 }
1772 }
1773
1773
1774 if (deltachainspan <= mingapsize) {
1774 if (deltachainspan <= mingapsize) {
1775 result = PyTuple_Pack(1, list_revs);
1775 result = PyTuple_Pack(1, list_revs);
1776 goto done;
1776 goto done;
1777 }
1777 }
1778 chainpayload = 0;
1778 chainpayload = 0;
1779 for (i = 0; i < num_revs; i++) {
1779 for (i = 0; i < num_revs; i++) {
1780 int tmp = index_get_length(self, revs[i]);
1780 int tmp = index_get_length(self, revs[i]);
1781 if (tmp < 0) {
1781 if (tmp < 0) {
1782 goto bail;
1782 goto bail;
1783 }
1783 }
1784 chainpayload += tmp;
1784 chainpayload += tmp;
1785 }
1785 }
1786
1786
1787 readdata = deltachainspan;
1787 readdata = deltachainspan;
1788 density = 1.0;
1788 density = 1.0;
1789
1789
1790 if (0 < deltachainspan) {
1790 if (0 < deltachainspan) {
1791 density = (double)chainpayload / (double)deltachainspan;
1791 density = (double)chainpayload / (double)deltachainspan;
1792 }
1792 }
1793
1793
1794 if (density >= targetdensity) {
1794 if (density >= targetdensity) {
1795 result = PyTuple_Pack(1, list_revs);
1795 result = PyTuple_Pack(1, list_revs);
1796 goto done;
1796 goto done;
1797 }
1797 }
1798
1798
1799 /* if chain is too sparse, look for relevant gaps */
1799 /* if chain is too sparse, look for relevant gaps */
1800 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1800 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1801 if (gaps == NULL) {
1801 if (gaps == NULL) {
1802 PyErr_NoMemory();
1802 PyErr_NoMemory();
1803 goto bail;
1803 goto bail;
1804 }
1804 }
1805
1805
1806 previous_end = -1;
1806 previous_end = -1;
1807 for (i = 0; i < num_revs; i++) {
1807 for (i = 0; i < num_revs; i++) {
1808 int64_t revstart;
1808 int64_t revstart;
1809 int revsize;
1809 int revsize;
1810 revstart = index_get_start(self, revs[i]);
1810 revstart = index_get_start(self, revs[i]);
1811 if (revstart < 0) {
1811 if (revstart < 0) {
1812 goto bail;
1812 goto bail;
1813 };
1813 };
1814 revsize = index_get_length(self, revs[i]);
1814 revsize = index_get_length(self, revs[i]);
1815 if (revsize < 0) {
1815 if (revsize < 0) {
1816 goto bail;
1816 goto bail;
1817 };
1817 };
1818 if (revsize == 0) {
1818 if (revsize == 0) {
1819 continue;
1819 continue;
1820 }
1820 }
1821 if (previous_end >= 0) {
1821 if (previous_end >= 0) {
1822 int64_t gapsize = revstart - previous_end;
1822 int64_t gapsize = revstart - previous_end;
1823 if (gapsize > mingapsize) {
1823 if (gapsize > mingapsize) {
1824 gaps[num_gaps].size = gapsize;
1824 gaps[num_gaps].size = gapsize;
1825 gaps[num_gaps].idx = i;
1825 gaps[num_gaps].idx = i;
1826 num_gaps += 1;
1826 num_gaps += 1;
1827 }
1827 }
1828 }
1828 }
1829 previous_end = revstart + revsize;
1829 previous_end = revstart + revsize;
1830 }
1830 }
1831 if (num_gaps == 0) {
1831 if (num_gaps == 0) {
1832 result = PyTuple_Pack(1, list_revs);
1832 result = PyTuple_Pack(1, list_revs);
1833 goto done;
1833 goto done;
1834 }
1834 }
1835 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1835 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1836
1836
1837 /* Slice the largest gap first, they improve the density the most */
1837 /* Slice the largest gap first, they improve the density the most */
1838 selected_indices =
1838 selected_indices =
1839 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1839 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1840 if (selected_indices == NULL) {
1840 if (selected_indices == NULL) {
1841 PyErr_NoMemory();
1841 PyErr_NoMemory();
1842 goto bail;
1842 goto bail;
1843 }
1843 }
1844
1844
1845 for (i = num_gaps - 1; i >= 0; i--) {
1845 for (i = num_gaps - 1; i >= 0; i--) {
1846 selected_indices[num_selected] = gaps[i].idx;
1846 selected_indices[num_selected] = gaps[i].idx;
1847 readdata -= gaps[i].size;
1847 readdata -= gaps[i].size;
1848 num_selected += 1;
1848 num_selected += 1;
1849 if (readdata <= 0) {
1849 if (readdata <= 0) {
1850 density = 1.0;
1850 density = 1.0;
1851 } else {
1851 } else {
1852 density = (double)chainpayload / (double)readdata;
1852 density = (double)chainpayload / (double)readdata;
1853 }
1853 }
1854 if (density >= targetdensity) {
1854 if (density >= targetdensity) {
1855 break;
1855 break;
1856 }
1856 }
1857 }
1857 }
1858 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1858 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1859 &Py_ssize_t_compare);
1859 &Py_ssize_t_compare);
1860
1860
1861 /* create the resulting slice */
1861 /* create the resulting slice */
1862 allchunks = PyList_New(0);
1862 allchunks = PyList_New(0);
1863 if (allchunks == NULL) {
1863 if (allchunks == NULL) {
1864 goto bail;
1864 goto bail;
1865 }
1865 }
1866 previdx = 0;
1866 previdx = 0;
1867 selected_indices[num_selected] = num_revs;
1867 selected_indices[num_selected] = num_revs;
1868 for (i = 0; i <= num_selected; i++) {
1868 for (i = 0; i <= num_selected; i++) {
1869 Py_ssize_t idx = selected_indices[i];
1869 Py_ssize_t idx = selected_indices[i];
1870 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1870 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1871 if (endidx < 0) {
1871 if (endidx < 0) {
1872 goto bail;
1872 goto bail;
1873 }
1873 }
1874 if (previdx < endidx) {
1874 if (previdx < endidx) {
1875 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1875 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1876 if (chunk == NULL) {
1876 if (chunk == NULL) {
1877 goto bail;
1877 goto bail;
1878 }
1878 }
1879 if (PyList_Append(allchunks, chunk) == -1) {
1879 if (PyList_Append(allchunks, chunk) == -1) {
1880 goto bail;
1880 goto bail;
1881 }
1881 }
1882 Py_DECREF(chunk);
1882 Py_DECREF(chunk);
1883 chunk = NULL;
1883 chunk = NULL;
1884 }
1884 }
1885 previdx = idx;
1885 previdx = idx;
1886 }
1886 }
1887 result = allchunks;
1887 result = allchunks;
1888 goto done;
1888 goto done;
1889
1889
1890 bail:
1890 bail:
1891 Py_XDECREF(allchunks);
1891 Py_XDECREF(allchunks);
1892 Py_XDECREF(chunk);
1892 Py_XDECREF(chunk);
1893 done:
1893 done:
1894 free(revs);
1894 free(revs);
1895 free(gaps);
1895 free(gaps);
1896 free(selected_indices);
1896 free(selected_indices);
1897 return result;
1897 return result;
1898 }
1898 }
1899
1899
1900 static inline int nt_level(const char *node, Py_ssize_t level)
1900 static inline int nt_level(const char *node, Py_ssize_t level)
1901 {
1901 {
1902 int v = node[level >> 1];
1902 int v = node[level >> 1];
1903 if (!(level & 1))
1903 if (!(level & 1))
1904 v >>= 4;
1904 v >>= 4;
1905 return v & 0xf;
1905 return v & 0xf;
1906 }
1906 }
1907
1907
1908 /*
1908 /*
1909 * Return values:
1909 * Return values:
1910 *
1910 *
1911 * -4: match is ambiguous (multiple candidates)
1911 * -4: match is ambiguous (multiple candidates)
1912 * -2: not found
1912 * -2: not found
1913 * rest: valid rev
1913 * rest: valid rev
1914 */
1914 */
1915 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1915 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1916 int hex)
1916 int hex)
1917 {
1917 {
1918 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1918 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1919 int level, maxlevel, off;
1919 int level, maxlevel, off;
1920
1920
1921 /* If the input is binary, do a fast check for the nullid first. */
1921 /* If the input is binary, do a fast check for the nullid first. */
1922 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1922 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1923 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1923 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1924 return -1;
1924 return -1;
1925
1925
1926 if (hex)
1926 if (hex)
1927 maxlevel = nodelen;
1927 maxlevel = nodelen;
1928 else
1928 else
1929 maxlevel = 2 * nodelen;
1929 maxlevel = 2 * nodelen;
1930 if (maxlevel > 2 * self->nodelen)
1930 if (maxlevel > 2 * self->nodelen)
1931 maxlevel = 2 * self->nodelen;
1931 maxlevel = 2 * self->nodelen;
1932
1932
1933 for (level = off = 0; level < maxlevel; level++) {
1933 for (level = off = 0; level < maxlevel; level++) {
1934 int k = getnybble(node, level);
1934 int k = getnybble(node, level);
1935 nodetreenode *n = &self->nodes[off];
1935 nodetreenode *n = &self->nodes[off];
1936 int v = n->children[k];
1936 int v = n->children[k];
1937
1937
1938 if (v < 0) {
1938 if (v < 0) {
1939 const char *n;
1939 const char *n;
1940 Py_ssize_t i;
1940 Py_ssize_t i;
1941
1941
1942 v = -(v + 2);
1942 v = -(v + 2);
1943 n = index_node(self->index, v);
1943 n = index_node(self->index, v);
1944 if (n == NULL)
1944 if (n == NULL)
1945 return -2;
1945 return -2;
1946 for (i = level; i < maxlevel; i++)
1946 for (i = level; i < maxlevel; i++)
1947 if (getnybble(node, i) != nt_level(n, i))
1947 if (getnybble(node, i) != nt_level(n, i))
1948 return -2;
1948 return -2;
1949 return v;
1949 return v;
1950 }
1950 }
1951 if (v == 0)
1951 if (v == 0)
1952 return -2;
1952 return -2;
1953 off = v;
1953 off = v;
1954 }
1954 }
1955 /* multiple matches against an ambiguous prefix */
1955 /* multiple matches against an ambiguous prefix */
1956 return -4;
1956 return -4;
1957 }
1957 }
1958
1958
1959 static int nt_new(nodetree *self)
1959 static int nt_new(nodetree *self)
1960 {
1960 {
1961 if (self->length == self->capacity) {
1961 if (self->length == self->capacity) {
1962 size_t newcapacity;
1962 size_t newcapacity;
1963 nodetreenode *newnodes;
1963 nodetreenode *newnodes;
1964 newcapacity = self->capacity * 2;
1964 newcapacity = self->capacity * 2;
1965 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1965 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1966 PyErr_SetString(PyExc_MemoryError,
1966 PyErr_SetString(PyExc_MemoryError,
1967 "overflow in nt_new");
1967 "overflow in nt_new");
1968 return -1;
1968 return -1;
1969 }
1969 }
1970 newnodes =
1970 newnodes =
1971 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1971 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1972 if (newnodes == NULL) {
1972 if (newnodes == NULL) {
1973 PyErr_SetString(PyExc_MemoryError, "out of memory");
1973 PyErr_SetString(PyExc_MemoryError, "out of memory");
1974 return -1;
1974 return -1;
1975 }
1975 }
1976 self->capacity = newcapacity;
1976 self->capacity = newcapacity;
1977 self->nodes = newnodes;
1977 self->nodes = newnodes;
1978 memset(&self->nodes[self->length], 0,
1978 memset(&self->nodes[self->length], 0,
1979 sizeof(nodetreenode) * (self->capacity - self->length));
1979 sizeof(nodetreenode) * (self->capacity - self->length));
1980 }
1980 }
1981 return self->length++;
1981 return self->length++;
1982 }
1982 }
1983
1983
1984 static int nt_insert(nodetree *self, const char *node, int rev)
1984 static int nt_insert(nodetree *self, const char *node, int rev)
1985 {
1985 {
1986 int level = 0;
1986 int level = 0;
1987 int off = 0;
1987 int off = 0;
1988
1988
1989 while (level < 2 * self->nodelen) {
1989 while (level < 2 * self->nodelen) {
1990 int k = nt_level(node, level);
1990 int k = nt_level(node, level);
1991 nodetreenode *n;
1991 nodetreenode *n;
1992 int v;
1992 int v;
1993
1993
1994 n = &self->nodes[off];
1994 n = &self->nodes[off];
1995 v = n->children[k];
1995 v = n->children[k];
1996
1996
1997 if (v == 0) {
1997 if (v == 0) {
1998 n->children[k] = -rev - 2;
1998 n->children[k] = -rev - 2;
1999 return 0;
1999 return 0;
2000 }
2000 }
2001 if (v < 0) {
2001 if (v < 0) {
2002 const char *oldnode =
2002 const char *oldnode =
2003 index_node_existing(self->index, -(v + 2));
2003 index_node_existing(self->index, -(v + 2));
2004 int noff;
2004 int noff;
2005
2005
2006 if (oldnode == NULL)
2006 if (oldnode == NULL)
2007 return -1;
2007 return -1;
2008 if (!memcmp(oldnode, node, self->nodelen)) {
2008 if (!memcmp(oldnode, node, self->nodelen)) {
2009 n->children[k] = -rev - 2;
2009 n->children[k] = -rev - 2;
2010 return 0;
2010 return 0;
2011 }
2011 }
2012 noff = nt_new(self);
2012 noff = nt_new(self);
2013 if (noff == -1)
2013 if (noff == -1)
2014 return -1;
2014 return -1;
2015 /* self->nodes may have been changed by realloc */
2015 /* self->nodes may have been changed by realloc */
2016 self->nodes[off].children[k] = noff;
2016 self->nodes[off].children[k] = noff;
2017 off = noff;
2017 off = noff;
2018 n = &self->nodes[off];
2018 n = &self->nodes[off];
2019 n->children[nt_level(oldnode, ++level)] = v;
2019 n->children[nt_level(oldnode, ++level)] = v;
2020 if (level > self->depth)
2020 if (level > self->depth)
2021 self->depth = level;
2021 self->depth = level;
2022 self->splits += 1;
2022 self->splits += 1;
2023 } else {
2023 } else {
2024 level += 1;
2024 level += 1;
2025 off = v;
2025 off = v;
2026 }
2026 }
2027 }
2027 }
2028
2028
2029 return -1;
2029 return -1;
2030 }
2030 }
2031
2031
2032 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2032 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
2033 {
2033 {
2034 Py_ssize_t rev;
2034 Py_ssize_t rev;
2035 const char *node;
2035 const char *node;
2036 Py_ssize_t length;
2036 Py_ssize_t length;
2037 if (!PyArg_ParseTuple(args, "n", &rev))
2037 if (!PyArg_ParseTuple(args, "n", &rev))
2038 return NULL;
2038 return NULL;
2039 length = index_length(self->nt.index);
2039 length = index_length(self->nt.index);
2040 if (rev < 0 || rev >= length) {
2040 if (rev < 0 || rev >= length) {
2041 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2041 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
2042 return NULL;
2042 return NULL;
2043 }
2043 }
2044 node = index_node_existing(self->nt.index, rev);
2044 node = index_node_existing(self->nt.index, rev);
2045 if (nt_insert(&self->nt, node, (int)rev) == -1)
2045 if (nt_insert(&self->nt, node, (int)rev) == -1)
2046 return NULL;
2046 return NULL;
2047 Py_RETURN_NONE;
2047 Py_RETURN_NONE;
2048 }
2048 }
2049
2049
2050 static int nt_delete_node(nodetree *self, const char *node)
2050 static int nt_delete_node(nodetree *self, const char *node)
2051 {
2051 {
2052 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2052 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2053 */
2053 */
2054 return nt_insert(self, node, -2);
2054 return nt_insert(self, node, -2);
2055 }
2055 }
2056
2056
2057 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2057 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2058 {
2058 {
2059 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2059 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2060 self->nodes = NULL;
2060 self->nodes = NULL;
2061
2061
2062 self->index = index;
2062 self->index = index;
2063 /* The input capacity is in terms of revisions, while the field is in
2063 /* The input capacity is in terms of revisions, while the field is in
2064 * terms of nodetree nodes. */
2064 * terms of nodetree nodes. */
2065 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2065 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2066 self->nodelen = index->nodelen;
2066 self->nodelen = index->nodelen;
2067 self->depth = 0;
2067 self->depth = 0;
2068 self->splits = 0;
2068 self->splits = 0;
2069 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2069 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2070 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2070 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2071 return -1;
2071 return -1;
2072 }
2072 }
2073 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2073 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2074 if (self->nodes == NULL) {
2074 if (self->nodes == NULL) {
2075 PyErr_NoMemory();
2075 PyErr_NoMemory();
2076 return -1;
2076 return -1;
2077 }
2077 }
2078 self->length = 1;
2078 self->length = 1;
2079 return 0;
2079 return 0;
2080 }
2080 }
2081
2081
2082 static int ntobj_init(nodetreeObject *self, PyObject *args)
2082 static int ntobj_init(nodetreeObject *self, PyObject *args)
2083 {
2083 {
2084 PyObject *index;
2084 PyObject *index;
2085 unsigned capacity;
2085 unsigned capacity;
2086 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2086 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2087 &capacity))
2087 &capacity))
2088 return -1;
2088 return -1;
2089 Py_INCREF(index);
2089 Py_INCREF(index);
2090 return nt_init(&self->nt, (indexObject *)index, capacity);
2090 return nt_init(&self->nt, (indexObject *)index, capacity);
2091 }
2091 }
2092
2092
2093 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2093 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2094 {
2094 {
2095 return nt_find(self, node, nodelen, 1);
2095 return nt_find(self, node, nodelen, 1);
2096 }
2096 }
2097
2097
2098 /*
2098 /*
2099 * Find the length of the shortest unique prefix of node.
2099 * Find the length of the shortest unique prefix of node.
2100 *
2100 *
2101 * Return values:
2101 * Return values:
2102 *
2102 *
2103 * -3: error (exception set)
2103 * -3: error (exception set)
2104 * -2: not found (no exception set)
2104 * -2: not found (no exception set)
2105 * rest: length of shortest prefix
2105 * rest: length of shortest prefix
2106 */
2106 */
2107 static int nt_shortest(nodetree *self, const char *node)
2107 static int nt_shortest(nodetree *self, const char *node)
2108 {
2108 {
2109 int level, off;
2109 int level, off;
2110
2110
2111 for (level = off = 0; level < 2 * self->nodelen; level++) {
2111 for (level = off = 0; level < 2 * self->nodelen; level++) {
2112 int k, v;
2112 int k, v;
2113 nodetreenode *n = &self->nodes[off];
2113 nodetreenode *n = &self->nodes[off];
2114 k = nt_level(node, level);
2114 k = nt_level(node, level);
2115 v = n->children[k];
2115 v = n->children[k];
2116 if (v < 0) {
2116 if (v < 0) {
2117 const char *n;
2117 const char *n;
2118 v = -(v + 2);
2118 v = -(v + 2);
2119 n = index_node_existing(self->index, v);
2119 n = index_node_existing(self->index, v);
2120 if (n == NULL)
2120 if (n == NULL)
2121 return -3;
2121 return -3;
2122 if (memcmp(node, n, self->nodelen) != 0)
2122 if (memcmp(node, n, self->nodelen) != 0)
2123 /*
2123 /*
2124 * Found a unique prefix, but it wasn't for the
2124 * Found a unique prefix, but it wasn't for the
2125 * requested node (i.e the requested node does
2125 * requested node (i.e the requested node does
2126 * not exist).
2126 * not exist).
2127 */
2127 */
2128 return -2;
2128 return -2;
2129 return level + 1;
2129 return level + 1;
2130 }
2130 }
2131 if (v == 0)
2131 if (v == 0)
2132 return -2;
2132 return -2;
2133 off = v;
2133 off = v;
2134 }
2134 }
2135 /*
2135 /*
2136 * The node was still not unique after 40 hex digits, so this won't
2136 * The node was still not unique after 40 hex digits, so this won't
2137 * happen. Also, if we get here, then there's a programming error in
2137 * happen. Also, if we get here, then there's a programming error in
2138 * this file that made us insert a node longer than 40 hex digits.
2138 * this file that made us insert a node longer than 40 hex digits.
2139 */
2139 */
2140 PyErr_SetString(PyExc_Exception, "broken node tree");
2140 PyErr_SetString(PyExc_Exception, "broken node tree");
2141 return -3;
2141 return -3;
2142 }
2142 }
2143
2143
2144 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2144 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2145 {
2145 {
2146 PyObject *val;
2146 PyObject *val;
2147 char *node;
2147 char *node;
2148 int length;
2148 int length;
2149
2149
2150 if (!PyArg_ParseTuple(args, "O", &val))
2150 if (!PyArg_ParseTuple(args, "O", &val))
2151 return NULL;
2151 return NULL;
2152 if (node_check(self->nt.nodelen, val, &node) == -1)
2152 if (node_check(self->nt.nodelen, val, &node) == -1)
2153 return NULL;
2153 return NULL;
2154
2154
2155 length = nt_shortest(&self->nt, node);
2155 length = nt_shortest(&self->nt, node);
2156 if (length == -3)
2156 if (length == -3)
2157 return NULL;
2157 return NULL;
2158 if (length == -2) {
2158 if (length == -2) {
2159 raise_revlog_error();
2159 raise_revlog_error();
2160 return NULL;
2160 return NULL;
2161 }
2161 }
2162 return PyLong_FromLong(length);
2162 return PyLong_FromLong(length);
2163 }
2163 }
2164
2164
2165 static void nt_dealloc(nodetree *self)
2165 static void nt_dealloc(nodetree *self)
2166 {
2166 {
2167 free(self->nodes);
2167 free(self->nodes);
2168 self->nodes = NULL;
2168 self->nodes = NULL;
2169 }
2169 }
2170
2170
2171 static void ntobj_dealloc(nodetreeObject *self)
2171 static void ntobj_dealloc(nodetreeObject *self)
2172 {
2172 {
2173 Py_XDECREF(self->nt.index);
2173 Py_XDECREF(self->nt.index);
2174 nt_dealloc(&self->nt);
2174 nt_dealloc(&self->nt);
2175 PyObject_Del(self);
2175 PyObject_Del(self);
2176 }
2176 }
2177
2177
2178 static PyMethodDef ntobj_methods[] = {
2178 static PyMethodDef ntobj_methods[] = {
2179 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2179 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2180 "insert an index entry"},
2180 "insert an index entry"},
2181 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2181 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2182 "find length of shortest hex nodeid of a binary ID"},
2182 "find length of shortest hex nodeid of a binary ID"},
2183 {NULL} /* Sentinel */
2183 {NULL} /* Sentinel */
2184 };
2184 };
2185
2185
2186 static PyTypeObject nodetreeType = {
2186 static PyTypeObject nodetreeType = {
2187 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2187 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2188 "parsers.nodetree", /* tp_name */
2188 "parsers.nodetree", /* tp_name */
2189 sizeof(nodetreeObject), /* tp_basicsize */
2189 sizeof(nodetreeObject), /* tp_basicsize */
2190 0, /* tp_itemsize */
2190 0, /* tp_itemsize */
2191 (destructor)ntobj_dealloc, /* tp_dealloc */
2191 (destructor)ntobj_dealloc, /* tp_dealloc */
2192 0, /* tp_print */
2192 0, /* tp_print */
2193 0, /* tp_getattr */
2193 0, /* tp_getattr */
2194 0, /* tp_setattr */
2194 0, /* tp_setattr */
2195 0, /* tp_compare */
2195 0, /* tp_compare */
2196 0, /* tp_repr */
2196 0, /* tp_repr */
2197 0, /* tp_as_number */
2197 0, /* tp_as_number */
2198 0, /* tp_as_sequence */
2198 0, /* tp_as_sequence */
2199 0, /* tp_as_mapping */
2199 0, /* tp_as_mapping */
2200 0, /* tp_hash */
2200 0, /* tp_hash */
2201 0, /* tp_call */
2201 0, /* tp_call */
2202 0, /* tp_str */
2202 0, /* tp_str */
2203 0, /* tp_getattro */
2203 0, /* tp_getattro */
2204 0, /* tp_setattro */
2204 0, /* tp_setattro */
2205 0, /* tp_as_buffer */
2205 0, /* tp_as_buffer */
2206 Py_TPFLAGS_DEFAULT, /* tp_flags */
2206 Py_TPFLAGS_DEFAULT, /* tp_flags */
2207 "nodetree", /* tp_doc */
2207 "nodetree", /* tp_doc */
2208 0, /* tp_traverse */
2208 0, /* tp_traverse */
2209 0, /* tp_clear */
2209 0, /* tp_clear */
2210 0, /* tp_richcompare */
2210 0, /* tp_richcompare */
2211 0, /* tp_weaklistoffset */
2211 0, /* tp_weaklistoffset */
2212 0, /* tp_iter */
2212 0, /* tp_iter */
2213 0, /* tp_iternext */
2213 0, /* tp_iternext */
2214 ntobj_methods, /* tp_methods */
2214 ntobj_methods, /* tp_methods */
2215 0, /* tp_members */
2215 0, /* tp_members */
2216 0, /* tp_getset */
2216 0, /* tp_getset */
2217 0, /* tp_base */
2217 0, /* tp_base */
2218 0, /* tp_dict */
2218 0, /* tp_dict */
2219 0, /* tp_descr_get */
2219 0, /* tp_descr_get */
2220 0, /* tp_descr_set */
2220 0, /* tp_descr_set */
2221 0, /* tp_dictoffset */
2221 0, /* tp_dictoffset */
2222 (initproc)ntobj_init, /* tp_init */
2222 (initproc)ntobj_init, /* tp_init */
2223 0, /* tp_alloc */
2223 0, /* tp_alloc */
2224 };
2224 };
2225
2225
2226 static int index_init_nt(indexObject *self)
2226 static int index_init_nt(indexObject *self)
2227 {
2227 {
2228 if (!self->ntinitialized) {
2228 if (!self->ntinitialized) {
2229 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2229 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2230 nt_dealloc(&self->nt);
2230 nt_dealloc(&self->nt);
2231 return -1;
2231 return -1;
2232 }
2232 }
2233 if (nt_insert(&self->nt, nullid, -1) == -1) {
2233 if (nt_insert(&self->nt, nullid, -1) == -1) {
2234 nt_dealloc(&self->nt);
2234 nt_dealloc(&self->nt);
2235 return -1;
2235 return -1;
2236 }
2236 }
2237 self->ntinitialized = 1;
2237 self->ntinitialized = 1;
2238 self->ntrev = (int)index_length(self);
2238 self->ntrev = (int)index_length(self);
2239 self->ntlookups = 1;
2239 self->ntlookups = 1;
2240 self->ntmisses = 0;
2240 self->ntmisses = 0;
2241 }
2241 }
2242 return 0;
2242 return 0;
2243 }
2243 }
2244
2244
2245 /*
2245 /*
2246 * Return values:
2246 * Return values:
2247 *
2247 *
2248 * -3: error (exception set)
2248 * -3: error (exception set)
2249 * -2: not found (no exception set)
2249 * -2: not found (no exception set)
2250 * rest: valid rev
2250 * rest: valid rev
2251 */
2251 */
2252 static int index_find_node(indexObject *self, const char *node)
2252 static int index_find_node(indexObject *self, const char *node)
2253 {
2253 {
2254 int rev;
2254 int rev;
2255
2255
2256 if (index_init_nt(self) == -1)
2256 if (index_init_nt(self) == -1)
2257 return -3;
2257 return -3;
2258
2258
2259 self->ntlookups++;
2259 self->ntlookups++;
2260 rev = nt_find(&self->nt, node, self->nodelen, 0);
2260 rev = nt_find(&self->nt, node, self->nodelen, 0);
2261 if (rev >= -1)
2261 if (rev >= -1)
2262 return rev;
2262 return rev;
2263
2263
2264 /*
2264 /*
2265 * For the first handful of lookups, we scan the entire index,
2265 * For the first handful of lookups, we scan the entire index,
2266 * and cache only the matching nodes. This optimizes for cases
2266 * and cache only the matching nodes. This optimizes for cases
2267 * like "hg tip", where only a few nodes are accessed.
2267 * like "hg tip", where only a few nodes are accessed.
2268 *
2268 *
2269 * After that, we cache every node we visit, using a single
2269 * After that, we cache every node we visit, using a single
2270 * scan amortized over multiple lookups. This gives the best
2270 * scan amortized over multiple lookups. This gives the best
2271 * bulk performance, e.g. for "hg log".
2271 * bulk performance, e.g. for "hg log".
2272 */
2272 */
2273 if (self->ntmisses++ < 4) {
2273 if (self->ntmisses++ < 4) {
2274 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2274 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2275 const char *n = index_node_existing(self, rev);
2275 const char *n = index_node_existing(self, rev);
2276 if (n == NULL)
2276 if (n == NULL)
2277 return -3;
2277 return -3;
2278 if (memcmp(node, n, self->nodelen) == 0) {
2278 if (memcmp(node, n, self->nodelen) == 0) {
2279 if (nt_insert(&self->nt, n, rev) == -1)
2279 if (nt_insert(&self->nt, n, rev) == -1)
2280 return -3;
2280 return -3;
2281 break;
2281 break;
2282 }
2282 }
2283 }
2283 }
2284 } else {
2284 } else {
2285 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2285 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2286 const char *n = index_node_existing(self, rev);
2286 const char *n = index_node_existing(self, rev);
2287 if (n == NULL)
2287 if (n == NULL)
2288 return -3;
2288 return -3;
2289 if (nt_insert(&self->nt, n, rev) == -1) {
2289 if (nt_insert(&self->nt, n, rev) == -1) {
2290 self->ntrev = rev + 1;
2290 self->ntrev = rev + 1;
2291 return -3;
2291 return -3;
2292 }
2292 }
2293 if (memcmp(node, n, self->nodelen) == 0) {
2293 if (memcmp(node, n, self->nodelen) == 0) {
2294 break;
2294 break;
2295 }
2295 }
2296 }
2296 }
2297 self->ntrev = rev;
2297 self->ntrev = rev;
2298 }
2298 }
2299
2299
2300 if (rev >= 0)
2300 if (rev >= 0)
2301 return rev;
2301 return rev;
2302 return -2;
2302 return -2;
2303 }
2303 }
2304
2304
2305 static PyObject *index_getitem(indexObject *self, PyObject *value)
2305 static PyObject *index_getitem(indexObject *self, PyObject *value)
2306 {
2306 {
2307 char *node;
2307 char *node;
2308 int rev;
2308 int rev;
2309
2309
2310 if (PyLong_Check(value)) {
2310 if (PyLong_Check(value)) {
2311 long idx;
2311 long idx;
2312 if (!pylong_to_long(value, &idx)) {
2312 if (!pylong_to_long(value, &idx)) {
2313 return NULL;
2313 return NULL;
2314 }
2314 }
2315 return index_get(self, idx);
2315 return index_get(self, idx);
2316 }
2316 }
2317
2317
2318 if (node_check(self->nodelen, value, &node) == -1)
2318 if (node_check(self->nodelen, value, &node) == -1)
2319 return NULL;
2319 return NULL;
2320 rev = index_find_node(self, node);
2320 rev = index_find_node(self, node);
2321 if (rev >= -1)
2321 if (rev >= -1)
2322 return PyLong_FromLong(rev);
2322 return PyLong_FromLong(rev);
2323 if (rev == -2)
2323 if (rev == -2)
2324 raise_revlog_error();
2324 raise_revlog_error();
2325 return NULL;
2325 return NULL;
2326 }
2326 }
2327
2327
2328 /*
2328 /*
2329 * Fully populate the radix tree.
2329 * Fully populate the radix tree.
2330 */
2330 */
2331 static int index_populate_nt(indexObject *self)
2331 static int index_populate_nt(indexObject *self)
2332 {
2332 {
2333 int rev;
2333 int rev;
2334 if (self->ntrev > 0) {
2334 if (self->ntrev > 0) {
2335 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2335 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2336 const char *n = index_node_existing(self, rev);
2336 const char *n = index_node_existing(self, rev);
2337 if (n == NULL)
2337 if (n == NULL)
2338 return -1;
2338 return -1;
2339 if (nt_insert(&self->nt, n, rev) == -1)
2339 if (nt_insert(&self->nt, n, rev) == -1)
2340 return -1;
2340 return -1;
2341 }
2341 }
2342 self->ntrev = -1;
2342 self->ntrev = -1;
2343 }
2343 }
2344 return 0;
2344 return 0;
2345 }
2345 }
2346
2346
2347 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2347 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2348 {
2348 {
2349 const char *fullnode;
2349 const char *fullnode;
2350 Py_ssize_t nodelen;
2350 Py_ssize_t nodelen;
2351 char *node;
2351 char *node;
2352 int rev, i;
2352 int rev, i;
2353
2353
2354 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2354 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2355 return NULL;
2355 return NULL;
2356
2356
2357 if (nodelen < 1) {
2357 if (nodelen < 1) {
2358 PyErr_SetString(PyExc_ValueError, "key too short");
2358 PyErr_SetString(PyExc_ValueError, "key too short");
2359 return NULL;
2359 return NULL;
2360 }
2360 }
2361
2361
2362 if (nodelen > 2 * self->nodelen) {
2362 if (nodelen > 2 * self->nodelen) {
2363 PyErr_SetString(PyExc_ValueError, "key too long");
2363 PyErr_SetString(PyExc_ValueError, "key too long");
2364 return NULL;
2364 return NULL;
2365 }
2365 }
2366
2366
2367 for (i = 0; i < nodelen; i++)
2367 for (i = 0; i < nodelen; i++)
2368 hexdigit(node, i);
2368 hexdigit(node, i);
2369 if (PyErr_Occurred()) {
2369 if (PyErr_Occurred()) {
2370 /* input contains non-hex characters */
2370 /* input contains non-hex characters */
2371 PyErr_Clear();
2371 PyErr_Clear();
2372 Py_RETURN_NONE;
2372 Py_RETURN_NONE;
2373 }
2373 }
2374
2374
2375 if (index_init_nt(self) == -1)
2375 if (index_init_nt(self) == -1)
2376 return NULL;
2376 return NULL;
2377 if (index_populate_nt(self) == -1)
2377 if (index_populate_nt(self) == -1)
2378 return NULL;
2378 return NULL;
2379 rev = nt_partialmatch(&self->nt, node, nodelen);
2379 rev = nt_partialmatch(&self->nt, node, nodelen);
2380
2380
2381 switch (rev) {
2381 switch (rev) {
2382 case -4:
2382 case -4:
2383 raise_revlog_error();
2383 raise_revlog_error();
2384 return NULL;
2384 return NULL;
2385 case -2:
2385 case -2:
2386 Py_RETURN_NONE;
2386 Py_RETURN_NONE;
2387 case -1:
2387 case -1:
2388 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2388 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2389 }
2389 }
2390
2390
2391 fullnode = index_node_existing(self, rev);
2391 fullnode = index_node_existing(self, rev);
2392 if (fullnode == NULL) {
2392 if (fullnode == NULL) {
2393 return NULL;
2393 return NULL;
2394 }
2394 }
2395 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2395 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2396 }
2396 }
2397
2397
2398 static PyObject *index_shortest(indexObject *self, PyObject *args)
2398 static PyObject *index_shortest(indexObject *self, PyObject *args)
2399 {
2399 {
2400 PyObject *val;
2400 PyObject *val;
2401 char *node;
2401 char *node;
2402 int length;
2402 int length;
2403
2403
2404 if (!PyArg_ParseTuple(args, "O", &val))
2404 if (!PyArg_ParseTuple(args, "O", &val))
2405 return NULL;
2405 return NULL;
2406 if (node_check(self->nodelen, val, &node) == -1)
2406 if (node_check(self->nodelen, val, &node) == -1)
2407 return NULL;
2407 return NULL;
2408
2408
2409 self->ntlookups++;
2409 self->ntlookups++;
2410 if (index_init_nt(self) == -1)
2410 if (index_init_nt(self) == -1)
2411 return NULL;
2411 return NULL;
2412 if (index_populate_nt(self) == -1)
2412 if (index_populate_nt(self) == -1)
2413 return NULL;
2413 return NULL;
2414 length = nt_shortest(&self->nt, node);
2414 length = nt_shortest(&self->nt, node);
2415 if (length == -3)
2415 if (length == -3)
2416 return NULL;
2416 return NULL;
2417 if (length == -2) {
2417 if (length == -2) {
2418 raise_revlog_error();
2418 raise_revlog_error();
2419 return NULL;
2419 return NULL;
2420 }
2420 }
2421 return PyLong_FromLong(length);
2421 return PyLong_FromLong(length);
2422 }
2422 }
2423
2423
2424 static PyObject *index_m_get(indexObject *self, PyObject *args)
2424 static PyObject *index_m_get(indexObject *self, PyObject *args)
2425 {
2425 {
2426 PyObject *val;
2426 PyObject *val;
2427 char *node;
2427 char *node;
2428 int rev;
2428 int rev;
2429
2429
2430 if (!PyArg_ParseTuple(args, "O", &val))
2430 if (!PyArg_ParseTuple(args, "O", &val))
2431 return NULL;
2431 return NULL;
2432 if (node_check(self->nodelen, val, &node) == -1)
2432 if (node_check(self->nodelen, val, &node) == -1)
2433 return NULL;
2433 return NULL;
2434 rev = index_find_node(self, node);
2434 rev = index_find_node(self, node);
2435 if (rev == -3)
2435 if (rev == -3)
2436 return NULL;
2436 return NULL;
2437 if (rev == -2)
2437 if (rev == -2)
2438 Py_RETURN_NONE;
2438 Py_RETURN_NONE;
2439 return PyLong_FromLong(rev);
2439 return PyLong_FromLong(rev);
2440 }
2440 }
2441
2441
2442 static int index_contains(indexObject *self, PyObject *value)
2442 static int index_contains(indexObject *self, PyObject *value)
2443 {
2443 {
2444 char *node;
2444 char *node;
2445
2445
2446 if (PyLong_Check(value)) {
2446 if (PyLong_Check(value)) {
2447 long rev;
2447 long rev;
2448 if (!pylong_to_long(value, &rev)) {
2448 if (!pylong_to_long(value, &rev)) {
2449 return -1;
2449 return -1;
2450 }
2450 }
2451 return rev >= -1 && rev < index_length(self);
2451 return rev >= -1 && rev < index_length(self);
2452 }
2452 }
2453
2453
2454 if (node_check(self->nodelen, value, &node) == -1)
2454 if (node_check(self->nodelen, value, &node) == -1)
2455 return -1;
2455 return -1;
2456
2456
2457 switch (index_find_node(self, node)) {
2457 switch (index_find_node(self, node)) {
2458 case -3:
2458 case -3:
2459 return -1;
2459 return -1;
2460 case -2:
2460 case -2:
2461 return 0;
2461 return 0;
2462 default:
2462 default:
2463 return 1;
2463 return 1;
2464 }
2464 }
2465 }
2465 }
2466
2466
2467 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2467 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2468 {
2468 {
2469 int ret = index_contains(self, args);
2469 int ret = index_contains(self, args);
2470 if (ret < 0)
2470 if (ret < 0)
2471 return NULL;
2471 return NULL;
2472 return PyBool_FromLong((long)ret);
2472 return PyBool_FromLong((long)ret);
2473 }
2473 }
2474
2474
2475 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2475 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2476 {
2476 {
2477 char *node;
2477 char *node;
2478 int rev;
2478 int rev;
2479
2479
2480 if (node_check(self->nodelen, val, &node) == -1)
2480 if (node_check(self->nodelen, val, &node) == -1)
2481 return NULL;
2481 return NULL;
2482 rev = index_find_node(self, node);
2482 rev = index_find_node(self, node);
2483 if (rev >= -1)
2483 if (rev >= -1)
2484 return PyLong_FromLong(rev);
2484 return PyLong_FromLong(rev);
2485 if (rev == -2)
2485 if (rev == -2)
2486 raise_revlog_error();
2486 raise_revlog_error();
2487 return NULL;
2487 return NULL;
2488 }
2488 }
2489
2489
2490 typedef uint64_t bitmask;
2490 typedef uint64_t bitmask;
2491
2491
2492 /*
2492 /*
2493 * Given a disjoint set of revs, return all candidates for the
2493 * Given a disjoint set of revs, return all candidates for the
2494 * greatest common ancestor. In revset notation, this is the set
2494 * greatest common ancestor. In revset notation, this is the set
2495 * "heads(::a and ::b and ...)"
2495 * "heads(::a and ::b and ...)"
2496 */
2496 */
2497 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2497 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2498 int revcount)
2498 int revcount)
2499 {
2499 {
2500 const bitmask allseen = (1ull << revcount) - 1;
2500 const bitmask allseen = (1ull << revcount) - 1;
2501 const bitmask poison = 1ull << revcount;
2501 const bitmask poison = 1ull << revcount;
2502 PyObject *gca = PyList_New(0);
2502 PyObject *gca = PyList_New(0);
2503 int i, v, interesting;
2503 int i, v, interesting;
2504 int maxrev = -1;
2504 int maxrev = -1;
2505 bitmask sp;
2505 bitmask sp;
2506 bitmask *seen;
2506 bitmask *seen;
2507
2507
2508 if (gca == NULL)
2508 if (gca == NULL)
2509 return PyErr_NoMemory();
2509 return PyErr_NoMemory();
2510
2510
2511 for (i = 0; i < revcount; i++) {
2511 for (i = 0; i < revcount; i++) {
2512 if (revs[i] > maxrev)
2512 if (revs[i] > maxrev)
2513 maxrev = revs[i];
2513 maxrev = revs[i];
2514 }
2514 }
2515
2515
2516 seen = calloc(sizeof(*seen), maxrev + 1);
2516 seen = calloc(sizeof(*seen), maxrev + 1);
2517 if (seen == NULL) {
2517 if (seen == NULL) {
2518 Py_DECREF(gca);
2518 Py_DECREF(gca);
2519 return PyErr_NoMemory();
2519 return PyErr_NoMemory();
2520 }
2520 }
2521
2521
2522 for (i = 0; i < revcount; i++)
2522 for (i = 0; i < revcount; i++)
2523 seen[revs[i]] = 1ull << i;
2523 seen[revs[i]] = 1ull << i;
2524
2524
2525 interesting = revcount;
2525 interesting = revcount;
2526
2526
2527 for (v = maxrev; v >= 0 && interesting; v--) {
2527 for (v = maxrev; v >= 0 && interesting; v--) {
2528 bitmask sv = seen[v];
2528 bitmask sv = seen[v];
2529 int parents[2];
2529 int parents[2];
2530
2530
2531 if (!sv)
2531 if (!sv)
2532 continue;
2532 continue;
2533
2533
2534 if (sv < poison) {
2534 if (sv < poison) {
2535 interesting -= 1;
2535 interesting -= 1;
2536 if (sv == allseen) {
2536 if (sv == allseen) {
2537 PyObject *obj = PyLong_FromLong(v);
2537 PyObject *obj = PyLong_FromLong(v);
2538 if (obj == NULL)
2538 if (obj == NULL)
2539 goto bail;
2539 goto bail;
2540 if (PyList_Append(gca, obj) == -1) {
2540 if (PyList_Append(gca, obj) == -1) {
2541 Py_DECREF(obj);
2541 Py_DECREF(obj);
2542 goto bail;
2542 goto bail;
2543 }
2543 }
2544 sv |= poison;
2544 sv |= poison;
2545 for (i = 0; i < revcount; i++) {
2545 for (i = 0; i < revcount; i++) {
2546 if (revs[i] == v)
2546 if (revs[i] == v)
2547 goto done;
2547 goto done;
2548 }
2548 }
2549 }
2549 }
2550 }
2550 }
2551 if (index_get_parents(self, v, parents, maxrev) < 0)
2551 if (index_get_parents(self, v, parents, maxrev) < 0)
2552 goto bail;
2552 goto bail;
2553
2553
2554 for (i = 0; i < 2; i++) {
2554 for (i = 0; i < 2; i++) {
2555 int p = parents[i];
2555 int p = parents[i];
2556 if (p == -1)
2556 if (p == -1)
2557 continue;
2557 continue;
2558 sp = seen[p];
2558 sp = seen[p];
2559 if (sv < poison) {
2559 if (sv < poison) {
2560 if (sp == 0) {
2560 if (sp == 0) {
2561 seen[p] = sv;
2561 seen[p] = sv;
2562 interesting++;
2562 interesting++;
2563 } else if (sp != sv)
2563 } else if (sp != sv)
2564 seen[p] |= sv;
2564 seen[p] |= sv;
2565 } else {
2565 } else {
2566 if (sp && sp < poison)
2566 if (sp && sp < poison)
2567 interesting--;
2567 interesting--;
2568 seen[p] = sv;
2568 seen[p] = sv;
2569 }
2569 }
2570 }
2570 }
2571 }
2571 }
2572
2572
2573 done:
2573 done:
2574 free(seen);
2574 free(seen);
2575 return gca;
2575 return gca;
2576 bail:
2576 bail:
2577 free(seen);
2577 free(seen);
2578 Py_XDECREF(gca);
2578 Py_XDECREF(gca);
2579 return NULL;
2579 return NULL;
2580 }
2580 }
2581
2581
2582 /*
2582 /*
2583 * Given a disjoint set of revs, return the subset with the longest
2583 * Given a disjoint set of revs, return the subset with the longest
2584 * path to the root.
2584 * path to the root.
2585 */
2585 */
2586 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2586 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2587 {
2587 {
2588 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2588 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2589 static const Py_ssize_t capacity = 24;
2589 static const Py_ssize_t capacity = 24;
2590 int *depth, *interesting = NULL;
2590 int *depth, *interesting = NULL;
2591 int i, j, v, ninteresting;
2591 int i, j, v, ninteresting;
2592 PyObject *dict = NULL, *keys = NULL;
2592 PyObject *dict = NULL, *keys = NULL;
2593 long *seen = NULL;
2593 long *seen = NULL;
2594 int maxrev = -1;
2594 int maxrev = -1;
2595 long final;
2595 long final;
2596
2596
2597 if (revcount > capacity) {
2597 if (revcount > capacity) {
2598 PyErr_Format(PyExc_OverflowError,
2598 PyErr_Format(PyExc_OverflowError,
2599 "bitset size (%ld) > capacity (%ld)",
2599 "bitset size (%ld) > capacity (%ld)",
2600 (long)revcount, (long)capacity);
2600 (long)revcount, (long)capacity);
2601 return NULL;
2601 return NULL;
2602 }
2602 }
2603
2603
2604 for (i = 0; i < revcount; i++) {
2604 for (i = 0; i < revcount; i++) {
2605 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2605 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2606 if (n > maxrev)
2606 if (n > maxrev)
2607 maxrev = n;
2607 maxrev = n;
2608 }
2608 }
2609
2609
2610 depth = calloc(sizeof(*depth), maxrev + 1);
2610 depth = calloc(sizeof(*depth), maxrev + 1);
2611 if (depth == NULL)
2611 if (depth == NULL)
2612 return PyErr_NoMemory();
2612 return PyErr_NoMemory();
2613
2613
2614 seen = calloc(sizeof(*seen), maxrev + 1);
2614 seen = calloc(sizeof(*seen), maxrev + 1);
2615 if (seen == NULL) {
2615 if (seen == NULL) {
2616 PyErr_NoMemory();
2616 PyErr_NoMemory();
2617 goto bail;
2617 goto bail;
2618 }
2618 }
2619
2619
2620 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2620 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2621 if (interesting == NULL) {
2621 if (interesting == NULL) {
2622 PyErr_NoMemory();
2622 PyErr_NoMemory();
2623 goto bail;
2623 goto bail;
2624 }
2624 }
2625
2625
2626 if (PyList_Sort(revs) == -1)
2626 if (PyList_Sort(revs) == -1)
2627 goto bail;
2627 goto bail;
2628
2628
2629 for (i = 0; i < revcount; i++) {
2629 for (i = 0; i < revcount; i++) {
2630 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2630 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2631 long b = 1l << i;
2631 long b = 1l << i;
2632 depth[n] = 1;
2632 depth[n] = 1;
2633 seen[n] = b;
2633 seen[n] = b;
2634 interesting[b] = 1;
2634 interesting[b] = 1;
2635 }
2635 }
2636
2636
2637 /* invariant: ninteresting is the number of non-zero entries in
2637 /* invariant: ninteresting is the number of non-zero entries in
2638 * interesting. */
2638 * interesting. */
2639 ninteresting = (int)revcount;
2639 ninteresting = (int)revcount;
2640
2640
2641 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2641 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2642 int dv = depth[v];
2642 int dv = depth[v];
2643 int parents[2];
2643 int parents[2];
2644 long sv;
2644 long sv;
2645
2645
2646 if (dv == 0)
2646 if (dv == 0)
2647 continue;
2647 continue;
2648
2648
2649 sv = seen[v];
2649 sv = seen[v];
2650 if (index_get_parents(self, v, parents, maxrev) < 0)
2650 if (index_get_parents(self, v, parents, maxrev) < 0)
2651 goto bail;
2651 goto bail;
2652
2652
2653 for (i = 0; i < 2; i++) {
2653 for (i = 0; i < 2; i++) {
2654 int p = parents[i];
2654 int p = parents[i];
2655 long sp;
2655 long sp;
2656 int dp;
2656 int dp;
2657
2657
2658 if (p == -1)
2658 if (p == -1)
2659 continue;
2659 continue;
2660
2660
2661 dp = depth[p];
2661 dp = depth[p];
2662 sp = seen[p];
2662 sp = seen[p];
2663 if (dp <= dv) {
2663 if (dp <= dv) {
2664 depth[p] = dv + 1;
2664 depth[p] = dv + 1;
2665 if (sp != sv) {
2665 if (sp != sv) {
2666 interesting[sv] += 1;
2666 interesting[sv] += 1;
2667 seen[p] = sv;
2667 seen[p] = sv;
2668 if (sp) {
2668 if (sp) {
2669 interesting[sp] -= 1;
2669 interesting[sp] -= 1;
2670 if (interesting[sp] == 0)
2670 if (interesting[sp] == 0)
2671 ninteresting -= 1;
2671 ninteresting -= 1;
2672 }
2672 }
2673 }
2673 }
2674 } else if (dv == dp - 1) {
2674 } else if (dv == dp - 1) {
2675 long nsp = sp | sv;
2675 long nsp = sp | sv;
2676 if (nsp == sp)
2676 if (nsp == sp)
2677 continue;
2677 continue;
2678 seen[p] = nsp;
2678 seen[p] = nsp;
2679 interesting[sp] -= 1;
2679 interesting[sp] -= 1;
2680 if (interesting[sp] == 0)
2680 if (interesting[sp] == 0)
2681 ninteresting -= 1;
2681 ninteresting -= 1;
2682 if (interesting[nsp] == 0)
2682 if (interesting[nsp] == 0)
2683 ninteresting += 1;
2683 ninteresting += 1;
2684 interesting[nsp] += 1;
2684 interesting[nsp] += 1;
2685 }
2685 }
2686 }
2686 }
2687 interesting[sv] -= 1;
2687 interesting[sv] -= 1;
2688 if (interesting[sv] == 0)
2688 if (interesting[sv] == 0)
2689 ninteresting -= 1;
2689 ninteresting -= 1;
2690 }
2690 }
2691
2691
2692 final = 0;
2692 final = 0;
2693 j = ninteresting;
2693 j = ninteresting;
2694 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2694 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2695 if (interesting[i] == 0)
2695 if (interesting[i] == 0)
2696 continue;
2696 continue;
2697 final |= i;
2697 final |= i;
2698 j -= 1;
2698 j -= 1;
2699 }
2699 }
2700 if (final == 0) {
2700 if (final == 0) {
2701 keys = PyList_New(0);
2701 keys = PyList_New(0);
2702 goto bail;
2702 goto bail;
2703 }
2703 }
2704
2704
2705 dict = PyDict_New();
2705 dict = PyDict_New();
2706 if (dict == NULL)
2706 if (dict == NULL)
2707 goto bail;
2707 goto bail;
2708
2708
2709 for (i = 0; i < revcount; i++) {
2709 for (i = 0; i < revcount; i++) {
2710 PyObject *key;
2710 PyObject *key;
2711
2711
2712 if ((final & (1 << i)) == 0)
2712 if ((final & (1 << i)) == 0)
2713 continue;
2713 continue;
2714
2714
2715 key = PyList_GET_ITEM(revs, i);
2715 key = PyList_GET_ITEM(revs, i);
2716 Py_INCREF(key);
2716 Py_INCREF(key);
2717 Py_INCREF(Py_None);
2717 Py_INCREF(Py_None);
2718 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2718 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2719 Py_DECREF(key);
2719 Py_DECREF(key);
2720 Py_DECREF(Py_None);
2720 Py_DECREF(Py_None);
2721 goto bail;
2721 goto bail;
2722 }
2722 }
2723 }
2723 }
2724
2724
2725 keys = PyDict_Keys(dict);
2725 keys = PyDict_Keys(dict);
2726
2726
2727 bail:
2727 bail:
2728 free(depth);
2728 free(depth);
2729 free(seen);
2729 free(seen);
2730 free(interesting);
2730 free(interesting);
2731 Py_XDECREF(dict);
2731 Py_XDECREF(dict);
2732
2732
2733 return keys;
2733 return keys;
2734 }
2734 }
2735
2735
2736 /*
2736 /*
2737 * Given a (possibly overlapping) set of revs, return all the
2737 * Given a (possibly overlapping) set of revs, return all the
2738 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2738 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2739 */
2739 */
2740 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2740 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2741 {
2741 {
2742 PyObject *ret = NULL;
2742 PyObject *ret = NULL;
2743 Py_ssize_t argcount, i, len;
2743 Py_ssize_t argcount, i, len;
2744 bitmask repeat = 0;
2744 bitmask repeat = 0;
2745 int revcount = 0;
2745 int revcount = 0;
2746 int *revs;
2746 int *revs;
2747
2747
2748 argcount = PySequence_Length(args);
2748 argcount = PySequence_Length(args);
2749 revs = PyMem_Malloc(argcount * sizeof(*revs));
2749 revs = PyMem_Malloc(argcount * sizeof(*revs));
2750 if (argcount > 0 && revs == NULL)
2750 if (argcount > 0 && revs == NULL)
2751 return PyErr_NoMemory();
2751 return PyErr_NoMemory();
2752 len = index_length(self);
2752 len = index_length(self);
2753
2753
2754 for (i = 0; i < argcount; i++) {
2754 for (i = 0; i < argcount; i++) {
2755 static const int capacity = 24;
2755 static const int capacity = 24;
2756 PyObject *obj = PySequence_GetItem(args, i);
2756 PyObject *obj = PySequence_GetItem(args, i);
2757 bitmask x;
2757 bitmask x;
2758 long val;
2758 long val;
2759
2759
2760 if (!PyLong_Check(obj)) {
2760 if (!PyLong_Check(obj)) {
2761 PyErr_SetString(PyExc_TypeError,
2761 PyErr_SetString(PyExc_TypeError,
2762 "arguments must all be ints");
2762 "arguments must all be ints");
2763 Py_DECREF(obj);
2763 Py_DECREF(obj);
2764 goto bail;
2764 goto bail;
2765 }
2765 }
2766 val = PyLong_AsLong(obj);
2766 val = PyLong_AsLong(obj);
2767 Py_DECREF(obj);
2767 Py_DECREF(obj);
2768 if (val == -1) {
2768 if (val == -1) {
2769 ret = PyList_New(0);
2769 ret = PyList_New(0);
2770 goto done;
2770 goto done;
2771 }
2771 }
2772 if (val < 0 || val >= len) {
2772 if (val < 0 || val >= len) {
2773 PyErr_SetString(PyExc_IndexError, "index out of range");
2773 PyErr_SetString(PyExc_IndexError, "index out of range");
2774 goto bail;
2774 goto bail;
2775 }
2775 }
2776 /* this cheesy bloom filter lets us avoid some more
2776 /* this cheesy bloom filter lets us avoid some more
2777 * expensive duplicate checks in the common set-is-disjoint
2777 * expensive duplicate checks in the common set-is-disjoint
2778 * case */
2778 * case */
2779 x = 1ull << (val & 0x3f);
2779 x = 1ull << (val & 0x3f);
2780 if (repeat & x) {
2780 if (repeat & x) {
2781 int k;
2781 int k;
2782 for (k = 0; k < revcount; k++) {
2782 for (k = 0; k < revcount; k++) {
2783 if (val == revs[k])
2783 if (val == revs[k])
2784 goto duplicate;
2784 goto duplicate;
2785 }
2785 }
2786 } else
2786 } else
2787 repeat |= x;
2787 repeat |= x;
2788 if (revcount >= capacity) {
2788 if (revcount >= capacity) {
2789 PyErr_Format(PyExc_OverflowError,
2789 PyErr_Format(PyExc_OverflowError,
2790 "bitset size (%d) > capacity (%d)",
2790 "bitset size (%d) > capacity (%d)",
2791 revcount, capacity);
2791 revcount, capacity);
2792 goto bail;
2792 goto bail;
2793 }
2793 }
2794 revs[revcount++] = (int)val;
2794 revs[revcount++] = (int)val;
2795 duplicate:;
2795 duplicate:;
2796 }
2796 }
2797
2797
2798 if (revcount == 0) {
2798 if (revcount == 0) {
2799 ret = PyList_New(0);
2799 ret = PyList_New(0);
2800 goto done;
2800 goto done;
2801 }
2801 }
2802 if (revcount == 1) {
2802 if (revcount == 1) {
2803 PyObject *obj;
2803 PyObject *obj;
2804 ret = PyList_New(1);
2804 ret = PyList_New(1);
2805 if (ret == NULL)
2805 if (ret == NULL)
2806 goto bail;
2806 goto bail;
2807 obj = PyLong_FromLong(revs[0]);
2807 obj = PyLong_FromLong(revs[0]);
2808 if (obj == NULL)
2808 if (obj == NULL)
2809 goto bail;
2809 goto bail;
2810 PyList_SET_ITEM(ret, 0, obj);
2810 PyList_SET_ITEM(ret, 0, obj);
2811 goto done;
2811 goto done;
2812 }
2812 }
2813
2813
2814 ret = find_gca_candidates(self, revs, revcount);
2814 ret = find_gca_candidates(self, revs, revcount);
2815 if (ret == NULL)
2815 if (ret == NULL)
2816 goto bail;
2816 goto bail;
2817
2817
2818 done:
2818 done:
2819 PyMem_Free(revs);
2819 PyMem_Free(revs);
2820 return ret;
2820 return ret;
2821
2821
2822 bail:
2822 bail:
2823 PyMem_Free(revs);
2823 PyMem_Free(revs);
2824 Py_XDECREF(ret);
2824 Py_XDECREF(ret);
2825 return NULL;
2825 return NULL;
2826 }
2826 }
2827
2827
2828 /*
2828 /*
2829 * Given a (possibly overlapping) set of revs, return the greatest
2829 * Given a (possibly overlapping) set of revs, return the greatest
2830 * common ancestors: those with the longest path to the root.
2830 * common ancestors: those with the longest path to the root.
2831 */
2831 */
2832 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2832 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2833 {
2833 {
2834 PyObject *ret;
2834 PyObject *ret;
2835 PyObject *gca = index_commonancestorsheads(self, args);
2835 PyObject *gca = index_commonancestorsheads(self, args);
2836 if (gca == NULL)
2836 if (gca == NULL)
2837 return NULL;
2837 return NULL;
2838
2838
2839 if (PyList_GET_SIZE(gca) <= 1) {
2839 if (PyList_GET_SIZE(gca) <= 1) {
2840 return gca;
2840 return gca;
2841 }
2841 }
2842
2842
2843 ret = find_deepest(self, gca);
2843 ret = find_deepest(self, gca);
2844 Py_DECREF(gca);
2844 Py_DECREF(gca);
2845 return ret;
2845 return ret;
2846 }
2846 }
2847
2847
2848 /*
2848 /*
2849 * Invalidate any trie entries introduced by added revs.
2849 * Invalidate any trie entries introduced by added revs.
2850 */
2850 */
2851 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2851 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2852 {
2852 {
2853 Py_ssize_t i, len;
2853 Py_ssize_t i, len;
2854
2854
2855 len = self->length + self->new_length;
2855 len = self->length + self->new_length;
2856 i = start - self->length;
2856 i = start - self->length;
2857 if (i < 0)
2857 if (i < 0)
2858 return;
2858 return;
2859
2859
2860 for (i = start; i < len; i++) {
2860 for (i = start; i < len; i++) {
2861 const char *node = index_node(self, i);
2861 const char *node = index_node(self, i);
2862 nt_delete_node(&self->nt, node);
2862 nt_delete_node(&self->nt, node);
2863 }
2863 }
2864
2864
2865 self->new_length = start - self->length;
2865 self->new_length = start - self->length;
2866 }
2866 }
2867
2867
2868 /*
2868 /*
2869 * Delete a numeric range of revs, which must be at the end of the
2869 * Delete a numeric range of revs, which must be at the end of the
2870 * range.
2870 * range.
2871 */
2871 */
2872 static int index_slice_del(indexObject *self, PyObject *item)
2872 static int index_slice_del(indexObject *self, PyObject *item)
2873 {
2873 {
2874 Py_ssize_t start, stop, step, slicelength;
2874 Py_ssize_t start, stop, step, slicelength;
2875 Py_ssize_t length = index_length(self) + 1;
2875 Py_ssize_t length = index_length(self) + 1;
2876 int ret = 0;
2876 int ret = 0;
2877
2877
2878 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2878 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2879 &slicelength) < 0)
2879 &slicelength) < 0)
2880 return -1;
2880 return -1;
2881
2881
2882 if (slicelength <= 0)
2882 if (slicelength <= 0)
2883 return 0;
2883 return 0;
2884
2884
2885 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2885 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2886 stop = start;
2886 stop = start;
2887
2887
2888 if (step < 0) {
2888 if (step < 0) {
2889 stop = start + 1;
2889 stop = start + 1;
2890 start = stop + step * (slicelength - 1) - 1;
2890 start = stop + step * (slicelength - 1) - 1;
2891 step = -step;
2891 step = -step;
2892 }
2892 }
2893
2893
2894 if (step != 1) {
2894 if (step != 1) {
2895 PyErr_SetString(PyExc_ValueError,
2895 PyErr_SetString(PyExc_ValueError,
2896 "revlog index delete requires step size of 1");
2896 "revlog index delete requires step size of 1");
2897 return -1;
2897 return -1;
2898 }
2898 }
2899
2899
2900 if (stop != length - 1) {
2900 if (stop != length - 1) {
2901 PyErr_SetString(PyExc_IndexError,
2901 PyErr_SetString(PyExc_IndexError,
2902 "revlog index deletion indices are invalid");
2902 "revlog index deletion indices are invalid");
2903 return -1;
2903 return -1;
2904 }
2904 }
2905
2905
2906 if (start < self->length) {
2906 if (start < self->length) {
2907 if (self->ntinitialized) {
2907 if (self->ntinitialized) {
2908 Py_ssize_t i;
2908 Py_ssize_t i;
2909
2909
2910 for (i = start; i < self->length; i++) {
2910 for (i = start; i < self->length; i++) {
2911 const char *node = index_node_existing(self, i);
2911 const char *node = index_node_existing(self, i);
2912 if (node == NULL)
2912 if (node == NULL)
2913 return -1;
2913 return -1;
2914
2914
2915 nt_delete_node(&self->nt, node);
2915 nt_delete_node(&self->nt, node);
2916 }
2916 }
2917 if (self->new_length)
2917 if (self->new_length)
2918 index_invalidate_added(self, self->length);
2918 index_invalidate_added(self, self->length);
2919 if (self->ntrev > start)
2919 if (self->ntrev > start)
2920 self->ntrev = (int)start;
2920 self->ntrev = (int)start;
2921 } else if (self->new_length) {
2921 } else if (self->new_length) {
2922 self->new_length = 0;
2922 self->new_length = 0;
2923 }
2923 }
2924
2924
2925 self->length = start;
2925 self->length = start;
2926 goto done;
2926 goto done;
2927 }
2927 }
2928
2928
2929 if (self->ntinitialized) {
2929 if (self->ntinitialized) {
2930 index_invalidate_added(self, start);
2930 index_invalidate_added(self, start);
2931 if (self->ntrev > start)
2931 if (self->ntrev > start)
2932 self->ntrev = (int)start;
2932 self->ntrev = (int)start;
2933 } else {
2933 } else {
2934 self->new_length = start - self->length;
2934 self->new_length = start - self->length;
2935 }
2935 }
2936 done:
2936 done:
2937 Py_CLEAR(self->headrevs);
2937 Py_CLEAR(self->headrevs);
2938 return ret;
2938 return ret;
2939 }
2939 }
2940
2940
2941 /*
2941 /*
2942 * Supported ops:
2942 * Supported ops:
2943 *
2943 *
2944 * slice deletion
2944 * slice deletion
2945 * string assignment (extend node->rev mapping)
2945 * string assignment (extend node->rev mapping)
2946 * string deletion (shrink node->rev mapping)
2946 * string deletion (shrink node->rev mapping)
2947 */
2947 */
2948 static int index_assign_subscript(indexObject *self, PyObject *item,
2948 static int index_assign_subscript(indexObject *self, PyObject *item,
2949 PyObject *value)
2949 PyObject *value)
2950 {
2950 {
2951 char *node;
2951 char *node;
2952 long rev;
2952 long rev;
2953
2953
2954 if (PySlice_Check(item) && value == NULL)
2954 if (PySlice_Check(item) && value == NULL)
2955 return index_slice_del(self, item);
2955 return index_slice_del(self, item);
2956
2956
2957 if (node_check(self->nodelen, item, &node) == -1)
2957 if (node_check(self->nodelen, item, &node) == -1)
2958 return -1;
2958 return -1;
2959
2959
2960 if (value == NULL)
2960 if (value == NULL)
2961 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2961 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2962 : 0;
2962 : 0;
2963 rev = PyLong_AsLong(value);
2963 rev = PyLong_AsLong(value);
2964 if (rev > INT_MAX || rev < 0) {
2964 if (rev > INT_MAX || rev < 0) {
2965 if (!PyErr_Occurred())
2965 if (!PyErr_Occurred())
2966 PyErr_SetString(PyExc_ValueError, "rev out of range");
2966 PyErr_SetString(PyExc_ValueError, "rev out of range");
2967 return -1;
2967 return -1;
2968 }
2968 }
2969
2969
2970 if (index_init_nt(self) == -1)
2970 if (index_init_nt(self) == -1)
2971 return -1;
2971 return -1;
2972 return nt_insert(&self->nt, node, (int)rev);
2972 return nt_insert(&self->nt, node, (int)rev);
2973 }
2973 }
2974
2974
2975 /*
2975 /*
2976 * Find all RevlogNG entries in an index that has inline data. Update
2976 * Find all RevlogNG entries in an index that has inline data. Update
2977 * the optional "offsets" table with those entries.
2977 * the optional "offsets" table with those entries.
2978 */
2978 */
2979 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2979 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2980 {
2980 {
2981 const char *data = (const char *)self->buf.buf;
2981 const char *data = (const char *)self->buf.buf;
2982 Py_ssize_t pos = 0;
2982 Py_ssize_t pos = 0;
2983 Py_ssize_t end = self->buf.len;
2983 Py_ssize_t end = self->buf.len;
2984 long incr = self->entry_size;
2984 long incr = self->entry_size;
2985 Py_ssize_t len = 0;
2985 Py_ssize_t len = 0;
2986
2986
2987 while (pos + self->entry_size <= end && pos >= 0) {
2987 while (pos + self->entry_size <= end && pos >= 0) {
2988 uint32_t comp_len, sidedata_comp_len = 0;
2988 uint32_t comp_len, sidedata_comp_len = 0;
2989 /* 3rd element of header is length of compressed inline data */
2989 /* 3rd element of header is length of compressed inline data */
2990 if (self->format_version == format_v1) {
2990 if (self->format_version == format_v1) {
2991 comp_len =
2991 comp_len =
2992 getbe32(data + pos + entry_v1_offset_comp_len);
2992 getbe32(data + pos + entry_v1_offset_comp_len);
2993 sidedata_comp_len = 0;
2993 sidedata_comp_len = 0;
2994 } else if (self->format_version == format_v2) {
2994 } else if (self->format_version == format_v2) {
2995 comp_len =
2995 comp_len =
2996 getbe32(data + pos + entry_v2_offset_comp_len);
2996 getbe32(data + pos + entry_v2_offset_comp_len);
2997 sidedata_comp_len = getbe32(
2997 sidedata_comp_len = getbe32(
2998 data + pos + entry_v2_offset_sidedata_comp_len);
2998 data + pos + entry_v2_offset_sidedata_comp_len);
2999 } else {
2999 } else {
3000 raise_revlog_error();
3000 raise_revlog_error();
3001 return -1;
3001 return -1;
3002 }
3002 }
3003 incr = self->entry_size + comp_len + sidedata_comp_len;
3003 incr = self->entry_size + comp_len + sidedata_comp_len;
3004 if (offsets)
3004 if (offsets)
3005 offsets[len] = data + pos;
3005 offsets[len] = data + pos;
3006 len++;
3006 len++;
3007 pos += incr;
3007 pos += incr;
3008 }
3008 }
3009
3009
3010 if (pos != end) {
3010 if (pos != end) {
3011 if (!PyErr_Occurred())
3011 if (!PyErr_Occurred())
3012 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3012 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3013 return -1;
3013 return -1;
3014 }
3014 }
3015
3015
3016 return len;
3016 return len;
3017 }
3017 }
3018
3018
3019 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
3019 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
3020 {
3020 {
3021 PyObject *data_obj, *inlined_obj;
3021 PyObject *data_obj, *inlined_obj;
3022 Py_ssize_t size;
3022 Py_ssize_t size;
3023
3023
3024 static char *kwlist[] = {"data", "inlined", "format", NULL};
3024 static char *kwlist[] = {"data", "inlined", "format", NULL};
3025
3025
3026 /* Initialize before argument-checking to avoid index_dealloc() crash.
3026 /* Initialize before argument-checking to avoid index_dealloc() crash.
3027 */
3027 */
3028 self->added = NULL;
3028 self->added = NULL;
3029 self->new_length = 0;
3029 self->new_length = 0;
3030 self->added_length = 0;
3030 self->added_length = 0;
3031 self->data = NULL;
3031 self->data = NULL;
3032 memset(&self->buf, 0, sizeof(self->buf));
3032 memset(&self->buf, 0, sizeof(self->buf));
3033 self->headrevs = NULL;
3033 self->headrevs = NULL;
3034 self->filteredrevs = Py_None;
3034 self->filteredrevs = Py_None;
3035 Py_INCREF(Py_None);
3035 Py_INCREF(Py_None);
3036 self->ntinitialized = 0;
3036 self->ntinitialized = 0;
3037 self->offsets = NULL;
3037 self->offsets = NULL;
3038 self->nodelen = 20;
3038 self->nodelen = 20;
3039 self->nullentry = NULL;
3039 self->nullentry = NULL;
3040 self->rust_ext_compat = 1;
3040 self->rust_ext_compat = 1;
3041 self->format_version = format_v1;
3041 self->format_version = format_v1;
3042
3042
3043 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3043 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
3044 &data_obj, &inlined_obj,
3044 &data_obj, &inlined_obj,
3045 &(self->format_version)))
3045 &(self->format_version)))
3046 return -1;
3046 return -1;
3047 if (!PyObject_CheckBuffer(data_obj)) {
3047 if (!PyObject_CheckBuffer(data_obj)) {
3048 PyErr_SetString(PyExc_TypeError,
3048 PyErr_SetString(PyExc_TypeError,
3049 "data does not support buffer interface");
3049 "data does not support buffer interface");
3050 return -1;
3050 return -1;
3051 }
3051 }
3052 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3052 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3053 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3053 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3054 return -1;
3054 return -1;
3055 }
3055 }
3056
3056
3057 if (self->format_version == format_v1) {
3057 if (self->format_version == format_v1) {
3058 self->entry_size = v1_entry_size;
3058 self->entry_size = v1_entry_size;
3059 } else if (self->format_version == format_v2) {
3059 } else if (self->format_version == format_v2) {
3060 self->entry_size = v2_entry_size;
3060 self->entry_size = v2_entry_size;
3061 } else if (self->format_version == format_cl2) {
3061 } else if (self->format_version == format_cl2) {
3062 self->entry_size = cl2_entry_size;
3062 self->entry_size = cl2_entry_size;
3063 }
3063 }
3064
3064
3065 self->nullentry = Py_BuildValue(
3065 self->nullentry = Py_BuildValue(
3066 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3066 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3067 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3067 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3068
3068
3069 if (!self->nullentry)
3069 if (!self->nullentry)
3070 return -1;
3070 return -1;
3071 PyObject_GC_UnTrack(self->nullentry);
3071 PyObject_GC_UnTrack(self->nullentry);
3072
3072
3073 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3073 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3074 return -1;
3074 return -1;
3075 size = self->buf.len;
3075 size = self->buf.len;
3076
3076
3077 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3077 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3078 self->data = data_obj;
3078 self->data = data_obj;
3079
3079
3080 self->ntlookups = self->ntmisses = 0;
3080 self->ntlookups = self->ntmisses = 0;
3081 self->ntrev = -1;
3081 self->ntrev = -1;
3082 Py_INCREF(self->data);
3082 Py_INCREF(self->data);
3083
3083
3084 if (self->inlined) {
3084 if (self->inlined) {
3085 Py_ssize_t len = inline_scan(self, NULL);
3085 Py_ssize_t len = inline_scan(self, NULL);
3086 if (len == -1)
3086 if (len == -1)
3087 goto bail;
3087 goto bail;
3088 self->length = len;
3088 self->length = len;
3089 } else {
3089 } else {
3090 if (size % self->entry_size) {
3090 if (size % self->entry_size) {
3091 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3091 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3092 goto bail;
3092 goto bail;
3093 }
3093 }
3094 self->length = size / self->entry_size;
3094 self->length = size / self->entry_size;
3095 }
3095 }
3096
3096
3097 return 0;
3097 return 0;
3098 bail:
3098 bail:
3099 return -1;
3099 return -1;
3100 }
3100 }
3101
3101
3102 static PyObject *index_nodemap(indexObject *self)
3102 static PyObject *index_nodemap(indexObject *self)
3103 {
3103 {
3104 Py_INCREF(self);
3104 Py_INCREF(self);
3105 return (PyObject *)self;
3105 return (PyObject *)self;
3106 }
3106 }
3107
3107
3108 static void _index_clearcaches(indexObject *self)
3108 static void _index_clearcaches(indexObject *self)
3109 {
3109 {
3110 if (self->offsets) {
3110 if (self->offsets) {
3111 PyMem_Free((void *)self->offsets);
3111 PyMem_Free((void *)self->offsets);
3112 self->offsets = NULL;
3112 self->offsets = NULL;
3113 }
3113 }
3114 if (self->ntinitialized) {
3114 if (self->ntinitialized) {
3115 nt_dealloc(&self->nt);
3115 nt_dealloc(&self->nt);
3116 }
3116 }
3117 self->ntinitialized = 0;
3117 self->ntinitialized = 0;
3118 Py_CLEAR(self->headrevs);
3118 Py_CLEAR(self->headrevs);
3119 }
3119 }
3120
3120
3121 static PyObject *index_clearcaches(indexObject *self)
3121 static PyObject *index_clearcaches(indexObject *self)
3122 {
3122 {
3123 _index_clearcaches(self);
3123 _index_clearcaches(self);
3124 self->ntrev = -1;
3124 self->ntrev = -1;
3125 self->ntlookups = self->ntmisses = 0;
3125 self->ntlookups = self->ntmisses = 0;
3126 Py_RETURN_NONE;
3126 Py_RETURN_NONE;
3127 }
3127 }
3128
3128
3129 static void index_dealloc(indexObject *self)
3129 static void index_dealloc(indexObject *self)
3130 {
3130 {
3131 _index_clearcaches(self);
3131 _index_clearcaches(self);
3132 Py_XDECREF(self->filteredrevs);
3132 Py_XDECREF(self->filteredrevs);
3133 if (self->buf.buf) {
3133 if (self->buf.buf) {
3134 PyBuffer_Release(&self->buf);
3134 PyBuffer_Release(&self->buf);
3135 memset(&self->buf, 0, sizeof(self->buf));
3135 memset(&self->buf, 0, sizeof(self->buf));
3136 }
3136 }
3137 Py_XDECREF(self->data);
3137 Py_XDECREF(self->data);
3138 PyMem_Free(self->added);
3138 PyMem_Free(self->added);
3139 Py_XDECREF(self->nullentry);
3139 Py_XDECREF(self->nullentry);
3140 PyObject_Del(self);
3140 PyObject_Del(self);
3141 }
3141 }
3142
3142
3143 static PySequenceMethods index_sequence_methods = {
3143 static PySequenceMethods index_sequence_methods = {
3144 (lenfunc)index_length, /* sq_length */
3144 (lenfunc)index_length, /* sq_length */
3145 0, /* sq_concat */
3145 0, /* sq_concat */
3146 0, /* sq_repeat */
3146 0, /* sq_repeat */
3147 (ssizeargfunc)index_get, /* sq_item */
3147 (ssizeargfunc)index_get, /* sq_item */
3148 0, /* sq_slice */
3148 0, /* sq_slice */
3149 0, /* sq_ass_item */
3149 0, /* sq_ass_item */
3150 0, /* sq_ass_slice */
3150 0, /* sq_ass_slice */
3151 (objobjproc)index_contains, /* sq_contains */
3151 (objobjproc)index_contains, /* sq_contains */
3152 };
3152 };
3153
3153
3154 static PyMappingMethods index_mapping_methods = {
3154 static PyMappingMethods index_mapping_methods = {
3155 (lenfunc)index_length, /* mp_length */
3155 (lenfunc)index_length, /* mp_length */
3156 (binaryfunc)index_getitem, /* mp_subscript */
3156 (binaryfunc)index_getitem, /* mp_subscript */
3157 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3157 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3158 };
3158 };
3159
3159
3160 static PyMethodDef index_methods[] = {
3160 static PyMethodDef index_methods[] = {
3161 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3161 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3162 "return the gca set of the given revs"},
3162 "return the gca set of the given revs"},
3163 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3163 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3164 METH_VARARGS,
3164 METH_VARARGS,
3165 "return the heads of the common ancestors of the given revs"},
3165 "return the heads of the common ancestors of the given revs"},
3166 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3166 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3167 "clear the index caches"},
3167 "clear the index caches"},
3168 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3168 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3169 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3169 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3170 "return `rev` associated with a node or None"},
3170 "return `rev` associated with a node or None"},
3171 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3171 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3172 "return True if the node exist in the index"},
3172 "return True if the node exist in the index"},
3173 {"rev", (PyCFunction)index_m_rev, METH_O,
3173 {"rev", (PyCFunction)index_m_rev, METH_O,
3174 "return `rev` associated with a node or raise RevlogError"},
3174 "return `rev` associated with a node or raise RevlogError"},
3175 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3175 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3176 "compute phases"},
3176 "compute phases"},
3177 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3177 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3178 "reachableroots"},
3178 "reachableroots"},
3179 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3179 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3180 METH_VARARGS, "replace an existing index entry with a new value"},
3180 METH_VARARGS, "replace an existing index entry with a new value"},
3181 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3181 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3182 "get head revisions"}, /* Can do filtering since 3.2 */
3182 "get head revisions"}, /* Can do filtering since 3.2 */
3183 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3183 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3184 "get filtered head revisions"}, /* Can always do filtering */
3184 "get filtered head revisions"}, /* Can always do filtering */
3185 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3185 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3186 "True if the object is a snapshot"},
3186 "True if the object is a snapshot"},
3187 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3187 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3188 "Gather snapshot data in a cache dict"},
3188 "Gather snapshot data in a cache dict"},
3189 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3189 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3190 "determine revisions with deltas to reconstruct fulltext"},
3190 "determine revisions with deltas to reconstruct fulltext"},
3191 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3191 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3192 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3192 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3193 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3193 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3194 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3194 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3195 "match a potentially ambiguous node ID"},
3195 "match a potentially ambiguous node ID"},
3196 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3196 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3197 "find length of shortest hex nodeid of a binary ID"},
3197 "find length of shortest hex nodeid of a binary ID"},
3198 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3198 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3199 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3199 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3200 "return an entry in binary form"},
3200 "return an entry in binary form"},
3201 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3201 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3202 "pack the revlog header information into binary"},
3202 "pack the revlog header information into binary"},
3203 {NULL} /* Sentinel */
3203 {NULL} /* Sentinel */
3204 };
3204 };
3205
3205
3206 static PyGetSetDef index_getset[] = {
3206 static PyGetSetDef index_getset[] = {
3207 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3207 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3208 {NULL} /* Sentinel */
3208 {NULL} /* Sentinel */
3209 };
3209 };
3210
3210
3211 static PyMemberDef index_members[] = {
3211 static PyMemberDef index_members[] = {
3212 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3212 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3213 "size of an index entry"},
3213 "size of an index entry"},
3214 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3214 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3215 "size of an index entry"},
3215 "size of an index entry"},
3216 {NULL} /* Sentinel */
3216 {NULL} /* Sentinel */
3217 };
3217 };
3218
3218
3219 PyTypeObject HgRevlogIndex_Type = {
3219 PyTypeObject HgRevlogIndex_Type = {
3220 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3220 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3221 "parsers.index", /* tp_name */
3221 "parsers.index", /* tp_name */
3222 sizeof(indexObject), /* tp_basicsize */
3222 sizeof(indexObject), /* tp_basicsize */
3223 0, /* tp_itemsize */
3223 0, /* tp_itemsize */
3224 (destructor)index_dealloc, /* tp_dealloc */
3224 (destructor)index_dealloc, /* tp_dealloc */
3225 0, /* tp_print */
3225 0, /* tp_print */
3226 0, /* tp_getattr */
3226 0, /* tp_getattr */
3227 0, /* tp_setattr */
3227 0, /* tp_setattr */
3228 0, /* tp_compare */
3228 0, /* tp_compare */
3229 0, /* tp_repr */
3229 0, /* tp_repr */
3230 0, /* tp_as_number */
3230 0, /* tp_as_number */
3231 &index_sequence_methods, /* tp_as_sequence */
3231 &index_sequence_methods, /* tp_as_sequence */
3232 &index_mapping_methods, /* tp_as_mapping */
3232 &index_mapping_methods, /* tp_as_mapping */
3233 0, /* tp_hash */
3233 0, /* tp_hash */
3234 0, /* tp_call */
3234 0, /* tp_call */
3235 0, /* tp_str */
3235 0, /* tp_str */
3236 0, /* tp_getattro */
3236 0, /* tp_getattro */
3237 0, /* tp_setattro */
3237 0, /* tp_setattro */
3238 0, /* tp_as_buffer */
3238 0, /* tp_as_buffer */
3239 Py_TPFLAGS_DEFAULT, /* tp_flags */
3239 Py_TPFLAGS_DEFAULT, /* tp_flags */
3240 "revlog index", /* tp_doc */
3240 "revlog index", /* tp_doc */
3241 0, /* tp_traverse */
3241 0, /* tp_traverse */
3242 0, /* tp_clear */
3242 0, /* tp_clear */
3243 0, /* tp_richcompare */
3243 0, /* tp_richcompare */
3244 0, /* tp_weaklistoffset */
3244 0, /* tp_weaklistoffset */
3245 0, /* tp_iter */
3245 0, /* tp_iter */
3246 0, /* tp_iternext */
3246 0, /* tp_iternext */
3247 index_methods, /* tp_methods */
3247 index_methods, /* tp_methods */
3248 index_members, /* tp_members */
3248 index_members, /* tp_members */
3249 index_getset, /* tp_getset */
3249 index_getset, /* tp_getset */
3250 0, /* tp_base */
3250 0, /* tp_base */
3251 0, /* tp_dict */
3251 0, /* tp_dict */
3252 0, /* tp_descr_get */
3252 0, /* tp_descr_get */
3253 0, /* tp_descr_set */
3253 0, /* tp_descr_set */
3254 0, /* tp_dictoffset */
3254 0, /* tp_dictoffset */
3255 (initproc)index_init, /* tp_init */
3255 (initproc)index_init, /* tp_init */
3256 0, /* tp_alloc */
3256 0, /* tp_alloc */
3257 };
3257 };
3258
3258
3259 /*
3259 /*
3260 * returns a tuple of the form (index, cache) with elements as
3260 * returns a tuple of the form (index, cache) with elements as
3261 * follows:
3261 * follows:
3262 *
3262 *
3263 * index: an index object that lazily parses Revlog (v1 or v2) records
3263 * index: an index object that lazily parses Revlog (v1 or v2) records
3264 * cache: if data is inlined, a tuple (0, index_file_content), else None
3264 * cache: if data is inlined, a tuple (0, index_file_content), else None
3265 * index_file_content could be a string, or a buffer
3265 * index_file_content could be a string, or a buffer
3266 *
3266 *
3267 * added complications are for backwards compatibility
3267 * added complications are for backwards compatibility
3268 */
3268 */
3269 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3269 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3270 {
3270 {
3271 PyObject *cache = NULL;
3271 PyObject *cache = NULL;
3272 indexObject *idx;
3272 indexObject *idx;
3273 int ret;
3273 int ret;
3274
3274
3275 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3275 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3276 if (idx == NULL)
3276 if (idx == NULL)
3277 goto bail;
3277 goto bail;
3278
3278
3279 ret = index_init(idx, args, kwargs);
3279 ret = index_init(idx, args, kwargs);
3280 if (ret == -1)
3280 if (ret == -1)
3281 goto bail;
3281 goto bail;
3282
3282
3283 if (idx->inlined) {
3283 if (idx->inlined) {
3284 cache = Py_BuildValue("iO", 0, idx->data);
3284 cache = Py_BuildValue("iO", 0, idx->data);
3285 if (cache == NULL)
3285 if (cache == NULL)
3286 goto bail;
3286 goto bail;
3287 } else {
3287 } else {
3288 cache = Py_None;
3288 cache = Py_None;
3289 Py_INCREF(cache);
3289 Py_INCREF(cache);
3290 }
3290 }
3291
3291
3292 return Py_BuildValue("NN", idx, cache);
3292 return Py_BuildValue("NN", idx, cache);
3293
3293
3294 bail:
3294 bail:
3295 Py_XDECREF(idx);
3295 Py_XDECREF(idx);
3296 Py_XDECREF(cache);
3296 Py_XDECREF(cache);
3297 return NULL;
3297 return NULL;
3298 }
3298 }
3299
3299
3300 static Revlog_CAPI CAPI = {
3300 static Revlog_CAPI CAPI = {
3301 /* increment the abi_version field upon each change in the Revlog_CAPI
3301 /* increment the abi_version field upon each change in the Revlog_CAPI
3302 struct or in the ABI of the listed functions */
3302 struct or in the ABI of the listed functions */
3303 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3303 3, index_length, index_node, index_fast_rank, HgRevlogIndex_GetParents,
3304 };
3304 };
3305
3305
3306 void revlog_module_init(PyObject *mod)
3306 void revlog_module_init(PyObject *mod)
3307 {
3307 {
3308 PyObject *caps = NULL;
3308 PyObject *caps = NULL;
3309 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3309 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3310 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3310 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3311 return;
3311 return;
3312 Py_INCREF(&HgRevlogIndex_Type);
3312 Py_INCREF(&HgRevlogIndex_Type);
3313 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3313 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3314
3314
3315 nodetreeType.tp_new = PyType_GenericNew;
3315 nodetreeType.tp_new = PyType_GenericNew;
3316 if (PyType_Ready(&nodetreeType) < 0)
3316 if (PyType_Ready(&nodetreeType) < 0)
3317 return;
3317 return;
3318 Py_INCREF(&nodetreeType);
3318 Py_INCREF(&nodetreeType);
3319 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3319 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3320
3320
3321 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3321 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3322 if (caps != NULL)
3322 if (caps != NULL)
3323 PyModule_AddObject(mod, "revlog_CAPI", caps);
3323 PyModule_AddObject(mod, "revlog_CAPI", caps);
3324 }
3324 }
@@ -1,1472 +1,1471 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 self._data = data
50 self._data = data
51 self._srdensitythreshold = density
51 self._srdensitythreshold = density
52 self._srmingapsize = mingap
52 self._srmingapsize = mingap
53 self._snapshot = set(snapshot)
53 self._snapshot = set(snapshot)
54 self.index = None
54 self.index = None
55
55
56 def start(self, rev):
56 def start(self, rev):
57 if rev == nullrev:
57 if rev == nullrev:
58 return 0
58 return 0
59 if rev == 0:
59 if rev == 0:
60 return 0
60 return 0
61 return self._data[rev - 1]
61 return self._data[rev - 1]
62
62
63 def end(self, rev):
63 def end(self, rev):
64 if rev == nullrev:
64 if rev == nullrev:
65 return 0
65 return 0
66 return self._data[rev]
66 return self._data[rev]
67
67
68 def length(self, rev):
68 def length(self, rev):
69 return self.end(rev) - self.start(rev)
69 return self.end(rev) - self.start(rev)
70
70
71 def __len__(self):
71 def __len__(self):
72 return len(self._data)
72 return len(self._data)
73
73
74 def issnapshot(self, rev):
74 def issnapshot(self, rev):
75 if rev == nullrev:
75 if rev == nullrev:
76 return True
76 return True
77 return rev in self._snapshot
77 return rev in self._snapshot
78
78
79
79
80 def slicechunk(revlog, revs, targetsize=None):
80 def slicechunk(revlog, revs, targetsize=None):
81 """slice revs to reduce the amount of unrelated data to be read from disk.
81 """slice revs to reduce the amount of unrelated data to be read from disk.
82
82
83 ``revs`` is sliced into groups that should be read in one time.
83 ``revs`` is sliced into groups that should be read in one time.
84 Assume that revs are sorted.
84 Assume that revs are sorted.
85
85
86 The initial chunk is sliced until the overall density (payload/chunks-span
86 The initial chunk is sliced until the overall density (payload/chunks-span
87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 `revlog._srmingapsize` is skipped.
88 `revlog._srmingapsize` is skipped.
89
89
90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 For consistency with other slicing choice, this limit won't go lower than
91 For consistency with other slicing choice, this limit won't go lower than
92 `revlog._srmingapsize`.
92 `revlog._srmingapsize`.
93
93
94 If individual revisions chunk are larger than this limit, they will still
94 If individual revisions chunk are larger than this limit, they will still
95 be raised individually.
95 be raised individually.
96
96
97 >>> data = [
97 >>> data = [
98 ... 5, #00 (5)
98 ... 5, #00 (5)
99 ... 10, #01 (5)
99 ... 10, #01 (5)
100 ... 12, #02 (2)
100 ... 12, #02 (2)
101 ... 12, #03 (empty)
101 ... 12, #03 (empty)
102 ... 27, #04 (15)
102 ... 27, #04 (15)
103 ... 31, #05 (4)
103 ... 31, #05 (4)
104 ... 31, #06 (empty)
104 ... 31, #06 (empty)
105 ... 42, #07 (11)
105 ... 42, #07 (11)
106 ... 47, #08 (5)
106 ... 47, #08 (5)
107 ... 47, #09 (empty)
107 ... 47, #09 (empty)
108 ... 48, #10 (1)
108 ... 48, #10 (1)
109 ... 51, #11 (3)
109 ... 51, #11 (3)
110 ... 74, #12 (23)
110 ... 74, #12 (23)
111 ... 85, #13 (11)
111 ... 85, #13 (11)
112 ... 86, #14 (1)
112 ... 86, #14 (1)
113 ... 91, #15 (5)
113 ... 91, #15 (5)
114 ... ]
114 ... ]
115 >>> revlog = _testrevlog(data, snapshot=range(16))
115 >>> revlog = _testrevlog(data, snapshot=range(16))
116
116
117 >>> list(slicechunk(revlog, list(range(16))))
117 >>> list(slicechunk(revlog, list(range(16))))
118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 >>> list(slicechunk(revlog, [0, 15]))
119 >>> list(slicechunk(revlog, [0, 15]))
120 [[0], [15]]
120 [[0], [15]]
121 >>> list(slicechunk(revlog, [0, 11, 15]))
121 >>> list(slicechunk(revlog, [0, 11, 15]))
122 [[0], [11], [15]]
122 [[0], [11], [15]]
123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 [[0], [11, 13, 15]]
124 [[0], [11, 13, 15]]
125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 [[1, 2], [5, 8, 10, 11], [14]]
126 [[1, 2], [5, 8, 10, 11], [14]]
127
127
128 Slicing with a maximum chunk size
128 Slicing with a maximum chunk size
129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 [[0], [11], [13], [15]]
130 [[0], [11], [13], [15]]
131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 [[0], [11], [13, 15]]
132 [[0], [11], [13, 15]]
133
133
134 Slicing involving nullrev
134 Slicing involving nullrev
135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 [[-1, 0], [11], [13, 15]]
136 [[-1, 0], [11], [13, 15]]
137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 [[-1], [13], [15]]
138 [[-1], [13], [15]]
139 """
139 """
140 if targetsize is not None:
140 if targetsize is not None:
141 targetsize = max(targetsize, revlog._srmingapsize)
141 targetsize = max(targetsize, revlog._srmingapsize)
142 # targetsize should not be specified when evaluating delta candidates:
142 # targetsize should not be specified when evaluating delta candidates:
143 # * targetsize is used to ensure we stay within specification when reading,
143 # * targetsize is used to ensure we stay within specification when reading,
144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 if densityslicing is None:
145 if densityslicing is None:
146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 for chunk in densityslicing(
147 for chunk in densityslicing(
148 revs, revlog._srdensitythreshold, revlog._srmingapsize
148 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 ):
149 ):
150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 yield subchunk
151 yield subchunk
152
152
153
153
154 def _slicechunktosize(revlog, revs, targetsize=None):
154 def _slicechunktosize(revlog, revs, targetsize=None):
155 """slice revs to match the target size
155 """slice revs to match the target size
156
156
157 This is intended to be used on chunk that density slicing selected by that
157 This is intended to be used on chunk that density slicing selected by that
158 are still too large compared to the read garantee of revlog. This might
158 are still too large compared to the read garantee of revlog. This might
159 happens when "minimal gap size" interrupted the slicing or when chain are
159 happens when "minimal gap size" interrupted the slicing or when chain are
160 built in a way that create large blocks next to each other.
160 built in a way that create large blocks next to each other.
161
161
162 >>> data = [
162 >>> data = [
163 ... 3, #0 (3)
163 ... 3, #0 (3)
164 ... 5, #1 (2)
164 ... 5, #1 (2)
165 ... 6, #2 (1)
165 ... 6, #2 (1)
166 ... 8, #3 (2)
166 ... 8, #3 (2)
167 ... 8, #4 (empty)
167 ... 8, #4 (empty)
168 ... 11, #5 (3)
168 ... 11, #5 (3)
169 ... 12, #6 (1)
169 ... 12, #6 (1)
170 ... 13, #7 (1)
170 ... 13, #7 (1)
171 ... 14, #8 (1)
171 ... 14, #8 (1)
172 ... ]
172 ... ]
173
173
174 == All snapshots cases ==
174 == All snapshots cases ==
175 >>> revlog = _testrevlog(data, snapshot=range(9))
175 >>> revlog = _testrevlog(data, snapshot=range(9))
176
176
177 Cases where chunk is already small enough
177 Cases where chunk is already small enough
178 >>> list(_slicechunktosize(revlog, [0], 3))
178 >>> list(_slicechunktosize(revlog, [0], 3))
179 [[0]]
179 [[0]]
180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 [[6, 7]]
181 [[6, 7]]
182 >>> list(_slicechunktosize(revlog, [0], None))
182 >>> list(_slicechunktosize(revlog, [0], None))
183 [[0]]
183 [[0]]
184 >>> list(_slicechunktosize(revlog, [6, 7], None))
184 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 [[6, 7]]
185 [[6, 7]]
186
186
187 cases where we need actual slicing
187 cases where we need actual slicing
188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 [[0], [1]]
189 [[0], [1]]
190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 [[1], [3]]
191 [[1], [3]]
192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 [[1, 2], [3]]
193 [[1, 2], [3]]
194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 [[3], [5]]
195 [[3], [5]]
196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 [[3], [5]]
197 [[3], [5]]
198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 [[5], [6, 7, 8]]
199 [[5], [6, 7, 8]]
200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 [[0], [1, 2], [3], [5], [6, 7, 8]]
201 [[0], [1, 2], [3], [5], [6, 7, 8]]
202
202
203 Case with too large individual chunk (must return valid chunk)
203 Case with too large individual chunk (must return valid chunk)
204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 [[0], [1]]
205 [[0], [1]]
206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 [[1], [3]]
207 [[1], [3]]
208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 [[3], [5]]
209 [[3], [5]]
210
210
211 == No Snapshot cases ==
211 == No Snapshot cases ==
212 >>> revlog = _testrevlog(data)
212 >>> revlog = _testrevlog(data)
213
213
214 Cases where chunk is already small enough
214 Cases where chunk is already small enough
215 >>> list(_slicechunktosize(revlog, [0], 3))
215 >>> list(_slicechunktosize(revlog, [0], 3))
216 [[0]]
216 [[0]]
217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 [[6, 7]]
218 [[6, 7]]
219 >>> list(_slicechunktosize(revlog, [0], None))
219 >>> list(_slicechunktosize(revlog, [0], None))
220 [[0]]
220 [[0]]
221 >>> list(_slicechunktosize(revlog, [6, 7], None))
221 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 [[6, 7]]
222 [[6, 7]]
223
223
224 cases where we need actual slicing
224 cases where we need actual slicing
225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 [[0], [1]]
226 [[0], [1]]
227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 [[1], [3]]
228 [[1], [3]]
229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 [[1], [2, 3]]
230 [[1], [2, 3]]
231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 [[3], [5]]
232 [[3], [5]]
233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 [[3], [4, 5]]
234 [[3], [4, 5]]
235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 [[5], [6, 7, 8]]
236 [[5], [6, 7, 8]]
237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 [[0], [1, 2], [3], [5], [6, 7, 8]]
238 [[0], [1, 2], [3], [5], [6, 7, 8]]
239
239
240 Case with too large individual chunk (must return valid chunk)
240 Case with too large individual chunk (must return valid chunk)
241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 [[0], [1]]
242 [[0], [1]]
243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 [[1], [3]]
244 [[1], [3]]
245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 [[3], [5]]
246 [[3], [5]]
247
247
248 == mixed case ==
248 == mixed case ==
249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 """
252 """
253 assert targetsize is None or 0 <= targetsize
253 assert targetsize is None or 0 <= targetsize
254 startdata = revlog.start(revs[0])
254 startdata = revlog.start(revs[0])
255 enddata = revlog.end(revs[-1])
255 enddata = revlog.end(revs[-1])
256 fullspan = enddata - startdata
256 fullspan = enddata - startdata
257 if targetsize is None or fullspan <= targetsize:
257 if targetsize is None or fullspan <= targetsize:
258 yield revs
258 yield revs
259 return
259 return
260
260
261 startrevidx = 0
261 startrevidx = 0
262 endrevidx = 1
262 endrevidx = 1
263 iterrevs = enumerate(revs)
263 iterrevs = enumerate(revs)
264 next(iterrevs) # skip first rev.
264 next(iterrevs) # skip first rev.
265 # first step: get snapshots out of the way
265 # first step: get snapshots out of the way
266 for idx, r in iterrevs:
266 for idx, r in iterrevs:
267 span = revlog.end(r) - startdata
267 span = revlog.end(r) - startdata
268 snapshot = revlog.issnapshot(r)
268 snapshot = revlog.issnapshot(r)
269 if span <= targetsize and snapshot:
269 if span <= targetsize and snapshot:
270 endrevidx = idx + 1
270 endrevidx = idx + 1
271 else:
271 else:
272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 if chunk:
273 if chunk:
274 yield chunk
274 yield chunk
275 startrevidx = idx
275 startrevidx = idx
276 startdata = revlog.start(r)
276 startdata = revlog.start(r)
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 if not snapshot:
278 if not snapshot:
279 break
279 break
280
280
281 # for the others, we use binary slicing to quickly converge toward valid
281 # for the others, we use binary slicing to quickly converge toward valid
282 # chunks (otherwise, we might end up looking for start/end of many
282 # chunks (otherwise, we might end up looking for start/end of many
283 # revisions). This logic is not looking for the perfect slicing point, it
283 # revisions). This logic is not looking for the perfect slicing point, it
284 # focuses on quickly converging toward valid chunks.
284 # focuses on quickly converging toward valid chunks.
285 nbitem = len(revs)
285 nbitem = len(revs)
286 while (enddata - startdata) > targetsize:
286 while (enddata - startdata) > targetsize:
287 endrevidx = nbitem
287 endrevidx = nbitem
288 if nbitem - startrevidx <= 1:
288 if nbitem - startrevidx <= 1:
289 break # protect against individual chunk larger than limit
289 break # protect against individual chunk larger than limit
290 localenddata = revlog.end(revs[endrevidx - 1])
290 localenddata = revlog.end(revs[endrevidx - 1])
291 span = localenddata - startdata
291 span = localenddata - startdata
292 while span > targetsize:
292 while span > targetsize:
293 if endrevidx - startrevidx <= 1:
293 if endrevidx - startrevidx <= 1:
294 break # protect against individual chunk larger than limit
294 break # protect against individual chunk larger than limit
295 endrevidx -= (endrevidx - startrevidx) // 2
295 endrevidx -= (endrevidx - startrevidx) // 2
296 localenddata = revlog.end(revs[endrevidx - 1])
296 localenddata = revlog.end(revs[endrevidx - 1])
297 span = localenddata - startdata
297 span = localenddata - startdata
298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 if chunk:
299 if chunk:
300 yield chunk
300 yield chunk
301 startrevidx = endrevidx
301 startrevidx = endrevidx
302 startdata = revlog.start(revs[startrevidx])
302 startdata = revlog.start(revs[startrevidx])
303
303
304 chunk = _trimchunk(revlog, revs, startrevidx)
304 chunk = _trimchunk(revlog, revs, startrevidx)
305 if chunk:
305 if chunk:
306 yield chunk
306 yield chunk
307
307
308
308
309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 """slice revs to reduce the amount of unrelated data to be read from disk.
310 """slice revs to reduce the amount of unrelated data to be read from disk.
311
311
312 ``revs`` is sliced into groups that should be read in one time.
312 ``revs`` is sliced into groups that should be read in one time.
313 Assume that revs are sorted.
313 Assume that revs are sorted.
314
314
315 The initial chunk is sliced until the overall density (payload/chunks-span
315 The initial chunk is sliced until the overall density (payload/chunks-span
316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 skipped.
317 skipped.
318
318
319 >>> revlog = _testrevlog([
319 >>> revlog = _testrevlog([
320 ... 5, #00 (5)
320 ... 5, #00 (5)
321 ... 10, #01 (5)
321 ... 10, #01 (5)
322 ... 12, #02 (2)
322 ... 12, #02 (2)
323 ... 12, #03 (empty)
323 ... 12, #03 (empty)
324 ... 27, #04 (15)
324 ... 27, #04 (15)
325 ... 31, #05 (4)
325 ... 31, #05 (4)
326 ... 31, #06 (empty)
326 ... 31, #06 (empty)
327 ... 42, #07 (11)
327 ... 42, #07 (11)
328 ... 47, #08 (5)
328 ... 47, #08 (5)
329 ... 47, #09 (empty)
329 ... 47, #09 (empty)
330 ... 48, #10 (1)
330 ... 48, #10 (1)
331 ... 51, #11 (3)
331 ... 51, #11 (3)
332 ... 74, #12 (23)
332 ... 74, #12 (23)
333 ... 85, #13 (11)
333 ... 85, #13 (11)
334 ... 86, #14 (1)
334 ... 86, #14 (1)
335 ... 91, #15 (5)
335 ... 91, #15 (5)
336 ... ])
336 ... ])
337
337
338 >>> list(_slicechunktodensity(revlog, list(range(16))))
338 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 >>> list(_slicechunktodensity(revlog, [0, 15]))
340 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 [[0], [15]]
341 [[0], [15]]
342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 [[0], [11], [15]]
343 [[0], [11], [15]]
344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 [[0], [11, 13, 15]]
345 [[0], [11, 13, 15]]
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 [[1, 2], [5, 8, 10, 11], [14]]
347 [[1, 2], [5, 8, 10, 11], [14]]
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 ... mingapsize=20))
349 ... mingapsize=20))
350 [[1, 2, 3, 5, 8, 10, 11], [14]]
350 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 ... targetdensity=0.95))
352 ... targetdensity=0.95))
353 [[1, 2], [5], [8, 10, 11], [14]]
353 [[1, 2], [5], [8, 10, 11], [14]]
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 ... targetdensity=0.95, mingapsize=12))
355 ... targetdensity=0.95, mingapsize=12))
356 [[1, 2], [5, 8, 10, 11], [14]]
356 [[1, 2], [5, 8, 10, 11], [14]]
357 """
357 """
358 start = revlog.start
358 start = revlog.start
359 length = revlog.length
359 length = revlog.length
360
360
361 if len(revs) <= 1:
361 if len(revs) <= 1:
362 yield revs
362 yield revs
363 return
363 return
364
364
365 deltachainspan = segmentspan(revlog, revs)
365 deltachainspan = segmentspan(revlog, revs)
366
366
367 if deltachainspan < mingapsize:
367 if deltachainspan < mingapsize:
368 yield revs
368 yield revs
369 return
369 return
370
370
371 readdata = deltachainspan
371 readdata = deltachainspan
372 chainpayload = sum(length(r) for r in revs)
372 chainpayload = sum(length(r) for r in revs)
373
373
374 if deltachainspan:
374 if deltachainspan:
375 density = chainpayload / float(deltachainspan)
375 density = chainpayload / float(deltachainspan)
376 else:
376 else:
377 density = 1.0
377 density = 1.0
378
378
379 if density >= targetdensity:
379 if density >= targetdensity:
380 yield revs
380 yield revs
381 return
381 return
382
382
383 # Store the gaps in a heap to have them sorted by decreasing size
383 # Store the gaps in a heap to have them sorted by decreasing size
384 gaps = []
384 gaps = []
385 prevend = None
385 prevend = None
386 for i, rev in enumerate(revs):
386 for i, rev in enumerate(revs):
387 revstart = start(rev)
387 revstart = start(rev)
388 revlen = length(rev)
388 revlen = length(rev)
389
389
390 # Skip empty revisions to form larger holes
390 # Skip empty revisions to form larger holes
391 if revlen == 0:
391 if revlen == 0:
392 continue
392 continue
393
393
394 if prevend is not None:
394 if prevend is not None:
395 gapsize = revstart - prevend
395 gapsize = revstart - prevend
396 # only consider holes that are large enough
396 # only consider holes that are large enough
397 if gapsize > mingapsize:
397 if gapsize > mingapsize:
398 gaps.append((gapsize, i))
398 gaps.append((gapsize, i))
399
399
400 prevend = revstart + revlen
400 prevend = revstart + revlen
401 # sort the gaps to pop them from largest to small
401 # sort the gaps to pop them from largest to small
402 gaps.sort()
402 gaps.sort()
403
403
404 # Collect the indices of the largest holes until the density is acceptable
404 # Collect the indices of the largest holes until the density is acceptable
405 selected = []
405 selected = []
406 while gaps and density < targetdensity:
406 while gaps and density < targetdensity:
407 gapsize, gapidx = gaps.pop()
407 gapsize, gapidx = gaps.pop()
408
408
409 selected.append(gapidx)
409 selected.append(gapidx)
410
410
411 # the gap sizes are stored as negatives to be sorted decreasingly
411 # the gap sizes are stored as negatives to be sorted decreasingly
412 # by the heap
412 # by the heap
413 readdata -= gapsize
413 readdata -= gapsize
414 if readdata > 0:
414 if readdata > 0:
415 density = chainpayload / float(readdata)
415 density = chainpayload / float(readdata)
416 else:
416 else:
417 density = 1.0
417 density = 1.0
418 selected.sort()
418 selected.sort()
419
419
420 # Cut the revs at collected indices
420 # Cut the revs at collected indices
421 previdx = 0
421 previdx = 0
422 for idx in selected:
422 for idx in selected:
423
423
424 chunk = _trimchunk(revlog, revs, previdx, idx)
424 chunk = _trimchunk(revlog, revs, previdx, idx)
425 if chunk:
425 if chunk:
426 yield chunk
426 yield chunk
427
427
428 previdx = idx
428 previdx = idx
429
429
430 chunk = _trimchunk(revlog, revs, previdx)
430 chunk = _trimchunk(revlog, revs, previdx)
431 if chunk:
431 if chunk:
432 yield chunk
432 yield chunk
433
433
434
434
435 def _trimchunk(revlog, revs, startidx, endidx=None):
435 def _trimchunk(revlog, revs, startidx, endidx=None):
436 """returns revs[startidx:endidx] without empty trailing revs
436 """returns revs[startidx:endidx] without empty trailing revs
437
437
438 Doctest Setup
438 Doctest Setup
439 >>> revlog = _testrevlog([
439 >>> revlog = _testrevlog([
440 ... 5, #0
440 ... 5, #0
441 ... 10, #1
441 ... 10, #1
442 ... 12, #2
442 ... 12, #2
443 ... 12, #3 (empty)
443 ... 12, #3 (empty)
444 ... 17, #4
444 ... 17, #4
445 ... 21, #5
445 ... 21, #5
446 ... 21, #6 (empty)
446 ... 21, #6 (empty)
447 ... ])
447 ... ])
448
448
449 Contiguous cases:
449 Contiguous cases:
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 [0, 1, 2, 3, 4, 5]
451 [0, 1, 2, 3, 4, 5]
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 [0, 1, 2, 3, 4]
453 [0, 1, 2, 3, 4]
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 [0, 1, 2]
455 [0, 1, 2]
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 [2]
457 [2]
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 [3, 4, 5]
459 [3, 4, 5]
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 [3, 4]
461 [3, 4]
462
462
463 Discontiguous cases:
463 Discontiguous cases:
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 [1, 3, 5]
465 [1, 3, 5]
466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 [1]
467 [1]
468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 [3, 5]
469 [3, 5]
470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 [3, 5]
471 [3, 5]
472 """
472 """
473 length = revlog.length
473 length = revlog.length
474
474
475 if endidx is None:
475 if endidx is None:
476 endidx = len(revs)
476 endidx = len(revs)
477
477
478 # If we have a non-emtpy delta candidate, there are nothing to trim
478 # If we have a non-emtpy delta candidate, there are nothing to trim
479 if revs[endidx - 1] < len(revlog):
479 if revs[endidx - 1] < len(revlog):
480 # Trim empty revs at the end, except the very first revision of a chain
480 # Trim empty revs at the end, except the very first revision of a chain
481 while (
481 while (
482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 ):
483 ):
484 endidx -= 1
484 endidx -= 1
485
485
486 return revs[startidx:endidx]
486 return revs[startidx:endidx]
487
487
488
488
489 def segmentspan(revlog, revs):
489 def segmentspan(revlog, revs):
490 """Get the byte span of a segment of revisions
490 """Get the byte span of a segment of revisions
491
491
492 revs is a sorted array of revision numbers
492 revs is a sorted array of revision numbers
493
493
494 >>> revlog = _testrevlog([
494 >>> revlog = _testrevlog([
495 ... 5, #0
495 ... 5, #0
496 ... 10, #1
496 ... 10, #1
497 ... 12, #2
497 ... 12, #2
498 ... 12, #3 (empty)
498 ... 12, #3 (empty)
499 ... 17, #4
499 ... 17, #4
500 ... ])
500 ... ])
501
501
502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 17
503 17
504 >>> segmentspan(revlog, [0, 4])
504 >>> segmentspan(revlog, [0, 4])
505 17
505 17
506 >>> segmentspan(revlog, [3, 4])
506 >>> segmentspan(revlog, [3, 4])
507 5
507 5
508 >>> segmentspan(revlog, [1, 2, 3,])
508 >>> segmentspan(revlog, [1, 2, 3,])
509 7
509 7
510 >>> segmentspan(revlog, [1, 3])
510 >>> segmentspan(revlog, [1, 3])
511 7
511 7
512 """
512 """
513 if not revs:
513 if not revs:
514 return 0
514 return 0
515 end = revlog.end(revs[-1])
515 end = revlog.end(revs[-1])
516 return end - revlog.start(revs[0])
516 return end - revlog.start(revs[0])
517
517
518
518
519 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
519 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 """build full text from a (base, delta) pair and other metadata"""
520 """build full text from a (base, delta) pair and other metadata"""
521 # special case deltas which replace entire base; no need to decode
521 # special case deltas which replace entire base; no need to decode
522 # base revision. this neatly avoids censored bases, which throw when
522 # base revision. this neatly avoids censored bases, which throw when
523 # they're decoded.
523 # they're decoded.
524 hlen = struct.calcsize(b">lll")
524 hlen = struct.calcsize(b">lll")
525 if delta[:hlen] == mdiff.replacediffheader(
525 if delta[:hlen] == mdiff.replacediffheader(
526 revlog.rawsize(baserev), len(delta) - hlen
526 revlog.rawsize(baserev), len(delta) - hlen
527 ):
527 ):
528 fulltext = delta[hlen:]
528 fulltext = delta[hlen:]
529 else:
529 else:
530 # deltabase is rawtext before changed by flag processors, which is
530 # deltabase is rawtext before changed by flag processors, which is
531 # equivalent to non-raw text
531 # equivalent to non-raw text
532 basetext = revlog.revision(baserev, _df=fh)
532 basetext = revlog.revision(baserev, _df=fh)
533 fulltext = mdiff.patch(basetext, delta)
533 fulltext = mdiff.patch(basetext, delta)
534
534
535 try:
535 try:
536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 if validatehash:
537 if validatehash:
538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 if flags & REVIDX_ISCENSORED:
539 if flags & REVIDX_ISCENSORED:
540 raise error.StorageError(
540 raise error.StorageError(
541 _(b'node %s is not censored') % expectednode
541 _(b'node %s is not censored') % expectednode
542 )
542 )
543 except error.CensoredNodeError:
543 except error.CensoredNodeError:
544 # must pass the censored index flag to add censored revisions
544 # must pass the censored index flag to add censored revisions
545 if not flags & REVIDX_ISCENSORED:
545 if not flags & REVIDX_ISCENSORED:
546 raise
546 raise
547 return fulltext
547 return fulltext
548
548
549
549
550 @attr.s(slots=True, frozen=True)
550 @attr.s(slots=True, frozen=True)
551 class _deltainfo:
551 class _deltainfo:
552 distance = attr.ib()
552 distance = attr.ib()
553 deltalen = attr.ib()
553 deltalen = attr.ib()
554 data = attr.ib()
554 data = attr.ib()
555 base = attr.ib()
555 base = attr.ib()
556 chainbase = attr.ib()
556 chainbase = attr.ib()
557 chainlen = attr.ib()
557 chainlen = attr.ib()
558 compresseddeltalen = attr.ib()
558 compresseddeltalen = attr.ib()
559 snapshotdepth = attr.ib()
559 snapshotdepth = attr.ib()
560
560
561
561
562 def drop_u_compression(delta):
562 def drop_u_compression(delta):
563 """turn into a "u" (no-compression) into no-compression without header
563 """turn into a "u" (no-compression) into no-compression without header
564
564
565 This is useful for revlog format that has better compression method.
565 This is useful for revlog format that has better compression method.
566 """
566 """
567 assert delta.data[0] == b'u', delta.data[0]
567 assert delta.data[0] == b'u', delta.data[0]
568 return _deltainfo(
568 return _deltainfo(
569 delta.distance,
569 delta.distance,
570 delta.deltalen - 1,
570 delta.deltalen - 1,
571 (b'', delta.data[1]),
571 (b'', delta.data[1]),
572 delta.base,
572 delta.base,
573 delta.chainbase,
573 delta.chainbase,
574 delta.chainlen,
574 delta.chainlen,
575 delta.compresseddeltalen,
575 delta.compresseddeltalen,
576 delta.snapshotdepth,
576 delta.snapshotdepth,
577 )
577 )
578
578
579
579
580 def is_good_delta_info(revlog, deltainfo, revinfo):
580 def is_good_delta_info(revlog, deltainfo, revinfo):
581 """Returns True if the given delta is good. Good means that it is within
581 """Returns True if the given delta is good. Good means that it is within
582 the disk span, disk size, and chain length bounds that we know to be
582 the disk span, disk size, and chain length bounds that we know to be
583 performant."""
583 performant."""
584 if deltainfo is None:
584 if deltainfo is None:
585 return False
585 return False
586
586
587 # - 'deltainfo.distance' is the distance from the base revision --
587 # - 'deltainfo.distance' is the distance from the base revision --
588 # bounding it limits the amount of I/O we need to do.
588 # bounding it limits the amount of I/O we need to do.
589 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
589 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
590 # deltas we need to apply -- bounding it limits the amount of CPU
590 # deltas we need to apply -- bounding it limits the amount of CPU
591 # we consume.
591 # we consume.
592
592
593 textlen = revinfo.textlen
593 textlen = revinfo.textlen
594 defaultmax = textlen * 4
594 defaultmax = textlen * 4
595 maxdist = revlog._maxdeltachainspan
595 maxdist = revlog._maxdeltachainspan
596 if not maxdist:
596 if not maxdist:
597 maxdist = deltainfo.distance # ensure the conditional pass
597 maxdist = deltainfo.distance # ensure the conditional pass
598 maxdist = max(maxdist, defaultmax)
598 maxdist = max(maxdist, defaultmax)
599
599
600 # Bad delta from read span:
600 # Bad delta from read span:
601 #
601 #
602 # If the span of data read is larger than the maximum allowed.
602 # If the span of data read is larger than the maximum allowed.
603 #
603 #
604 # In the sparse-revlog case, we rely on the associated "sparse reading"
604 # In the sparse-revlog case, we rely on the associated "sparse reading"
605 # to avoid issue related to the span of data. In theory, it would be
605 # to avoid issue related to the span of data. In theory, it would be
606 # possible to build pathological revlog where delta pattern would lead
606 # possible to build pathological revlog where delta pattern would lead
607 # to too many reads. However, they do not happen in practice at all. So
607 # to too many reads. However, they do not happen in practice at all. So
608 # we skip the span check entirely.
608 # we skip the span check entirely.
609 if not revlog._sparserevlog and maxdist < deltainfo.distance:
609 if not revlog._sparserevlog and maxdist < deltainfo.distance:
610 return False
610 return False
611
611
612 # Bad delta from new delta size:
612 # Bad delta from new delta size:
613 #
613 #
614 # If the delta size is larger than the target text, storing the
614 # If the delta size is larger than the target text, storing the
615 # delta will be inefficient.
615 # delta will be inefficient.
616 if textlen < deltainfo.deltalen:
616 if textlen < deltainfo.deltalen:
617 return False
617 return False
618
618
619 # Bad delta from cumulated payload size:
619 # Bad delta from cumulated payload size:
620 #
620 #
621 # If the sum of delta get larger than K * target text length.
621 # If the sum of delta get larger than K * target text length.
622 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
622 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
623 return False
623 return False
624
624
625 # Bad delta from chain length:
625 # Bad delta from chain length:
626 #
626 #
627 # If the number of delta in the chain gets too high.
627 # If the number of delta in the chain gets too high.
628 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
628 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
629 return False
629 return False
630
630
631 # bad delta from intermediate snapshot size limit
631 # bad delta from intermediate snapshot size limit
632 #
632 #
633 # If an intermediate snapshot size is higher than the limit. The
633 # If an intermediate snapshot size is higher than the limit. The
634 # limit exist to prevent endless chain of intermediate delta to be
634 # limit exist to prevent endless chain of intermediate delta to be
635 # created.
635 # created.
636 if (
636 if (
637 deltainfo.snapshotdepth is not None
637 deltainfo.snapshotdepth is not None
638 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
638 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
639 ):
639 ):
640 return False
640 return False
641
641
642 # bad delta if new intermediate snapshot is larger than the previous
642 # bad delta if new intermediate snapshot is larger than the previous
643 # snapshot
643 # snapshot
644 if (
644 if (
645 deltainfo.snapshotdepth
645 deltainfo.snapshotdepth
646 and revlog.length(deltainfo.base) < deltainfo.deltalen
646 and revlog.length(deltainfo.base) < deltainfo.deltalen
647 ):
647 ):
648 return False
648 return False
649
649
650 return True
650 return True
651
651
652
652
653 # If a revision's full text is that much bigger than a base candidate full
653 # If a revision's full text is that much bigger than a base candidate full
654 # text's, it is very unlikely that it will produce a valid delta. We no longer
654 # text's, it is very unlikely that it will produce a valid delta. We no longer
655 # consider these candidates.
655 # consider these candidates.
656 LIMIT_BASE2TEXT = 500
656 LIMIT_BASE2TEXT = 500
657
657
658
658
659 def _candidategroups(
659 def _candidategroups(
660 revlog,
660 revlog,
661 textlen,
661 textlen,
662 p1,
662 p1,
663 p2,
663 p2,
664 cachedelta,
664 cachedelta,
665 excluded_bases=None,
665 excluded_bases=None,
666 target_rev=None,
666 target_rev=None,
667 ):
667 ):
668 """Provides group of revision to be tested as delta base
668 """Provides group of revision to be tested as delta base
669
669
670 This top level function focus on emitting groups with unique and worthwhile
670 This top level function focus on emitting groups with unique and worthwhile
671 content. See _raw_candidate_groups for details about the group order.
671 content. See _raw_candidate_groups for details about the group order.
672 """
672 """
673 # should we try to build a delta?
673 # should we try to build a delta?
674 if not (len(revlog) and revlog._storedeltachains):
674 if not (len(revlog) and revlog._storedeltachains):
675 yield None
675 yield None
676 return
676 return
677
677
678 deltalength = revlog.length
678 deltalength = revlog.length
679 deltaparent = revlog.deltaparent
679 deltaparent = revlog.deltaparent
680 sparse = revlog._sparserevlog
680 sparse = revlog._sparserevlog
681 good = None
681 good = None
682
682
683 deltas_limit = textlen * LIMIT_DELTA2TEXT
683 deltas_limit = textlen * LIMIT_DELTA2TEXT
684 group_chunk_size = revlog._candidate_group_chunk_size
684 group_chunk_size = revlog._candidate_group_chunk_size
685
685
686 tested = {nullrev}
686 tested = {nullrev}
687 candidates = _refinedgroups(
687 candidates = _refinedgroups(
688 revlog,
688 revlog,
689 p1,
689 p1,
690 p2,
690 p2,
691 cachedelta,
691 cachedelta,
692 )
692 )
693 while True:
693 while True:
694 temptative = candidates.send(good)
694 temptative = candidates.send(good)
695 if temptative is None:
695 if temptative is None:
696 break
696 break
697 group = []
697 group = []
698 for rev in temptative:
698 for rev in temptative:
699 # skip over empty delta (no need to include them in a chain)
699 # skip over empty delta (no need to include them in a chain)
700 while revlog._generaldelta and not (
700 while revlog._generaldelta and not (
701 rev == nullrev or rev in tested or deltalength(rev)
701 rev == nullrev or rev in tested or deltalength(rev)
702 ):
702 ):
703 tested.add(rev)
703 tested.add(rev)
704 rev = deltaparent(rev)
704 rev = deltaparent(rev)
705 # no need to try a delta against nullrev, this will be done as a
705 # no need to try a delta against nullrev, this will be done as a
706 # last resort.
706 # last resort.
707 if rev == nullrev:
707 if rev == nullrev:
708 continue
708 continue
709 # filter out revision we tested already
709 # filter out revision we tested already
710 if rev in tested:
710 if rev in tested:
711 continue
711 continue
712 # an higher authority deamed the base unworthy (e.g. censored)
712 # an higher authority deamed the base unworthy (e.g. censored)
713 if excluded_bases is not None and rev in excluded_bases:
713 if excluded_bases is not None and rev in excluded_bases:
714 tested.add(rev)
714 tested.add(rev)
715 continue
715 continue
716 # We are in some recomputation cases and that rev is too high in
716 # We are in some recomputation cases and that rev is too high in
717 # the revlog
717 # the revlog
718 if target_rev is not None and rev >= target_rev:
718 if target_rev is not None and rev >= target_rev:
719 tested.add(rev)
719 tested.add(rev)
720 continue
720 continue
721 # filter out delta base that will never produce good delta
721 # filter out delta base that will never produce good delta
722 if deltas_limit < revlog.length(rev):
722 if deltas_limit < revlog.length(rev):
723 tested.add(rev)
723 tested.add(rev)
724 continue
724 continue
725 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
725 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
726 tested.add(rev)
726 tested.add(rev)
727 continue
727 continue
728 # no delta for rawtext-changing revs (see "candelta" for why)
728 # no delta for rawtext-changing revs (see "candelta" for why)
729 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
729 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
730 tested.add(rev)
730 tested.add(rev)
731 continue
731 continue
732
732
733 # If we reach here, we are about to build and test a delta.
733 # If we reach here, we are about to build and test a delta.
734 # The delta building process will compute the chaininfo in all
734 # The delta building process will compute the chaininfo in all
735 # case, since that computation is cached, it is fine to access it
735 # case, since that computation is cached, it is fine to access it
736 # here too.
736 # here too.
737 chainlen, chainsize = revlog._chaininfo(rev)
737 chainlen, chainsize = revlog._chaininfo(rev)
738 # if chain will be too long, skip base
738 # if chain will be too long, skip base
739 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
739 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
740 tested.add(rev)
740 tested.add(rev)
741 continue
741 continue
742 # if chain already have too much data, skip base
742 # if chain already have too much data, skip base
743 if deltas_limit < chainsize:
743 if deltas_limit < chainsize:
744 tested.add(rev)
744 tested.add(rev)
745 continue
745 continue
746 if sparse and revlog.upperboundcomp is not None:
746 if sparse and revlog.upperboundcomp is not None:
747 maxcomp = revlog.upperboundcomp
747 maxcomp = revlog.upperboundcomp
748 basenotsnap = (p1, p2, nullrev)
748 basenotsnap = (p1, p2, nullrev)
749 if rev not in basenotsnap and revlog.issnapshot(rev):
749 if rev not in basenotsnap and revlog.issnapshot(rev):
750 snapshotdepth = revlog.snapshotdepth(rev)
750 snapshotdepth = revlog.snapshotdepth(rev)
751 # If text is significantly larger than the base, we can
751 # If text is significantly larger than the base, we can
752 # expect the resulting delta to be proportional to the size
752 # expect the resulting delta to be proportional to the size
753 # difference
753 # difference
754 revsize = revlog.rawsize(rev)
754 revsize = revlog.rawsize(rev)
755 rawsizedistance = max(textlen - revsize, 0)
755 rawsizedistance = max(textlen - revsize, 0)
756 # use an estimate of the compression upper bound.
756 # use an estimate of the compression upper bound.
757 lowestrealisticdeltalen = rawsizedistance // maxcomp
757 lowestrealisticdeltalen = rawsizedistance // maxcomp
758
758
759 # check the absolute constraint on the delta size
759 # check the absolute constraint on the delta size
760 snapshotlimit = textlen >> snapshotdepth
760 snapshotlimit = textlen >> snapshotdepth
761 if snapshotlimit < lowestrealisticdeltalen:
761 if snapshotlimit < lowestrealisticdeltalen:
762 # delta lower bound is larger than accepted upper bound
762 # delta lower bound is larger than accepted upper bound
763 tested.add(rev)
763 tested.add(rev)
764 continue
764 continue
765
765
766 # check the relative constraint on the delta size
766 # check the relative constraint on the delta size
767 revlength = revlog.length(rev)
767 revlength = revlog.length(rev)
768 if revlength < lowestrealisticdeltalen:
768 if revlength < lowestrealisticdeltalen:
769 # delta probable lower bound is larger than target base
769 # delta probable lower bound is larger than target base
770 tested.add(rev)
770 tested.add(rev)
771 continue
771 continue
772
772
773 group.append(rev)
773 group.append(rev)
774 if group:
774 if group:
775 # When the size of the candidate group is big, it can result in a
775 # When the size of the candidate group is big, it can result in a
776 # quite significant performance impact. To reduce this, we can send
776 # quite significant performance impact. To reduce this, we can send
777 # them in smaller batches until the new batch does not provide any
777 # them in smaller batches until the new batch does not provide any
778 # improvements.
778 # improvements.
779 #
779 #
780 # This might reduce the overall efficiency of the compression in
780 # This might reduce the overall efficiency of the compression in
781 # some corner cases, but that should also prevent very pathological
781 # some corner cases, but that should also prevent very pathological
782 # cases from being an issue. (eg. 20 000 candidates).
782 # cases from being an issue. (eg. 20 000 candidates).
783 #
783 #
784 # XXX note that the ordering of the group becomes important as it
784 # XXX note that the ordering of the group becomes important as it
785 # now impacts the final result. The current order is unprocessed
785 # now impacts the final result. The current order is unprocessed
786 # and can be improved.
786 # and can be improved.
787 if group_chunk_size == 0:
787 if group_chunk_size == 0:
788 tested.update(group)
788 tested.update(group)
789 good = yield tuple(group)
789 good = yield tuple(group)
790 else:
790 else:
791 prev_good = good
791 prev_good = good
792 for start in range(0, len(group), group_chunk_size):
792 for start in range(0, len(group), group_chunk_size):
793 sub_group = group[start : start + group_chunk_size]
793 sub_group = group[start : start + group_chunk_size]
794 tested.update(sub_group)
794 tested.update(sub_group)
795 good = yield tuple(sub_group)
795 good = yield tuple(sub_group)
796 if prev_good == good:
796 if prev_good == good:
797 break
797 break
798
798
799 yield None
799 yield None
800
800
801
801
802 def _refinedgroups(revlog, p1, p2, cachedelta):
802 def _refinedgroups(revlog, p1, p2, cachedelta):
803 good = None
803 good = None
804 # First we try to reuse a the delta contained in the bundle.
804 # First we try to reuse a the delta contained in the bundle.
805 # (or from the source revlog)
805 # (or from the source revlog)
806 #
806 #
807 # This logic only applies to general delta repositories and can be disabled
807 # This logic only applies to general delta repositories and can be disabled
808 # through configuration. Disabling reuse source delta is useful when
808 # through configuration. Disabling reuse source delta is useful when
809 # we want to make sure we recomputed "optimal" deltas.
809 # we want to make sure we recomputed "optimal" deltas.
810 debug_info = None
810 debug_info = None
811 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
811 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
812 # Assume what we received from the server is a good choice
812 # Assume what we received from the server is a good choice
813 # build delta will reuse the cache
813 # build delta will reuse the cache
814 if debug_info is not None:
814 if debug_info is not None:
815 debug_info['cached-delta.tested'] += 1
815 debug_info['cached-delta.tested'] += 1
816 good = yield (cachedelta[0],)
816 good = yield (cachedelta[0],)
817 if good is not None:
817 if good is not None:
818 if debug_info is not None:
818 if debug_info is not None:
819 debug_info['cached-delta.accepted'] += 1
819 debug_info['cached-delta.accepted'] += 1
820 yield None
820 yield None
821 return
821 return
822 # XXX cache me higher
822 # XXX cache me higher
823 snapshot_cache = SnapshotCache()
823 snapshot_cache = SnapshotCache()
824 groups = _rawgroups(
824 groups = _rawgroups(
825 revlog,
825 revlog,
826 p1,
826 p1,
827 p2,
827 p2,
828 cachedelta,
828 cachedelta,
829 snapshot_cache,
829 snapshot_cache,
830 )
830 )
831 for candidates in groups:
831 for candidates in groups:
832 good = yield candidates
832 good = yield candidates
833 if good is not None:
833 if good is not None:
834 break
834 break
835
835
836 # If sparse revlog is enabled, we can try to refine the available deltas
836 # If sparse revlog is enabled, we can try to refine the available deltas
837 if not revlog._sparserevlog:
837 if not revlog._sparserevlog:
838 yield None
838 yield None
839 return
839 return
840
840
841 # if we have a refinable value, try to refine it
841 # if we have a refinable value, try to refine it
842 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
842 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
843 # refine snapshot down
843 # refine snapshot down
844 previous = None
844 previous = None
845 while previous != good:
845 while previous != good:
846 previous = good
846 previous = good
847 base = revlog.deltaparent(good)
847 base = revlog.deltaparent(good)
848 if base == nullrev:
848 if base == nullrev:
849 break
849 break
850 good = yield (base,)
850 good = yield (base,)
851 # refine snapshot up
851 # refine snapshot up
852 if not snapshot_cache.snapshots:
852 if not snapshot_cache.snapshots:
853 snapshot_cache.update(revlog, good + 1)
853 snapshot_cache.update(revlog, good + 1)
854 previous = None
854 previous = None
855 while good != previous:
855 while good != previous:
856 previous = good
856 previous = good
857 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
857 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
858 good = yield children
858 good = yield children
859
859
860 if debug_info is not None:
860 if debug_info is not None:
861 if good is None:
861 if good is None:
862 debug_info['no-solution'] += 1
862 debug_info['no-solution'] += 1
863
863
864 yield None
864 yield None
865
865
866
866
867 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
867 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
868 """Provides group of revision to be tested as delta base
868 """Provides group of revision to be tested as delta base
869
869
870 This lower level function focus on emitting delta theorically interresting
870 This lower level function focus on emitting delta theorically interresting
871 without looking it any practical details.
871 without looking it any practical details.
872
872
873 The group order aims at providing fast or small candidates first.
873 The group order aims at providing fast or small candidates first.
874 """
874 """
875 gdelta = revlog._generaldelta
875 gdelta = revlog._generaldelta
876 # gate sparse behind general-delta because of issue6056
876 # gate sparse behind general-delta because of issue6056
877 sparse = gdelta and revlog._sparserevlog
877 sparse = gdelta and revlog._sparserevlog
878 curr = len(revlog)
878 curr = len(revlog)
879 prev = curr - 1
879 prev = curr - 1
880 deltachain = lambda rev: revlog._deltachain(rev)[0]
880 deltachain = lambda rev: revlog._deltachain(rev)[0]
881
881
882 if gdelta:
882 if gdelta:
883 # exclude already lazy tested base if any
883 # exclude already lazy tested base if any
884 parents = [p for p in (p1, p2) if p != nullrev]
884 parents = [p for p in (p1, p2) if p != nullrev]
885
885
886 if not revlog._deltabothparents and len(parents) == 2:
886 if not revlog._deltabothparents and len(parents) == 2:
887 parents.sort()
887 parents.sort()
888 # To minimize the chance of having to build a fulltext,
888 # To minimize the chance of having to build a fulltext,
889 # pick first whichever parent is closest to us (max rev)
889 # pick first whichever parent is closest to us (max rev)
890 yield (parents[1],)
890 yield (parents[1],)
891 # then the other one (min rev) if the first did not fit
891 # then the other one (min rev) if the first did not fit
892 yield (parents[0],)
892 yield (parents[0],)
893 elif len(parents) > 0:
893 elif len(parents) > 0:
894 # Test all parents (1 or 2), and keep the best candidate
894 # Test all parents (1 or 2), and keep the best candidate
895 yield parents
895 yield parents
896
896
897 if sparse and parents:
897 if sparse and parents:
898 if snapshot_cache is None:
898 if snapshot_cache is None:
899 # map: base-rev: [snapshot-revs]
899 # map: base-rev: [snapshot-revs]
900 snapshot_cache = SnapshotCache()
900 snapshot_cache = SnapshotCache()
901 # See if we can use an existing snapshot in the parent chains to use as
901 # See if we can use an existing snapshot in the parent chains to use as
902 # a base for a new intermediate-snapshot
902 # a base for a new intermediate-snapshot
903 #
903 #
904 # search for snapshot in parents delta chain
904 # search for snapshot in parents delta chain
905 # map: snapshot-level: snapshot-rev
905 # map: snapshot-level: snapshot-rev
906 parents_snaps = collections.defaultdict(set)
906 parents_snaps = collections.defaultdict(set)
907 candidate_chains = [deltachain(p) for p in parents]
907 candidate_chains = [deltachain(p) for p in parents]
908 for chain in candidate_chains:
908 for chain in candidate_chains:
909 for idx, s in enumerate(chain):
909 for idx, s in enumerate(chain):
910 if not revlog.issnapshot(s):
910 if not revlog.issnapshot(s):
911 break
911 break
912 parents_snaps[idx].add(s)
912 parents_snaps[idx].add(s)
913 snapfloor = min(parents_snaps[0]) + 1
913 snapfloor = min(parents_snaps[0]) + 1
914 snapshot_cache.update(revlog, snapfloor)
914 snapshot_cache.update(revlog, snapfloor)
915 # search for the highest "unrelated" revision
915 # search for the highest "unrelated" revision
916 #
916 #
917 # Adding snapshots used by "unrelated" revision increase the odd we
917 # Adding snapshots used by "unrelated" revision increase the odd we
918 # reuse an independant, yet better snapshot chain.
918 # reuse an independant, yet better snapshot chain.
919 #
919 #
920 # XXX instead of building a set of revisions, we could lazily enumerate
920 # XXX instead of building a set of revisions, we could lazily enumerate
921 # over the chains. That would be more efficient, however we stick to
921 # over the chains. That would be more efficient, however we stick to
922 # simple code for now.
922 # simple code for now.
923 all_revs = set()
923 all_revs = set()
924 for chain in candidate_chains:
924 for chain in candidate_chains:
925 all_revs.update(chain)
925 all_revs.update(chain)
926 other = None
926 other = None
927 for r in revlog.revs(prev, snapfloor):
927 for r in revlog.revs(prev, snapfloor):
928 if r not in all_revs:
928 if r not in all_revs:
929 other = r
929 other = r
930 break
930 break
931 if other is not None:
931 if other is not None:
932 # To avoid unfair competition, we won't use unrelated intermediate
932 # To avoid unfair competition, we won't use unrelated intermediate
933 # snapshot that are deeper than the ones from the parent delta
933 # snapshot that are deeper than the ones from the parent delta
934 # chain.
934 # chain.
935 max_depth = max(parents_snaps.keys())
935 max_depth = max(parents_snaps.keys())
936 chain = deltachain(other)
936 chain = deltachain(other)
937 for depth, s in enumerate(chain):
937 for depth, s in enumerate(chain):
938 if s < snapfloor:
938 if s < snapfloor:
939 continue
939 continue
940 if max_depth < depth:
940 if max_depth < depth:
941 break
941 break
942 if not revlog.issnapshot(s):
942 if not revlog.issnapshot(s):
943 break
943 break
944 parents_snaps[depth].add(s)
944 parents_snaps[depth].add(s)
945 # Test them as possible intermediate snapshot base
945 # Test them as possible intermediate snapshot base
946 # We test them from highest to lowest level. High level one are more
946 # We test them from highest to lowest level. High level one are more
947 # likely to result in small delta
947 # likely to result in small delta
948 floor = None
948 floor = None
949 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
949 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
950 siblings = set()
950 siblings = set()
951 for s in snaps:
951 for s in snaps:
952 siblings.update(snapshot_cache.snapshots[s])
952 siblings.update(snapshot_cache.snapshots[s])
953 # Before considering making a new intermediate snapshot, we check
953 # Before considering making a new intermediate snapshot, we check
954 # if an existing snapshot, children of base we consider, would be
954 # if an existing snapshot, children of base we consider, would be
955 # suitable.
955 # suitable.
956 #
956 #
957 # It give a change to reuse a delta chain "unrelated" to the
957 # It give a change to reuse a delta chain "unrelated" to the
958 # current revision instead of starting our own. Without such
958 # current revision instead of starting our own. Without such
959 # re-use, topological branches would keep reopening new chains.
959 # re-use, topological branches would keep reopening new chains.
960 # Creating more and more snapshot as the repository grow.
960 # Creating more and more snapshot as the repository grow.
961
961
962 if floor is not None:
962 if floor is not None:
963 # We only do this for siblings created after the one in our
963 # We only do this for siblings created after the one in our
964 # parent's delta chain. Those created before has less chances
964 # parent's delta chain. Those created before has less chances
965 # to be valid base since our ancestors had to create a new
965 # to be valid base since our ancestors had to create a new
966 # snapshot.
966 # snapshot.
967 siblings = [r for r in siblings if floor < r]
967 siblings = [r for r in siblings if floor < r]
968 yield tuple(sorted(siblings))
968 yield tuple(sorted(siblings))
969 # then test the base from our parent's delta chain.
969 # then test the base from our parent's delta chain.
970 yield tuple(sorted(snaps))
970 yield tuple(sorted(snaps))
971 floor = min(snaps)
971 floor = min(snaps)
972 # No suitable base found in the parent chain, search if any full
972 # No suitable base found in the parent chain, search if any full
973 # snapshots emitted since parent's base would be a suitable base for an
973 # snapshots emitted since parent's base would be a suitable base for an
974 # intermediate snapshot.
974 # intermediate snapshot.
975 #
975 #
976 # It give a chance to reuse a delta chain unrelated to the current
976 # It give a chance to reuse a delta chain unrelated to the current
977 # revisions instead of starting our own. Without such re-use,
977 # revisions instead of starting our own. Without such re-use,
978 # topological branches would keep reopening new full chains. Creating
978 # topological branches would keep reopening new full chains. Creating
979 # more and more snapshot as the repository grow.
979 # more and more snapshot as the repository grow.
980 yield tuple(snapshot_cache.snapshots[nullrev])
980 yield tuple(snapshot_cache.snapshots[nullrev])
981
981
982 if not sparse:
982 if not sparse:
983 # other approach failed try against prev to hopefully save us a
983 # other approach failed try against prev to hopefully save us a
984 # fulltext.
984 # fulltext.
985 yield (prev,)
985 yield (prev,)
986
986
987
987
988 class SnapshotCache:
988 class SnapshotCache:
989 __slots__ = ('snapshots', '_start_rev', '_end_rev')
989 __slots__ = ('snapshots', '_start_rev', '_end_rev')
990
990
991 def __init__(self):
991 def __init__(self):
992 # XXX should probably be a set ?
992 self.snapshots = collections.defaultdict(set)
993 self.snapshots = collections.defaultdict(list)
994 self._start_rev = None
993 self._start_rev = None
995 self._end_rev = None
994 self._end_rev = None
996
995
997 def update(self, revlog, start_rev=0):
996 def update(self, revlog, start_rev=0):
998 """find snapshots from start_rev to tip"""
997 """find snapshots from start_rev to tip"""
999 nb_revs = len(revlog)
998 nb_revs = len(revlog)
1000 end_rev = nb_revs - 1
999 end_rev = nb_revs - 1
1001 if start_rev > end_rev:
1000 if start_rev > end_rev:
1002 return # range is empty
1001 return # range is empty
1003
1002
1004 if self._start_rev is None:
1003 if self._start_rev is None:
1005 assert self._end_rev is None
1004 assert self._end_rev is None
1006 self._update(revlog, start_rev, end_rev)
1005 self._update(revlog, start_rev, end_rev)
1007 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1006 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1008 if start_rev < self._start_rev:
1007 if start_rev < self._start_rev:
1009 self._update(revlog, start_rev, self._start_rev - 1)
1008 self._update(revlog, start_rev, self._start_rev - 1)
1010 if self._end_rev < end_rev:
1009 if self._end_rev < end_rev:
1011 self._update(revlog, self._end_rev + 1, end_rev)
1010 self._update(revlog, self._end_rev + 1, end_rev)
1012
1011
1013 if self._start_rev is None:
1012 if self._start_rev is None:
1014 assert self._end_rev is None
1013 assert self._end_rev is None
1015 self._end_rev = end_rev
1014 self._end_rev = end_rev
1016 self._start_rev = start_rev
1015 self._start_rev = start_rev
1017 else:
1016 else:
1018 self._start_rev = min(self._start_rev, start_rev)
1017 self._start_rev = min(self._start_rev, start_rev)
1019 self._end_rev = max(self._end_rev, end_rev)
1018 self._end_rev = max(self._end_rev, end_rev)
1020 assert self._start_rev <= self._end_rev, (
1019 assert self._start_rev <= self._end_rev, (
1021 self._start_rev,
1020 self._start_rev,
1022 self._end_rev,
1021 self._end_rev,
1023 )
1022 )
1024
1023
1025 def _update(self, revlog, start_rev, end_rev):
1024 def _update(self, revlog, start_rev, end_rev):
1026 """internal method that actually do update content"""
1025 """internal method that actually do update content"""
1027 assert self._start_rev is None or (
1026 assert self._start_rev is None or (
1028 start_rev < self._start_rev or start_rev > self._end_rev
1027 start_rev < self._start_rev or start_rev > self._end_rev
1029 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1028 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1030 assert self._start_rev is None or (
1029 assert self._start_rev is None or (
1031 end_rev < self._start_rev or end_rev > self._end_rev
1030 end_rev < self._start_rev or end_rev > self._end_rev
1032 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1031 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1033 cache = self.snapshots
1032 cache = self.snapshots
1034 if util.safehasattr(revlog.index, b'findsnapshots'):
1033 if util.safehasattr(revlog.index, b'findsnapshots'):
1035 revlog.index.findsnapshots(cache, start_rev, end_rev)
1034 revlog.index.findsnapshots(cache, start_rev, end_rev)
1036 else:
1035 else:
1037 deltaparent = revlog.deltaparent
1036 deltaparent = revlog.deltaparent
1038 issnapshot = revlog.issnapshot
1037 issnapshot = revlog.issnapshot
1039 for rev in revlog.revs(start_rev, end_rev):
1038 for rev in revlog.revs(start_rev, end_rev):
1040 if issnapshot(rev):
1039 if issnapshot(rev):
1041 cache[deltaparent(rev)].append(rev)
1040 cache[deltaparent(rev)].add(rev)
1042
1041
1043
1042
1044 class deltacomputer:
1043 class deltacomputer:
1045 def __init__(
1044 def __init__(
1046 self,
1045 self,
1047 revlog,
1046 revlog,
1048 write_debug=None,
1047 write_debug=None,
1049 debug_search=False,
1048 debug_search=False,
1050 debug_info=None,
1049 debug_info=None,
1051 ):
1050 ):
1052 self.revlog = revlog
1051 self.revlog = revlog
1053 self._write_debug = write_debug
1052 self._write_debug = write_debug
1054 self._debug_search = debug_search
1053 self._debug_search = debug_search
1055 self._debug_info = debug_info
1054 self._debug_info = debug_info
1056
1055
1057 def buildtext(self, revinfo, fh):
1056 def buildtext(self, revinfo, fh):
1058 """Builds a fulltext version of a revision
1057 """Builds a fulltext version of a revision
1059
1058
1060 revinfo: revisioninfo instance that contains all needed info
1059 revinfo: revisioninfo instance that contains all needed info
1061 fh: file handle to either the .i or the .d revlog file,
1060 fh: file handle to either the .i or the .d revlog file,
1062 depending on whether it is inlined or not
1061 depending on whether it is inlined or not
1063 """
1062 """
1064 btext = revinfo.btext
1063 btext = revinfo.btext
1065 if btext[0] is not None:
1064 if btext[0] is not None:
1066 return btext[0]
1065 return btext[0]
1067
1066
1068 revlog = self.revlog
1067 revlog = self.revlog
1069 cachedelta = revinfo.cachedelta
1068 cachedelta = revinfo.cachedelta
1070 baserev = cachedelta[0]
1069 baserev = cachedelta[0]
1071 delta = cachedelta[1]
1070 delta = cachedelta[1]
1072
1071
1073 fulltext = btext[0] = _textfromdelta(
1072 fulltext = btext[0] = _textfromdelta(
1074 fh,
1073 fh,
1075 revlog,
1074 revlog,
1076 baserev,
1075 baserev,
1077 delta,
1076 delta,
1078 revinfo.p1,
1077 revinfo.p1,
1079 revinfo.p2,
1078 revinfo.p2,
1080 revinfo.flags,
1079 revinfo.flags,
1081 revinfo.node,
1080 revinfo.node,
1082 )
1081 )
1083 return fulltext
1082 return fulltext
1084
1083
1085 def _builddeltadiff(self, base, revinfo, fh):
1084 def _builddeltadiff(self, base, revinfo, fh):
1086 revlog = self.revlog
1085 revlog = self.revlog
1087 t = self.buildtext(revinfo, fh)
1086 t = self.buildtext(revinfo, fh)
1088 if revlog.iscensored(base):
1087 if revlog.iscensored(base):
1089 # deltas based on a censored revision must replace the
1088 # deltas based on a censored revision must replace the
1090 # full content in one patch, so delta works everywhere
1089 # full content in one patch, so delta works everywhere
1091 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1090 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1092 delta = header + t
1091 delta = header + t
1093 else:
1092 else:
1094 ptext = revlog.rawdata(base, _df=fh)
1093 ptext = revlog.rawdata(base, _df=fh)
1095 delta = mdiff.textdiff(ptext, t)
1094 delta = mdiff.textdiff(ptext, t)
1096
1095
1097 return delta
1096 return delta
1098
1097
1099 def _builddeltainfo(self, revinfo, base, fh):
1098 def _builddeltainfo(self, revinfo, base, fh):
1100 # can we use the cached delta?
1099 # can we use the cached delta?
1101 revlog = self.revlog
1100 revlog = self.revlog
1102 debug_search = self._write_debug is not None and self._debug_search
1101 debug_search = self._write_debug is not None and self._debug_search
1103 chainbase = revlog.chainbase(base)
1102 chainbase = revlog.chainbase(base)
1104 if revlog._generaldelta:
1103 if revlog._generaldelta:
1105 deltabase = base
1104 deltabase = base
1106 else:
1105 else:
1107 deltabase = chainbase
1106 deltabase = chainbase
1108 snapshotdepth = None
1107 snapshotdepth = None
1109 if revlog._sparserevlog and deltabase == nullrev:
1108 if revlog._sparserevlog and deltabase == nullrev:
1110 snapshotdepth = 0
1109 snapshotdepth = 0
1111 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1110 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1112 # A delta chain should always be one full snapshot,
1111 # A delta chain should always be one full snapshot,
1113 # zero or more semi-snapshots, and zero or more deltas
1112 # zero or more semi-snapshots, and zero or more deltas
1114 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1113 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1115 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1114 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1116 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1115 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1117 delta = None
1116 delta = None
1118 if revinfo.cachedelta:
1117 if revinfo.cachedelta:
1119 cachebase = revinfo.cachedelta[0]
1118 cachebase = revinfo.cachedelta[0]
1120 # check if the diff still apply
1119 # check if the diff still apply
1121 currentbase = cachebase
1120 currentbase = cachebase
1122 while (
1121 while (
1123 currentbase != nullrev
1122 currentbase != nullrev
1124 and currentbase != base
1123 and currentbase != base
1125 and self.revlog.length(currentbase) == 0
1124 and self.revlog.length(currentbase) == 0
1126 ):
1125 ):
1127 currentbase = self.revlog.deltaparent(currentbase)
1126 currentbase = self.revlog.deltaparent(currentbase)
1128 if self.revlog._lazydelta and currentbase == base:
1127 if self.revlog._lazydelta and currentbase == base:
1129 delta = revinfo.cachedelta[1]
1128 delta = revinfo.cachedelta[1]
1130 if delta is None:
1129 if delta is None:
1131 delta = self._builddeltadiff(base, revinfo, fh)
1130 delta = self._builddeltadiff(base, revinfo, fh)
1132 if debug_search:
1131 if debug_search:
1133 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1132 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1134 msg %= len(delta)
1133 msg %= len(delta)
1135 self._write_debug(msg)
1134 self._write_debug(msg)
1136 # snapshotdept need to be neither None nor 0 level snapshot
1135 # snapshotdept need to be neither None nor 0 level snapshot
1137 if revlog.upperboundcomp is not None and snapshotdepth:
1136 if revlog.upperboundcomp is not None and snapshotdepth:
1138 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1137 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1139 snapshotlimit = revinfo.textlen >> snapshotdepth
1138 snapshotlimit = revinfo.textlen >> snapshotdepth
1140 if debug_search:
1139 if debug_search:
1141 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1140 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1142 msg %= lowestrealisticdeltalen
1141 msg %= lowestrealisticdeltalen
1143 self._write_debug(msg)
1142 self._write_debug(msg)
1144 if snapshotlimit < lowestrealisticdeltalen:
1143 if snapshotlimit < lowestrealisticdeltalen:
1145 if debug_search:
1144 if debug_search:
1146 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1145 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1147 self._write_debug(msg)
1146 self._write_debug(msg)
1148 return None
1147 return None
1149 if revlog.length(base) < lowestrealisticdeltalen:
1148 if revlog.length(base) < lowestrealisticdeltalen:
1150 if debug_search:
1149 if debug_search:
1151 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1150 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1152 self._write_debug(msg)
1151 self._write_debug(msg)
1153 return None
1152 return None
1154 header, data = revlog.compress(delta)
1153 header, data = revlog.compress(delta)
1155 deltalen = len(header) + len(data)
1154 deltalen = len(header) + len(data)
1156 offset = revlog.end(len(revlog) - 1)
1155 offset = revlog.end(len(revlog) - 1)
1157 dist = deltalen + offset - revlog.start(chainbase)
1156 dist = deltalen + offset - revlog.start(chainbase)
1158 chainlen, compresseddeltalen = revlog._chaininfo(base)
1157 chainlen, compresseddeltalen = revlog._chaininfo(base)
1159 chainlen += 1
1158 chainlen += 1
1160 compresseddeltalen += deltalen
1159 compresseddeltalen += deltalen
1161
1160
1162 return _deltainfo(
1161 return _deltainfo(
1163 dist,
1162 dist,
1164 deltalen,
1163 deltalen,
1165 (header, data),
1164 (header, data),
1166 deltabase,
1165 deltabase,
1167 chainbase,
1166 chainbase,
1168 chainlen,
1167 chainlen,
1169 compresseddeltalen,
1168 compresseddeltalen,
1170 snapshotdepth,
1169 snapshotdepth,
1171 )
1170 )
1172
1171
1173 def _fullsnapshotinfo(self, fh, revinfo, curr):
1172 def _fullsnapshotinfo(self, fh, revinfo, curr):
1174 rawtext = self.buildtext(revinfo, fh)
1173 rawtext = self.buildtext(revinfo, fh)
1175 data = self.revlog.compress(rawtext)
1174 data = self.revlog.compress(rawtext)
1176 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1175 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1177 deltabase = chainbase = curr
1176 deltabase = chainbase = curr
1178 snapshotdepth = 0
1177 snapshotdepth = 0
1179 chainlen = 1
1178 chainlen = 1
1180
1179
1181 return _deltainfo(
1180 return _deltainfo(
1182 dist,
1181 dist,
1183 deltalen,
1182 deltalen,
1184 data,
1183 data,
1185 deltabase,
1184 deltabase,
1186 chainbase,
1185 chainbase,
1187 chainlen,
1186 chainlen,
1188 compresseddeltalen,
1187 compresseddeltalen,
1189 snapshotdepth,
1188 snapshotdepth,
1190 )
1189 )
1191
1190
1192 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1191 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1193 """Find an acceptable delta against a candidate revision
1192 """Find an acceptable delta against a candidate revision
1194
1193
1195 revinfo: information about the revision (instance of _revisioninfo)
1194 revinfo: information about the revision (instance of _revisioninfo)
1196 fh: file handle to either the .i or the .d revlog file,
1195 fh: file handle to either the .i or the .d revlog file,
1197 depending on whether it is inlined or not
1196 depending on whether it is inlined or not
1198
1197
1199 Returns the first acceptable candidate revision, as ordered by
1198 Returns the first acceptable candidate revision, as ordered by
1200 _candidategroups
1199 _candidategroups
1201
1200
1202 If no suitable deltabase is found, we return delta info for a full
1201 If no suitable deltabase is found, we return delta info for a full
1203 snapshot.
1202 snapshot.
1204
1203
1205 `excluded_bases` is an optional set of revision that cannot be used as
1204 `excluded_bases` is an optional set of revision that cannot be used as
1206 a delta base. Use this to recompute delta suitable in censor or strip
1205 a delta base. Use this to recompute delta suitable in censor or strip
1207 context.
1206 context.
1208 """
1207 """
1209 if target_rev is None:
1208 if target_rev is None:
1210 target_rev = len(self.revlog)
1209 target_rev = len(self.revlog)
1211
1210
1212 if not revinfo.textlen:
1211 if not revinfo.textlen:
1213 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1212 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1214
1213
1215 if excluded_bases is None:
1214 if excluded_bases is None:
1216 excluded_bases = set()
1215 excluded_bases = set()
1217
1216
1218 # no delta for flag processor revision (see "candelta" for why)
1217 # no delta for flag processor revision (see "candelta" for why)
1219 # not calling candelta since only one revision needs test, also to
1218 # not calling candelta since only one revision needs test, also to
1220 # avoid overhead fetching flags again.
1219 # avoid overhead fetching flags again.
1221 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1220 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1222 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1221 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1223
1222
1224 gather_debug = (
1223 gather_debug = (
1225 self._write_debug is not None or self._debug_info is not None
1224 self._write_debug is not None or self._debug_info is not None
1226 )
1225 )
1227 debug_search = self._write_debug is not None and self._debug_search
1226 debug_search = self._write_debug is not None and self._debug_search
1228
1227
1229 if gather_debug:
1228 if gather_debug:
1230 start = util.timer()
1229 start = util.timer()
1231
1230
1232 # count the number of different delta we tried (for debug purpose)
1231 # count the number of different delta we tried (for debug purpose)
1233 dbg_try_count = 0
1232 dbg_try_count = 0
1234 # count the number of "search round" we did. (for debug purpose)
1233 # count the number of "search round" we did. (for debug purpose)
1235 dbg_try_rounds = 0
1234 dbg_try_rounds = 0
1236 dbg_type = b'unknown'
1235 dbg_type = b'unknown'
1237
1236
1238 cachedelta = revinfo.cachedelta
1237 cachedelta = revinfo.cachedelta
1239 p1 = revinfo.p1
1238 p1 = revinfo.p1
1240 p2 = revinfo.p2
1239 p2 = revinfo.p2
1241 revlog = self.revlog
1240 revlog = self.revlog
1242
1241
1243 deltainfo = None
1242 deltainfo = None
1244 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1243 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1245
1244
1246 if gather_debug:
1245 if gather_debug:
1247 if p1r != nullrev:
1246 if p1r != nullrev:
1248 p1_chain_len = revlog._chaininfo(p1r)[0]
1247 p1_chain_len = revlog._chaininfo(p1r)[0]
1249 else:
1248 else:
1250 p1_chain_len = -1
1249 p1_chain_len = -1
1251 if p2r != nullrev:
1250 if p2r != nullrev:
1252 p2_chain_len = revlog._chaininfo(p2r)[0]
1251 p2_chain_len = revlog._chaininfo(p2r)[0]
1253 else:
1252 else:
1254 p2_chain_len = -1
1253 p2_chain_len = -1
1255 if debug_search:
1254 if debug_search:
1256 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1255 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1257 msg %= target_rev
1256 msg %= target_rev
1258 self._write_debug(msg)
1257 self._write_debug(msg)
1259
1258
1260 groups = _candidategroups(
1259 groups = _candidategroups(
1261 self.revlog,
1260 self.revlog,
1262 revinfo.textlen,
1261 revinfo.textlen,
1263 p1r,
1262 p1r,
1264 p2r,
1263 p2r,
1265 cachedelta,
1264 cachedelta,
1266 excluded_bases,
1265 excluded_bases,
1267 target_rev,
1266 target_rev,
1268 )
1267 )
1269 candidaterevs = next(groups)
1268 candidaterevs = next(groups)
1270 while candidaterevs is not None:
1269 while candidaterevs is not None:
1271 dbg_try_rounds += 1
1270 dbg_try_rounds += 1
1272 if debug_search:
1271 if debug_search:
1273 prev = None
1272 prev = None
1274 if deltainfo is not None:
1273 if deltainfo is not None:
1275 prev = deltainfo.base
1274 prev = deltainfo.base
1276
1275
1277 if (
1276 if (
1278 cachedelta is not None
1277 cachedelta is not None
1279 and len(candidaterevs) == 1
1278 and len(candidaterevs) == 1
1280 and cachedelta[0] in candidaterevs
1279 and cachedelta[0] in candidaterevs
1281 ):
1280 ):
1282 round_type = b"cached-delta"
1281 round_type = b"cached-delta"
1283 elif p1 in candidaterevs or p2 in candidaterevs:
1282 elif p1 in candidaterevs or p2 in candidaterevs:
1284 round_type = b"parents"
1283 round_type = b"parents"
1285 elif prev is not None and all(c < prev for c in candidaterevs):
1284 elif prev is not None and all(c < prev for c in candidaterevs):
1286 round_type = b"refine-down"
1285 round_type = b"refine-down"
1287 elif prev is not None and all(c > prev for c in candidaterevs):
1286 elif prev is not None and all(c > prev for c in candidaterevs):
1288 round_type = b"refine-up"
1287 round_type = b"refine-up"
1289 else:
1288 else:
1290 round_type = b"search-down"
1289 round_type = b"search-down"
1291 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1290 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1292 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1291 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1293 self._write_debug(msg)
1292 self._write_debug(msg)
1294 nominateddeltas = []
1293 nominateddeltas = []
1295 if deltainfo is not None:
1294 if deltainfo is not None:
1296 if debug_search:
1295 if debug_search:
1297 msg = (
1296 msg = (
1298 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1297 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1299 )
1298 )
1300 msg %= (deltainfo.base, deltainfo.deltalen)
1299 msg %= (deltainfo.base, deltainfo.deltalen)
1301 self._write_debug(msg)
1300 self._write_debug(msg)
1302 # if we already found a good delta,
1301 # if we already found a good delta,
1303 # challenge it against refined candidates
1302 # challenge it against refined candidates
1304 nominateddeltas.append(deltainfo)
1303 nominateddeltas.append(deltainfo)
1305 for candidaterev in candidaterevs:
1304 for candidaterev in candidaterevs:
1306 if debug_search:
1305 if debug_search:
1307 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1306 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1308 msg %= candidaterev
1307 msg %= candidaterev
1309 self._write_debug(msg)
1308 self._write_debug(msg)
1310 candidate_type = None
1309 candidate_type = None
1311 if candidaterev == p1:
1310 if candidaterev == p1:
1312 candidate_type = b"p1"
1311 candidate_type = b"p1"
1313 elif candidaterev == p2:
1312 elif candidaterev == p2:
1314 candidate_type = b"p2"
1313 candidate_type = b"p2"
1315 elif self.revlog.issnapshot(candidaterev):
1314 elif self.revlog.issnapshot(candidaterev):
1316 candidate_type = b"snapshot-%d"
1315 candidate_type = b"snapshot-%d"
1317 candidate_type %= self.revlog.snapshotdepth(
1316 candidate_type %= self.revlog.snapshotdepth(
1318 candidaterev
1317 candidaterev
1319 )
1318 )
1320
1319
1321 if candidate_type is not None:
1320 if candidate_type is not None:
1322 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1321 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1323 msg %= candidate_type
1322 msg %= candidate_type
1324 self._write_debug(msg)
1323 self._write_debug(msg)
1325 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1324 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1326 msg %= self.revlog.length(candidaterev)
1325 msg %= self.revlog.length(candidaterev)
1327 self._write_debug(msg)
1326 self._write_debug(msg)
1328 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1327 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1329 msg %= self.revlog.deltaparent(candidaterev)
1328 msg %= self.revlog.deltaparent(candidaterev)
1330 self._write_debug(msg)
1329 self._write_debug(msg)
1331
1330
1332 dbg_try_count += 1
1331 dbg_try_count += 1
1333
1332
1334 if debug_search:
1333 if debug_search:
1335 delta_start = util.timer()
1334 delta_start = util.timer()
1336 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1335 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1337 if debug_search:
1336 if debug_search:
1338 delta_end = util.timer()
1337 delta_end = util.timer()
1339 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1338 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1340 msg %= delta_end - delta_start
1339 msg %= delta_end - delta_start
1341 self._write_debug(msg)
1340 self._write_debug(msg)
1342 if candidatedelta is not None:
1341 if candidatedelta is not None:
1343 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1342 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1344 if debug_search:
1343 if debug_search:
1345 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1344 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1346 msg %= candidatedelta.deltalen
1345 msg %= candidatedelta.deltalen
1347 self._write_debug(msg)
1346 self._write_debug(msg)
1348 nominateddeltas.append(candidatedelta)
1347 nominateddeltas.append(candidatedelta)
1349 elif debug_search:
1348 elif debug_search:
1350 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1349 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1351 msg %= candidatedelta.deltalen
1350 msg %= candidatedelta.deltalen
1352 self._write_debug(msg)
1351 self._write_debug(msg)
1353 elif debug_search:
1352 elif debug_search:
1354 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1353 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1355 self._write_debug(msg)
1354 self._write_debug(msg)
1356 if nominateddeltas:
1355 if nominateddeltas:
1357 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1356 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1358 if deltainfo is not None:
1357 if deltainfo is not None:
1359 candidaterevs = groups.send(deltainfo.base)
1358 candidaterevs = groups.send(deltainfo.base)
1360 else:
1359 else:
1361 candidaterevs = next(groups)
1360 candidaterevs = next(groups)
1362
1361
1363 if deltainfo is None:
1362 if deltainfo is None:
1364 dbg_type = b"full"
1363 dbg_type = b"full"
1365 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1364 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1366 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1365 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1367 dbg_type = b"snapshot"
1366 dbg_type = b"snapshot"
1368 else:
1367 else:
1369 dbg_type = b"delta"
1368 dbg_type = b"delta"
1370
1369
1371 if gather_debug:
1370 if gather_debug:
1372 end = util.timer()
1371 end = util.timer()
1373 used_cached = (
1372 used_cached = (
1374 cachedelta is not None
1373 cachedelta is not None
1375 and dbg_try_rounds == 1
1374 and dbg_try_rounds == 1
1376 and dbg_try_count == 1
1375 and dbg_try_count == 1
1377 and deltainfo.base == cachedelta[0]
1376 and deltainfo.base == cachedelta[0]
1378 )
1377 )
1379 dbg = {
1378 dbg = {
1380 'duration': end - start,
1379 'duration': end - start,
1381 'revision': target_rev,
1380 'revision': target_rev,
1382 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1381 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1383 'search_round_count': dbg_try_rounds,
1382 'search_round_count': dbg_try_rounds,
1384 'using-cached-base': used_cached,
1383 'using-cached-base': used_cached,
1385 'delta_try_count': dbg_try_count,
1384 'delta_try_count': dbg_try_count,
1386 'type': dbg_type,
1385 'type': dbg_type,
1387 'p1-chain-len': p1_chain_len,
1386 'p1-chain-len': p1_chain_len,
1388 'p2-chain-len': p2_chain_len,
1387 'p2-chain-len': p2_chain_len,
1389 }
1388 }
1390 if (
1389 if (
1391 deltainfo.snapshotdepth # pytype: disable=attribute-error
1390 deltainfo.snapshotdepth # pytype: disable=attribute-error
1392 is not None
1391 is not None
1393 ):
1392 ):
1394 dbg[
1393 dbg[
1395 'snapshot-depth'
1394 'snapshot-depth'
1396 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1395 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1397 else:
1396 else:
1398 dbg['snapshot-depth'] = 0
1397 dbg['snapshot-depth'] = 0
1399 target_revlog = b"UNKNOWN"
1398 target_revlog = b"UNKNOWN"
1400 target_type = self.revlog.target[0]
1399 target_type = self.revlog.target[0]
1401 target_key = self.revlog.target[1]
1400 target_key = self.revlog.target[1]
1402 if target_type == KIND_CHANGELOG:
1401 if target_type == KIND_CHANGELOG:
1403 target_revlog = b'CHANGELOG:'
1402 target_revlog = b'CHANGELOG:'
1404 elif target_type == KIND_MANIFESTLOG:
1403 elif target_type == KIND_MANIFESTLOG:
1405 target_revlog = b'MANIFESTLOG:'
1404 target_revlog = b'MANIFESTLOG:'
1406 if target_key:
1405 if target_key:
1407 target_revlog += b'%s:' % target_key
1406 target_revlog += b'%s:' % target_key
1408 elif target_type == KIND_FILELOG:
1407 elif target_type == KIND_FILELOG:
1409 target_revlog = b'FILELOG:'
1408 target_revlog = b'FILELOG:'
1410 if target_key:
1409 if target_key:
1411 target_revlog += b'%s:' % target_key
1410 target_revlog += b'%s:' % target_key
1412 dbg['target-revlog'] = target_revlog
1411 dbg['target-revlog'] = target_revlog
1413
1412
1414 if self._debug_info is not None:
1413 if self._debug_info is not None:
1415 self._debug_info.append(dbg)
1414 self._debug_info.append(dbg)
1416
1415
1417 if self._write_debug is not None:
1416 if self._write_debug is not None:
1418 msg = (
1417 msg = (
1419 b"DBG-DELTAS:"
1418 b"DBG-DELTAS:"
1420 b" %-12s"
1419 b" %-12s"
1421 b" rev=%d:"
1420 b" rev=%d:"
1422 b" delta-base=%d"
1421 b" delta-base=%d"
1423 b" is-cached=%d"
1422 b" is-cached=%d"
1424 b" - search-rounds=%d"
1423 b" - search-rounds=%d"
1425 b" try-count=%d"
1424 b" try-count=%d"
1426 b" - delta-type=%-6s"
1425 b" - delta-type=%-6s"
1427 b" snap-depth=%d"
1426 b" snap-depth=%d"
1428 b" - p1-chain-length=%d"
1427 b" - p1-chain-length=%d"
1429 b" p2-chain-length=%d"
1428 b" p2-chain-length=%d"
1430 b" - duration=%f"
1429 b" - duration=%f"
1431 b"\n"
1430 b"\n"
1432 )
1431 )
1433 msg %= (
1432 msg %= (
1434 dbg["target-revlog"],
1433 dbg["target-revlog"],
1435 dbg["revision"],
1434 dbg["revision"],
1436 dbg["delta-base"],
1435 dbg["delta-base"],
1437 dbg["using-cached-base"],
1436 dbg["using-cached-base"],
1438 dbg["search_round_count"],
1437 dbg["search_round_count"],
1439 dbg["delta_try_count"],
1438 dbg["delta_try_count"],
1440 dbg["type"],
1439 dbg["type"],
1441 dbg["snapshot-depth"],
1440 dbg["snapshot-depth"],
1442 dbg["p1-chain-len"],
1441 dbg["p1-chain-len"],
1443 dbg["p2-chain-len"],
1442 dbg["p2-chain-len"],
1444 dbg["duration"],
1443 dbg["duration"],
1445 )
1444 )
1446 self._write_debug(msg)
1445 self._write_debug(msg)
1447 return deltainfo
1446 return deltainfo
1448
1447
1449
1448
1450 def delta_compression(default_compression_header, deltainfo):
1449 def delta_compression(default_compression_header, deltainfo):
1451 """return (COMPRESSION_MODE, deltainfo)
1450 """return (COMPRESSION_MODE, deltainfo)
1452
1451
1453 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1452 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1454 compression.
1453 compression.
1455 """
1454 """
1456 h, d = deltainfo.data
1455 h, d = deltainfo.data
1457 compression_mode = COMP_MODE_INLINE
1456 compression_mode = COMP_MODE_INLINE
1458 if not h and not d:
1457 if not h and not d:
1459 # not data to store at all... declare them uncompressed
1458 # not data to store at all... declare them uncompressed
1460 compression_mode = COMP_MODE_PLAIN
1459 compression_mode = COMP_MODE_PLAIN
1461 elif not h:
1460 elif not h:
1462 t = d[0:1]
1461 t = d[0:1]
1463 if t == b'\0':
1462 if t == b'\0':
1464 compression_mode = COMP_MODE_PLAIN
1463 compression_mode = COMP_MODE_PLAIN
1465 elif t == default_compression_header:
1464 elif t == default_compression_header:
1466 compression_mode = COMP_MODE_DEFAULT
1465 compression_mode = COMP_MODE_DEFAULT
1467 elif h == b'u':
1466 elif h == b'u':
1468 # we have a more efficient way to declare uncompressed
1467 # we have a more efficient way to declare uncompressed
1469 h = b''
1468 h = b''
1470 compression_mode = COMP_MODE_PLAIN
1469 compression_mode = COMP_MODE_PLAIN
1471 deltainfo = drop_u_compression(deltainfo)
1470 deltainfo = drop_u_compression(deltainfo)
1472 return compression_mode, deltainfo
1471 return compression_mode, deltainfo
@@ -1,523 +1,523 b''
1 # test revlog interaction about raw data (flagprocessor)
1 # test revlog interaction about raw data (flagprocessor)
2
2
3
3
4 import hashlib
4 import hashlib
5 import sys
5 import sys
6
6
7 from mercurial import (
7 from mercurial import (
8 encoding,
8 encoding,
9 revlog,
9 revlog,
10 transaction,
10 transaction,
11 vfs,
11 vfs,
12 )
12 )
13
13
14 from mercurial.revlogutils import (
14 from mercurial.revlogutils import (
15 constants,
15 constants,
16 deltas,
16 deltas,
17 flagutil,
17 flagutil,
18 )
18 )
19
19
20
20
21 class _NoTransaction:
21 class _NoTransaction:
22 """transaction like object to update the nodemap outside a transaction"""
22 """transaction like object to update the nodemap outside a transaction"""
23
23
24 def __init__(self):
24 def __init__(self):
25 self._postclose = {}
25 self._postclose = {}
26
26
27 def addpostclose(self, callback_id, callback_func):
27 def addpostclose(self, callback_id, callback_func):
28 self._postclose[callback_id] = callback_func
28 self._postclose[callback_id] = callback_func
29
29
30 def registertmp(self, *args, **kwargs):
30 def registertmp(self, *args, **kwargs):
31 pass
31 pass
32
32
33 def addbackup(self, *args, **kwargs):
33 def addbackup(self, *args, **kwargs):
34 pass
34 pass
35
35
36 def add(self, *args, **kwargs):
36 def add(self, *args, **kwargs):
37 pass
37 pass
38
38
39 def addabort(self, *args, **kwargs):
39 def addabort(self, *args, **kwargs):
40 pass
40 pass
41
41
42 def _report(self, *args):
42 def _report(self, *args):
43 pass
43 pass
44
44
45
45
46 # TESTTMP is optional. This makes it convenient to run without run-tests.py
46 # TESTTMP is optional. This makes it convenient to run without run-tests.py
47 tvfs = vfs.vfs(encoding.environ.get(b'TESTTMP', b'/tmp'))
47 tvfs = vfs.vfs(encoding.environ.get(b'TESTTMP', b'/tmp'))
48
48
49 # Enable generaldelta otherwise revlog won't use delta as expected by the test
49 # Enable generaldelta otherwise revlog won't use delta as expected by the test
50 tvfs.options = {
50 tvfs.options = {
51 b'generaldelta': True,
51 b'generaldelta': True,
52 b'revlogv1': True,
52 b'revlogv1': True,
53 b'sparse-revlog': True,
53 b'sparse-revlog': True,
54 }
54 }
55
55
56
56
57 def abort(msg):
57 def abort(msg):
58 print('abort: %s' % msg)
58 print('abort: %s' % msg)
59 # Return 0 so run-tests.py could compare the output.
59 # Return 0 so run-tests.py could compare the output.
60 sys.exit()
60 sys.exit()
61
61
62
62
63 # Register a revlog processor for flag EXTSTORED.
63 # Register a revlog processor for flag EXTSTORED.
64 #
64 #
65 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
65 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
66 # insertion and replacement, and may be interesting to test revlog's line-based
66 # insertion and replacement, and may be interesting to test revlog's line-based
67 # deltas.
67 # deltas.
68 _extheader = b'E\n'
68 _extheader = b'E\n'
69
69
70
70
71 def readprocessor(self, rawtext):
71 def readprocessor(self, rawtext):
72 # True: the returned text could be used to verify hash
72 # True: the returned text could be used to verify hash
73 text = rawtext[len(_extheader) :].replace(b'i', b'1')
73 text = rawtext[len(_extheader) :].replace(b'i', b'1')
74 return text, True
74 return text, True
75
75
76
76
77 def writeprocessor(self, text):
77 def writeprocessor(self, text):
78 # False: the returned rawtext shouldn't be used to verify hash
78 # False: the returned rawtext shouldn't be used to verify hash
79 rawtext = _extheader + text.replace(b'1', b'i')
79 rawtext = _extheader + text.replace(b'1', b'i')
80 return rawtext, False
80 return rawtext, False
81
81
82
82
83 def rawprocessor(self, rawtext):
83 def rawprocessor(self, rawtext):
84 # False: do not verify hash. Only the content returned by "readprocessor"
84 # False: do not verify hash. Only the content returned by "readprocessor"
85 # can be used to verify hash.
85 # can be used to verify hash.
86 return False
86 return False
87
87
88
88
89 flagutil.addflagprocessor(
89 flagutil.addflagprocessor(
90 revlog.REVIDX_EXTSTORED, (readprocessor, writeprocessor, rawprocessor)
90 revlog.REVIDX_EXTSTORED, (readprocessor, writeprocessor, rawprocessor)
91 )
91 )
92
92
93 # Utilities about reading and appending revlog
93 # Utilities about reading and appending revlog
94
94
95
95
96 def newtransaction():
96 def newtransaction():
97 # A transaction is required to write revlogs
97 # A transaction is required to write revlogs
98 report = lambda msg: None
98 report = lambda msg: None
99 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
99 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
100
100
101
101
102 def newrevlog(name=b'_testrevlog', recreate=False):
102 def newrevlog(name=b'_testrevlog', recreate=False):
103 if recreate:
103 if recreate:
104 tvfs.tryunlink(name + b'.i')
104 tvfs.tryunlink(name + b'.i')
105 target = (constants.KIND_OTHER, b'test')
105 target = (constants.KIND_OTHER, b'test')
106 rlog = revlog.revlog(tvfs, target=target, radix=name)
106 rlog = revlog.revlog(tvfs, target=target, radix=name)
107 return rlog
107 return rlog
108
108
109
109
110 def appendrev(rlog, text, tr, isext=False, isdelta=True):
110 def appendrev(rlog, text, tr, isext=False, isdelta=True):
111 """Append a revision. If isext is True, set the EXTSTORED flag so flag
111 """Append a revision. If isext is True, set the EXTSTORED flag so flag
112 processor will be used (and rawtext is different from text). If isdelta is
112 processor will be used (and rawtext is different from text). If isdelta is
113 True, force the revision to be a delta, otherwise it's full text.
113 True, force the revision to be a delta, otherwise it's full text.
114 """
114 """
115 nextrev = len(rlog)
115 nextrev = len(rlog)
116 p1 = rlog.node(nextrev - 1)
116 p1 = rlog.node(nextrev - 1)
117 p2 = rlog.nullid
117 p2 = rlog.nullid
118 if isext:
118 if isext:
119 flags = revlog.REVIDX_EXTSTORED
119 flags = revlog.REVIDX_EXTSTORED
120 else:
120 else:
121 flags = revlog.REVIDX_DEFAULT_FLAGS
121 flags = revlog.REVIDX_DEFAULT_FLAGS
122 # Change storedeltachains temporarily, to override revlog's delta decision
122 # Change storedeltachains temporarily, to override revlog's delta decision
123 rlog._storedeltachains = isdelta
123 rlog._storedeltachains = isdelta
124 try:
124 try:
125 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
125 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
126 return nextrev
126 return nextrev
127 except Exception as ex:
127 except Exception as ex:
128 abort('rev %d: failed to append: %s' % (nextrev, ex))
128 abort('rev %d: failed to append: %s' % (nextrev, ex))
129 finally:
129 finally:
130 # Restore storedeltachains. It is always True, see revlog.__init__
130 # Restore storedeltachains. It is always True, see revlog.__init__
131 rlog._storedeltachains = True
131 rlog._storedeltachains = True
132
132
133
133
134 def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True):
134 def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True):
135 """Copy revlog to destname using revlog.addgroup. Return the copied revlog.
135 """Copy revlog to destname using revlog.addgroup. Return the copied revlog.
136
136
137 This emulates push or pull. They use changegroup. Changegroup requires
137 This emulates push or pull. They use changegroup. Changegroup requires
138 repo to work. We don't have a repo, so a dummy changegroup is used.
138 repo to work. We don't have a repo, so a dummy changegroup is used.
139
139
140 If optimaldelta is True, use optimized delta parent, so the destination
140 If optimaldelta is True, use optimized delta parent, so the destination
141 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
141 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
142 the destination revlog needs more work to use it.
142 the destination revlog needs more work to use it.
143
143
144 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
144 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
145 code path, which is not covered by "appendrev" alone.
145 code path, which is not covered by "appendrev" alone.
146 """
146 """
147
147
148 class dummychangegroup:
148 class dummychangegroup:
149 @staticmethod
149 @staticmethod
150 def deltachunk(pnode):
150 def deltachunk(pnode):
151 pnode = pnode or rlog.nullid
151 pnode = pnode or rlog.nullid
152 parentrev = rlog.rev(pnode)
152 parentrev = rlog.rev(pnode)
153 r = parentrev + 1
153 r = parentrev + 1
154 if r >= len(rlog):
154 if r >= len(rlog):
155 return {}
155 return {}
156 if optimaldelta:
156 if optimaldelta:
157 deltaparent = parentrev
157 deltaparent = parentrev
158 else:
158 else:
159 # suboptimal deltaparent
159 # suboptimal deltaparent
160 deltaparent = min(0, parentrev)
160 deltaparent = min(0, parentrev)
161 if not rlog.candelta(deltaparent, r):
161 if not rlog.candelta(deltaparent, r):
162 deltaparent = -1
162 deltaparent = -1
163 return {
163 return {
164 b'node': rlog.node(r),
164 b'node': rlog.node(r),
165 b'p1': pnode,
165 b'p1': pnode,
166 b'p2': rlog.nullid,
166 b'p2': rlog.nullid,
167 b'cs': rlog.node(rlog.linkrev(r)),
167 b'cs': rlog.node(rlog.linkrev(r)),
168 b'flags': rlog.flags(r),
168 b'flags': rlog.flags(r),
169 b'deltabase': rlog.node(deltaparent),
169 b'deltabase': rlog.node(deltaparent),
170 b'delta': rlog.revdiff(deltaparent, r),
170 b'delta': rlog.revdiff(deltaparent, r),
171 b'sidedata': rlog.sidedata(r),
171 b'sidedata': rlog.sidedata(r),
172 }
172 }
173
173
174 def deltaiter(self):
174 def deltaiter(self):
175 chain = None
175 chain = None
176 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
176 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
177 node = chunkdata[b'node']
177 node = chunkdata[b'node']
178 p1 = chunkdata[b'p1']
178 p1 = chunkdata[b'p1']
179 p2 = chunkdata[b'p2']
179 p2 = chunkdata[b'p2']
180 cs = chunkdata[b'cs']
180 cs = chunkdata[b'cs']
181 deltabase = chunkdata[b'deltabase']
181 deltabase = chunkdata[b'deltabase']
182 delta = chunkdata[b'delta']
182 delta = chunkdata[b'delta']
183 flags = chunkdata[b'flags']
183 flags = chunkdata[b'flags']
184 sidedata = chunkdata[b'sidedata']
184 sidedata = chunkdata[b'sidedata']
185
185
186 chain = node
186 chain = node
187
187
188 yield (node, p1, p2, cs, deltabase, delta, flags, sidedata)
188 yield (node, p1, p2, cs, deltabase, delta, flags, sidedata)
189
189
190 def linkmap(lnode):
190 def linkmap(lnode):
191 return rlog.rev(lnode)
191 return rlog.rev(lnode)
192
192
193 dlog = newrevlog(destname, recreate=True)
193 dlog = newrevlog(destname, recreate=True)
194 dummydeltas = dummychangegroup().deltaiter()
194 dummydeltas = dummychangegroup().deltaiter()
195 dlog.addgroup(dummydeltas, linkmap, tr)
195 dlog.addgroup(dummydeltas, linkmap, tr)
196 return dlog
196 return dlog
197
197
198
198
199 def lowlevelcopy(rlog, tr, destname=b'_destrevlog'):
199 def lowlevelcopy(rlog, tr, destname=b'_destrevlog'):
200 """Like addgroupcopy, but use the low level revlog._addrevision directly.
200 """Like addgroupcopy, but use the low level revlog._addrevision directly.
201
201
202 It exercises some code paths that are hard to reach easily otherwise.
202 It exercises some code paths that are hard to reach easily otherwise.
203 """
203 """
204 dlog = newrevlog(destname, recreate=True)
204 dlog = newrevlog(destname, recreate=True)
205 for r in rlog:
205 for r in rlog:
206 p1 = rlog.node(r - 1)
206 p1 = rlog.node(r - 1)
207 p2 = rlog.nullid
207 p2 = rlog.nullid
208 if r == 0 or (rlog.flags(r) & revlog.REVIDX_EXTSTORED):
208 if r == 0 or (rlog.flags(r) & revlog.REVIDX_EXTSTORED):
209 text = rlog.rawdata(r)
209 text = rlog.rawdata(r)
210 cachedelta = None
210 cachedelta = None
211 else:
211 else:
212 # deltaparent cannot have EXTSTORED flag.
212 # deltaparent cannot have EXTSTORED flag.
213 deltaparent = max(
213 deltaparent = max(
214 [-1]
214 [-1]
215 + [
215 + [
216 p
216 p
217 for p in range(r)
217 for p in range(r)
218 if rlog.flags(p) & revlog.REVIDX_EXTSTORED == 0
218 if rlog.flags(p) & revlog.REVIDX_EXTSTORED == 0
219 ]
219 ]
220 )
220 )
221 text = None
221 text = None
222 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
222 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
223 flags = rlog.flags(r)
223 flags = rlog.flags(r)
224 with dlog._writing(_NoTransaction()):
224 with dlog._writing(_NoTransaction()):
225 dlog._addrevision(
225 dlog._addrevision(
226 rlog.node(r),
226 rlog.node(r),
227 text,
227 text,
228 tr,
228 tr,
229 r,
229 r,
230 p1,
230 p1,
231 p2,
231 p2,
232 flags,
232 flags,
233 cachedelta,
233 cachedelta,
234 )
234 )
235 return dlog
235 return dlog
236
236
237
237
238 # Utilities to generate revisions for testing
238 # Utilities to generate revisions for testing
239
239
240
240
241 def genbits(n):
241 def genbits(n):
242 """Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
242 """Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
243 i.e. the generated numbers have a width of n bits.
243 i.e. the generated numbers have a width of n bits.
244
244
245 The combination of two adjacent numbers will cover all possible cases.
245 The combination of two adjacent numbers will cover all possible cases.
246 That is to say, given any x, y where both x, and y are in range(2 ** n),
246 That is to say, given any x, y where both x, and y are in range(2 ** n),
247 there is an x followed immediately by y in the generated sequence.
247 there is an x followed immediately by y in the generated sequence.
248 """
248 """
249 m = 2 ** n
249 m = 2 ** n
250
250
251 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
251 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
252 gray = lambda x: x ^ (x >> 1)
252 gray = lambda x: x ^ (x >> 1)
253 reversegray = {gray(i): i for i in range(m)}
253 reversegray = {gray(i): i for i in range(m)}
254
254
255 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
255 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
256 # the next unused gray code where higher n bits equal to X.
256 # the next unused gray code where higher n bits equal to X.
257
257
258 # For gray codes whose higher bits are X, a[X] of them have been used.
258 # For gray codes whose higher bits are X, a[X] of them have been used.
259 a = [0] * m
259 a = [0] * m
260
260
261 # Iterate from 0.
261 # Iterate from 0.
262 x = 0
262 x = 0
263 yield x
263 yield x
264 for i in range(m * m):
264 for i in range(m * m):
265 x = reversegray[x]
265 x = reversegray[x]
266 y = gray(a[x] + x * m) & (m - 1)
266 y = gray(a[x] + x * m) & (m - 1)
267 assert a[x] < m
267 assert a[x] < m
268 a[x] += 1
268 a[x] += 1
269 x = y
269 x = y
270 yield x
270 yield x
271
271
272
272
273 def gentext(rev):
273 def gentext(rev):
274 '''Given a revision number, generate dummy text'''
274 '''Given a revision number, generate dummy text'''
275 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
275 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
276
276
277
277
278 def writecases(rlog, tr):
278 def writecases(rlog, tr):
279 """Write some revisions interested to the test.
279 """Write some revisions interested to the test.
280
280
281 The test is interested in 3 properties of a revision:
281 The test is interested in 3 properties of a revision:
282
282
283 - Is it a delta or a full text? (isdelta)
283 - Is it a delta or a full text? (isdelta)
284 This is to catch some delta application issues.
284 This is to catch some delta application issues.
285 - Does it have a flag of EXTSTORED? (isext)
285 - Does it have a flag of EXTSTORED? (isext)
286 This is to catch some flag processor issues. Especially when
286 This is to catch some flag processor issues. Especially when
287 interacted with revlog deltas.
287 interacted with revlog deltas.
288 - Is its text empty? (isempty)
288 - Is its text empty? (isempty)
289 This is less important. It is intended to try to catch some careless
289 This is less important. It is intended to try to catch some careless
290 checks like "if text" instead of "if text is None". Note: if flag
290 checks like "if text" instead of "if text is None". Note: if flag
291 processor is involved, raw text may be not empty.
291 processor is involved, raw text may be not empty.
292
292
293 Write 65 revisions. So that all combinations of the above flags for
293 Write 65 revisions. So that all combinations of the above flags for
294 adjacent revisions are covered. That is to say,
294 adjacent revisions are covered. That is to say,
295
295
296 len(set(
296 len(set(
297 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
297 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
298 for r in range(len(rlog) - 1)
298 for r in range(len(rlog) - 1)
299 )) is 64.
299 )) is 64.
300
300
301 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
301 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
302 mentioned above.
302 mentioned above.
303
303
304 Return expected [(text, rawtext)].
304 Return expected [(text, rawtext)].
305 """
305 """
306 result = []
306 result = []
307 for i, x in enumerate(genbits(3)):
307 for i, x in enumerate(genbits(3)):
308 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
308 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
309 if isempty:
309 if isempty:
310 text = b''
310 text = b''
311 else:
311 else:
312 text = gentext(i)
312 text = gentext(i)
313 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
313 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
314
314
315 # Verify text, rawtext, and rawsize
315 # Verify text, rawtext, and rawsize
316 if isext:
316 if isext:
317 rawtext = writeprocessor(None, text)[0]
317 rawtext = writeprocessor(None, text)[0]
318 else:
318 else:
319 rawtext = text
319 rawtext = text
320 if rlog.rawsize(rev) != len(rawtext):
320 if rlog.rawsize(rev) != len(rawtext):
321 abort('rev %d: wrong rawsize' % rev)
321 abort('rev %d: wrong rawsize' % rev)
322 if rlog.revision(rev) != text:
322 if rlog.revision(rev) != text:
323 abort('rev %d: wrong text' % rev)
323 abort('rev %d: wrong text' % rev)
324 if rlog.rawdata(rev) != rawtext:
324 if rlog.rawdata(rev) != rawtext:
325 abort('rev %d: wrong rawtext' % rev)
325 abort('rev %d: wrong rawtext' % rev)
326 result.append((text, rawtext))
326 result.append((text, rawtext))
327
327
328 # Verify flags like isdelta, isext work as expected
328 # Verify flags like isdelta, isext work as expected
329 # isdelta can be overridden to False if this or p1 has isext set
329 # isdelta can be overridden to False if this or p1 has isext set
330 if bool(rlog.deltaparent(rev) > -1) and not isdelta:
330 if bool(rlog.deltaparent(rev) > -1) and not isdelta:
331 abort('rev %d: isdelta is unexpected' % rev)
331 abort('rev %d: isdelta is unexpected' % rev)
332 if bool(rlog.flags(rev)) != isext:
332 if bool(rlog.flags(rev)) != isext:
333 abort('rev %d: isext is ineffective' % rev)
333 abort('rev %d: isext is ineffective' % rev)
334 return result
334 return result
335
335
336
336
337 # Main test and checking
337 # Main test and checking
338
338
339
339
340 def checkrevlog(rlog, expected):
340 def checkrevlog(rlog, expected):
341 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
341 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
342 # Test using different access orders. This could expose some issues
342 # Test using different access orders. This could expose some issues
343 # depending on revlog caching (see revlog._cache).
343 # depending on revlog caching (see revlog._cache).
344 for r0 in range(len(rlog) - 1):
344 for r0 in range(len(rlog) - 1):
345 r1 = r0 + 1
345 r1 = r0 + 1
346 for revorder in [[r0, r1], [r1, r0]]:
346 for revorder in [[r0, r1], [r1, r0]]:
347 for raworder in [[True], [False], [True, False], [False, True]]:
347 for raworder in [[True], [False], [True, False], [False, True]]:
348 nlog = newrevlog()
348 nlog = newrevlog()
349 for rev in revorder:
349 for rev in revorder:
350 for raw in raworder:
350 for raw in raworder:
351 if raw:
351 if raw:
352 t = nlog.rawdata(rev)
352 t = nlog.rawdata(rev)
353 else:
353 else:
354 t = nlog.revision(rev)
354 t = nlog.revision(rev)
355 if t != expected[rev][int(raw)]:
355 if t != expected[rev][int(raw)]:
356 abort(
356 abort(
357 'rev %d: corrupted %stext'
357 'rev %d: corrupted %stext'
358 % (rev, raw and 'raw' or '')
358 % (rev, raw and 'raw' or '')
359 )
359 )
360
360
361
361
362 slicingdata = [
362 slicingdata = [
363 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
363 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
364 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
364 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
365 (
365 (
366 [-1, 0, 1, 2, 3, 55, 56, 58, 59, 60],
366 [-1, 0, 1, 2, 3, 55, 56, 58, 59, 60],
367 [[-1, 0, 1], [2], [58], [59, 60]],
367 [[-1, 0, 1], [2], [58], [59, 60]],
368 10,
368 10,
369 ),
369 ),
370 ]
370 ]
371
371
372
372
373 def slicingtest(rlog):
373 def slicingtest(rlog):
374 oldmin = rlog._srmingapsize
374 oldmin = rlog._srmingapsize
375 try:
375 try:
376 # the test revlog is small, we remove the floor under which we
376 # the test revlog is small, we remove the floor under which we
377 # slicing is diregarded.
377 # slicing is diregarded.
378 rlog._srmingapsize = 0
378 rlog._srmingapsize = 0
379 for item in slicingdata:
379 for item in slicingdata:
380 chain, expected, target = item
380 chain, expected, target = item
381 result = deltas.slicechunk(rlog, chain, targetsize=target)
381 result = deltas.slicechunk(rlog, chain, targetsize=target)
382 result = list(result)
382 result = list(result)
383 if result != expected:
383 if result != expected:
384 print('slicing differ:')
384 print('slicing differ:')
385 print(' chain: %s' % chain)
385 print(' chain: %s' % chain)
386 print(' target: %s' % target)
386 print(' target: %s' % target)
387 print(' expected: %s' % expected)
387 print(' expected: %s' % expected)
388 print(' result: %s' % result)
388 print(' result: %s' % result)
389 finally:
389 finally:
390 rlog._srmingapsize = oldmin
390 rlog._srmingapsize = oldmin
391
391
392
392
393 def md5sum(s):
393 def md5sum(s):
394 return hashlib.md5(s).digest()
394 return hashlib.md5(s).digest()
395
395
396
396
397 def _maketext(*coord):
397 def _maketext(*coord):
398 """create piece of text according to range of integers
398 """create piece of text according to range of integers
399
399
400 The test returned use a md5sum of the integer to make it less
400 The test returned use a md5sum of the integer to make it less
401 compressible"""
401 compressible"""
402 pieces = []
402 pieces = []
403 for start, size in coord:
403 for start, size in coord:
404 num = range(start, start + size)
404 num = range(start, start + size)
405 p = [md5sum(b'%d' % r) for r in num]
405 p = [md5sum(b'%d' % r) for r in num]
406 pieces.append(b'\n'.join(p))
406 pieces.append(b'\n'.join(p))
407 return b'\n'.join(pieces) + b'\n'
407 return b'\n'.join(pieces) + b'\n'
408
408
409
409
410 data = [
410 data = [
411 _maketext((0, 120), (456, 60)),
411 _maketext((0, 120), (456, 60)),
412 _maketext((0, 120), (345, 60)),
412 _maketext((0, 120), (345, 60)),
413 _maketext((0, 120), (734, 60)),
413 _maketext((0, 120), (734, 60)),
414 _maketext((0, 120), (734, 60), (923, 45)),
414 _maketext((0, 120), (734, 60), (923, 45)),
415 _maketext((0, 120), (734, 60), (234, 45)),
415 _maketext((0, 120), (734, 60), (234, 45)),
416 _maketext((0, 120), (734, 60), (564, 45)),
416 _maketext((0, 120), (734, 60), (564, 45)),
417 _maketext((0, 120), (734, 60), (361, 45)),
417 _maketext((0, 120), (734, 60), (361, 45)),
418 _maketext((0, 120), (734, 60), (489, 45)),
418 _maketext((0, 120), (734, 60), (489, 45)),
419 _maketext((0, 120), (123, 60)),
419 _maketext((0, 120), (123, 60)),
420 _maketext((0, 120), (145, 60)),
420 _maketext((0, 120), (145, 60)),
421 _maketext((0, 120), (104, 60)),
421 _maketext((0, 120), (104, 60)),
422 _maketext((0, 120), (430, 60)),
422 _maketext((0, 120), (430, 60)),
423 _maketext((0, 120), (430, 60), (923, 45)),
423 _maketext((0, 120), (430, 60), (923, 45)),
424 _maketext((0, 120), (430, 60), (234, 45)),
424 _maketext((0, 120), (430, 60), (234, 45)),
425 _maketext((0, 120), (430, 60), (564, 45)),
425 _maketext((0, 120), (430, 60), (564, 45)),
426 _maketext((0, 120), (430, 60), (361, 45)),
426 _maketext((0, 120), (430, 60), (361, 45)),
427 _maketext((0, 120), (430, 60), (489, 45)),
427 _maketext((0, 120), (430, 60), (489, 45)),
428 _maketext((0, 120), (249, 60)),
428 _maketext((0, 120), (249, 60)),
429 _maketext((0, 120), (832, 60)),
429 _maketext((0, 120), (832, 60)),
430 _maketext((0, 120), (891, 60)),
430 _maketext((0, 120), (891, 60)),
431 _maketext((0, 120), (543, 60)),
431 _maketext((0, 120), (543, 60)),
432 _maketext((0, 120), (120, 60)),
432 _maketext((0, 120), (120, 60)),
433 _maketext((0, 120), (60, 60), (768, 30)),
433 _maketext((0, 120), (60, 60), (768, 30)),
434 _maketext((0, 120), (60, 60), (260, 30)),
434 _maketext((0, 120), (60, 60), (260, 30)),
435 _maketext((0, 120), (60, 60), (450, 30)),
435 _maketext((0, 120), (60, 60), (450, 30)),
436 _maketext((0, 120), (60, 60), (361, 30)),
436 _maketext((0, 120), (60, 60), (361, 30)),
437 _maketext((0, 120), (60, 60), (886, 30)),
437 _maketext((0, 120), (60, 60), (886, 30)),
438 _maketext((0, 120), (60, 60), (116, 30)),
438 _maketext((0, 120), (60, 60), (116, 30)),
439 _maketext((0, 120), (60, 60), (567, 30), (629, 40)),
439 _maketext((0, 120), (60, 60), (567, 30), (629, 40)),
440 _maketext((0, 120), (60, 60), (569, 30), (745, 40)),
440 _maketext((0, 120), (60, 60), (569, 30), (745, 40)),
441 _maketext((0, 120), (60, 60), (777, 30), (700, 40)),
441 _maketext((0, 120), (60, 60), (777, 30), (700, 40)),
442 _maketext((0, 120), (60, 60), (618, 30), (398, 40), (158, 10)),
442 _maketext((0, 120), (60, 60), (618, 30), (398, 40), (158, 10)),
443 ]
443 ]
444
444
445
445
446 def makesnapshot(tr):
446 def makesnapshot(tr):
447 rl = newrevlog(name=b'_snaprevlog3', recreate=True)
447 rl = newrevlog(name=b'_snaprevlog3', recreate=True)
448 for i in data:
448 for i in data:
449 appendrev(rl, i, tr)
449 appendrev(rl, i, tr)
450 return rl
450 return rl
451
451
452
452
453 snapshots = [-1, 0, 6, 8, 11, 17, 19, 21, 25, 30]
453 snapshots = [-1, 0, 6, 8, 11, 17, 19, 21, 25, 30]
454
454
455
455
456 def issnapshottest(rlog):
456 def issnapshottest(rlog):
457 result = []
457 result = []
458 if rlog.issnapshot(-1):
458 if rlog.issnapshot(-1):
459 result.append(-1)
459 result.append(-1)
460 for rev in rlog:
460 for rev in rlog:
461 if rlog.issnapshot(rev):
461 if rlog.issnapshot(rev):
462 result.append(rev)
462 result.append(rev)
463 if snapshots != result:
463 if snapshots != result:
464 print('snapshot differ:')
464 print('snapshot differ:')
465 print(' expected: %s' % snapshots)
465 print(' expected: %s' % snapshots)
466 print(' got: %s' % result)
466 print(' got: %s' % result)
467
467
468
468
469 snapshotmapall = {0: [6, 8, 11, 17, 19, 25], 8: [21], -1: [0, 30]}
469 snapshotmapall = {0: {6, 8, 11, 17, 19, 25}, 8: {21}, -1: {0, 30}}
470 snapshotmap15 = {0: [17, 19, 25], 8: [21], -1: [30]}
470 snapshotmap15 = {0: {17, 19, 25}, 8: {21}, -1: {30}}
471
471
472
472
473 def findsnapshottest(rlog):
473 def findsnapshottest(rlog):
474 cache = deltas.SnapshotCache()
474 cache = deltas.SnapshotCache()
475 cache.update(rlog)
475 cache.update(rlog)
476 resultall = dict(cache.snapshots)
476 resultall = dict(cache.snapshots)
477 if resultall != snapshotmapall:
477 if resultall != snapshotmapall:
478 print('snapshot map differ:')
478 print('snapshot map differ:')
479 print(' expected: %s' % snapshotmapall)
479 print(' expected: %s' % snapshotmapall)
480 print(' got: %s' % resultall)
480 print(' got: %s' % resultall)
481 cache15 = deltas.SnapshotCache()
481 cache15 = deltas.SnapshotCache()
482 cache15.update(rlog, 15)
482 cache15.update(rlog, 15)
483 result15 = dict(cache15.snapshots)
483 result15 = dict(cache15.snapshots)
484 if result15 != snapshotmap15:
484 if result15 != snapshotmap15:
485 print('snapshot map differ:')
485 print('snapshot map differ:')
486 print(' expected: %s' % snapshotmap15)
486 print(' expected: %s' % snapshotmap15)
487 print(' got: %s' % result15)
487 print(' got: %s' % result15)
488
488
489
489
490 def maintest():
490 def maintest():
491 with newtransaction() as tr:
491 with newtransaction() as tr:
492 rl = newrevlog(recreate=True)
492 rl = newrevlog(recreate=True)
493 expected = writecases(rl, tr)
493 expected = writecases(rl, tr)
494 checkrevlog(rl, expected)
494 checkrevlog(rl, expected)
495 print('local test passed')
495 print('local test passed')
496 # Copy via revlog.addgroup
496 # Copy via revlog.addgroup
497 rl1 = addgroupcopy(rl, tr)
497 rl1 = addgroupcopy(rl, tr)
498 checkrevlog(rl1, expected)
498 checkrevlog(rl1, expected)
499 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
499 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
500 checkrevlog(rl2, expected)
500 checkrevlog(rl2, expected)
501 print('addgroupcopy test passed')
501 print('addgroupcopy test passed')
502 # Copy via revlog.clone
502 # Copy via revlog.clone
503 rl3 = newrevlog(name=b'_destrevlog3', recreate=True)
503 rl3 = newrevlog(name=b'_destrevlog3', recreate=True)
504 rl.clone(tr, rl3)
504 rl.clone(tr, rl3)
505 checkrevlog(rl3, expected)
505 checkrevlog(rl3, expected)
506 print('clone test passed')
506 print('clone test passed')
507 # Copy via low-level revlog._addrevision
507 # Copy via low-level revlog._addrevision
508 rl4 = lowlevelcopy(rl, tr)
508 rl4 = lowlevelcopy(rl, tr)
509 checkrevlog(rl4, expected)
509 checkrevlog(rl4, expected)
510 print('lowlevelcopy test passed')
510 print('lowlevelcopy test passed')
511 slicingtest(rl)
511 slicingtest(rl)
512 print('slicing test passed')
512 print('slicing test passed')
513 rl5 = makesnapshot(tr)
513 rl5 = makesnapshot(tr)
514 issnapshottest(rl5)
514 issnapshottest(rl5)
515 print('issnapshot test passed')
515 print('issnapshot test passed')
516 findsnapshottest(rl5)
516 findsnapshottest(rl5)
517 print('findsnapshot test passed')
517 print('findsnapshot test passed')
518
518
519
519
520 try:
520 try:
521 maintest()
521 maintest()
522 except Exception as ex:
522 except Exception as ex:
523 abort('crashed: %s' % ex)
523 abort('crashed: %s' % ex)
General Comments 0
You need to be logged in to leave comments. Login now