##// END OF EJS Templates
index: add a `rev` method (API)...
marmoute -
r43952:bd87114c default
parent child Browse files
Show More
@@ -1,762 +1,762 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <ctype.h>
12 #include <ctype.h>
13 #include <stddef.h>
13 #include <stddef.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #include "bitmanipulation.h"
16 #include "bitmanipulation.h"
17 #include "charencode.h"
17 #include "charencode.h"
18 #include "util.h"
18 #include "util.h"
19
19
20 #ifdef IS_PY3K
20 #ifdef IS_PY3K
21 /* The mapping of Python types is meant to be temporary to get Python
21 /* The mapping of Python types is meant to be temporary to get Python
22 * 3 to compile. We should remove this once Python 3 support is fully
22 * 3 to compile. We should remove this once Python 3 support is fully
23 * supported and proper types are used in the extensions themselves. */
23 * supported and proper types are used in the extensions themselves. */
24 #define PyInt_Check PyLong_Check
24 #define PyInt_Check PyLong_Check
25 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromLong PyLong_FromLong
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 #define PyInt_AsLong PyLong_AsLong
27 #define PyInt_AsLong PyLong_AsLong
28 #endif
28 #endif
29
29
30 static const char *const versionerrortext = "Python minor version mismatch";
30 static const char *const versionerrortext = "Python minor version mismatch";
31
31
32 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
32 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
33 {
33 {
34 Py_ssize_t expected_size;
34 Py_ssize_t expected_size;
35
35
36 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
36 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
37 return NULL;
37 return NULL;
38 }
38 }
39
39
40 return _dict_new_presized(expected_size);
40 return _dict_new_presized(expected_size);
41 }
41 }
42
42
43 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
43 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
44 int size, int mtime)
44 int size, int mtime)
45 {
45 {
46 dirstateTupleObject *t =
46 dirstateTupleObject *t =
47 PyObject_New(dirstateTupleObject, &dirstateTupleType);
47 PyObject_New(dirstateTupleObject, &dirstateTupleType);
48 if (!t) {
48 if (!t) {
49 return NULL;
49 return NULL;
50 }
50 }
51 t->state = state;
51 t->state = state;
52 t->mode = mode;
52 t->mode = mode;
53 t->size = size;
53 t->size = size;
54 t->mtime = mtime;
54 t->mtime = mtime;
55 return t;
55 return t;
56 }
56 }
57
57
58 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
58 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
59 PyObject *kwds)
59 PyObject *kwds)
60 {
60 {
61 /* We do all the initialization here and not a tp_init function because
61 /* We do all the initialization here and not a tp_init function because
62 * dirstate_tuple is immutable. */
62 * dirstate_tuple is immutable. */
63 dirstateTupleObject *t;
63 dirstateTupleObject *t;
64 char state;
64 char state;
65 int size, mode, mtime;
65 int size, mode, mtime;
66 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
66 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
67 return NULL;
67 return NULL;
68 }
68 }
69
69
70 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
70 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
71 if (!t) {
71 if (!t) {
72 return NULL;
72 return NULL;
73 }
73 }
74 t->state = state;
74 t->state = state;
75 t->mode = mode;
75 t->mode = mode;
76 t->size = size;
76 t->size = size;
77 t->mtime = mtime;
77 t->mtime = mtime;
78
78
79 return (PyObject *)t;
79 return (PyObject *)t;
80 }
80 }
81
81
82 static void dirstate_tuple_dealloc(PyObject *o)
82 static void dirstate_tuple_dealloc(PyObject *o)
83 {
83 {
84 PyObject_Del(o);
84 PyObject_Del(o);
85 }
85 }
86
86
87 static Py_ssize_t dirstate_tuple_length(PyObject *o)
87 static Py_ssize_t dirstate_tuple_length(PyObject *o)
88 {
88 {
89 return 4;
89 return 4;
90 }
90 }
91
91
92 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
92 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
93 {
93 {
94 dirstateTupleObject *t = (dirstateTupleObject *)o;
94 dirstateTupleObject *t = (dirstateTupleObject *)o;
95 switch (i) {
95 switch (i) {
96 case 0:
96 case 0:
97 return PyBytes_FromStringAndSize(&t->state, 1);
97 return PyBytes_FromStringAndSize(&t->state, 1);
98 case 1:
98 case 1:
99 return PyInt_FromLong(t->mode);
99 return PyInt_FromLong(t->mode);
100 case 2:
100 case 2:
101 return PyInt_FromLong(t->size);
101 return PyInt_FromLong(t->size);
102 case 3:
102 case 3:
103 return PyInt_FromLong(t->mtime);
103 return PyInt_FromLong(t->mtime);
104 default:
104 default:
105 PyErr_SetString(PyExc_IndexError, "index out of range");
105 PyErr_SetString(PyExc_IndexError, "index out of range");
106 return NULL;
106 return NULL;
107 }
107 }
108 }
108 }
109
109
110 static PySequenceMethods dirstate_tuple_sq = {
110 static PySequenceMethods dirstate_tuple_sq = {
111 dirstate_tuple_length, /* sq_length */
111 dirstate_tuple_length, /* sq_length */
112 0, /* sq_concat */
112 0, /* sq_concat */
113 0, /* sq_repeat */
113 0, /* sq_repeat */
114 dirstate_tuple_item, /* sq_item */
114 dirstate_tuple_item, /* sq_item */
115 0, /* sq_ass_item */
115 0, /* sq_ass_item */
116 0, /* sq_contains */
116 0, /* sq_contains */
117 0, /* sq_inplace_concat */
117 0, /* sq_inplace_concat */
118 0 /* sq_inplace_repeat */
118 0 /* sq_inplace_repeat */
119 };
119 };
120
120
121 PyTypeObject dirstateTupleType = {
121 PyTypeObject dirstateTupleType = {
122 PyVarObject_HEAD_INIT(NULL, 0) /* header */
122 PyVarObject_HEAD_INIT(NULL, 0) /* header */
123 "dirstate_tuple", /* tp_name */
123 "dirstate_tuple", /* tp_name */
124 sizeof(dirstateTupleObject), /* tp_basicsize */
124 sizeof(dirstateTupleObject), /* tp_basicsize */
125 0, /* tp_itemsize */
125 0, /* tp_itemsize */
126 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
126 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
127 0, /* tp_print */
127 0, /* tp_print */
128 0, /* tp_getattr */
128 0, /* tp_getattr */
129 0, /* tp_setattr */
129 0, /* tp_setattr */
130 0, /* tp_compare */
130 0, /* tp_compare */
131 0, /* tp_repr */
131 0, /* tp_repr */
132 0, /* tp_as_number */
132 0, /* tp_as_number */
133 &dirstate_tuple_sq, /* tp_as_sequence */
133 &dirstate_tuple_sq, /* tp_as_sequence */
134 0, /* tp_as_mapping */
134 0, /* tp_as_mapping */
135 0, /* tp_hash */
135 0, /* tp_hash */
136 0, /* tp_call */
136 0, /* tp_call */
137 0, /* tp_str */
137 0, /* tp_str */
138 0, /* tp_getattro */
138 0, /* tp_getattro */
139 0, /* tp_setattro */
139 0, /* tp_setattro */
140 0, /* tp_as_buffer */
140 0, /* tp_as_buffer */
141 Py_TPFLAGS_DEFAULT, /* tp_flags */
141 Py_TPFLAGS_DEFAULT, /* tp_flags */
142 "dirstate tuple", /* tp_doc */
142 "dirstate tuple", /* tp_doc */
143 0, /* tp_traverse */
143 0, /* tp_traverse */
144 0, /* tp_clear */
144 0, /* tp_clear */
145 0, /* tp_richcompare */
145 0, /* tp_richcompare */
146 0, /* tp_weaklistoffset */
146 0, /* tp_weaklistoffset */
147 0, /* tp_iter */
147 0, /* tp_iter */
148 0, /* tp_iternext */
148 0, /* tp_iternext */
149 0, /* tp_methods */
149 0, /* tp_methods */
150 0, /* tp_members */
150 0, /* tp_members */
151 0, /* tp_getset */
151 0, /* tp_getset */
152 0, /* tp_base */
152 0, /* tp_base */
153 0, /* tp_dict */
153 0, /* tp_dict */
154 0, /* tp_descr_get */
154 0, /* tp_descr_get */
155 0, /* tp_descr_set */
155 0, /* tp_descr_set */
156 0, /* tp_dictoffset */
156 0, /* tp_dictoffset */
157 0, /* tp_init */
157 0, /* tp_init */
158 0, /* tp_alloc */
158 0, /* tp_alloc */
159 dirstate_tuple_new, /* tp_new */
159 dirstate_tuple_new, /* tp_new */
160 };
160 };
161
161
162 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
162 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
163 {
163 {
164 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
164 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
165 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
165 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
166 char state, *cur, *str, *cpos;
166 char state, *cur, *str, *cpos;
167 int mode, size, mtime;
167 int mode, size, mtime;
168 unsigned int flen, pos = 40;
168 unsigned int flen, pos = 40;
169 Py_ssize_t len = 40;
169 Py_ssize_t len = 40;
170 Py_ssize_t readlen;
170 Py_ssize_t readlen;
171
171
172 if (!PyArg_ParseTuple(
172 if (!PyArg_ParseTuple(
173 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
173 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
174 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
174 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
175 goto quit;
175 goto quit;
176 }
176 }
177
177
178 len = readlen;
178 len = readlen;
179
179
180 /* read parents */
180 /* read parents */
181 if (len < 40) {
181 if (len < 40) {
182 PyErr_SetString(PyExc_ValueError,
182 PyErr_SetString(PyExc_ValueError,
183 "too little data for parents");
183 "too little data for parents");
184 goto quit;
184 goto quit;
185 }
185 }
186
186
187 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
187 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
188 str + 20, (Py_ssize_t)20);
188 str + 20, (Py_ssize_t)20);
189 if (!parents) {
189 if (!parents) {
190 goto quit;
190 goto quit;
191 }
191 }
192
192
193 /* read filenames */
193 /* read filenames */
194 while (pos >= 40 && pos < len) {
194 while (pos >= 40 && pos < len) {
195 if (pos + 17 > len) {
195 if (pos + 17 > len) {
196 PyErr_SetString(PyExc_ValueError,
196 PyErr_SetString(PyExc_ValueError,
197 "overflow in dirstate");
197 "overflow in dirstate");
198 goto quit;
198 goto quit;
199 }
199 }
200 cur = str + pos;
200 cur = str + pos;
201 /* unpack header */
201 /* unpack header */
202 state = *cur;
202 state = *cur;
203 mode = getbe32(cur + 1);
203 mode = getbe32(cur + 1);
204 size = getbe32(cur + 5);
204 size = getbe32(cur + 5);
205 mtime = getbe32(cur + 9);
205 mtime = getbe32(cur + 9);
206 flen = getbe32(cur + 13);
206 flen = getbe32(cur + 13);
207 pos += 17;
207 pos += 17;
208 cur += 17;
208 cur += 17;
209 if (flen > len - pos) {
209 if (flen > len - pos) {
210 PyErr_SetString(PyExc_ValueError,
210 PyErr_SetString(PyExc_ValueError,
211 "overflow in dirstate");
211 "overflow in dirstate");
212 goto quit;
212 goto quit;
213 }
213 }
214
214
215 entry =
215 entry =
216 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
216 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
217 cpos = memchr(cur, 0, flen);
217 cpos = memchr(cur, 0, flen);
218 if (cpos) {
218 if (cpos) {
219 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
219 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
220 cname = PyBytes_FromStringAndSize(
220 cname = PyBytes_FromStringAndSize(
221 cpos + 1, flen - (cpos - cur) - 1);
221 cpos + 1, flen - (cpos - cur) - 1);
222 if (!fname || !cname ||
222 if (!fname || !cname ||
223 PyDict_SetItem(cmap, fname, cname) == -1 ||
223 PyDict_SetItem(cmap, fname, cname) == -1 ||
224 PyDict_SetItem(dmap, fname, entry) == -1) {
224 PyDict_SetItem(dmap, fname, entry) == -1) {
225 goto quit;
225 goto quit;
226 }
226 }
227 Py_DECREF(cname);
227 Py_DECREF(cname);
228 } else {
228 } else {
229 fname = PyBytes_FromStringAndSize(cur, flen);
229 fname = PyBytes_FromStringAndSize(cur, flen);
230 if (!fname ||
230 if (!fname ||
231 PyDict_SetItem(dmap, fname, entry) == -1) {
231 PyDict_SetItem(dmap, fname, entry) == -1) {
232 goto quit;
232 goto quit;
233 }
233 }
234 }
234 }
235 Py_DECREF(fname);
235 Py_DECREF(fname);
236 Py_DECREF(entry);
236 Py_DECREF(entry);
237 fname = cname = entry = NULL;
237 fname = cname = entry = NULL;
238 pos += flen;
238 pos += flen;
239 }
239 }
240
240
241 ret = parents;
241 ret = parents;
242 Py_INCREF(ret);
242 Py_INCREF(ret);
243 quit:
243 quit:
244 Py_XDECREF(fname);
244 Py_XDECREF(fname);
245 Py_XDECREF(cname);
245 Py_XDECREF(cname);
246 Py_XDECREF(entry);
246 Py_XDECREF(entry);
247 Py_XDECREF(parents);
247 Py_XDECREF(parents);
248 return ret;
248 return ret;
249 }
249 }
250
250
251 /*
251 /*
252 * Build a set of non-normal and other parent entries from the dirstate dmap
252 * Build a set of non-normal and other parent entries from the dirstate dmap
253 */
253 */
254 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
254 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
255 {
255 {
256 PyObject *dmap, *fname, *v;
256 PyObject *dmap, *fname, *v;
257 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
257 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
258 Py_ssize_t pos;
258 Py_ssize_t pos;
259
259
260 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type,
260 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type,
261 &dmap)) {
261 &dmap)) {
262 goto bail;
262 goto bail;
263 }
263 }
264
264
265 nonnset = PySet_New(NULL);
265 nonnset = PySet_New(NULL);
266 if (nonnset == NULL) {
266 if (nonnset == NULL) {
267 goto bail;
267 goto bail;
268 }
268 }
269
269
270 otherpset = PySet_New(NULL);
270 otherpset = PySet_New(NULL);
271 if (otherpset == NULL) {
271 if (otherpset == NULL) {
272 goto bail;
272 goto bail;
273 }
273 }
274
274
275 pos = 0;
275 pos = 0;
276 while (PyDict_Next(dmap, &pos, &fname, &v)) {
276 while (PyDict_Next(dmap, &pos, &fname, &v)) {
277 dirstateTupleObject *t;
277 dirstateTupleObject *t;
278 if (!dirstate_tuple_check(v)) {
278 if (!dirstate_tuple_check(v)) {
279 PyErr_SetString(PyExc_TypeError,
279 PyErr_SetString(PyExc_TypeError,
280 "expected a dirstate tuple");
280 "expected a dirstate tuple");
281 goto bail;
281 goto bail;
282 }
282 }
283 t = (dirstateTupleObject *)v;
283 t = (dirstateTupleObject *)v;
284
284
285 if (t->state == 'n' && t->size == -2) {
285 if (t->state == 'n' && t->size == -2) {
286 if (PySet_Add(otherpset, fname) == -1) {
286 if (PySet_Add(otherpset, fname) == -1) {
287 goto bail;
287 goto bail;
288 }
288 }
289 }
289 }
290
290
291 if (t->state == 'n' && t->mtime != -1) {
291 if (t->state == 'n' && t->mtime != -1) {
292 continue;
292 continue;
293 }
293 }
294 if (PySet_Add(nonnset, fname) == -1) {
294 if (PySet_Add(nonnset, fname) == -1) {
295 goto bail;
295 goto bail;
296 }
296 }
297 }
297 }
298
298
299 result = Py_BuildValue("(OO)", nonnset, otherpset);
299 result = Py_BuildValue("(OO)", nonnset, otherpset);
300 if (result == NULL) {
300 if (result == NULL) {
301 goto bail;
301 goto bail;
302 }
302 }
303 Py_DECREF(nonnset);
303 Py_DECREF(nonnset);
304 Py_DECREF(otherpset);
304 Py_DECREF(otherpset);
305 return result;
305 return result;
306 bail:
306 bail:
307 Py_XDECREF(nonnset);
307 Py_XDECREF(nonnset);
308 Py_XDECREF(otherpset);
308 Py_XDECREF(otherpset);
309 Py_XDECREF(result);
309 Py_XDECREF(result);
310 return NULL;
310 return NULL;
311 }
311 }
312
312
313 /*
313 /*
314 * Efficiently pack a dirstate object into its on-disk format.
314 * Efficiently pack a dirstate object into its on-disk format.
315 */
315 */
316 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
316 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
317 {
317 {
318 PyObject *packobj = NULL;
318 PyObject *packobj = NULL;
319 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
319 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
320 Py_ssize_t nbytes, pos, l;
320 Py_ssize_t nbytes, pos, l;
321 PyObject *k, *v = NULL, *pn;
321 PyObject *k, *v = NULL, *pn;
322 char *p, *s;
322 char *p, *s;
323 int now;
323 int now;
324
324
325 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
325 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
326 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
326 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
327 &now)) {
327 &now)) {
328 return NULL;
328 return NULL;
329 }
329 }
330
330
331 if (PyTuple_Size(pl) != 2) {
331 if (PyTuple_Size(pl) != 2) {
332 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
332 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
333 return NULL;
333 return NULL;
334 }
334 }
335
335
336 /* Figure out how much we need to allocate. */
336 /* Figure out how much we need to allocate. */
337 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
337 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
338 PyObject *c;
338 PyObject *c;
339 if (!PyBytes_Check(k)) {
339 if (!PyBytes_Check(k)) {
340 PyErr_SetString(PyExc_TypeError, "expected string key");
340 PyErr_SetString(PyExc_TypeError, "expected string key");
341 goto bail;
341 goto bail;
342 }
342 }
343 nbytes += PyBytes_GET_SIZE(k) + 17;
343 nbytes += PyBytes_GET_SIZE(k) + 17;
344 c = PyDict_GetItem(copymap, k);
344 c = PyDict_GetItem(copymap, k);
345 if (c) {
345 if (c) {
346 if (!PyBytes_Check(c)) {
346 if (!PyBytes_Check(c)) {
347 PyErr_SetString(PyExc_TypeError,
347 PyErr_SetString(PyExc_TypeError,
348 "expected string key");
348 "expected string key");
349 goto bail;
349 goto bail;
350 }
350 }
351 nbytes += PyBytes_GET_SIZE(c) + 1;
351 nbytes += PyBytes_GET_SIZE(c) + 1;
352 }
352 }
353 }
353 }
354
354
355 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
355 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
356 if (packobj == NULL) {
356 if (packobj == NULL) {
357 goto bail;
357 goto bail;
358 }
358 }
359
359
360 p = PyBytes_AS_STRING(packobj);
360 p = PyBytes_AS_STRING(packobj);
361
361
362 pn = PyTuple_GET_ITEM(pl, 0);
362 pn = PyTuple_GET_ITEM(pl, 0);
363 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
363 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
364 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
364 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
365 goto bail;
365 goto bail;
366 }
366 }
367 memcpy(p, s, l);
367 memcpy(p, s, l);
368 p += 20;
368 p += 20;
369 pn = PyTuple_GET_ITEM(pl, 1);
369 pn = PyTuple_GET_ITEM(pl, 1);
370 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
370 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
371 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
371 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
372 goto bail;
372 goto bail;
373 }
373 }
374 memcpy(p, s, l);
374 memcpy(p, s, l);
375 p += 20;
375 p += 20;
376
376
377 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
377 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
378 dirstateTupleObject *tuple;
378 dirstateTupleObject *tuple;
379 char state;
379 char state;
380 int mode, size, mtime;
380 int mode, size, mtime;
381 Py_ssize_t len, l;
381 Py_ssize_t len, l;
382 PyObject *o;
382 PyObject *o;
383 char *t;
383 char *t;
384
384
385 if (!dirstate_tuple_check(v)) {
385 if (!dirstate_tuple_check(v)) {
386 PyErr_SetString(PyExc_TypeError,
386 PyErr_SetString(PyExc_TypeError,
387 "expected a dirstate tuple");
387 "expected a dirstate tuple");
388 goto bail;
388 goto bail;
389 }
389 }
390 tuple = (dirstateTupleObject *)v;
390 tuple = (dirstateTupleObject *)v;
391
391
392 state = tuple->state;
392 state = tuple->state;
393 mode = tuple->mode;
393 mode = tuple->mode;
394 size = tuple->size;
394 size = tuple->size;
395 mtime = tuple->mtime;
395 mtime = tuple->mtime;
396 if (state == 'n' && mtime == now) {
396 if (state == 'n' && mtime == now) {
397 /* See pure/parsers.py:pack_dirstate for why we do
397 /* See pure/parsers.py:pack_dirstate for why we do
398 * this. */
398 * this. */
399 mtime = -1;
399 mtime = -1;
400 mtime_unset = (PyObject *)make_dirstate_tuple(
400 mtime_unset = (PyObject *)make_dirstate_tuple(
401 state, mode, size, mtime);
401 state, mode, size, mtime);
402 if (!mtime_unset) {
402 if (!mtime_unset) {
403 goto bail;
403 goto bail;
404 }
404 }
405 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
405 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
406 goto bail;
406 goto bail;
407 }
407 }
408 Py_DECREF(mtime_unset);
408 Py_DECREF(mtime_unset);
409 mtime_unset = NULL;
409 mtime_unset = NULL;
410 }
410 }
411 *p++ = state;
411 *p++ = state;
412 putbe32((uint32_t)mode, p);
412 putbe32((uint32_t)mode, p);
413 putbe32((uint32_t)size, p + 4);
413 putbe32((uint32_t)size, p + 4);
414 putbe32((uint32_t)mtime, p + 8);
414 putbe32((uint32_t)mtime, p + 8);
415 t = p + 12;
415 t = p + 12;
416 p += 16;
416 p += 16;
417 len = PyBytes_GET_SIZE(k);
417 len = PyBytes_GET_SIZE(k);
418 memcpy(p, PyBytes_AS_STRING(k), len);
418 memcpy(p, PyBytes_AS_STRING(k), len);
419 p += len;
419 p += len;
420 o = PyDict_GetItem(copymap, k);
420 o = PyDict_GetItem(copymap, k);
421 if (o) {
421 if (o) {
422 *p++ = '\0';
422 *p++ = '\0';
423 l = PyBytes_GET_SIZE(o);
423 l = PyBytes_GET_SIZE(o);
424 memcpy(p, PyBytes_AS_STRING(o), l);
424 memcpy(p, PyBytes_AS_STRING(o), l);
425 p += l;
425 p += l;
426 len += l + 1;
426 len += l + 1;
427 }
427 }
428 putbe32((uint32_t)len, t);
428 putbe32((uint32_t)len, t);
429 }
429 }
430
430
431 pos = p - PyBytes_AS_STRING(packobj);
431 pos = p - PyBytes_AS_STRING(packobj);
432 if (pos != nbytes) {
432 if (pos != nbytes) {
433 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
433 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
434 (long)pos, (long)nbytes);
434 (long)pos, (long)nbytes);
435 goto bail;
435 goto bail;
436 }
436 }
437
437
438 return packobj;
438 return packobj;
439 bail:
439 bail:
440 Py_XDECREF(mtime_unset);
440 Py_XDECREF(mtime_unset);
441 Py_XDECREF(packobj);
441 Py_XDECREF(packobj);
442 Py_XDECREF(v);
442 Py_XDECREF(v);
443 return NULL;
443 return NULL;
444 }
444 }
445
445
446 #define BUMPED_FIX 1
446 #define BUMPED_FIX 1
447 #define USING_SHA_256 2
447 #define USING_SHA_256 2
448 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
448 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
449
449
450 static PyObject *readshas(const char *source, unsigned char num,
450 static PyObject *readshas(const char *source, unsigned char num,
451 Py_ssize_t hashwidth)
451 Py_ssize_t hashwidth)
452 {
452 {
453 int i;
453 int i;
454 PyObject *list = PyTuple_New(num);
454 PyObject *list = PyTuple_New(num);
455 if (list == NULL) {
455 if (list == NULL) {
456 return NULL;
456 return NULL;
457 }
457 }
458 for (i = 0; i < num; i++) {
458 for (i = 0; i < num; i++) {
459 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
459 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
460 if (hash == NULL) {
460 if (hash == NULL) {
461 Py_DECREF(list);
461 Py_DECREF(list);
462 return NULL;
462 return NULL;
463 }
463 }
464 PyTuple_SET_ITEM(list, i, hash);
464 PyTuple_SET_ITEM(list, i, hash);
465 source += hashwidth;
465 source += hashwidth;
466 }
466 }
467 return list;
467 return list;
468 }
468 }
469
469
470 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
470 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
471 uint32_t *msize)
471 uint32_t *msize)
472 {
472 {
473 const char *data = databegin;
473 const char *data = databegin;
474 const char *meta;
474 const char *meta;
475
475
476 double mtime;
476 double mtime;
477 int16_t tz;
477 int16_t tz;
478 uint16_t flags;
478 uint16_t flags;
479 unsigned char nsuccs, nparents, nmetadata;
479 unsigned char nsuccs, nparents, nmetadata;
480 Py_ssize_t hashwidth = 20;
480 Py_ssize_t hashwidth = 20;
481
481
482 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
482 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
483 PyObject *metadata = NULL, *ret = NULL;
483 PyObject *metadata = NULL, *ret = NULL;
484 int i;
484 int i;
485
485
486 if (data + FM1_HEADER_SIZE > dataend) {
486 if (data + FM1_HEADER_SIZE > dataend) {
487 goto overflow;
487 goto overflow;
488 }
488 }
489
489
490 *msize = getbe32(data);
490 *msize = getbe32(data);
491 data += 4;
491 data += 4;
492 mtime = getbefloat64(data);
492 mtime = getbefloat64(data);
493 data += 8;
493 data += 8;
494 tz = getbeint16(data);
494 tz = getbeint16(data);
495 data += 2;
495 data += 2;
496 flags = getbeuint16(data);
496 flags = getbeuint16(data);
497 data += 2;
497 data += 2;
498
498
499 if (flags & USING_SHA_256) {
499 if (flags & USING_SHA_256) {
500 hashwidth = 32;
500 hashwidth = 32;
501 }
501 }
502
502
503 nsuccs = (unsigned char)(*data++);
503 nsuccs = (unsigned char)(*data++);
504 nparents = (unsigned char)(*data++);
504 nparents = (unsigned char)(*data++);
505 nmetadata = (unsigned char)(*data++);
505 nmetadata = (unsigned char)(*data++);
506
506
507 if (databegin + *msize > dataend) {
507 if (databegin + *msize > dataend) {
508 goto overflow;
508 goto overflow;
509 }
509 }
510 dataend = databegin + *msize; /* narrow down to marker size */
510 dataend = databegin + *msize; /* narrow down to marker size */
511
511
512 if (data + hashwidth > dataend) {
512 if (data + hashwidth > dataend) {
513 goto overflow;
513 goto overflow;
514 }
514 }
515 prec = PyBytes_FromStringAndSize(data, hashwidth);
515 prec = PyBytes_FromStringAndSize(data, hashwidth);
516 data += hashwidth;
516 data += hashwidth;
517 if (prec == NULL) {
517 if (prec == NULL) {
518 goto bail;
518 goto bail;
519 }
519 }
520
520
521 if (data + nsuccs * hashwidth > dataend) {
521 if (data + nsuccs * hashwidth > dataend) {
522 goto overflow;
522 goto overflow;
523 }
523 }
524 succs = readshas(data, nsuccs, hashwidth);
524 succs = readshas(data, nsuccs, hashwidth);
525 if (succs == NULL) {
525 if (succs == NULL) {
526 goto bail;
526 goto bail;
527 }
527 }
528 data += nsuccs * hashwidth;
528 data += nsuccs * hashwidth;
529
529
530 if (nparents == 1 || nparents == 2) {
530 if (nparents == 1 || nparents == 2) {
531 if (data + nparents * hashwidth > dataend) {
531 if (data + nparents * hashwidth > dataend) {
532 goto overflow;
532 goto overflow;
533 }
533 }
534 parents = readshas(data, nparents, hashwidth);
534 parents = readshas(data, nparents, hashwidth);
535 if (parents == NULL) {
535 if (parents == NULL) {
536 goto bail;
536 goto bail;
537 }
537 }
538 data += nparents * hashwidth;
538 data += nparents * hashwidth;
539 } else {
539 } else {
540 parents = Py_None;
540 parents = Py_None;
541 Py_INCREF(parents);
541 Py_INCREF(parents);
542 }
542 }
543
543
544 if (data + 2 * nmetadata > dataend) {
544 if (data + 2 * nmetadata > dataend) {
545 goto overflow;
545 goto overflow;
546 }
546 }
547 meta = data + (2 * nmetadata);
547 meta = data + (2 * nmetadata);
548 metadata = PyTuple_New(nmetadata);
548 metadata = PyTuple_New(nmetadata);
549 if (metadata == NULL) {
549 if (metadata == NULL) {
550 goto bail;
550 goto bail;
551 }
551 }
552 for (i = 0; i < nmetadata; i++) {
552 for (i = 0; i < nmetadata; i++) {
553 PyObject *tmp, *left = NULL, *right = NULL;
553 PyObject *tmp, *left = NULL, *right = NULL;
554 Py_ssize_t leftsize = (unsigned char)(*data++);
554 Py_ssize_t leftsize = (unsigned char)(*data++);
555 Py_ssize_t rightsize = (unsigned char)(*data++);
555 Py_ssize_t rightsize = (unsigned char)(*data++);
556 if (meta + leftsize + rightsize > dataend) {
556 if (meta + leftsize + rightsize > dataend) {
557 goto overflow;
557 goto overflow;
558 }
558 }
559 left = PyBytes_FromStringAndSize(meta, leftsize);
559 left = PyBytes_FromStringAndSize(meta, leftsize);
560 meta += leftsize;
560 meta += leftsize;
561 right = PyBytes_FromStringAndSize(meta, rightsize);
561 right = PyBytes_FromStringAndSize(meta, rightsize);
562 meta += rightsize;
562 meta += rightsize;
563 tmp = PyTuple_New(2);
563 tmp = PyTuple_New(2);
564 if (!left || !right || !tmp) {
564 if (!left || !right || !tmp) {
565 Py_XDECREF(left);
565 Py_XDECREF(left);
566 Py_XDECREF(right);
566 Py_XDECREF(right);
567 Py_XDECREF(tmp);
567 Py_XDECREF(tmp);
568 goto bail;
568 goto bail;
569 }
569 }
570 PyTuple_SET_ITEM(tmp, 0, left);
570 PyTuple_SET_ITEM(tmp, 0, left);
571 PyTuple_SET_ITEM(tmp, 1, right);
571 PyTuple_SET_ITEM(tmp, 1, right);
572 PyTuple_SET_ITEM(metadata, i, tmp);
572 PyTuple_SET_ITEM(metadata, i, tmp);
573 }
573 }
574 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
574 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
575 (int)tz * 60, parents);
575 (int)tz * 60, parents);
576 goto bail; /* return successfully */
576 goto bail; /* return successfully */
577
577
578 overflow:
578 overflow:
579 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
579 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
580 bail:
580 bail:
581 Py_XDECREF(prec);
581 Py_XDECREF(prec);
582 Py_XDECREF(succs);
582 Py_XDECREF(succs);
583 Py_XDECREF(metadata);
583 Py_XDECREF(metadata);
584 Py_XDECREF(parents);
584 Py_XDECREF(parents);
585 return ret;
585 return ret;
586 }
586 }
587
587
588 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
588 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
589 {
589 {
590 const char *data, *dataend;
590 const char *data, *dataend;
591 Py_ssize_t datalen, offset, stop;
591 Py_ssize_t datalen, offset, stop;
592 PyObject *markers = NULL;
592 PyObject *markers = NULL;
593
593
594 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
594 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
595 &offset, &stop)) {
595 &offset, &stop)) {
596 return NULL;
596 return NULL;
597 }
597 }
598 if (offset < 0) {
598 if (offset < 0) {
599 PyErr_SetString(PyExc_ValueError,
599 PyErr_SetString(PyExc_ValueError,
600 "invalid negative offset in fm1readmarkers");
600 "invalid negative offset in fm1readmarkers");
601 return NULL;
601 return NULL;
602 }
602 }
603 if (stop > datalen) {
603 if (stop > datalen) {
604 PyErr_SetString(
604 PyErr_SetString(
605 PyExc_ValueError,
605 PyExc_ValueError,
606 "stop longer than data length in fm1readmarkers");
606 "stop longer than data length in fm1readmarkers");
607 return NULL;
607 return NULL;
608 }
608 }
609 dataend = data + datalen;
609 dataend = data + datalen;
610 data += offset;
610 data += offset;
611 markers = PyList_New(0);
611 markers = PyList_New(0);
612 if (!markers) {
612 if (!markers) {
613 return NULL;
613 return NULL;
614 }
614 }
615 while (offset < stop) {
615 while (offset < stop) {
616 uint32_t msize;
616 uint32_t msize;
617 int error;
617 int error;
618 PyObject *record = fm1readmarker(data, dataend, &msize);
618 PyObject *record = fm1readmarker(data, dataend, &msize);
619 if (!record) {
619 if (!record) {
620 goto bail;
620 goto bail;
621 }
621 }
622 error = PyList_Append(markers, record);
622 error = PyList_Append(markers, record);
623 Py_DECREF(record);
623 Py_DECREF(record);
624 if (error) {
624 if (error) {
625 goto bail;
625 goto bail;
626 }
626 }
627 data += msize;
627 data += msize;
628 offset += msize;
628 offset += msize;
629 }
629 }
630 return markers;
630 return markers;
631 bail:
631 bail:
632 Py_DECREF(markers);
632 Py_DECREF(markers);
633 return NULL;
633 return NULL;
634 }
634 }
635
635
636 static char parsers_doc[] = "Efficient content parsing.";
636 static char parsers_doc[] = "Efficient content parsing.";
637
637
638 PyObject *encodedir(PyObject *self, PyObject *args);
638 PyObject *encodedir(PyObject *self, PyObject *args);
639 PyObject *pathencode(PyObject *self, PyObject *args);
639 PyObject *pathencode(PyObject *self, PyObject *args);
640 PyObject *lowerencode(PyObject *self, PyObject *args);
640 PyObject *lowerencode(PyObject *self, PyObject *args);
641 PyObject *parse_index2(PyObject *self, PyObject *args);
641 PyObject *parse_index2(PyObject *self, PyObject *args);
642
642
643 static PyMethodDef methods[] = {
643 static PyMethodDef methods[] = {
644 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
644 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
645 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
645 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
646 "create a set containing non-normal and other parent entries of given "
646 "create a set containing non-normal and other parent entries of given "
647 "dirstate\n"},
647 "dirstate\n"},
648 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
648 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
649 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
649 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
650 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
650 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
651 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
651 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
652 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
652 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
653 {"dict_new_presized", dict_new_presized, METH_VARARGS,
653 {"dict_new_presized", dict_new_presized, METH_VARARGS,
654 "construct a dict with an expected size\n"},
654 "construct a dict with an expected size\n"},
655 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
655 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
656 "make file foldmap\n"},
656 "make file foldmap\n"},
657 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
657 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
658 "escape a UTF-8 byte string to JSON (fast path)\n"},
658 "escape a UTF-8 byte string to JSON (fast path)\n"},
659 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
659 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
660 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
660 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
661 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
661 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
662 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
662 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
663 "parse v1 obsolete markers\n"},
663 "parse v1 obsolete markers\n"},
664 {NULL, NULL}};
664 {NULL, NULL}};
665
665
666 void dirs_module_init(PyObject *mod);
666 void dirs_module_init(PyObject *mod);
667 void manifest_module_init(PyObject *mod);
667 void manifest_module_init(PyObject *mod);
668 void revlog_module_init(PyObject *mod);
668 void revlog_module_init(PyObject *mod);
669
669
670 static const int version = 14;
670 static const int version = 15;
671
671
672 static void module_init(PyObject *mod)
672 static void module_init(PyObject *mod)
673 {
673 {
674 PyObject *capsule = NULL;
674 PyObject *capsule = NULL;
675 PyModule_AddIntConstant(mod, "version", version);
675 PyModule_AddIntConstant(mod, "version", version);
676
676
677 /* This module constant has two purposes. First, it lets us unit test
677 /* This module constant has two purposes. First, it lets us unit test
678 * the ImportError raised without hard-coding any error text. This
678 * the ImportError raised without hard-coding any error text. This
679 * means we can change the text in the future without breaking tests,
679 * means we can change the text in the future without breaking tests,
680 * even across changesets without a recompile. Second, its presence
680 * even across changesets without a recompile. Second, its presence
681 * can be used to determine whether the version-checking logic is
681 * can be used to determine whether the version-checking logic is
682 * present, which also helps in testing across changesets without a
682 * present, which also helps in testing across changesets without a
683 * recompile. Note that this means the pure-Python version of parsers
683 * recompile. Note that this means the pure-Python version of parsers
684 * should not have this module constant. */
684 * should not have this module constant. */
685 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
685 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
686
686
687 dirs_module_init(mod);
687 dirs_module_init(mod);
688 manifest_module_init(mod);
688 manifest_module_init(mod);
689 revlog_module_init(mod);
689 revlog_module_init(mod);
690
690
691 capsule = PyCapsule_New(
691 capsule = PyCapsule_New(
692 make_dirstate_tuple,
692 make_dirstate_tuple,
693 "mercurial.cext.parsers.make_dirstate_tuple_CAPI", NULL);
693 "mercurial.cext.parsers.make_dirstate_tuple_CAPI", NULL);
694 if (capsule != NULL)
694 if (capsule != NULL)
695 PyModule_AddObject(mod, "make_dirstate_tuple_CAPI", capsule);
695 PyModule_AddObject(mod, "make_dirstate_tuple_CAPI", capsule);
696
696
697 if (PyType_Ready(&dirstateTupleType) < 0) {
697 if (PyType_Ready(&dirstateTupleType) < 0) {
698 return;
698 return;
699 }
699 }
700 Py_INCREF(&dirstateTupleType);
700 Py_INCREF(&dirstateTupleType);
701 PyModule_AddObject(mod, "dirstatetuple",
701 PyModule_AddObject(mod, "dirstatetuple",
702 (PyObject *)&dirstateTupleType);
702 (PyObject *)&dirstateTupleType);
703 }
703 }
704
704
705 static int check_python_version(void)
705 static int check_python_version(void)
706 {
706 {
707 PyObject *sys = PyImport_ImportModule("sys"), *ver;
707 PyObject *sys = PyImport_ImportModule("sys"), *ver;
708 long hexversion;
708 long hexversion;
709 if (!sys) {
709 if (!sys) {
710 return -1;
710 return -1;
711 }
711 }
712 ver = PyObject_GetAttrString(sys, "hexversion");
712 ver = PyObject_GetAttrString(sys, "hexversion");
713 Py_DECREF(sys);
713 Py_DECREF(sys);
714 if (!ver) {
714 if (!ver) {
715 return -1;
715 return -1;
716 }
716 }
717 hexversion = PyInt_AsLong(ver);
717 hexversion = PyInt_AsLong(ver);
718 Py_DECREF(ver);
718 Py_DECREF(ver);
719 /* sys.hexversion is a 32-bit number by default, so the -1 case
719 /* sys.hexversion is a 32-bit number by default, so the -1 case
720 * should only occur in unusual circumstances (e.g. if sys.hexversion
720 * should only occur in unusual circumstances (e.g. if sys.hexversion
721 * is manually set to an invalid value). */
721 * is manually set to an invalid value). */
722 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
722 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
723 PyErr_Format(PyExc_ImportError,
723 PyErr_Format(PyExc_ImportError,
724 "%s: The Mercurial extension "
724 "%s: The Mercurial extension "
725 "modules were compiled with Python " PY_VERSION
725 "modules were compiled with Python " PY_VERSION
726 ", but "
726 ", but "
727 "Mercurial is currently using Python with "
727 "Mercurial is currently using Python with "
728 "sys.hexversion=%ld: "
728 "sys.hexversion=%ld: "
729 "Python %s\n at: %s",
729 "Python %s\n at: %s",
730 versionerrortext, hexversion, Py_GetVersion(),
730 versionerrortext, hexversion, Py_GetVersion(),
731 Py_GetProgramFullPath());
731 Py_GetProgramFullPath());
732 return -1;
732 return -1;
733 }
733 }
734 return 0;
734 return 0;
735 }
735 }
736
736
737 #ifdef IS_PY3K
737 #ifdef IS_PY3K
738 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
738 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
739 parsers_doc, -1, methods};
739 parsers_doc, -1, methods};
740
740
741 PyMODINIT_FUNC PyInit_parsers(void)
741 PyMODINIT_FUNC PyInit_parsers(void)
742 {
742 {
743 PyObject *mod;
743 PyObject *mod;
744
744
745 if (check_python_version() == -1)
745 if (check_python_version() == -1)
746 return NULL;
746 return NULL;
747 mod = PyModule_Create(&parsers_module);
747 mod = PyModule_Create(&parsers_module);
748 module_init(mod);
748 module_init(mod);
749 return mod;
749 return mod;
750 }
750 }
751 #else
751 #else
752 PyMODINIT_FUNC initparsers(void)
752 PyMODINIT_FUNC initparsers(void)
753 {
753 {
754 PyObject *mod;
754 PyObject *mod;
755
755
756 if (check_python_version() == -1) {
756 if (check_python_version() == -1) {
757 return;
757 return;
758 }
758 }
759 mod = Py_InitModule3("parsers", methods, parsers_doc);
759 mod = Py_InitModule3("parsers", methods, parsers_doc);
760 module_init(mod);
760 module_init(mod);
761 }
761 }
762 #endif
762 #endif
@@ -1,3051 +1,3068 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <assert.h>
12 #include <assert.h>
13 #include <ctype.h>
13 #include <ctype.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stddef.h>
15 #include <stddef.h>
16 #include <stdlib.h>
16 #include <stdlib.h>
17 #include <string.h>
17 #include <string.h>
18
18
19 #include "bitmanipulation.h"
19 #include "bitmanipulation.h"
20 #include "charencode.h"
20 #include "charencode.h"
21 #include "revlog.h"
21 #include "revlog.h"
22 #include "util.h"
22 #include "util.h"
23
23
24 #ifdef IS_PY3K
24 #ifdef IS_PY3K
25 /* The mapping of Python types is meant to be temporary to get Python
25 /* The mapping of Python types is meant to be temporary to get Python
26 * 3 to compile. We should remove this once Python 3 support is fully
26 * 3 to compile. We should remove this once Python 3 support is fully
27 * supported and proper types are used in the extensions themselves. */
27 * supported and proper types are used in the extensions themselves. */
28 #define PyInt_Check PyLong_Check
28 #define PyInt_Check PyLong_Check
29 #define PyInt_FromLong PyLong_FromLong
29 #define PyInt_FromLong PyLong_FromLong
30 #define PyInt_FromSsize_t PyLong_FromSsize_t
30 #define PyInt_FromSsize_t PyLong_FromSsize_t
31 #define PyInt_AsLong PyLong_AsLong
31 #define PyInt_AsLong PyLong_AsLong
32 #endif
32 #endif
33
33
34 typedef struct indexObjectStruct indexObject;
34 typedef struct indexObjectStruct indexObject;
35
35
36 typedef struct {
36 typedef struct {
37 int children[16];
37 int children[16];
38 } nodetreenode;
38 } nodetreenode;
39
39
40 /*
40 /*
41 * A base-16 trie for fast node->rev mapping.
41 * A base-16 trie for fast node->rev mapping.
42 *
42 *
43 * Positive value is index of the next node in the trie
43 * Positive value is index of the next node in the trie
44 * Negative value is a leaf: -(rev + 2)
44 * Negative value is a leaf: -(rev + 2)
45 * Zero is empty
45 * Zero is empty
46 */
46 */
47 typedef struct {
47 typedef struct {
48 indexObject *index;
48 indexObject *index;
49 nodetreenode *nodes;
49 nodetreenode *nodes;
50 unsigned length; /* # nodes in use */
50 unsigned length; /* # nodes in use */
51 unsigned capacity; /* # nodes allocated */
51 unsigned capacity; /* # nodes allocated */
52 int depth; /* maximum depth of tree */
52 int depth; /* maximum depth of tree */
53 int splits; /* # splits performed */
53 int splits; /* # splits performed */
54 } nodetree;
54 } nodetree;
55
55
56 typedef struct {
56 typedef struct {
57 PyObject_HEAD /* ; */
57 PyObject_HEAD /* ; */
58 nodetree nt;
58 nodetree nt;
59 } nodetreeObject;
59 } nodetreeObject;
60
60
61 /*
61 /*
62 * This class has two behaviors.
62 * This class has two behaviors.
63 *
63 *
64 * When used in a list-like way (with integer keys), we decode an
64 * When used in a list-like way (with integer keys), we decode an
65 * entry in a RevlogNG index file on demand. Our last entry is a
65 * entry in a RevlogNG index file on demand. Our last entry is a
66 * sentinel, always a nullid. We have limited support for
66 * sentinel, always a nullid. We have limited support for
67 * integer-keyed insert and delete, only at elements right before the
67 * integer-keyed insert and delete, only at elements right before the
68 * sentinel.
68 * sentinel.
69 *
69 *
70 * With string keys, we lazily perform a reverse mapping from node to
70 * With string keys, we lazily perform a reverse mapping from node to
71 * rev, using a base-16 trie.
71 * rev, using a base-16 trie.
72 */
72 */
73 struct indexObjectStruct {
73 struct indexObjectStruct {
74 PyObject_HEAD
74 PyObject_HEAD
75 /* Type-specific fields go here. */
75 /* Type-specific fields go here. */
76 PyObject *data; /* raw bytes of index */
76 PyObject *data; /* raw bytes of index */
77 Py_buffer buf; /* buffer of data */
77 Py_buffer buf; /* buffer of data */
78 PyObject **cache; /* cached tuples */
78 PyObject **cache; /* cached tuples */
79 const char **offsets; /* populated on demand */
79 const char **offsets; /* populated on demand */
80 Py_ssize_t raw_length; /* original number of elements */
80 Py_ssize_t raw_length; /* original number of elements */
81 Py_ssize_t length; /* current number of elements */
81 Py_ssize_t length; /* current number of elements */
82 PyObject *added; /* populated on demand */
82 PyObject *added; /* populated on demand */
83 PyObject *headrevs; /* cache, invalidated on changes */
83 PyObject *headrevs; /* cache, invalidated on changes */
84 PyObject *filteredrevs; /* filtered revs set */
84 PyObject *filteredrevs; /* filtered revs set */
85 nodetree nt; /* base-16 trie */
85 nodetree nt; /* base-16 trie */
86 int ntinitialized; /* 0 or 1 */
86 int ntinitialized; /* 0 or 1 */
87 int ntrev; /* last rev scanned */
87 int ntrev; /* last rev scanned */
88 int ntlookups; /* # lookups */
88 int ntlookups; /* # lookups */
89 int ntmisses; /* # lookups that miss the cache */
89 int ntmisses; /* # lookups that miss the cache */
90 int inlined;
90 int inlined;
91 };
91 };
92
92
93 static Py_ssize_t index_length(const indexObject *self)
93 static Py_ssize_t index_length(const indexObject *self)
94 {
94 {
95 if (self->added == NULL)
95 if (self->added == NULL)
96 return self->length;
96 return self->length;
97 return self->length + PyList_GET_SIZE(self->added);
97 return self->length + PyList_GET_SIZE(self->added);
98 }
98 }
99
99
100 static PyObject *nullentry = NULL;
100 static PyObject *nullentry = NULL;
101 static const char nullid[20] = {0};
101 static const char nullid[20] = {0};
102 static const Py_ssize_t nullrev = -1;
102 static const Py_ssize_t nullrev = -1;
103
103
104 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
104 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
105
105
106 #if LONG_MAX == 0x7fffffffL
106 #if LONG_MAX == 0x7fffffffL
107 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
107 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
108 #else
108 #else
109 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
109 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
110 #endif
110 #endif
111
111
112 /* A RevlogNG v1 index entry is 64 bytes long. */
112 /* A RevlogNG v1 index entry is 64 bytes long. */
113 static const long v1_hdrsize = 64;
113 static const long v1_hdrsize = 64;
114
114
115 static void raise_revlog_error(void)
115 static void raise_revlog_error(void)
116 {
116 {
117 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
117 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
118
118
119 mod = PyImport_ImportModule("mercurial.error");
119 mod = PyImport_ImportModule("mercurial.error");
120 if (mod == NULL) {
120 if (mod == NULL) {
121 goto cleanup;
121 goto cleanup;
122 }
122 }
123
123
124 dict = PyModule_GetDict(mod);
124 dict = PyModule_GetDict(mod);
125 if (dict == NULL) {
125 if (dict == NULL) {
126 goto cleanup;
126 goto cleanup;
127 }
127 }
128 Py_INCREF(dict);
128 Py_INCREF(dict);
129
129
130 errclass = PyDict_GetItemString(dict, "RevlogError");
130 errclass = PyDict_GetItemString(dict, "RevlogError");
131 if (errclass == NULL) {
131 if (errclass == NULL) {
132 PyErr_SetString(PyExc_SystemError,
132 PyErr_SetString(PyExc_SystemError,
133 "could not find RevlogError");
133 "could not find RevlogError");
134 goto cleanup;
134 goto cleanup;
135 }
135 }
136
136
137 /* value of exception is ignored by callers */
137 /* value of exception is ignored by callers */
138 PyErr_SetString(errclass, "RevlogError");
138 PyErr_SetString(errclass, "RevlogError");
139
139
140 cleanup:
140 cleanup:
141 Py_XDECREF(dict);
141 Py_XDECREF(dict);
142 Py_XDECREF(mod);
142 Py_XDECREF(mod);
143 }
143 }
144
144
145 /*
145 /*
146 * Return a pointer to the beginning of a RevlogNG record.
146 * Return a pointer to the beginning of a RevlogNG record.
147 */
147 */
148 static const char *index_deref(indexObject *self, Py_ssize_t pos)
148 static const char *index_deref(indexObject *self, Py_ssize_t pos)
149 {
149 {
150 if (self->inlined && pos > 0) {
150 if (self->inlined && pos > 0) {
151 if (self->offsets == NULL) {
151 if (self->offsets == NULL) {
152 self->offsets = PyMem_Malloc(self->raw_length *
152 self->offsets = PyMem_Malloc(self->raw_length *
153 sizeof(*self->offsets));
153 sizeof(*self->offsets));
154 if (self->offsets == NULL)
154 if (self->offsets == NULL)
155 return (const char *)PyErr_NoMemory();
155 return (const char *)PyErr_NoMemory();
156 inline_scan(self, self->offsets);
156 inline_scan(self, self->offsets);
157 }
157 }
158 return self->offsets[pos];
158 return self->offsets[pos];
159 }
159 }
160
160
161 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
161 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
162 }
162 }
163
163
164 /*
164 /*
165 * Get parents of the given rev.
165 * Get parents of the given rev.
166 *
166 *
167 * The specified rev must be valid and must not be nullrev. A returned
167 * The specified rev must be valid and must not be nullrev. A returned
168 * parent revision may be nullrev, but is guaranteed to be in valid range.
168 * parent revision may be nullrev, but is guaranteed to be in valid range.
169 */
169 */
170 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
170 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
171 int maxrev)
171 int maxrev)
172 {
172 {
173 if (rev >= self->length) {
173 if (rev >= self->length) {
174 long tmp;
174 long tmp;
175 PyObject *tuple =
175 PyObject *tuple =
176 PyList_GET_ITEM(self->added, rev - self->length);
176 PyList_GET_ITEM(self->added, rev - self->length);
177 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
177 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
178 return -1;
178 return -1;
179 }
179 }
180 ps[0] = (int)tmp;
180 ps[0] = (int)tmp;
181 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
181 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
182 return -1;
182 return -1;
183 }
183 }
184 ps[1] = (int)tmp;
184 ps[1] = (int)tmp;
185 } else {
185 } else {
186 const char *data = index_deref(self, rev);
186 const char *data = index_deref(self, rev);
187 ps[0] = getbe32(data + 24);
187 ps[0] = getbe32(data + 24);
188 ps[1] = getbe32(data + 28);
188 ps[1] = getbe32(data + 28);
189 }
189 }
190 /* If index file is corrupted, ps[] may point to invalid revisions. So
190 /* If index file is corrupted, ps[] may point to invalid revisions. So
191 * there is a risk of buffer overflow to trust them unconditionally. */
191 * there is a risk of buffer overflow to trust them unconditionally. */
192 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
192 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
193 PyErr_SetString(PyExc_ValueError, "parent out of range");
193 PyErr_SetString(PyExc_ValueError, "parent out of range");
194 return -1;
194 return -1;
195 }
195 }
196 return 0;
196 return 0;
197 }
197 }
198
198
199 /*
199 /*
200 * Get parents of the given rev.
200 * Get parents of the given rev.
201 *
201 *
202 * If the specified rev is out of range, IndexError will be raised. If the
202 * If the specified rev is out of range, IndexError will be raised. If the
203 * revlog entry is corrupted, ValueError may be raised.
203 * revlog entry is corrupted, ValueError may be raised.
204 *
204 *
205 * Returns 0 on success or -1 on failure.
205 * Returns 0 on success or -1 on failure.
206 */
206 */
207 int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
207 int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
208 {
208 {
209 int tiprev;
209 int tiprev;
210 if (!op || !HgRevlogIndex_Check(op) || !ps) {
210 if (!op || !HgRevlogIndex_Check(op) || !ps) {
211 PyErr_BadInternalCall();
211 PyErr_BadInternalCall();
212 return -1;
212 return -1;
213 }
213 }
214 tiprev = (int)index_length((indexObject *)op) - 1;
214 tiprev = (int)index_length((indexObject *)op) - 1;
215 if (rev < -1 || rev > tiprev) {
215 if (rev < -1 || rev > tiprev) {
216 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
216 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
217 return -1;
217 return -1;
218 } else if (rev == -1) {
218 } else if (rev == -1) {
219 ps[0] = ps[1] = -1;
219 ps[0] = ps[1] = -1;
220 return 0;
220 return 0;
221 } else {
221 } else {
222 return index_get_parents((indexObject *)op, rev, ps, tiprev);
222 return index_get_parents((indexObject *)op, rev, ps, tiprev);
223 }
223 }
224 }
224 }
225
225
226 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
226 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
227 {
227 {
228 uint64_t offset;
228 uint64_t offset;
229 if (rev == nullrev) {
229 if (rev == nullrev) {
230 return 0;
230 return 0;
231 }
231 }
232 if (rev >= self->length) {
232 if (rev >= self->length) {
233 PyObject *tuple;
233 PyObject *tuple;
234 PyObject *pylong;
234 PyObject *pylong;
235 PY_LONG_LONG tmp;
235 PY_LONG_LONG tmp;
236 tuple = PyList_GET_ITEM(self->added, rev - self->length);
236 tuple = PyList_GET_ITEM(self->added, rev - self->length);
237 pylong = PyTuple_GET_ITEM(tuple, 0);
237 pylong = PyTuple_GET_ITEM(tuple, 0);
238 tmp = PyLong_AsLongLong(pylong);
238 tmp = PyLong_AsLongLong(pylong);
239 if (tmp == -1 && PyErr_Occurred()) {
239 if (tmp == -1 && PyErr_Occurred()) {
240 return -1;
240 return -1;
241 }
241 }
242 if (tmp < 0) {
242 if (tmp < 0) {
243 PyErr_Format(PyExc_OverflowError,
243 PyErr_Format(PyExc_OverflowError,
244 "revlog entry size out of bound (%lld)",
244 "revlog entry size out of bound (%lld)",
245 (long long)tmp);
245 (long long)tmp);
246 return -1;
246 return -1;
247 }
247 }
248 offset = (uint64_t)tmp;
248 offset = (uint64_t)tmp;
249 } else {
249 } else {
250 const char *data = index_deref(self, rev);
250 const char *data = index_deref(self, rev);
251 offset = getbe32(data + 4);
251 offset = getbe32(data + 4);
252 if (rev == 0) {
252 if (rev == 0) {
253 /* mask out version number for the first entry */
253 /* mask out version number for the first entry */
254 offset &= 0xFFFF;
254 offset &= 0xFFFF;
255 } else {
255 } else {
256 uint32_t offset_high = getbe32(data);
256 uint32_t offset_high = getbe32(data);
257 offset |= ((uint64_t)offset_high) << 32;
257 offset |= ((uint64_t)offset_high) << 32;
258 }
258 }
259 }
259 }
260 return (int64_t)(offset >> 16);
260 return (int64_t)(offset >> 16);
261 }
261 }
262
262
263 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
263 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
264 {
264 {
265 if (rev == nullrev) {
265 if (rev == nullrev) {
266 return 0;
266 return 0;
267 }
267 }
268 if (rev >= self->length) {
268 if (rev >= self->length) {
269 PyObject *tuple;
269 PyObject *tuple;
270 PyObject *pylong;
270 PyObject *pylong;
271 long ret;
271 long ret;
272 tuple = PyList_GET_ITEM(self->added, rev - self->length);
272 tuple = PyList_GET_ITEM(self->added, rev - self->length);
273 pylong = PyTuple_GET_ITEM(tuple, 1);
273 pylong = PyTuple_GET_ITEM(tuple, 1);
274 ret = PyInt_AsLong(pylong);
274 ret = PyInt_AsLong(pylong);
275 if (ret == -1 && PyErr_Occurred()) {
275 if (ret == -1 && PyErr_Occurred()) {
276 return -1;
276 return -1;
277 }
277 }
278 if (ret < 0 || ret > (long)INT_MAX) {
278 if (ret < 0 || ret > (long)INT_MAX) {
279 PyErr_Format(PyExc_OverflowError,
279 PyErr_Format(PyExc_OverflowError,
280 "revlog entry size out of bound (%ld)",
280 "revlog entry size out of bound (%ld)",
281 ret);
281 ret);
282 return -1;
282 return -1;
283 }
283 }
284 return (int)ret;
284 return (int)ret;
285 } else {
285 } else {
286 const char *data = index_deref(self, rev);
286 const char *data = index_deref(self, rev);
287 int tmp = (int)getbe32(data + 8);
287 int tmp = (int)getbe32(data + 8);
288 if (tmp < 0) {
288 if (tmp < 0) {
289 PyErr_Format(PyExc_OverflowError,
289 PyErr_Format(PyExc_OverflowError,
290 "revlog entry size out of bound (%d)",
290 "revlog entry size out of bound (%d)",
291 tmp);
291 tmp);
292 return -1;
292 return -1;
293 }
293 }
294 return tmp;
294 return tmp;
295 }
295 }
296 }
296 }
297
297
298 /*
298 /*
299 * RevlogNG format (all in big endian, data may be inlined):
299 * RevlogNG format (all in big endian, data may be inlined):
300 * 6 bytes: offset
300 * 6 bytes: offset
301 * 2 bytes: flags
301 * 2 bytes: flags
302 * 4 bytes: compressed length
302 * 4 bytes: compressed length
303 * 4 bytes: uncompressed length
303 * 4 bytes: uncompressed length
304 * 4 bytes: base revision
304 * 4 bytes: base revision
305 * 4 bytes: link revision
305 * 4 bytes: link revision
306 * 4 bytes: parent 1 revision
306 * 4 bytes: parent 1 revision
307 * 4 bytes: parent 2 revision
307 * 4 bytes: parent 2 revision
308 * 32 bytes: nodeid (only 20 bytes used)
308 * 32 bytes: nodeid (only 20 bytes used)
309 */
309 */
310 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
311 {
311 {
312 uint64_t offset_flags;
312 uint64_t offset_flags;
313 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
314 const char *c_node_id;
314 const char *c_node_id;
315 const char *data;
315 const char *data;
316 Py_ssize_t length = index_length(self);
316 Py_ssize_t length = index_length(self);
317 PyObject *entry;
317 PyObject *entry;
318
318
319 if (pos == nullrev) {
319 if (pos == nullrev) {
320 Py_INCREF(nullentry);
320 Py_INCREF(nullentry);
321 return nullentry;
321 return nullentry;
322 }
322 }
323
323
324 if (pos < 0 || pos >= length) {
324 if (pos < 0 || pos >= length) {
325 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
325 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
326 return NULL;
326 return NULL;
327 }
327 }
328
328
329 if (pos >= self->length) {
329 if (pos >= self->length) {
330 PyObject *obj;
330 PyObject *obj;
331 obj = PyList_GET_ITEM(self->added, pos - self->length);
331 obj = PyList_GET_ITEM(self->added, pos - self->length);
332 Py_INCREF(obj);
332 Py_INCREF(obj);
333 return obj;
333 return obj;
334 }
334 }
335
335
336 if (self->cache) {
336 if (self->cache) {
337 if (self->cache[pos]) {
337 if (self->cache[pos]) {
338 Py_INCREF(self->cache[pos]);
338 Py_INCREF(self->cache[pos]);
339 return self->cache[pos];
339 return self->cache[pos];
340 }
340 }
341 } else {
341 } else {
342 self->cache = calloc(self->raw_length, sizeof(PyObject *));
342 self->cache = calloc(self->raw_length, sizeof(PyObject *));
343 if (self->cache == NULL)
343 if (self->cache == NULL)
344 return PyErr_NoMemory();
344 return PyErr_NoMemory();
345 }
345 }
346
346
347 data = index_deref(self, pos);
347 data = index_deref(self, pos);
348 if (data == NULL)
348 if (data == NULL)
349 return NULL;
349 return NULL;
350
350
351 offset_flags = getbe32(data + 4);
351 offset_flags = getbe32(data + 4);
352 if (pos == 0) /* mask out version number for the first entry */
352 if (pos == 0) /* mask out version number for the first entry */
353 offset_flags &= 0xFFFF;
353 offset_flags &= 0xFFFF;
354 else {
354 else {
355 uint32_t offset_high = getbe32(data);
355 uint32_t offset_high = getbe32(data);
356 offset_flags |= ((uint64_t)offset_high) << 32;
356 offset_flags |= ((uint64_t)offset_high) << 32;
357 }
357 }
358
358
359 comp_len = getbe32(data + 8);
359 comp_len = getbe32(data + 8);
360 uncomp_len = getbe32(data + 12);
360 uncomp_len = getbe32(data + 12);
361 base_rev = getbe32(data + 16);
361 base_rev = getbe32(data + 16);
362 link_rev = getbe32(data + 20);
362 link_rev = getbe32(data + 20);
363 parent_1 = getbe32(data + 24);
363 parent_1 = getbe32(data + 24);
364 parent_2 = getbe32(data + 28);
364 parent_2 = getbe32(data + 28);
365 c_node_id = data + 32;
365 c_node_id = data + 32;
366
366
367 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
367 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
368 base_rev, link_rev, parent_1, parent_2, c_node_id,
368 base_rev, link_rev, parent_1, parent_2, c_node_id,
369 (Py_ssize_t)20);
369 (Py_ssize_t)20);
370
370
371 if (entry) {
371 if (entry) {
372 PyObject_GC_UnTrack(entry);
372 PyObject_GC_UnTrack(entry);
373 Py_INCREF(entry);
373 Py_INCREF(entry);
374 }
374 }
375
375
376 self->cache[pos] = entry;
376 self->cache[pos] = entry;
377
377
378 return entry;
378 return entry;
379 }
379 }
380
380
381 /*
381 /*
382 * Return the 20-byte SHA of the node corresponding to the given rev.
382 * Return the 20-byte SHA of the node corresponding to the given rev.
383 */
383 */
384 static const char *index_node(indexObject *self, Py_ssize_t pos)
384 static const char *index_node(indexObject *self, Py_ssize_t pos)
385 {
385 {
386 Py_ssize_t length = index_length(self);
386 Py_ssize_t length = index_length(self);
387 const char *data;
387 const char *data;
388
388
389 if (pos == nullrev)
389 if (pos == nullrev)
390 return nullid;
390 return nullid;
391
391
392 if (pos >= length)
392 if (pos >= length)
393 return NULL;
393 return NULL;
394
394
395 if (pos >= self->length) {
395 if (pos >= self->length) {
396 PyObject *tuple, *str;
396 PyObject *tuple, *str;
397 tuple = PyList_GET_ITEM(self->added, pos - self->length);
397 tuple = PyList_GET_ITEM(self->added, pos - self->length);
398 str = PyTuple_GetItem(tuple, 7);
398 str = PyTuple_GetItem(tuple, 7);
399 return str ? PyBytes_AS_STRING(str) : NULL;
399 return str ? PyBytes_AS_STRING(str) : NULL;
400 }
400 }
401
401
402 data = index_deref(self, pos);
402 data = index_deref(self, pos);
403 return data ? data + 32 : NULL;
403 return data ? data + 32 : NULL;
404 }
404 }
405
405
406 /*
406 /*
407 * Return the 20-byte SHA of the node corresponding to the given rev. The
407 * Return the 20-byte SHA of the node corresponding to the given rev. The
408 * rev is assumed to be existing. If not, an exception is set.
408 * rev is assumed to be existing. If not, an exception is set.
409 */
409 */
410 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
410 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
411 {
411 {
412 const char *node = index_node(self, pos);
412 const char *node = index_node(self, pos);
413 if (node == NULL) {
413 if (node == NULL) {
414 PyErr_Format(PyExc_IndexError, "could not access rev %d",
414 PyErr_Format(PyExc_IndexError, "could not access rev %d",
415 (int)pos);
415 (int)pos);
416 }
416 }
417 return node;
417 return node;
418 }
418 }
419
419
420 static int nt_insert(nodetree *self, const char *node, int rev);
420 static int nt_insert(nodetree *self, const char *node, int rev);
421
421
422 static int node_check(PyObject *obj, char **node)
422 static int node_check(PyObject *obj, char **node)
423 {
423 {
424 Py_ssize_t nodelen;
424 Py_ssize_t nodelen;
425 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
425 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
426 return -1;
426 return -1;
427 if (nodelen == 20)
427 if (nodelen == 20)
428 return 0;
428 return 0;
429 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
429 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
430 return -1;
430 return -1;
431 }
431 }
432
432
433 static PyObject *index_append(indexObject *self, PyObject *obj)
433 static PyObject *index_append(indexObject *self, PyObject *obj)
434 {
434 {
435 char *node;
435 char *node;
436 Py_ssize_t len;
436 Py_ssize_t len;
437
437
438 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
438 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
439 PyErr_SetString(PyExc_TypeError, "8-tuple required");
439 PyErr_SetString(PyExc_TypeError, "8-tuple required");
440 return NULL;
440 return NULL;
441 }
441 }
442
442
443 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
443 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
444 return NULL;
444 return NULL;
445
445
446 len = index_length(self);
446 len = index_length(self);
447
447
448 if (self->added == NULL) {
448 if (self->added == NULL) {
449 self->added = PyList_New(0);
449 self->added = PyList_New(0);
450 if (self->added == NULL)
450 if (self->added == NULL)
451 return NULL;
451 return NULL;
452 }
452 }
453
453
454 if (PyList_Append(self->added, obj) == -1)
454 if (PyList_Append(self->added, obj) == -1)
455 return NULL;
455 return NULL;
456
456
457 if (self->ntinitialized)
457 if (self->ntinitialized)
458 nt_insert(&self->nt, node, (int)len);
458 nt_insert(&self->nt, node, (int)len);
459
459
460 Py_CLEAR(self->headrevs);
460 Py_CLEAR(self->headrevs);
461 Py_RETURN_NONE;
461 Py_RETURN_NONE;
462 }
462 }
463
463
464 static PyObject *index_stats(indexObject *self)
464 static PyObject *index_stats(indexObject *self)
465 {
465 {
466 PyObject *obj = PyDict_New();
466 PyObject *obj = PyDict_New();
467 PyObject *s = NULL;
467 PyObject *s = NULL;
468 PyObject *t = NULL;
468 PyObject *t = NULL;
469
469
470 if (obj == NULL)
470 if (obj == NULL)
471 return NULL;
471 return NULL;
472
472
473 #define istat(__n, __d) \
473 #define istat(__n, __d) \
474 do { \
474 do { \
475 s = PyBytes_FromString(__d); \
475 s = PyBytes_FromString(__d); \
476 t = PyInt_FromSsize_t(self->__n); \
476 t = PyInt_FromSsize_t(self->__n); \
477 if (!s || !t) \
477 if (!s || !t) \
478 goto bail; \
478 goto bail; \
479 if (PyDict_SetItem(obj, s, t) == -1) \
479 if (PyDict_SetItem(obj, s, t) == -1) \
480 goto bail; \
480 goto bail; \
481 Py_CLEAR(s); \
481 Py_CLEAR(s); \
482 Py_CLEAR(t); \
482 Py_CLEAR(t); \
483 } while (0)
483 } while (0)
484
484
485 if (self->added) {
485 if (self->added) {
486 Py_ssize_t len = PyList_GET_SIZE(self->added);
486 Py_ssize_t len = PyList_GET_SIZE(self->added);
487 s = PyBytes_FromString("index entries added");
487 s = PyBytes_FromString("index entries added");
488 t = PyInt_FromSsize_t(len);
488 t = PyInt_FromSsize_t(len);
489 if (!s || !t)
489 if (!s || !t)
490 goto bail;
490 goto bail;
491 if (PyDict_SetItem(obj, s, t) == -1)
491 if (PyDict_SetItem(obj, s, t) == -1)
492 goto bail;
492 goto bail;
493 Py_CLEAR(s);
493 Py_CLEAR(s);
494 Py_CLEAR(t);
494 Py_CLEAR(t);
495 }
495 }
496
496
497 if (self->raw_length != self->length)
497 if (self->raw_length != self->length)
498 istat(raw_length, "revs on disk");
498 istat(raw_length, "revs on disk");
499 istat(length, "revs in memory");
499 istat(length, "revs in memory");
500 istat(ntlookups, "node trie lookups");
500 istat(ntlookups, "node trie lookups");
501 istat(ntmisses, "node trie misses");
501 istat(ntmisses, "node trie misses");
502 istat(ntrev, "node trie last rev scanned");
502 istat(ntrev, "node trie last rev scanned");
503 if (self->ntinitialized) {
503 if (self->ntinitialized) {
504 istat(nt.capacity, "node trie capacity");
504 istat(nt.capacity, "node trie capacity");
505 istat(nt.depth, "node trie depth");
505 istat(nt.depth, "node trie depth");
506 istat(nt.length, "node trie count");
506 istat(nt.length, "node trie count");
507 istat(nt.splits, "node trie splits");
507 istat(nt.splits, "node trie splits");
508 }
508 }
509
509
510 #undef istat
510 #undef istat
511
511
512 return obj;
512 return obj;
513
513
514 bail:
514 bail:
515 Py_XDECREF(obj);
515 Py_XDECREF(obj);
516 Py_XDECREF(s);
516 Py_XDECREF(s);
517 Py_XDECREF(t);
517 Py_XDECREF(t);
518 return NULL;
518 return NULL;
519 }
519 }
520
520
521 /*
521 /*
522 * When we cache a list, we want to be sure the caller can't mutate
522 * When we cache a list, we want to be sure the caller can't mutate
523 * the cached copy.
523 * the cached copy.
524 */
524 */
525 static PyObject *list_copy(PyObject *list)
525 static PyObject *list_copy(PyObject *list)
526 {
526 {
527 Py_ssize_t len = PyList_GET_SIZE(list);
527 Py_ssize_t len = PyList_GET_SIZE(list);
528 PyObject *newlist = PyList_New(len);
528 PyObject *newlist = PyList_New(len);
529 Py_ssize_t i;
529 Py_ssize_t i;
530
530
531 if (newlist == NULL)
531 if (newlist == NULL)
532 return NULL;
532 return NULL;
533
533
534 for (i = 0; i < len; i++) {
534 for (i = 0; i < len; i++) {
535 PyObject *obj = PyList_GET_ITEM(list, i);
535 PyObject *obj = PyList_GET_ITEM(list, i);
536 Py_INCREF(obj);
536 Py_INCREF(obj);
537 PyList_SET_ITEM(newlist, i, obj);
537 PyList_SET_ITEM(newlist, i, obj);
538 }
538 }
539
539
540 return newlist;
540 return newlist;
541 }
541 }
542
542
543 static int check_filter(PyObject *filter, Py_ssize_t arg)
543 static int check_filter(PyObject *filter, Py_ssize_t arg)
544 {
544 {
545 if (filter) {
545 if (filter) {
546 PyObject *arglist, *result;
546 PyObject *arglist, *result;
547 int isfiltered;
547 int isfiltered;
548
548
549 arglist = Py_BuildValue("(n)", arg);
549 arglist = Py_BuildValue("(n)", arg);
550 if (!arglist) {
550 if (!arglist) {
551 return -1;
551 return -1;
552 }
552 }
553
553
554 result = PyEval_CallObject(filter, arglist);
554 result = PyEval_CallObject(filter, arglist);
555 Py_DECREF(arglist);
555 Py_DECREF(arglist);
556 if (!result) {
556 if (!result) {
557 return -1;
557 return -1;
558 }
558 }
559
559
560 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
560 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
561 * same as this function, so we can just return it directly.*/
561 * same as this function, so we can just return it directly.*/
562 isfiltered = PyObject_IsTrue(result);
562 isfiltered = PyObject_IsTrue(result);
563 Py_DECREF(result);
563 Py_DECREF(result);
564 return isfiltered;
564 return isfiltered;
565 } else {
565 } else {
566 return 0;
566 return 0;
567 }
567 }
568 }
568 }
569
569
570 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
570 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
571 Py_ssize_t marker, char *phases)
571 Py_ssize_t marker, char *phases)
572 {
572 {
573 PyObject *iter = NULL;
573 PyObject *iter = NULL;
574 PyObject *iter_item = NULL;
574 PyObject *iter_item = NULL;
575 Py_ssize_t min_idx = index_length(self) + 2;
575 Py_ssize_t min_idx = index_length(self) + 2;
576 long iter_item_long;
576 long iter_item_long;
577
577
578 if (PyList_GET_SIZE(list) != 0) {
578 if (PyList_GET_SIZE(list) != 0) {
579 iter = PyObject_GetIter(list);
579 iter = PyObject_GetIter(list);
580 if (iter == NULL)
580 if (iter == NULL)
581 return -2;
581 return -2;
582 while ((iter_item = PyIter_Next(iter))) {
582 while ((iter_item = PyIter_Next(iter))) {
583 if (!pylong_to_long(iter_item, &iter_item_long)) {
583 if (!pylong_to_long(iter_item, &iter_item_long)) {
584 Py_DECREF(iter_item);
584 Py_DECREF(iter_item);
585 return -2;
585 return -2;
586 }
586 }
587 Py_DECREF(iter_item);
587 Py_DECREF(iter_item);
588 if (iter_item_long < min_idx)
588 if (iter_item_long < min_idx)
589 min_idx = iter_item_long;
589 min_idx = iter_item_long;
590 phases[iter_item_long] = (char)marker;
590 phases[iter_item_long] = (char)marker;
591 }
591 }
592 Py_DECREF(iter);
592 Py_DECREF(iter);
593 }
593 }
594
594
595 return min_idx;
595 return min_idx;
596 }
596 }
597
597
598 static inline void set_phase_from_parents(char *phases, int parent_1,
598 static inline void set_phase_from_parents(char *phases, int parent_1,
599 int parent_2, Py_ssize_t i)
599 int parent_2, Py_ssize_t i)
600 {
600 {
601 if (parent_1 >= 0 && phases[parent_1] > phases[i])
601 if (parent_1 >= 0 && phases[parent_1] > phases[i])
602 phases[i] = phases[parent_1];
602 phases[i] = phases[parent_1];
603 if (parent_2 >= 0 && phases[parent_2] > phases[i])
603 if (parent_2 >= 0 && phases[parent_2] > phases[i])
604 phases[i] = phases[parent_2];
604 phases[i] = phases[parent_2];
605 }
605 }
606
606
607 static PyObject *reachableroots2(indexObject *self, PyObject *args)
607 static PyObject *reachableroots2(indexObject *self, PyObject *args)
608 {
608 {
609
609
610 /* Input */
610 /* Input */
611 long minroot;
611 long minroot;
612 PyObject *includepatharg = NULL;
612 PyObject *includepatharg = NULL;
613 int includepath = 0;
613 int includepath = 0;
614 /* heads and roots are lists */
614 /* heads and roots are lists */
615 PyObject *heads = NULL;
615 PyObject *heads = NULL;
616 PyObject *roots = NULL;
616 PyObject *roots = NULL;
617 PyObject *reachable = NULL;
617 PyObject *reachable = NULL;
618
618
619 PyObject *val;
619 PyObject *val;
620 Py_ssize_t len = index_length(self);
620 Py_ssize_t len = index_length(self);
621 long revnum;
621 long revnum;
622 Py_ssize_t k;
622 Py_ssize_t k;
623 Py_ssize_t i;
623 Py_ssize_t i;
624 Py_ssize_t l;
624 Py_ssize_t l;
625 int r;
625 int r;
626 int parents[2];
626 int parents[2];
627
627
628 /* Internal data structure:
628 /* Internal data structure:
629 * tovisit: array of length len+1 (all revs + nullrev), filled upto
629 * tovisit: array of length len+1 (all revs + nullrev), filled upto
630 * lentovisit
630 * lentovisit
631 *
631 *
632 * revstates: array of length len+1 (all revs + nullrev) */
632 * revstates: array of length len+1 (all revs + nullrev) */
633 int *tovisit = NULL;
633 int *tovisit = NULL;
634 long lentovisit = 0;
634 long lentovisit = 0;
635 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
635 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
636 char *revstates = NULL;
636 char *revstates = NULL;
637
637
638 /* Get arguments */
638 /* Get arguments */
639 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
639 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
640 &PyList_Type, &roots, &PyBool_Type,
640 &PyList_Type, &roots, &PyBool_Type,
641 &includepatharg))
641 &includepatharg))
642 goto bail;
642 goto bail;
643
643
644 if (includepatharg == Py_True)
644 if (includepatharg == Py_True)
645 includepath = 1;
645 includepath = 1;
646
646
647 /* Initialize return set */
647 /* Initialize return set */
648 reachable = PyList_New(0);
648 reachable = PyList_New(0);
649 if (reachable == NULL)
649 if (reachable == NULL)
650 goto bail;
650 goto bail;
651
651
652 /* Initialize internal datastructures */
652 /* Initialize internal datastructures */
653 tovisit = (int *)malloc((len + 1) * sizeof(int));
653 tovisit = (int *)malloc((len + 1) * sizeof(int));
654 if (tovisit == NULL) {
654 if (tovisit == NULL) {
655 PyErr_NoMemory();
655 PyErr_NoMemory();
656 goto bail;
656 goto bail;
657 }
657 }
658
658
659 revstates = (char *)calloc(len + 1, 1);
659 revstates = (char *)calloc(len + 1, 1);
660 if (revstates == NULL) {
660 if (revstates == NULL) {
661 PyErr_NoMemory();
661 PyErr_NoMemory();
662 goto bail;
662 goto bail;
663 }
663 }
664
664
665 l = PyList_GET_SIZE(roots);
665 l = PyList_GET_SIZE(roots);
666 for (i = 0; i < l; i++) {
666 for (i = 0; i < l; i++) {
667 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
667 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
668 if (revnum == -1 && PyErr_Occurred())
668 if (revnum == -1 && PyErr_Occurred())
669 goto bail;
669 goto bail;
670 /* If root is out of range, e.g. wdir(), it must be unreachable
670 /* If root is out of range, e.g. wdir(), it must be unreachable
671 * from heads. So we can just ignore it. */
671 * from heads. So we can just ignore it. */
672 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
672 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
673 continue;
673 continue;
674 revstates[revnum + 1] |= RS_ROOT;
674 revstates[revnum + 1] |= RS_ROOT;
675 }
675 }
676
676
677 /* Populate tovisit with all the heads */
677 /* Populate tovisit with all the heads */
678 l = PyList_GET_SIZE(heads);
678 l = PyList_GET_SIZE(heads);
679 for (i = 0; i < l; i++) {
679 for (i = 0; i < l; i++) {
680 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
680 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
681 if (revnum == -1 && PyErr_Occurred())
681 if (revnum == -1 && PyErr_Occurred())
682 goto bail;
682 goto bail;
683 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
683 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
684 PyErr_SetString(PyExc_IndexError, "head out of range");
684 PyErr_SetString(PyExc_IndexError, "head out of range");
685 goto bail;
685 goto bail;
686 }
686 }
687 if (!(revstates[revnum + 1] & RS_SEEN)) {
687 if (!(revstates[revnum + 1] & RS_SEEN)) {
688 tovisit[lentovisit++] = (int)revnum;
688 tovisit[lentovisit++] = (int)revnum;
689 revstates[revnum + 1] |= RS_SEEN;
689 revstates[revnum + 1] |= RS_SEEN;
690 }
690 }
691 }
691 }
692
692
693 /* Visit the tovisit list and find the reachable roots */
693 /* Visit the tovisit list and find the reachable roots */
694 k = 0;
694 k = 0;
695 while (k < lentovisit) {
695 while (k < lentovisit) {
696 /* Add the node to reachable if it is a root*/
696 /* Add the node to reachable if it is a root*/
697 revnum = tovisit[k++];
697 revnum = tovisit[k++];
698 if (revstates[revnum + 1] & RS_ROOT) {
698 if (revstates[revnum + 1] & RS_ROOT) {
699 revstates[revnum + 1] |= RS_REACHABLE;
699 revstates[revnum + 1] |= RS_REACHABLE;
700 val = PyInt_FromLong(revnum);
700 val = PyInt_FromLong(revnum);
701 if (val == NULL)
701 if (val == NULL)
702 goto bail;
702 goto bail;
703 r = PyList_Append(reachable, val);
703 r = PyList_Append(reachable, val);
704 Py_DECREF(val);
704 Py_DECREF(val);
705 if (r < 0)
705 if (r < 0)
706 goto bail;
706 goto bail;
707 if (includepath == 0)
707 if (includepath == 0)
708 continue;
708 continue;
709 }
709 }
710
710
711 /* Add its parents to the list of nodes to visit */
711 /* Add its parents to the list of nodes to visit */
712 if (revnum == nullrev)
712 if (revnum == nullrev)
713 continue;
713 continue;
714 r = index_get_parents(self, revnum, parents, (int)len - 1);
714 r = index_get_parents(self, revnum, parents, (int)len - 1);
715 if (r < 0)
715 if (r < 0)
716 goto bail;
716 goto bail;
717 for (i = 0; i < 2; i++) {
717 for (i = 0; i < 2; i++) {
718 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
718 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
719 parents[i] >= minroot) {
719 parents[i] >= minroot) {
720 tovisit[lentovisit++] = parents[i];
720 tovisit[lentovisit++] = parents[i];
721 revstates[parents[i] + 1] |= RS_SEEN;
721 revstates[parents[i] + 1] |= RS_SEEN;
722 }
722 }
723 }
723 }
724 }
724 }
725
725
726 /* Find all the nodes in between the roots we found and the heads
726 /* Find all the nodes in between the roots we found and the heads
727 * and add them to the reachable set */
727 * and add them to the reachable set */
728 if (includepath == 1) {
728 if (includepath == 1) {
729 long minidx = minroot;
729 long minidx = minroot;
730 if (minidx < 0)
730 if (minidx < 0)
731 minidx = 0;
731 minidx = 0;
732 for (i = minidx; i < len; i++) {
732 for (i = minidx; i < len; i++) {
733 if (!(revstates[i + 1] & RS_SEEN))
733 if (!(revstates[i + 1] & RS_SEEN))
734 continue;
734 continue;
735 r = index_get_parents(self, i, parents, (int)len - 1);
735 r = index_get_parents(self, i, parents, (int)len - 1);
736 /* Corrupted index file, error is set from
736 /* Corrupted index file, error is set from
737 * index_get_parents */
737 * index_get_parents */
738 if (r < 0)
738 if (r < 0)
739 goto bail;
739 goto bail;
740 if (((revstates[parents[0] + 1] |
740 if (((revstates[parents[0] + 1] |
741 revstates[parents[1] + 1]) &
741 revstates[parents[1] + 1]) &
742 RS_REACHABLE) &&
742 RS_REACHABLE) &&
743 !(revstates[i + 1] & RS_REACHABLE)) {
743 !(revstates[i + 1] & RS_REACHABLE)) {
744 revstates[i + 1] |= RS_REACHABLE;
744 revstates[i + 1] |= RS_REACHABLE;
745 val = PyInt_FromSsize_t(i);
745 val = PyInt_FromSsize_t(i);
746 if (val == NULL)
746 if (val == NULL)
747 goto bail;
747 goto bail;
748 r = PyList_Append(reachable, val);
748 r = PyList_Append(reachable, val);
749 Py_DECREF(val);
749 Py_DECREF(val);
750 if (r < 0)
750 if (r < 0)
751 goto bail;
751 goto bail;
752 }
752 }
753 }
753 }
754 }
754 }
755
755
756 free(revstates);
756 free(revstates);
757 free(tovisit);
757 free(tovisit);
758 return reachable;
758 return reachable;
759 bail:
759 bail:
760 Py_XDECREF(reachable);
760 Py_XDECREF(reachable);
761 free(revstates);
761 free(revstates);
762 free(tovisit);
762 free(tovisit);
763 return NULL;
763 return NULL;
764 }
764 }
765
765
766 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
766 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
767 {
767 {
768 PyObject *roots = Py_None;
768 PyObject *roots = Py_None;
769 PyObject *ret = NULL;
769 PyObject *ret = NULL;
770 PyObject *phasessize = NULL;
770 PyObject *phasessize = NULL;
771 PyObject *phaseroots = NULL;
771 PyObject *phaseroots = NULL;
772 PyObject *phaseset = NULL;
772 PyObject *phaseset = NULL;
773 PyObject *phasessetlist = NULL;
773 PyObject *phasessetlist = NULL;
774 PyObject *rev = NULL;
774 PyObject *rev = NULL;
775 Py_ssize_t len = index_length(self);
775 Py_ssize_t len = index_length(self);
776 Py_ssize_t numphase = 0;
776 Py_ssize_t numphase = 0;
777 Py_ssize_t minrevallphases = 0;
777 Py_ssize_t minrevallphases = 0;
778 Py_ssize_t minrevphase = 0;
778 Py_ssize_t minrevphase = 0;
779 Py_ssize_t i = 0;
779 Py_ssize_t i = 0;
780 char *phases = NULL;
780 char *phases = NULL;
781 long phase;
781 long phase;
782
782
783 if (!PyArg_ParseTuple(args, "O", &roots))
783 if (!PyArg_ParseTuple(args, "O", &roots))
784 goto done;
784 goto done;
785 if (roots == NULL || !PyList_Check(roots)) {
785 if (roots == NULL || !PyList_Check(roots)) {
786 PyErr_SetString(PyExc_TypeError, "roots must be a list");
786 PyErr_SetString(PyExc_TypeError, "roots must be a list");
787 goto done;
787 goto done;
788 }
788 }
789
789
790 phases = calloc(
790 phases = calloc(
791 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
791 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
792 if (phases == NULL) {
792 if (phases == NULL) {
793 PyErr_NoMemory();
793 PyErr_NoMemory();
794 goto done;
794 goto done;
795 }
795 }
796 /* Put the phase information of all the roots in phases */
796 /* Put the phase information of all the roots in phases */
797 numphase = PyList_GET_SIZE(roots) + 1;
797 numphase = PyList_GET_SIZE(roots) + 1;
798 minrevallphases = len + 1;
798 minrevallphases = len + 1;
799 phasessetlist = PyList_New(numphase);
799 phasessetlist = PyList_New(numphase);
800 if (phasessetlist == NULL)
800 if (phasessetlist == NULL)
801 goto done;
801 goto done;
802
802
803 PyList_SET_ITEM(phasessetlist, 0, Py_None);
803 PyList_SET_ITEM(phasessetlist, 0, Py_None);
804 Py_INCREF(Py_None);
804 Py_INCREF(Py_None);
805
805
806 for (i = 0; i < numphase - 1; i++) {
806 for (i = 0; i < numphase - 1; i++) {
807 phaseroots = PyList_GET_ITEM(roots, i);
807 phaseroots = PyList_GET_ITEM(roots, i);
808 phaseset = PySet_New(NULL);
808 phaseset = PySet_New(NULL);
809 if (phaseset == NULL)
809 if (phaseset == NULL)
810 goto release;
810 goto release;
811 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
811 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
812 if (!PyList_Check(phaseroots)) {
812 if (!PyList_Check(phaseroots)) {
813 PyErr_SetString(PyExc_TypeError,
813 PyErr_SetString(PyExc_TypeError,
814 "roots item must be a list");
814 "roots item must be a list");
815 goto release;
815 goto release;
816 }
816 }
817 minrevphase =
817 minrevphase =
818 add_roots_get_min(self, phaseroots, i + 1, phases);
818 add_roots_get_min(self, phaseroots, i + 1, phases);
819 if (minrevphase == -2) /* Error from add_roots_get_min */
819 if (minrevphase == -2) /* Error from add_roots_get_min */
820 goto release;
820 goto release;
821 minrevallphases = MIN(minrevallphases, minrevphase);
821 minrevallphases = MIN(minrevallphases, minrevphase);
822 }
822 }
823 /* Propagate the phase information from the roots to the revs */
823 /* Propagate the phase information from the roots to the revs */
824 if (minrevallphases != -1) {
824 if (minrevallphases != -1) {
825 int parents[2];
825 int parents[2];
826 for (i = minrevallphases; i < len; i++) {
826 for (i = minrevallphases; i < len; i++) {
827 if (index_get_parents(self, i, parents, (int)len - 1) <
827 if (index_get_parents(self, i, parents, (int)len - 1) <
828 0)
828 0)
829 goto release;
829 goto release;
830 set_phase_from_parents(phases, parents[0], parents[1],
830 set_phase_from_parents(phases, parents[0], parents[1],
831 i);
831 i);
832 }
832 }
833 }
833 }
834 /* Transform phase list to a python list */
834 /* Transform phase list to a python list */
835 phasessize = PyInt_FromSsize_t(len);
835 phasessize = PyInt_FromSsize_t(len);
836 if (phasessize == NULL)
836 if (phasessize == NULL)
837 goto release;
837 goto release;
838 for (i = 0; i < len; i++) {
838 for (i = 0; i < len; i++) {
839 phase = phases[i];
839 phase = phases[i];
840 /* We only store the sets of phase for non public phase, the
840 /* We only store the sets of phase for non public phase, the
841 * public phase is computed as a difference */
841 * public phase is computed as a difference */
842 if (phase != 0) {
842 if (phase != 0) {
843 phaseset = PyList_GET_ITEM(phasessetlist, phase);
843 phaseset = PyList_GET_ITEM(phasessetlist, phase);
844 rev = PyInt_FromSsize_t(i);
844 rev = PyInt_FromSsize_t(i);
845 if (rev == NULL)
845 if (rev == NULL)
846 goto release;
846 goto release;
847 PySet_Add(phaseset, rev);
847 PySet_Add(phaseset, rev);
848 Py_XDECREF(rev);
848 Py_XDECREF(rev);
849 }
849 }
850 }
850 }
851 ret = PyTuple_Pack(2, phasessize, phasessetlist);
851 ret = PyTuple_Pack(2, phasessize, phasessetlist);
852
852
853 release:
853 release:
854 Py_XDECREF(phasessize);
854 Py_XDECREF(phasessize);
855 Py_XDECREF(phasessetlist);
855 Py_XDECREF(phasessetlist);
856 done:
856 done:
857 free(phases);
857 free(phases);
858 return ret;
858 return ret;
859 }
859 }
860
860
861 static PyObject *index_headrevs(indexObject *self, PyObject *args)
861 static PyObject *index_headrevs(indexObject *self, PyObject *args)
862 {
862 {
863 Py_ssize_t i, j, len;
863 Py_ssize_t i, j, len;
864 char *nothead = NULL;
864 char *nothead = NULL;
865 PyObject *heads = NULL;
865 PyObject *heads = NULL;
866 PyObject *filter = NULL;
866 PyObject *filter = NULL;
867 PyObject *filteredrevs = Py_None;
867 PyObject *filteredrevs = Py_None;
868
868
869 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
869 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
870 return NULL;
870 return NULL;
871 }
871 }
872
872
873 if (self->headrevs && filteredrevs == self->filteredrevs)
873 if (self->headrevs && filteredrevs == self->filteredrevs)
874 return list_copy(self->headrevs);
874 return list_copy(self->headrevs);
875
875
876 Py_DECREF(self->filteredrevs);
876 Py_DECREF(self->filteredrevs);
877 self->filteredrevs = filteredrevs;
877 self->filteredrevs = filteredrevs;
878 Py_INCREF(filteredrevs);
878 Py_INCREF(filteredrevs);
879
879
880 if (filteredrevs != Py_None) {
880 if (filteredrevs != Py_None) {
881 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
881 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
882 if (!filter) {
882 if (!filter) {
883 PyErr_SetString(
883 PyErr_SetString(
884 PyExc_TypeError,
884 PyExc_TypeError,
885 "filteredrevs has no attribute __contains__");
885 "filteredrevs has no attribute __contains__");
886 goto bail;
886 goto bail;
887 }
887 }
888 }
888 }
889
889
890 len = index_length(self);
890 len = index_length(self);
891 heads = PyList_New(0);
891 heads = PyList_New(0);
892 if (heads == NULL)
892 if (heads == NULL)
893 goto bail;
893 goto bail;
894 if (len == 0) {
894 if (len == 0) {
895 PyObject *nullid = PyInt_FromLong(-1);
895 PyObject *nullid = PyInt_FromLong(-1);
896 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
896 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
897 Py_XDECREF(nullid);
897 Py_XDECREF(nullid);
898 goto bail;
898 goto bail;
899 }
899 }
900 goto done;
900 goto done;
901 }
901 }
902
902
903 nothead = calloc(len, 1);
903 nothead = calloc(len, 1);
904 if (nothead == NULL) {
904 if (nothead == NULL) {
905 PyErr_NoMemory();
905 PyErr_NoMemory();
906 goto bail;
906 goto bail;
907 }
907 }
908
908
909 for (i = len - 1; i >= 0; i--) {
909 for (i = len - 1; i >= 0; i--) {
910 int isfiltered;
910 int isfiltered;
911 int parents[2];
911 int parents[2];
912
912
913 /* If nothead[i] == 1, it means we've seen an unfiltered child
913 /* If nothead[i] == 1, it means we've seen an unfiltered child
914 * of this node already, and therefore this node is not
914 * of this node already, and therefore this node is not
915 * filtered. So we can skip the expensive check_filter step.
915 * filtered. So we can skip the expensive check_filter step.
916 */
916 */
917 if (nothead[i] != 1) {
917 if (nothead[i] != 1) {
918 isfiltered = check_filter(filter, i);
918 isfiltered = check_filter(filter, i);
919 if (isfiltered == -1) {
919 if (isfiltered == -1) {
920 PyErr_SetString(PyExc_TypeError,
920 PyErr_SetString(PyExc_TypeError,
921 "unable to check filter");
921 "unable to check filter");
922 goto bail;
922 goto bail;
923 }
923 }
924
924
925 if (isfiltered) {
925 if (isfiltered) {
926 nothead[i] = 1;
926 nothead[i] = 1;
927 continue;
927 continue;
928 }
928 }
929 }
929 }
930
930
931 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
931 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
932 goto bail;
932 goto bail;
933 for (j = 0; j < 2; j++) {
933 for (j = 0; j < 2; j++) {
934 if (parents[j] >= 0)
934 if (parents[j] >= 0)
935 nothead[parents[j]] = 1;
935 nothead[parents[j]] = 1;
936 }
936 }
937 }
937 }
938
938
939 for (i = 0; i < len; i++) {
939 for (i = 0; i < len; i++) {
940 PyObject *head;
940 PyObject *head;
941
941
942 if (nothead[i])
942 if (nothead[i])
943 continue;
943 continue;
944 head = PyInt_FromSsize_t(i);
944 head = PyInt_FromSsize_t(i);
945 if (head == NULL || PyList_Append(heads, head) == -1) {
945 if (head == NULL || PyList_Append(heads, head) == -1) {
946 Py_XDECREF(head);
946 Py_XDECREF(head);
947 goto bail;
947 goto bail;
948 }
948 }
949 }
949 }
950
950
951 done:
951 done:
952 self->headrevs = heads;
952 self->headrevs = heads;
953 Py_XDECREF(filter);
953 Py_XDECREF(filter);
954 free(nothead);
954 free(nothead);
955 return list_copy(self->headrevs);
955 return list_copy(self->headrevs);
956 bail:
956 bail:
957 Py_XDECREF(filter);
957 Py_XDECREF(filter);
958 Py_XDECREF(heads);
958 Py_XDECREF(heads);
959 free(nothead);
959 free(nothead);
960 return NULL;
960 return NULL;
961 }
961 }
962
962
963 /**
963 /**
964 * Obtain the base revision index entry.
964 * Obtain the base revision index entry.
965 *
965 *
966 * Callers must ensure that rev >= 0 or illegal memory access may occur.
966 * Callers must ensure that rev >= 0 or illegal memory access may occur.
967 */
967 */
968 static inline int index_baserev(indexObject *self, int rev)
968 static inline int index_baserev(indexObject *self, int rev)
969 {
969 {
970 const char *data;
970 const char *data;
971 int result;
971 int result;
972
972
973 if (rev >= self->length) {
973 if (rev >= self->length) {
974 PyObject *tuple =
974 PyObject *tuple =
975 PyList_GET_ITEM(self->added, rev - self->length);
975 PyList_GET_ITEM(self->added, rev - self->length);
976 long ret;
976 long ret;
977 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
977 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
978 return -2;
978 return -2;
979 }
979 }
980 result = (int)ret;
980 result = (int)ret;
981 } else {
981 } else {
982 data = index_deref(self, rev);
982 data = index_deref(self, rev);
983 if (data == NULL) {
983 if (data == NULL) {
984 return -2;
984 return -2;
985 }
985 }
986
986
987 result = getbe32(data + 16);
987 result = getbe32(data + 16);
988 }
988 }
989 if (result > rev) {
989 if (result > rev) {
990 PyErr_Format(
990 PyErr_Format(
991 PyExc_ValueError,
991 PyExc_ValueError,
992 "corrupted revlog, revision base above revision: %d, %d",
992 "corrupted revlog, revision base above revision: %d, %d",
993 rev, result);
993 rev, result);
994 return -2;
994 return -2;
995 }
995 }
996 if (result < -1) {
996 if (result < -1) {
997 PyErr_Format(
997 PyErr_Format(
998 PyExc_ValueError,
998 PyExc_ValueError,
999 "corrupted revlog, revision base out of range: %d, %d", rev,
999 "corrupted revlog, revision base out of range: %d, %d", rev,
1000 result);
1000 result);
1001 return -2;
1001 return -2;
1002 }
1002 }
1003 return result;
1003 return result;
1004 }
1004 }
1005
1005
1006 /**
1006 /**
1007 * Find if a revision is a snapshot or not
1007 * Find if a revision is a snapshot or not
1008 *
1008 *
1009 * Only relevant for sparse-revlog case.
1009 * Only relevant for sparse-revlog case.
1010 * Callers must ensure that rev is in a valid range.
1010 * Callers must ensure that rev is in a valid range.
1011 */
1011 */
1012 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1012 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1013 {
1013 {
1014 int ps[2];
1014 int ps[2];
1015 Py_ssize_t base;
1015 Py_ssize_t base;
1016 while (rev >= 0) {
1016 while (rev >= 0) {
1017 base = (Py_ssize_t)index_baserev(self, rev);
1017 base = (Py_ssize_t)index_baserev(self, rev);
1018 if (base == rev) {
1018 if (base == rev) {
1019 base = -1;
1019 base = -1;
1020 }
1020 }
1021 if (base == -2) {
1021 if (base == -2) {
1022 assert(PyErr_Occurred());
1022 assert(PyErr_Occurred());
1023 return -1;
1023 return -1;
1024 }
1024 }
1025 if (base == -1) {
1025 if (base == -1) {
1026 return 1;
1026 return 1;
1027 }
1027 }
1028 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1028 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1029 assert(PyErr_Occurred());
1029 assert(PyErr_Occurred());
1030 return -1;
1030 return -1;
1031 };
1031 };
1032 if (base == ps[0] || base == ps[1]) {
1032 if (base == ps[0] || base == ps[1]) {
1033 return 0;
1033 return 0;
1034 }
1034 }
1035 rev = base;
1035 rev = base;
1036 }
1036 }
1037 return rev == -1;
1037 return rev == -1;
1038 }
1038 }
1039
1039
1040 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1040 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1041 {
1041 {
1042 long rev;
1042 long rev;
1043 int issnap;
1043 int issnap;
1044 Py_ssize_t length = index_length(self);
1044 Py_ssize_t length = index_length(self);
1045
1045
1046 if (!pylong_to_long(value, &rev)) {
1046 if (!pylong_to_long(value, &rev)) {
1047 return NULL;
1047 return NULL;
1048 }
1048 }
1049 if (rev < -1 || rev >= length) {
1049 if (rev < -1 || rev >= length) {
1050 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1050 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1051 rev);
1051 rev);
1052 return NULL;
1052 return NULL;
1053 };
1053 };
1054 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1054 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1055 if (issnap < 0) {
1055 if (issnap < 0) {
1056 return NULL;
1056 return NULL;
1057 };
1057 };
1058 return PyBool_FromLong((long)issnap);
1058 return PyBool_FromLong((long)issnap);
1059 }
1059 }
1060
1060
1061 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1061 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1062 {
1062 {
1063 Py_ssize_t start_rev;
1063 Py_ssize_t start_rev;
1064 PyObject *cache;
1064 PyObject *cache;
1065 Py_ssize_t base;
1065 Py_ssize_t base;
1066 Py_ssize_t rev;
1066 Py_ssize_t rev;
1067 PyObject *key = NULL;
1067 PyObject *key = NULL;
1068 PyObject *value = NULL;
1068 PyObject *value = NULL;
1069 const Py_ssize_t length = index_length(self);
1069 const Py_ssize_t length = index_length(self);
1070 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1070 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1071 return NULL;
1071 return NULL;
1072 }
1072 }
1073 for (rev = start_rev; rev < length; rev++) {
1073 for (rev = start_rev; rev < length; rev++) {
1074 int issnap;
1074 int issnap;
1075 PyObject *allvalues = NULL;
1075 PyObject *allvalues = NULL;
1076 issnap = index_issnapshotrev(self, rev);
1076 issnap = index_issnapshotrev(self, rev);
1077 if (issnap < 0) {
1077 if (issnap < 0) {
1078 goto bail;
1078 goto bail;
1079 }
1079 }
1080 if (issnap == 0) {
1080 if (issnap == 0) {
1081 continue;
1081 continue;
1082 }
1082 }
1083 base = (Py_ssize_t)index_baserev(self, rev);
1083 base = (Py_ssize_t)index_baserev(self, rev);
1084 if (base == rev) {
1084 if (base == rev) {
1085 base = -1;
1085 base = -1;
1086 }
1086 }
1087 if (base == -2) {
1087 if (base == -2) {
1088 assert(PyErr_Occurred());
1088 assert(PyErr_Occurred());
1089 goto bail;
1089 goto bail;
1090 }
1090 }
1091 key = PyInt_FromSsize_t(base);
1091 key = PyInt_FromSsize_t(base);
1092 allvalues = PyDict_GetItem(cache, key);
1092 allvalues = PyDict_GetItem(cache, key);
1093 if (allvalues == NULL && PyErr_Occurred()) {
1093 if (allvalues == NULL && PyErr_Occurred()) {
1094 goto bail;
1094 goto bail;
1095 }
1095 }
1096 if (allvalues == NULL) {
1096 if (allvalues == NULL) {
1097 int r;
1097 int r;
1098 allvalues = PyList_New(0);
1098 allvalues = PyList_New(0);
1099 if (!allvalues) {
1099 if (!allvalues) {
1100 goto bail;
1100 goto bail;
1101 }
1101 }
1102 r = PyDict_SetItem(cache, key, allvalues);
1102 r = PyDict_SetItem(cache, key, allvalues);
1103 Py_DECREF(allvalues);
1103 Py_DECREF(allvalues);
1104 if (r < 0) {
1104 if (r < 0) {
1105 goto bail;
1105 goto bail;
1106 }
1106 }
1107 }
1107 }
1108 value = PyInt_FromSsize_t(rev);
1108 value = PyInt_FromSsize_t(rev);
1109 if (PyList_Append(allvalues, value)) {
1109 if (PyList_Append(allvalues, value)) {
1110 goto bail;
1110 goto bail;
1111 }
1111 }
1112 Py_CLEAR(key);
1112 Py_CLEAR(key);
1113 Py_CLEAR(value);
1113 Py_CLEAR(value);
1114 }
1114 }
1115 Py_RETURN_NONE;
1115 Py_RETURN_NONE;
1116 bail:
1116 bail:
1117 Py_XDECREF(key);
1117 Py_XDECREF(key);
1118 Py_XDECREF(value);
1118 Py_XDECREF(value);
1119 return NULL;
1119 return NULL;
1120 }
1120 }
1121
1121
1122 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1122 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1123 {
1123 {
1124 int rev, generaldelta;
1124 int rev, generaldelta;
1125 PyObject *stoparg;
1125 PyObject *stoparg;
1126 int stoprev, iterrev, baserev = -1;
1126 int stoprev, iterrev, baserev = -1;
1127 int stopped;
1127 int stopped;
1128 PyObject *chain = NULL, *result = NULL;
1128 PyObject *chain = NULL, *result = NULL;
1129 const Py_ssize_t length = index_length(self);
1129 const Py_ssize_t length = index_length(self);
1130
1130
1131 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1131 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1132 return NULL;
1132 return NULL;
1133 }
1133 }
1134
1134
1135 if (PyInt_Check(stoparg)) {
1135 if (PyInt_Check(stoparg)) {
1136 stoprev = (int)PyInt_AsLong(stoparg);
1136 stoprev = (int)PyInt_AsLong(stoparg);
1137 if (stoprev == -1 && PyErr_Occurred()) {
1137 if (stoprev == -1 && PyErr_Occurred()) {
1138 return NULL;
1138 return NULL;
1139 }
1139 }
1140 } else if (stoparg == Py_None) {
1140 } else if (stoparg == Py_None) {
1141 stoprev = -2;
1141 stoprev = -2;
1142 } else {
1142 } else {
1143 PyErr_SetString(PyExc_ValueError,
1143 PyErr_SetString(PyExc_ValueError,
1144 "stoprev must be integer or None");
1144 "stoprev must be integer or None");
1145 return NULL;
1145 return NULL;
1146 }
1146 }
1147
1147
1148 if (rev < 0 || rev >= length) {
1148 if (rev < 0 || rev >= length) {
1149 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1149 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1150 return NULL;
1150 return NULL;
1151 }
1151 }
1152
1152
1153 chain = PyList_New(0);
1153 chain = PyList_New(0);
1154 if (chain == NULL) {
1154 if (chain == NULL) {
1155 return NULL;
1155 return NULL;
1156 }
1156 }
1157
1157
1158 baserev = index_baserev(self, rev);
1158 baserev = index_baserev(self, rev);
1159
1159
1160 /* This should never happen. */
1160 /* This should never happen. */
1161 if (baserev <= -2) {
1161 if (baserev <= -2) {
1162 /* Error should be set by index_deref() */
1162 /* Error should be set by index_deref() */
1163 assert(PyErr_Occurred());
1163 assert(PyErr_Occurred());
1164 goto bail;
1164 goto bail;
1165 }
1165 }
1166
1166
1167 iterrev = rev;
1167 iterrev = rev;
1168
1168
1169 while (iterrev != baserev && iterrev != stoprev) {
1169 while (iterrev != baserev && iterrev != stoprev) {
1170 PyObject *value = PyInt_FromLong(iterrev);
1170 PyObject *value = PyInt_FromLong(iterrev);
1171 if (value == NULL) {
1171 if (value == NULL) {
1172 goto bail;
1172 goto bail;
1173 }
1173 }
1174 if (PyList_Append(chain, value)) {
1174 if (PyList_Append(chain, value)) {
1175 Py_DECREF(value);
1175 Py_DECREF(value);
1176 goto bail;
1176 goto bail;
1177 }
1177 }
1178 Py_DECREF(value);
1178 Py_DECREF(value);
1179
1179
1180 if (generaldelta) {
1180 if (generaldelta) {
1181 iterrev = baserev;
1181 iterrev = baserev;
1182 } else {
1182 } else {
1183 iterrev--;
1183 iterrev--;
1184 }
1184 }
1185
1185
1186 if (iterrev < 0) {
1186 if (iterrev < 0) {
1187 break;
1187 break;
1188 }
1188 }
1189
1189
1190 if (iterrev >= length) {
1190 if (iterrev >= length) {
1191 PyErr_SetString(PyExc_IndexError,
1191 PyErr_SetString(PyExc_IndexError,
1192 "revision outside index");
1192 "revision outside index");
1193 return NULL;
1193 return NULL;
1194 }
1194 }
1195
1195
1196 baserev = index_baserev(self, iterrev);
1196 baserev = index_baserev(self, iterrev);
1197
1197
1198 /* This should never happen. */
1198 /* This should never happen. */
1199 if (baserev <= -2) {
1199 if (baserev <= -2) {
1200 /* Error should be set by index_deref() */
1200 /* Error should be set by index_deref() */
1201 assert(PyErr_Occurred());
1201 assert(PyErr_Occurred());
1202 goto bail;
1202 goto bail;
1203 }
1203 }
1204 }
1204 }
1205
1205
1206 if (iterrev == stoprev) {
1206 if (iterrev == stoprev) {
1207 stopped = 1;
1207 stopped = 1;
1208 } else {
1208 } else {
1209 PyObject *value = PyInt_FromLong(iterrev);
1209 PyObject *value = PyInt_FromLong(iterrev);
1210 if (value == NULL) {
1210 if (value == NULL) {
1211 goto bail;
1211 goto bail;
1212 }
1212 }
1213 if (PyList_Append(chain, value)) {
1213 if (PyList_Append(chain, value)) {
1214 Py_DECREF(value);
1214 Py_DECREF(value);
1215 goto bail;
1215 goto bail;
1216 }
1216 }
1217 Py_DECREF(value);
1217 Py_DECREF(value);
1218
1218
1219 stopped = 0;
1219 stopped = 0;
1220 }
1220 }
1221
1221
1222 if (PyList_Reverse(chain)) {
1222 if (PyList_Reverse(chain)) {
1223 goto bail;
1223 goto bail;
1224 }
1224 }
1225
1225
1226 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1226 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1227 Py_DECREF(chain);
1227 Py_DECREF(chain);
1228 return result;
1228 return result;
1229
1229
1230 bail:
1230 bail:
1231 Py_DECREF(chain);
1231 Py_DECREF(chain);
1232 return NULL;
1232 return NULL;
1233 }
1233 }
1234
1234
1235 static inline int64_t
1235 static inline int64_t
1236 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1236 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1237 {
1237 {
1238 int64_t start_offset;
1238 int64_t start_offset;
1239 int64_t end_offset;
1239 int64_t end_offset;
1240 int end_size;
1240 int end_size;
1241 start_offset = index_get_start(self, start_rev);
1241 start_offset = index_get_start(self, start_rev);
1242 if (start_offset < 0) {
1242 if (start_offset < 0) {
1243 return -1;
1243 return -1;
1244 }
1244 }
1245 end_offset = index_get_start(self, end_rev);
1245 end_offset = index_get_start(self, end_rev);
1246 if (end_offset < 0) {
1246 if (end_offset < 0) {
1247 return -1;
1247 return -1;
1248 }
1248 }
1249 end_size = index_get_length(self, end_rev);
1249 end_size = index_get_length(self, end_rev);
1250 if (end_size < 0) {
1250 if (end_size < 0) {
1251 return -1;
1251 return -1;
1252 }
1252 }
1253 if (end_offset < start_offset) {
1253 if (end_offset < start_offset) {
1254 PyErr_Format(PyExc_ValueError,
1254 PyErr_Format(PyExc_ValueError,
1255 "corrupted revlog index: inconsistent offset "
1255 "corrupted revlog index: inconsistent offset "
1256 "between revisions (%zd) and (%zd)",
1256 "between revisions (%zd) and (%zd)",
1257 start_rev, end_rev);
1257 start_rev, end_rev);
1258 return -1;
1258 return -1;
1259 }
1259 }
1260 return (end_offset - start_offset) + (int64_t)end_size;
1260 return (end_offset - start_offset) + (int64_t)end_size;
1261 }
1261 }
1262
1262
1263 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1263 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1264 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1264 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1265 Py_ssize_t startidx, Py_ssize_t endidx)
1265 Py_ssize_t startidx, Py_ssize_t endidx)
1266 {
1266 {
1267 int length;
1267 int length;
1268 while (endidx > 1 && endidx > startidx) {
1268 while (endidx > 1 && endidx > startidx) {
1269 length = index_get_length(self, revs[endidx - 1]);
1269 length = index_get_length(self, revs[endidx - 1]);
1270 if (length < 0) {
1270 if (length < 0) {
1271 return -1;
1271 return -1;
1272 }
1272 }
1273 if (length != 0) {
1273 if (length != 0) {
1274 break;
1274 break;
1275 }
1275 }
1276 endidx -= 1;
1276 endidx -= 1;
1277 }
1277 }
1278 return endidx;
1278 return endidx;
1279 }
1279 }
1280
1280
1281 struct Gap {
1281 struct Gap {
1282 int64_t size;
1282 int64_t size;
1283 Py_ssize_t idx;
1283 Py_ssize_t idx;
1284 };
1284 };
1285
1285
1286 static int gap_compare(const void *left, const void *right)
1286 static int gap_compare(const void *left, const void *right)
1287 {
1287 {
1288 const struct Gap *l_left = ((const struct Gap *)left);
1288 const struct Gap *l_left = ((const struct Gap *)left);
1289 const struct Gap *l_right = ((const struct Gap *)right);
1289 const struct Gap *l_right = ((const struct Gap *)right);
1290 if (l_left->size < l_right->size) {
1290 if (l_left->size < l_right->size) {
1291 return -1;
1291 return -1;
1292 } else if (l_left->size > l_right->size) {
1292 } else if (l_left->size > l_right->size) {
1293 return 1;
1293 return 1;
1294 }
1294 }
1295 return 0;
1295 return 0;
1296 }
1296 }
1297 static int Py_ssize_t_compare(const void *left, const void *right)
1297 static int Py_ssize_t_compare(const void *left, const void *right)
1298 {
1298 {
1299 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1299 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1300 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1300 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1301 if (l_left < l_right) {
1301 if (l_left < l_right) {
1302 return -1;
1302 return -1;
1303 } else if (l_left > l_right) {
1303 } else if (l_left > l_right) {
1304 return 1;
1304 return 1;
1305 }
1305 }
1306 return 0;
1306 return 0;
1307 }
1307 }
1308
1308
1309 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1309 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1310 {
1310 {
1311 /* method arguments */
1311 /* method arguments */
1312 PyObject *list_revs = NULL; /* revisions in the chain */
1312 PyObject *list_revs = NULL; /* revisions in the chain */
1313 double targetdensity = 0; /* min density to achieve */
1313 double targetdensity = 0; /* min density to achieve */
1314 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1314 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1315
1315
1316 /* other core variables */
1316 /* other core variables */
1317 Py_ssize_t idxlen = index_length(self);
1317 Py_ssize_t idxlen = index_length(self);
1318 Py_ssize_t i; /* used for various iteration */
1318 Py_ssize_t i; /* used for various iteration */
1319 PyObject *result = NULL; /* the final return of the function */
1319 PyObject *result = NULL; /* the final return of the function */
1320
1320
1321 /* generic information about the delta chain being slice */
1321 /* generic information about the delta chain being slice */
1322 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1322 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1323 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1323 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1324 int64_t chainpayload = 0; /* sum of all delta in the chain */
1324 int64_t chainpayload = 0; /* sum of all delta in the chain */
1325 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1325 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1326
1326
1327 /* variable used for slicing the delta chain */
1327 /* variable used for slicing the delta chain */
1328 int64_t readdata = 0; /* amount of data currently planned to be read */
1328 int64_t readdata = 0; /* amount of data currently planned to be read */
1329 double density = 0; /* ration of payload data compared to read ones */
1329 double density = 0; /* ration of payload data compared to read ones */
1330 int64_t previous_end;
1330 int64_t previous_end;
1331 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1331 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1332 Py_ssize_t num_gaps =
1332 Py_ssize_t num_gaps =
1333 0; /* total number of notable gap recorded so far */
1333 0; /* total number of notable gap recorded so far */
1334 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1334 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1335 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1335 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1336 PyObject *chunk = NULL; /* individual slice */
1336 PyObject *chunk = NULL; /* individual slice */
1337 PyObject *allchunks = NULL; /* all slices */
1337 PyObject *allchunks = NULL; /* all slices */
1338 Py_ssize_t previdx;
1338 Py_ssize_t previdx;
1339
1339
1340 /* parsing argument */
1340 /* parsing argument */
1341 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1341 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1342 &targetdensity, &mingapsize)) {
1342 &targetdensity, &mingapsize)) {
1343 goto bail;
1343 goto bail;
1344 }
1344 }
1345
1345
1346 /* If the delta chain contains a single element, we do not need slicing
1346 /* If the delta chain contains a single element, we do not need slicing
1347 */
1347 */
1348 num_revs = PyList_GET_SIZE(list_revs);
1348 num_revs = PyList_GET_SIZE(list_revs);
1349 if (num_revs <= 1) {
1349 if (num_revs <= 1) {
1350 result = PyTuple_Pack(1, list_revs);
1350 result = PyTuple_Pack(1, list_revs);
1351 goto done;
1351 goto done;
1352 }
1352 }
1353
1353
1354 /* Turn the python list into a native integer array (for efficiency) */
1354 /* Turn the python list into a native integer array (for efficiency) */
1355 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1355 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1356 if (revs == NULL) {
1356 if (revs == NULL) {
1357 PyErr_NoMemory();
1357 PyErr_NoMemory();
1358 goto bail;
1358 goto bail;
1359 }
1359 }
1360 for (i = 0; i < num_revs; i++) {
1360 for (i = 0; i < num_revs; i++) {
1361 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1361 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1362 if (revnum == -1 && PyErr_Occurred()) {
1362 if (revnum == -1 && PyErr_Occurred()) {
1363 goto bail;
1363 goto bail;
1364 }
1364 }
1365 if (revnum < nullrev || revnum >= idxlen) {
1365 if (revnum < nullrev || revnum >= idxlen) {
1366 PyErr_Format(PyExc_IndexError,
1366 PyErr_Format(PyExc_IndexError,
1367 "index out of range: %zd", revnum);
1367 "index out of range: %zd", revnum);
1368 goto bail;
1368 goto bail;
1369 }
1369 }
1370 revs[i] = revnum;
1370 revs[i] = revnum;
1371 }
1371 }
1372
1372
1373 /* Compute and check various property of the unsliced delta chain */
1373 /* Compute and check various property of the unsliced delta chain */
1374 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1374 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1375 if (deltachainspan < 0) {
1375 if (deltachainspan < 0) {
1376 goto bail;
1376 goto bail;
1377 }
1377 }
1378
1378
1379 if (deltachainspan <= mingapsize) {
1379 if (deltachainspan <= mingapsize) {
1380 result = PyTuple_Pack(1, list_revs);
1380 result = PyTuple_Pack(1, list_revs);
1381 goto done;
1381 goto done;
1382 }
1382 }
1383 chainpayload = 0;
1383 chainpayload = 0;
1384 for (i = 0; i < num_revs; i++) {
1384 for (i = 0; i < num_revs; i++) {
1385 int tmp = index_get_length(self, revs[i]);
1385 int tmp = index_get_length(self, revs[i]);
1386 if (tmp < 0) {
1386 if (tmp < 0) {
1387 goto bail;
1387 goto bail;
1388 }
1388 }
1389 chainpayload += tmp;
1389 chainpayload += tmp;
1390 }
1390 }
1391
1391
1392 readdata = deltachainspan;
1392 readdata = deltachainspan;
1393 density = 1.0;
1393 density = 1.0;
1394
1394
1395 if (0 < deltachainspan) {
1395 if (0 < deltachainspan) {
1396 density = (double)chainpayload / (double)deltachainspan;
1396 density = (double)chainpayload / (double)deltachainspan;
1397 }
1397 }
1398
1398
1399 if (density >= targetdensity) {
1399 if (density >= targetdensity) {
1400 result = PyTuple_Pack(1, list_revs);
1400 result = PyTuple_Pack(1, list_revs);
1401 goto done;
1401 goto done;
1402 }
1402 }
1403
1403
1404 /* if chain is too sparse, look for relevant gaps */
1404 /* if chain is too sparse, look for relevant gaps */
1405 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1405 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1406 if (gaps == NULL) {
1406 if (gaps == NULL) {
1407 PyErr_NoMemory();
1407 PyErr_NoMemory();
1408 goto bail;
1408 goto bail;
1409 }
1409 }
1410
1410
1411 previous_end = -1;
1411 previous_end = -1;
1412 for (i = 0; i < num_revs; i++) {
1412 for (i = 0; i < num_revs; i++) {
1413 int64_t revstart;
1413 int64_t revstart;
1414 int revsize;
1414 int revsize;
1415 revstart = index_get_start(self, revs[i]);
1415 revstart = index_get_start(self, revs[i]);
1416 if (revstart < 0) {
1416 if (revstart < 0) {
1417 goto bail;
1417 goto bail;
1418 };
1418 };
1419 revsize = index_get_length(self, revs[i]);
1419 revsize = index_get_length(self, revs[i]);
1420 if (revsize < 0) {
1420 if (revsize < 0) {
1421 goto bail;
1421 goto bail;
1422 };
1422 };
1423 if (revsize == 0) {
1423 if (revsize == 0) {
1424 continue;
1424 continue;
1425 }
1425 }
1426 if (previous_end >= 0) {
1426 if (previous_end >= 0) {
1427 int64_t gapsize = revstart - previous_end;
1427 int64_t gapsize = revstart - previous_end;
1428 if (gapsize > mingapsize) {
1428 if (gapsize > mingapsize) {
1429 gaps[num_gaps].size = gapsize;
1429 gaps[num_gaps].size = gapsize;
1430 gaps[num_gaps].idx = i;
1430 gaps[num_gaps].idx = i;
1431 num_gaps += 1;
1431 num_gaps += 1;
1432 }
1432 }
1433 }
1433 }
1434 previous_end = revstart + revsize;
1434 previous_end = revstart + revsize;
1435 }
1435 }
1436 if (num_gaps == 0) {
1436 if (num_gaps == 0) {
1437 result = PyTuple_Pack(1, list_revs);
1437 result = PyTuple_Pack(1, list_revs);
1438 goto done;
1438 goto done;
1439 }
1439 }
1440 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1440 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1441
1441
1442 /* Slice the largest gap first, they improve the density the most */
1442 /* Slice the largest gap first, they improve the density the most */
1443 selected_indices =
1443 selected_indices =
1444 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1444 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1445 if (selected_indices == NULL) {
1445 if (selected_indices == NULL) {
1446 PyErr_NoMemory();
1446 PyErr_NoMemory();
1447 goto bail;
1447 goto bail;
1448 }
1448 }
1449
1449
1450 for (i = num_gaps - 1; i >= 0; i--) {
1450 for (i = num_gaps - 1; i >= 0; i--) {
1451 selected_indices[num_selected] = gaps[i].idx;
1451 selected_indices[num_selected] = gaps[i].idx;
1452 readdata -= gaps[i].size;
1452 readdata -= gaps[i].size;
1453 num_selected += 1;
1453 num_selected += 1;
1454 if (readdata <= 0) {
1454 if (readdata <= 0) {
1455 density = 1.0;
1455 density = 1.0;
1456 } else {
1456 } else {
1457 density = (double)chainpayload / (double)readdata;
1457 density = (double)chainpayload / (double)readdata;
1458 }
1458 }
1459 if (density >= targetdensity) {
1459 if (density >= targetdensity) {
1460 break;
1460 break;
1461 }
1461 }
1462 }
1462 }
1463 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1463 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1464 &Py_ssize_t_compare);
1464 &Py_ssize_t_compare);
1465
1465
1466 /* create the resulting slice */
1466 /* create the resulting slice */
1467 allchunks = PyList_New(0);
1467 allchunks = PyList_New(0);
1468 if (allchunks == NULL) {
1468 if (allchunks == NULL) {
1469 goto bail;
1469 goto bail;
1470 }
1470 }
1471 previdx = 0;
1471 previdx = 0;
1472 selected_indices[num_selected] = num_revs;
1472 selected_indices[num_selected] = num_revs;
1473 for (i = 0; i <= num_selected; i++) {
1473 for (i = 0; i <= num_selected; i++) {
1474 Py_ssize_t idx = selected_indices[i];
1474 Py_ssize_t idx = selected_indices[i];
1475 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1475 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1476 if (endidx < 0) {
1476 if (endidx < 0) {
1477 goto bail;
1477 goto bail;
1478 }
1478 }
1479 if (previdx < endidx) {
1479 if (previdx < endidx) {
1480 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1480 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1481 if (chunk == NULL) {
1481 if (chunk == NULL) {
1482 goto bail;
1482 goto bail;
1483 }
1483 }
1484 if (PyList_Append(allchunks, chunk) == -1) {
1484 if (PyList_Append(allchunks, chunk) == -1) {
1485 goto bail;
1485 goto bail;
1486 }
1486 }
1487 Py_DECREF(chunk);
1487 Py_DECREF(chunk);
1488 chunk = NULL;
1488 chunk = NULL;
1489 }
1489 }
1490 previdx = idx;
1490 previdx = idx;
1491 }
1491 }
1492 result = allchunks;
1492 result = allchunks;
1493 goto done;
1493 goto done;
1494
1494
1495 bail:
1495 bail:
1496 Py_XDECREF(allchunks);
1496 Py_XDECREF(allchunks);
1497 Py_XDECREF(chunk);
1497 Py_XDECREF(chunk);
1498 done:
1498 done:
1499 free(revs);
1499 free(revs);
1500 free(gaps);
1500 free(gaps);
1501 free(selected_indices);
1501 free(selected_indices);
1502 return result;
1502 return result;
1503 }
1503 }
1504
1504
1505 static inline int nt_level(const char *node, Py_ssize_t level)
1505 static inline int nt_level(const char *node, Py_ssize_t level)
1506 {
1506 {
1507 int v = node[level >> 1];
1507 int v = node[level >> 1];
1508 if (!(level & 1))
1508 if (!(level & 1))
1509 v >>= 4;
1509 v >>= 4;
1510 return v & 0xf;
1510 return v & 0xf;
1511 }
1511 }
1512
1512
1513 /*
1513 /*
1514 * Return values:
1514 * Return values:
1515 *
1515 *
1516 * -4: match is ambiguous (multiple candidates)
1516 * -4: match is ambiguous (multiple candidates)
1517 * -2: not found
1517 * -2: not found
1518 * rest: valid rev
1518 * rest: valid rev
1519 */
1519 */
1520 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1520 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1521 int hex)
1521 int hex)
1522 {
1522 {
1523 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1523 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1524 int level, maxlevel, off;
1524 int level, maxlevel, off;
1525
1525
1526 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1526 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1527 return -1;
1527 return -1;
1528
1528
1529 if (hex)
1529 if (hex)
1530 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1530 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1531 else
1531 else
1532 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1532 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1533
1533
1534 for (level = off = 0; level < maxlevel; level++) {
1534 for (level = off = 0; level < maxlevel; level++) {
1535 int k = getnybble(node, level);
1535 int k = getnybble(node, level);
1536 nodetreenode *n = &self->nodes[off];
1536 nodetreenode *n = &self->nodes[off];
1537 int v = n->children[k];
1537 int v = n->children[k];
1538
1538
1539 if (v < 0) {
1539 if (v < 0) {
1540 const char *n;
1540 const char *n;
1541 Py_ssize_t i;
1541 Py_ssize_t i;
1542
1542
1543 v = -(v + 2);
1543 v = -(v + 2);
1544 n = index_node(self->index, v);
1544 n = index_node(self->index, v);
1545 if (n == NULL)
1545 if (n == NULL)
1546 return -2;
1546 return -2;
1547 for (i = level; i < maxlevel; i++)
1547 for (i = level; i < maxlevel; i++)
1548 if (getnybble(node, i) != nt_level(n, i))
1548 if (getnybble(node, i) != nt_level(n, i))
1549 return -2;
1549 return -2;
1550 return v;
1550 return v;
1551 }
1551 }
1552 if (v == 0)
1552 if (v == 0)
1553 return -2;
1553 return -2;
1554 off = v;
1554 off = v;
1555 }
1555 }
1556 /* multiple matches against an ambiguous prefix */
1556 /* multiple matches against an ambiguous prefix */
1557 return -4;
1557 return -4;
1558 }
1558 }
1559
1559
1560 static int nt_new(nodetree *self)
1560 static int nt_new(nodetree *self)
1561 {
1561 {
1562 if (self->length == self->capacity) {
1562 if (self->length == self->capacity) {
1563 unsigned newcapacity;
1563 unsigned newcapacity;
1564 nodetreenode *newnodes;
1564 nodetreenode *newnodes;
1565 newcapacity = self->capacity * 2;
1565 newcapacity = self->capacity * 2;
1566 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1566 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1567 PyErr_SetString(PyExc_MemoryError,
1567 PyErr_SetString(PyExc_MemoryError,
1568 "overflow in nt_new");
1568 "overflow in nt_new");
1569 return -1;
1569 return -1;
1570 }
1570 }
1571 newnodes =
1571 newnodes =
1572 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1572 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1573 if (newnodes == NULL) {
1573 if (newnodes == NULL) {
1574 PyErr_SetString(PyExc_MemoryError, "out of memory");
1574 PyErr_SetString(PyExc_MemoryError, "out of memory");
1575 return -1;
1575 return -1;
1576 }
1576 }
1577 self->capacity = newcapacity;
1577 self->capacity = newcapacity;
1578 self->nodes = newnodes;
1578 self->nodes = newnodes;
1579 memset(&self->nodes[self->length], 0,
1579 memset(&self->nodes[self->length], 0,
1580 sizeof(nodetreenode) * (self->capacity - self->length));
1580 sizeof(nodetreenode) * (self->capacity - self->length));
1581 }
1581 }
1582 return self->length++;
1582 return self->length++;
1583 }
1583 }
1584
1584
1585 static int nt_insert(nodetree *self, const char *node, int rev)
1585 static int nt_insert(nodetree *self, const char *node, int rev)
1586 {
1586 {
1587 int level = 0;
1587 int level = 0;
1588 int off = 0;
1588 int off = 0;
1589
1589
1590 while (level < 40) {
1590 while (level < 40) {
1591 int k = nt_level(node, level);
1591 int k = nt_level(node, level);
1592 nodetreenode *n;
1592 nodetreenode *n;
1593 int v;
1593 int v;
1594
1594
1595 n = &self->nodes[off];
1595 n = &self->nodes[off];
1596 v = n->children[k];
1596 v = n->children[k];
1597
1597
1598 if (v == 0) {
1598 if (v == 0) {
1599 n->children[k] = -rev - 2;
1599 n->children[k] = -rev - 2;
1600 return 0;
1600 return 0;
1601 }
1601 }
1602 if (v < 0) {
1602 if (v < 0) {
1603 const char *oldnode =
1603 const char *oldnode =
1604 index_node_existing(self->index, -(v + 2));
1604 index_node_existing(self->index, -(v + 2));
1605 int noff;
1605 int noff;
1606
1606
1607 if (oldnode == NULL)
1607 if (oldnode == NULL)
1608 return -1;
1608 return -1;
1609 if (!memcmp(oldnode, node, 20)) {
1609 if (!memcmp(oldnode, node, 20)) {
1610 n->children[k] = -rev - 2;
1610 n->children[k] = -rev - 2;
1611 return 0;
1611 return 0;
1612 }
1612 }
1613 noff = nt_new(self);
1613 noff = nt_new(self);
1614 if (noff == -1)
1614 if (noff == -1)
1615 return -1;
1615 return -1;
1616 /* self->nodes may have been changed by realloc */
1616 /* self->nodes may have been changed by realloc */
1617 self->nodes[off].children[k] = noff;
1617 self->nodes[off].children[k] = noff;
1618 off = noff;
1618 off = noff;
1619 n = &self->nodes[off];
1619 n = &self->nodes[off];
1620 n->children[nt_level(oldnode, ++level)] = v;
1620 n->children[nt_level(oldnode, ++level)] = v;
1621 if (level > self->depth)
1621 if (level > self->depth)
1622 self->depth = level;
1622 self->depth = level;
1623 self->splits += 1;
1623 self->splits += 1;
1624 } else {
1624 } else {
1625 level += 1;
1625 level += 1;
1626 off = v;
1626 off = v;
1627 }
1627 }
1628 }
1628 }
1629
1629
1630 return -1;
1630 return -1;
1631 }
1631 }
1632
1632
1633 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1633 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1634 {
1634 {
1635 Py_ssize_t rev;
1635 Py_ssize_t rev;
1636 const char *node;
1636 const char *node;
1637 Py_ssize_t length;
1637 Py_ssize_t length;
1638 if (!PyArg_ParseTuple(args, "n", &rev))
1638 if (!PyArg_ParseTuple(args, "n", &rev))
1639 return NULL;
1639 return NULL;
1640 length = index_length(self->nt.index);
1640 length = index_length(self->nt.index);
1641 if (rev < 0 || rev >= length) {
1641 if (rev < 0 || rev >= length) {
1642 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1642 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1643 return NULL;
1643 return NULL;
1644 }
1644 }
1645 node = index_node_existing(self->nt.index, rev);
1645 node = index_node_existing(self->nt.index, rev);
1646 if (nt_insert(&self->nt, node, (int)rev) == -1)
1646 if (nt_insert(&self->nt, node, (int)rev) == -1)
1647 return NULL;
1647 return NULL;
1648 Py_RETURN_NONE;
1648 Py_RETURN_NONE;
1649 }
1649 }
1650
1650
1651 static int nt_delete_node(nodetree *self, const char *node)
1651 static int nt_delete_node(nodetree *self, const char *node)
1652 {
1652 {
1653 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1653 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1654 */
1654 */
1655 return nt_insert(self, node, -2);
1655 return nt_insert(self, node, -2);
1656 }
1656 }
1657
1657
1658 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1658 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1659 {
1659 {
1660 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1660 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1661 self->nodes = NULL;
1661 self->nodes = NULL;
1662
1662
1663 self->index = index;
1663 self->index = index;
1664 /* The input capacity is in terms of revisions, while the field is in
1664 /* The input capacity is in terms of revisions, while the field is in
1665 * terms of nodetree nodes. */
1665 * terms of nodetree nodes. */
1666 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1666 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1667 self->depth = 0;
1667 self->depth = 0;
1668 self->splits = 0;
1668 self->splits = 0;
1669 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1669 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1670 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1670 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1671 return -1;
1671 return -1;
1672 }
1672 }
1673 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1673 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1674 if (self->nodes == NULL) {
1674 if (self->nodes == NULL) {
1675 PyErr_NoMemory();
1675 PyErr_NoMemory();
1676 return -1;
1676 return -1;
1677 }
1677 }
1678 self->length = 1;
1678 self->length = 1;
1679 return 0;
1679 return 0;
1680 }
1680 }
1681
1681
1682 static int ntobj_init(nodetreeObject *self, PyObject *args)
1682 static int ntobj_init(nodetreeObject *self, PyObject *args)
1683 {
1683 {
1684 PyObject *index;
1684 PyObject *index;
1685 unsigned capacity;
1685 unsigned capacity;
1686 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1686 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1687 &capacity))
1687 &capacity))
1688 return -1;
1688 return -1;
1689 Py_INCREF(index);
1689 Py_INCREF(index);
1690 return nt_init(&self->nt, (indexObject *)index, capacity);
1690 return nt_init(&self->nt, (indexObject *)index, capacity);
1691 }
1691 }
1692
1692
1693 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1693 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1694 {
1694 {
1695 return nt_find(self, node, nodelen, 1);
1695 return nt_find(self, node, nodelen, 1);
1696 }
1696 }
1697
1697
1698 /*
1698 /*
1699 * Find the length of the shortest unique prefix of node.
1699 * Find the length of the shortest unique prefix of node.
1700 *
1700 *
1701 * Return values:
1701 * Return values:
1702 *
1702 *
1703 * -3: error (exception set)
1703 * -3: error (exception set)
1704 * -2: not found (no exception set)
1704 * -2: not found (no exception set)
1705 * rest: length of shortest prefix
1705 * rest: length of shortest prefix
1706 */
1706 */
1707 static int nt_shortest(nodetree *self, const char *node)
1707 static int nt_shortest(nodetree *self, const char *node)
1708 {
1708 {
1709 int level, off;
1709 int level, off;
1710
1710
1711 for (level = off = 0; level < 40; level++) {
1711 for (level = off = 0; level < 40; level++) {
1712 int k, v;
1712 int k, v;
1713 nodetreenode *n = &self->nodes[off];
1713 nodetreenode *n = &self->nodes[off];
1714 k = nt_level(node, level);
1714 k = nt_level(node, level);
1715 v = n->children[k];
1715 v = n->children[k];
1716 if (v < 0) {
1716 if (v < 0) {
1717 const char *n;
1717 const char *n;
1718 v = -(v + 2);
1718 v = -(v + 2);
1719 n = index_node_existing(self->index, v);
1719 n = index_node_existing(self->index, v);
1720 if (n == NULL)
1720 if (n == NULL)
1721 return -3;
1721 return -3;
1722 if (memcmp(node, n, 20) != 0)
1722 if (memcmp(node, n, 20) != 0)
1723 /*
1723 /*
1724 * Found a unique prefix, but it wasn't for the
1724 * Found a unique prefix, but it wasn't for the
1725 * requested node (i.e the requested node does
1725 * requested node (i.e the requested node does
1726 * not exist).
1726 * not exist).
1727 */
1727 */
1728 return -2;
1728 return -2;
1729 return level + 1;
1729 return level + 1;
1730 }
1730 }
1731 if (v == 0)
1731 if (v == 0)
1732 return -2;
1732 return -2;
1733 off = v;
1733 off = v;
1734 }
1734 }
1735 /*
1735 /*
1736 * The node was still not unique after 40 hex digits, so this won't
1736 * The node was still not unique after 40 hex digits, so this won't
1737 * happen. Also, if we get here, then there's a programming error in
1737 * happen. Also, if we get here, then there's a programming error in
1738 * this file that made us insert a node longer than 40 hex digits.
1738 * this file that made us insert a node longer than 40 hex digits.
1739 */
1739 */
1740 PyErr_SetString(PyExc_Exception, "broken node tree");
1740 PyErr_SetString(PyExc_Exception, "broken node tree");
1741 return -3;
1741 return -3;
1742 }
1742 }
1743
1743
1744 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1744 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1745 {
1745 {
1746 PyObject *val;
1746 PyObject *val;
1747 char *node;
1747 char *node;
1748 int length;
1748 int length;
1749
1749
1750 if (!PyArg_ParseTuple(args, "O", &val))
1750 if (!PyArg_ParseTuple(args, "O", &val))
1751 return NULL;
1751 return NULL;
1752 if (node_check(val, &node) == -1)
1752 if (node_check(val, &node) == -1)
1753 return NULL;
1753 return NULL;
1754
1754
1755 length = nt_shortest(&self->nt, node);
1755 length = nt_shortest(&self->nt, node);
1756 if (length == -3)
1756 if (length == -3)
1757 return NULL;
1757 return NULL;
1758 if (length == -2) {
1758 if (length == -2) {
1759 raise_revlog_error();
1759 raise_revlog_error();
1760 return NULL;
1760 return NULL;
1761 }
1761 }
1762 return PyInt_FromLong(length);
1762 return PyInt_FromLong(length);
1763 }
1763 }
1764
1764
1765 static void nt_dealloc(nodetree *self)
1765 static void nt_dealloc(nodetree *self)
1766 {
1766 {
1767 free(self->nodes);
1767 free(self->nodes);
1768 self->nodes = NULL;
1768 self->nodes = NULL;
1769 }
1769 }
1770
1770
1771 static void ntobj_dealloc(nodetreeObject *self)
1771 static void ntobj_dealloc(nodetreeObject *self)
1772 {
1772 {
1773 Py_XDECREF(self->nt.index);
1773 Py_XDECREF(self->nt.index);
1774 nt_dealloc(&self->nt);
1774 nt_dealloc(&self->nt);
1775 PyObject_Del(self);
1775 PyObject_Del(self);
1776 }
1776 }
1777
1777
1778 static PyMethodDef ntobj_methods[] = {
1778 static PyMethodDef ntobj_methods[] = {
1779 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1779 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1780 "insert an index entry"},
1780 "insert an index entry"},
1781 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1781 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1782 "find length of shortest hex nodeid of a binary ID"},
1782 "find length of shortest hex nodeid of a binary ID"},
1783 {NULL} /* Sentinel */
1783 {NULL} /* Sentinel */
1784 };
1784 };
1785
1785
1786 static PyTypeObject nodetreeType = {
1786 static PyTypeObject nodetreeType = {
1787 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1787 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1788 "parsers.nodetree", /* tp_name */
1788 "parsers.nodetree", /* tp_name */
1789 sizeof(nodetreeObject), /* tp_basicsize */
1789 sizeof(nodetreeObject), /* tp_basicsize */
1790 0, /* tp_itemsize */
1790 0, /* tp_itemsize */
1791 (destructor)ntobj_dealloc, /* tp_dealloc */
1791 (destructor)ntobj_dealloc, /* tp_dealloc */
1792 0, /* tp_print */
1792 0, /* tp_print */
1793 0, /* tp_getattr */
1793 0, /* tp_getattr */
1794 0, /* tp_setattr */
1794 0, /* tp_setattr */
1795 0, /* tp_compare */
1795 0, /* tp_compare */
1796 0, /* tp_repr */
1796 0, /* tp_repr */
1797 0, /* tp_as_number */
1797 0, /* tp_as_number */
1798 0, /* tp_as_sequence */
1798 0, /* tp_as_sequence */
1799 0, /* tp_as_mapping */
1799 0, /* tp_as_mapping */
1800 0, /* tp_hash */
1800 0, /* tp_hash */
1801 0, /* tp_call */
1801 0, /* tp_call */
1802 0, /* tp_str */
1802 0, /* tp_str */
1803 0, /* tp_getattro */
1803 0, /* tp_getattro */
1804 0, /* tp_setattro */
1804 0, /* tp_setattro */
1805 0, /* tp_as_buffer */
1805 0, /* tp_as_buffer */
1806 Py_TPFLAGS_DEFAULT, /* tp_flags */
1806 Py_TPFLAGS_DEFAULT, /* tp_flags */
1807 "nodetree", /* tp_doc */
1807 "nodetree", /* tp_doc */
1808 0, /* tp_traverse */
1808 0, /* tp_traverse */
1809 0, /* tp_clear */
1809 0, /* tp_clear */
1810 0, /* tp_richcompare */
1810 0, /* tp_richcompare */
1811 0, /* tp_weaklistoffset */
1811 0, /* tp_weaklistoffset */
1812 0, /* tp_iter */
1812 0, /* tp_iter */
1813 0, /* tp_iternext */
1813 0, /* tp_iternext */
1814 ntobj_methods, /* tp_methods */
1814 ntobj_methods, /* tp_methods */
1815 0, /* tp_members */
1815 0, /* tp_members */
1816 0, /* tp_getset */
1816 0, /* tp_getset */
1817 0, /* tp_base */
1817 0, /* tp_base */
1818 0, /* tp_dict */
1818 0, /* tp_dict */
1819 0, /* tp_descr_get */
1819 0, /* tp_descr_get */
1820 0, /* tp_descr_set */
1820 0, /* tp_descr_set */
1821 0, /* tp_dictoffset */
1821 0, /* tp_dictoffset */
1822 (initproc)ntobj_init, /* tp_init */
1822 (initproc)ntobj_init, /* tp_init */
1823 0, /* tp_alloc */
1823 0, /* tp_alloc */
1824 };
1824 };
1825
1825
1826 static int index_init_nt(indexObject *self)
1826 static int index_init_nt(indexObject *self)
1827 {
1827 {
1828 if (!self->ntinitialized) {
1828 if (!self->ntinitialized) {
1829 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1829 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1830 nt_dealloc(&self->nt);
1830 nt_dealloc(&self->nt);
1831 return -1;
1831 return -1;
1832 }
1832 }
1833 if (nt_insert(&self->nt, nullid, -1) == -1) {
1833 if (nt_insert(&self->nt, nullid, -1) == -1) {
1834 nt_dealloc(&self->nt);
1834 nt_dealloc(&self->nt);
1835 return -1;
1835 return -1;
1836 }
1836 }
1837 self->ntinitialized = 1;
1837 self->ntinitialized = 1;
1838 self->ntrev = (int)index_length(self);
1838 self->ntrev = (int)index_length(self);
1839 self->ntlookups = 1;
1839 self->ntlookups = 1;
1840 self->ntmisses = 0;
1840 self->ntmisses = 0;
1841 }
1841 }
1842 return 0;
1842 return 0;
1843 }
1843 }
1844
1844
1845 /*
1845 /*
1846 * Return values:
1846 * Return values:
1847 *
1847 *
1848 * -3: error (exception set)
1848 * -3: error (exception set)
1849 * -2: not found (no exception set)
1849 * -2: not found (no exception set)
1850 * rest: valid rev
1850 * rest: valid rev
1851 */
1851 */
1852 static int index_find_node(indexObject *self, const char *node,
1852 static int index_find_node(indexObject *self, const char *node,
1853 Py_ssize_t nodelen)
1853 Py_ssize_t nodelen)
1854 {
1854 {
1855 int rev;
1855 int rev;
1856
1856
1857 if (index_init_nt(self) == -1)
1857 if (index_init_nt(self) == -1)
1858 return -3;
1858 return -3;
1859
1859
1860 self->ntlookups++;
1860 self->ntlookups++;
1861 rev = nt_find(&self->nt, node, nodelen, 0);
1861 rev = nt_find(&self->nt, node, nodelen, 0);
1862 if (rev >= -1)
1862 if (rev >= -1)
1863 return rev;
1863 return rev;
1864
1864
1865 /*
1865 /*
1866 * For the first handful of lookups, we scan the entire index,
1866 * For the first handful of lookups, we scan the entire index,
1867 * and cache only the matching nodes. This optimizes for cases
1867 * and cache only the matching nodes. This optimizes for cases
1868 * like "hg tip", where only a few nodes are accessed.
1868 * like "hg tip", where only a few nodes are accessed.
1869 *
1869 *
1870 * After that, we cache every node we visit, using a single
1870 * After that, we cache every node we visit, using a single
1871 * scan amortized over multiple lookups. This gives the best
1871 * scan amortized over multiple lookups. This gives the best
1872 * bulk performance, e.g. for "hg log".
1872 * bulk performance, e.g. for "hg log".
1873 */
1873 */
1874 if (self->ntmisses++ < 4) {
1874 if (self->ntmisses++ < 4) {
1875 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1875 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1876 const char *n = index_node_existing(self, rev);
1876 const char *n = index_node_existing(self, rev);
1877 if (n == NULL)
1877 if (n == NULL)
1878 return -3;
1878 return -3;
1879 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1879 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1880 if (nt_insert(&self->nt, n, rev) == -1)
1880 if (nt_insert(&self->nt, n, rev) == -1)
1881 return -3;
1881 return -3;
1882 break;
1882 break;
1883 }
1883 }
1884 }
1884 }
1885 } else {
1885 } else {
1886 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1886 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1887 const char *n = index_node_existing(self, rev);
1887 const char *n = index_node_existing(self, rev);
1888 if (n == NULL)
1888 if (n == NULL)
1889 return -3;
1889 return -3;
1890 if (nt_insert(&self->nt, n, rev) == -1) {
1890 if (nt_insert(&self->nt, n, rev) == -1) {
1891 self->ntrev = rev + 1;
1891 self->ntrev = rev + 1;
1892 return -3;
1892 return -3;
1893 }
1893 }
1894 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1894 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1895 break;
1895 break;
1896 }
1896 }
1897 }
1897 }
1898 self->ntrev = rev;
1898 self->ntrev = rev;
1899 }
1899 }
1900
1900
1901 if (rev >= 0)
1901 if (rev >= 0)
1902 return rev;
1902 return rev;
1903 return -2;
1903 return -2;
1904 }
1904 }
1905
1905
1906 static PyObject *index_getitem(indexObject *self, PyObject *value)
1906 static PyObject *index_getitem(indexObject *self, PyObject *value)
1907 {
1907 {
1908 char *node;
1908 char *node;
1909 int rev;
1909 int rev;
1910
1910
1911 if (PyInt_Check(value)) {
1911 if (PyInt_Check(value)) {
1912 long idx;
1912 long idx;
1913 if (!pylong_to_long(value, &idx)) {
1913 if (!pylong_to_long(value, &idx)) {
1914 return NULL;
1914 return NULL;
1915 }
1915 }
1916 return index_get(self, idx);
1916 return index_get(self, idx);
1917 }
1917 }
1918
1918
1919 if (node_check(value, &node) == -1)
1919 if (node_check(value, &node) == -1)
1920 return NULL;
1920 return NULL;
1921 rev = index_find_node(self, node, 20);
1921 rev = index_find_node(self, node, 20);
1922 if (rev >= -1)
1922 if (rev >= -1)
1923 return PyInt_FromLong(rev);
1923 return PyInt_FromLong(rev);
1924 if (rev == -2)
1924 if (rev == -2)
1925 raise_revlog_error();
1925 raise_revlog_error();
1926 return NULL;
1926 return NULL;
1927 }
1927 }
1928
1928
1929 /*
1929 /*
1930 * Fully populate the radix tree.
1930 * Fully populate the radix tree.
1931 */
1931 */
1932 static int index_populate_nt(indexObject *self)
1932 static int index_populate_nt(indexObject *self)
1933 {
1933 {
1934 int rev;
1934 int rev;
1935 if (self->ntrev > 0) {
1935 if (self->ntrev > 0) {
1936 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1936 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1937 const char *n = index_node_existing(self, rev);
1937 const char *n = index_node_existing(self, rev);
1938 if (n == NULL)
1938 if (n == NULL)
1939 return -1;
1939 return -1;
1940 if (nt_insert(&self->nt, n, rev) == -1)
1940 if (nt_insert(&self->nt, n, rev) == -1)
1941 return -1;
1941 return -1;
1942 }
1942 }
1943 self->ntrev = -1;
1943 self->ntrev = -1;
1944 }
1944 }
1945 return 0;
1945 return 0;
1946 }
1946 }
1947
1947
1948 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1948 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1949 {
1949 {
1950 const char *fullnode;
1950 const char *fullnode;
1951 Py_ssize_t nodelen;
1951 Py_ssize_t nodelen;
1952 char *node;
1952 char *node;
1953 int rev, i;
1953 int rev, i;
1954
1954
1955 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1955 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1956 return NULL;
1956 return NULL;
1957
1957
1958 if (nodelen < 1) {
1958 if (nodelen < 1) {
1959 PyErr_SetString(PyExc_ValueError, "key too short");
1959 PyErr_SetString(PyExc_ValueError, "key too short");
1960 return NULL;
1960 return NULL;
1961 }
1961 }
1962
1962
1963 if (nodelen > 40) {
1963 if (nodelen > 40) {
1964 PyErr_SetString(PyExc_ValueError, "key too long");
1964 PyErr_SetString(PyExc_ValueError, "key too long");
1965 return NULL;
1965 return NULL;
1966 }
1966 }
1967
1967
1968 for (i = 0; i < nodelen; i++)
1968 for (i = 0; i < nodelen; i++)
1969 hexdigit(node, i);
1969 hexdigit(node, i);
1970 if (PyErr_Occurred()) {
1970 if (PyErr_Occurred()) {
1971 /* input contains non-hex characters */
1971 /* input contains non-hex characters */
1972 PyErr_Clear();
1972 PyErr_Clear();
1973 Py_RETURN_NONE;
1973 Py_RETURN_NONE;
1974 }
1974 }
1975
1975
1976 if (index_init_nt(self) == -1)
1976 if (index_init_nt(self) == -1)
1977 return NULL;
1977 return NULL;
1978 if (index_populate_nt(self) == -1)
1978 if (index_populate_nt(self) == -1)
1979 return NULL;
1979 return NULL;
1980 rev = nt_partialmatch(&self->nt, node, nodelen);
1980 rev = nt_partialmatch(&self->nt, node, nodelen);
1981
1981
1982 switch (rev) {
1982 switch (rev) {
1983 case -4:
1983 case -4:
1984 raise_revlog_error();
1984 raise_revlog_error();
1985 return NULL;
1985 return NULL;
1986 case -2:
1986 case -2:
1987 Py_RETURN_NONE;
1987 Py_RETURN_NONE;
1988 case -1:
1988 case -1:
1989 return PyBytes_FromStringAndSize(nullid, 20);
1989 return PyBytes_FromStringAndSize(nullid, 20);
1990 }
1990 }
1991
1991
1992 fullnode = index_node_existing(self, rev);
1992 fullnode = index_node_existing(self, rev);
1993 if (fullnode == NULL) {
1993 if (fullnode == NULL) {
1994 return NULL;
1994 return NULL;
1995 }
1995 }
1996 return PyBytes_FromStringAndSize(fullnode, 20);
1996 return PyBytes_FromStringAndSize(fullnode, 20);
1997 }
1997 }
1998
1998
1999 static PyObject *index_shortest(indexObject *self, PyObject *args)
1999 static PyObject *index_shortest(indexObject *self, PyObject *args)
2000 {
2000 {
2001 PyObject *val;
2001 PyObject *val;
2002 char *node;
2002 char *node;
2003 int length;
2003 int length;
2004
2004
2005 if (!PyArg_ParseTuple(args, "O", &val))
2005 if (!PyArg_ParseTuple(args, "O", &val))
2006 return NULL;
2006 return NULL;
2007 if (node_check(val, &node) == -1)
2007 if (node_check(val, &node) == -1)
2008 return NULL;
2008 return NULL;
2009
2009
2010 self->ntlookups++;
2010 self->ntlookups++;
2011 if (index_init_nt(self) == -1)
2011 if (index_init_nt(self) == -1)
2012 return NULL;
2012 return NULL;
2013 if (index_populate_nt(self) == -1)
2013 if (index_populate_nt(self) == -1)
2014 return NULL;
2014 return NULL;
2015 length = nt_shortest(&self->nt, node);
2015 length = nt_shortest(&self->nt, node);
2016 if (length == -3)
2016 if (length == -3)
2017 return NULL;
2017 return NULL;
2018 if (length == -2) {
2018 if (length == -2) {
2019 raise_revlog_error();
2019 raise_revlog_error();
2020 return NULL;
2020 return NULL;
2021 }
2021 }
2022 return PyInt_FromLong(length);
2022 return PyInt_FromLong(length);
2023 }
2023 }
2024
2024
2025 static PyObject *index_m_get(indexObject *self, PyObject *args)
2025 static PyObject *index_m_get(indexObject *self, PyObject *args)
2026 {
2026 {
2027 PyObject *val;
2027 PyObject *val;
2028 char *node;
2028 char *node;
2029 int rev;
2029 int rev;
2030
2030
2031 if (!PyArg_ParseTuple(args, "O", &val))
2031 if (!PyArg_ParseTuple(args, "O", &val))
2032 return NULL;
2032 return NULL;
2033 if (node_check(val, &node) == -1)
2033 if (node_check(val, &node) == -1)
2034 return NULL;
2034 return NULL;
2035 rev = index_find_node(self, node, 20);
2035 rev = index_find_node(self, node, 20);
2036 if (rev == -3)
2036 if (rev == -3)
2037 return NULL;
2037 return NULL;
2038 if (rev == -2)
2038 if (rev == -2)
2039 Py_RETURN_NONE;
2039 Py_RETURN_NONE;
2040 return PyInt_FromLong(rev);
2040 return PyInt_FromLong(rev);
2041 }
2041 }
2042
2042
2043 static int index_contains(indexObject *self, PyObject *value)
2043 static int index_contains(indexObject *self, PyObject *value)
2044 {
2044 {
2045 char *node;
2045 char *node;
2046
2046
2047 if (PyInt_Check(value)) {
2047 if (PyInt_Check(value)) {
2048 long rev;
2048 long rev;
2049 if (!pylong_to_long(value, &rev)) {
2049 if (!pylong_to_long(value, &rev)) {
2050 return -1;
2050 return -1;
2051 }
2051 }
2052 return rev >= -1 && rev < index_length(self);
2052 return rev >= -1 && rev < index_length(self);
2053 }
2053 }
2054
2054
2055 if (node_check(value, &node) == -1)
2055 if (node_check(value, &node) == -1)
2056 return -1;
2056 return -1;
2057
2057
2058 switch (index_find_node(self, node, 20)) {
2058 switch (index_find_node(self, node, 20)) {
2059 case -3:
2059 case -3:
2060 return -1;
2060 return -1;
2061 case -2:
2061 case -2:
2062 return 0;
2062 return 0;
2063 default:
2063 default:
2064 return 1;
2064 return 1;
2065 }
2065 }
2066 }
2066 }
2067
2067
2068 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2068 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2069 {
2069 {
2070 int ret = index_contains(self, args);
2070 int ret = index_contains(self, args);
2071 if (ret < 0)
2071 if (ret < 0)
2072 return NULL;
2072 return NULL;
2073 return PyBool_FromLong((long)ret);
2073 return PyBool_FromLong((long)ret);
2074 }
2074 }
2075
2075
2076 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2077 {
2078 char *node;
2079 int rev;
2080
2081 if (node_check(val, &node) == -1)
2082 return NULL;
2083 rev = index_find_node(self, node, 20);
2084 if (rev >= -1)
2085 return PyInt_FromLong(rev);
2086 if (rev == -2)
2087 raise_revlog_error();
2088 return NULL;
2089 }
2090
2076 typedef uint64_t bitmask;
2091 typedef uint64_t bitmask;
2077
2092
2078 /*
2093 /*
2079 * Given a disjoint set of revs, return all candidates for the
2094 * Given a disjoint set of revs, return all candidates for the
2080 * greatest common ancestor. In revset notation, this is the set
2095 * greatest common ancestor. In revset notation, this is the set
2081 * "heads(::a and ::b and ...)"
2096 * "heads(::a and ::b and ...)"
2082 */
2097 */
2083 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2098 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2084 int revcount)
2099 int revcount)
2085 {
2100 {
2086 const bitmask allseen = (1ull << revcount) - 1;
2101 const bitmask allseen = (1ull << revcount) - 1;
2087 const bitmask poison = 1ull << revcount;
2102 const bitmask poison = 1ull << revcount;
2088 PyObject *gca = PyList_New(0);
2103 PyObject *gca = PyList_New(0);
2089 int i, v, interesting;
2104 int i, v, interesting;
2090 int maxrev = -1;
2105 int maxrev = -1;
2091 bitmask sp;
2106 bitmask sp;
2092 bitmask *seen;
2107 bitmask *seen;
2093
2108
2094 if (gca == NULL)
2109 if (gca == NULL)
2095 return PyErr_NoMemory();
2110 return PyErr_NoMemory();
2096
2111
2097 for (i = 0; i < revcount; i++) {
2112 for (i = 0; i < revcount; i++) {
2098 if (revs[i] > maxrev)
2113 if (revs[i] > maxrev)
2099 maxrev = revs[i];
2114 maxrev = revs[i];
2100 }
2115 }
2101
2116
2102 seen = calloc(sizeof(*seen), maxrev + 1);
2117 seen = calloc(sizeof(*seen), maxrev + 1);
2103 if (seen == NULL) {
2118 if (seen == NULL) {
2104 Py_DECREF(gca);
2119 Py_DECREF(gca);
2105 return PyErr_NoMemory();
2120 return PyErr_NoMemory();
2106 }
2121 }
2107
2122
2108 for (i = 0; i < revcount; i++)
2123 for (i = 0; i < revcount; i++)
2109 seen[revs[i]] = 1ull << i;
2124 seen[revs[i]] = 1ull << i;
2110
2125
2111 interesting = revcount;
2126 interesting = revcount;
2112
2127
2113 for (v = maxrev; v >= 0 && interesting; v--) {
2128 for (v = maxrev; v >= 0 && interesting; v--) {
2114 bitmask sv = seen[v];
2129 bitmask sv = seen[v];
2115 int parents[2];
2130 int parents[2];
2116
2131
2117 if (!sv)
2132 if (!sv)
2118 continue;
2133 continue;
2119
2134
2120 if (sv < poison) {
2135 if (sv < poison) {
2121 interesting -= 1;
2136 interesting -= 1;
2122 if (sv == allseen) {
2137 if (sv == allseen) {
2123 PyObject *obj = PyInt_FromLong(v);
2138 PyObject *obj = PyInt_FromLong(v);
2124 if (obj == NULL)
2139 if (obj == NULL)
2125 goto bail;
2140 goto bail;
2126 if (PyList_Append(gca, obj) == -1) {
2141 if (PyList_Append(gca, obj) == -1) {
2127 Py_DECREF(obj);
2142 Py_DECREF(obj);
2128 goto bail;
2143 goto bail;
2129 }
2144 }
2130 sv |= poison;
2145 sv |= poison;
2131 for (i = 0; i < revcount; i++) {
2146 for (i = 0; i < revcount; i++) {
2132 if (revs[i] == v)
2147 if (revs[i] == v)
2133 goto done;
2148 goto done;
2134 }
2149 }
2135 }
2150 }
2136 }
2151 }
2137 if (index_get_parents(self, v, parents, maxrev) < 0)
2152 if (index_get_parents(self, v, parents, maxrev) < 0)
2138 goto bail;
2153 goto bail;
2139
2154
2140 for (i = 0; i < 2; i++) {
2155 for (i = 0; i < 2; i++) {
2141 int p = parents[i];
2156 int p = parents[i];
2142 if (p == -1)
2157 if (p == -1)
2143 continue;
2158 continue;
2144 sp = seen[p];
2159 sp = seen[p];
2145 if (sv < poison) {
2160 if (sv < poison) {
2146 if (sp == 0) {
2161 if (sp == 0) {
2147 seen[p] = sv;
2162 seen[p] = sv;
2148 interesting++;
2163 interesting++;
2149 } else if (sp != sv)
2164 } else if (sp != sv)
2150 seen[p] |= sv;
2165 seen[p] |= sv;
2151 } else {
2166 } else {
2152 if (sp && sp < poison)
2167 if (sp && sp < poison)
2153 interesting--;
2168 interesting--;
2154 seen[p] = sv;
2169 seen[p] = sv;
2155 }
2170 }
2156 }
2171 }
2157 }
2172 }
2158
2173
2159 done:
2174 done:
2160 free(seen);
2175 free(seen);
2161 return gca;
2176 return gca;
2162 bail:
2177 bail:
2163 free(seen);
2178 free(seen);
2164 Py_XDECREF(gca);
2179 Py_XDECREF(gca);
2165 return NULL;
2180 return NULL;
2166 }
2181 }
2167
2182
2168 /*
2183 /*
2169 * Given a disjoint set of revs, return the subset with the longest
2184 * Given a disjoint set of revs, return the subset with the longest
2170 * path to the root.
2185 * path to the root.
2171 */
2186 */
2172 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2187 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2173 {
2188 {
2174 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2189 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2175 static const Py_ssize_t capacity = 24;
2190 static const Py_ssize_t capacity = 24;
2176 int *depth, *interesting = NULL;
2191 int *depth, *interesting = NULL;
2177 int i, j, v, ninteresting;
2192 int i, j, v, ninteresting;
2178 PyObject *dict = NULL, *keys = NULL;
2193 PyObject *dict = NULL, *keys = NULL;
2179 long *seen = NULL;
2194 long *seen = NULL;
2180 int maxrev = -1;
2195 int maxrev = -1;
2181 long final;
2196 long final;
2182
2197
2183 if (revcount > capacity) {
2198 if (revcount > capacity) {
2184 PyErr_Format(PyExc_OverflowError,
2199 PyErr_Format(PyExc_OverflowError,
2185 "bitset size (%ld) > capacity (%ld)",
2200 "bitset size (%ld) > capacity (%ld)",
2186 (long)revcount, (long)capacity);
2201 (long)revcount, (long)capacity);
2187 return NULL;
2202 return NULL;
2188 }
2203 }
2189
2204
2190 for (i = 0; i < revcount; i++) {
2205 for (i = 0; i < revcount; i++) {
2191 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2206 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2192 if (n > maxrev)
2207 if (n > maxrev)
2193 maxrev = n;
2208 maxrev = n;
2194 }
2209 }
2195
2210
2196 depth = calloc(sizeof(*depth), maxrev + 1);
2211 depth = calloc(sizeof(*depth), maxrev + 1);
2197 if (depth == NULL)
2212 if (depth == NULL)
2198 return PyErr_NoMemory();
2213 return PyErr_NoMemory();
2199
2214
2200 seen = calloc(sizeof(*seen), maxrev + 1);
2215 seen = calloc(sizeof(*seen), maxrev + 1);
2201 if (seen == NULL) {
2216 if (seen == NULL) {
2202 PyErr_NoMemory();
2217 PyErr_NoMemory();
2203 goto bail;
2218 goto bail;
2204 }
2219 }
2205
2220
2206 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2221 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2207 if (interesting == NULL) {
2222 if (interesting == NULL) {
2208 PyErr_NoMemory();
2223 PyErr_NoMemory();
2209 goto bail;
2224 goto bail;
2210 }
2225 }
2211
2226
2212 if (PyList_Sort(revs) == -1)
2227 if (PyList_Sort(revs) == -1)
2213 goto bail;
2228 goto bail;
2214
2229
2215 for (i = 0; i < revcount; i++) {
2230 for (i = 0; i < revcount; i++) {
2216 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2231 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2217 long b = 1l << i;
2232 long b = 1l << i;
2218 depth[n] = 1;
2233 depth[n] = 1;
2219 seen[n] = b;
2234 seen[n] = b;
2220 interesting[b] = 1;
2235 interesting[b] = 1;
2221 }
2236 }
2222
2237
2223 /* invariant: ninteresting is the number of non-zero entries in
2238 /* invariant: ninteresting is the number of non-zero entries in
2224 * interesting. */
2239 * interesting. */
2225 ninteresting = (int)revcount;
2240 ninteresting = (int)revcount;
2226
2241
2227 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2242 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2228 int dv = depth[v];
2243 int dv = depth[v];
2229 int parents[2];
2244 int parents[2];
2230 long sv;
2245 long sv;
2231
2246
2232 if (dv == 0)
2247 if (dv == 0)
2233 continue;
2248 continue;
2234
2249
2235 sv = seen[v];
2250 sv = seen[v];
2236 if (index_get_parents(self, v, parents, maxrev) < 0)
2251 if (index_get_parents(self, v, parents, maxrev) < 0)
2237 goto bail;
2252 goto bail;
2238
2253
2239 for (i = 0; i < 2; i++) {
2254 for (i = 0; i < 2; i++) {
2240 int p = parents[i];
2255 int p = parents[i];
2241 long sp;
2256 long sp;
2242 int dp;
2257 int dp;
2243
2258
2244 if (p == -1)
2259 if (p == -1)
2245 continue;
2260 continue;
2246
2261
2247 dp = depth[p];
2262 dp = depth[p];
2248 sp = seen[p];
2263 sp = seen[p];
2249 if (dp <= dv) {
2264 if (dp <= dv) {
2250 depth[p] = dv + 1;
2265 depth[p] = dv + 1;
2251 if (sp != sv) {
2266 if (sp != sv) {
2252 interesting[sv] += 1;
2267 interesting[sv] += 1;
2253 seen[p] = sv;
2268 seen[p] = sv;
2254 if (sp) {
2269 if (sp) {
2255 interesting[sp] -= 1;
2270 interesting[sp] -= 1;
2256 if (interesting[sp] == 0)
2271 if (interesting[sp] == 0)
2257 ninteresting -= 1;
2272 ninteresting -= 1;
2258 }
2273 }
2259 }
2274 }
2260 } else if (dv == dp - 1) {
2275 } else if (dv == dp - 1) {
2261 long nsp = sp | sv;
2276 long nsp = sp | sv;
2262 if (nsp == sp)
2277 if (nsp == sp)
2263 continue;
2278 continue;
2264 seen[p] = nsp;
2279 seen[p] = nsp;
2265 interesting[sp] -= 1;
2280 interesting[sp] -= 1;
2266 if (interesting[sp] == 0)
2281 if (interesting[sp] == 0)
2267 ninteresting -= 1;
2282 ninteresting -= 1;
2268 if (interesting[nsp] == 0)
2283 if (interesting[nsp] == 0)
2269 ninteresting += 1;
2284 ninteresting += 1;
2270 interesting[nsp] += 1;
2285 interesting[nsp] += 1;
2271 }
2286 }
2272 }
2287 }
2273 interesting[sv] -= 1;
2288 interesting[sv] -= 1;
2274 if (interesting[sv] == 0)
2289 if (interesting[sv] == 0)
2275 ninteresting -= 1;
2290 ninteresting -= 1;
2276 }
2291 }
2277
2292
2278 final = 0;
2293 final = 0;
2279 j = ninteresting;
2294 j = ninteresting;
2280 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2295 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2281 if (interesting[i] == 0)
2296 if (interesting[i] == 0)
2282 continue;
2297 continue;
2283 final |= i;
2298 final |= i;
2284 j -= 1;
2299 j -= 1;
2285 }
2300 }
2286 if (final == 0) {
2301 if (final == 0) {
2287 keys = PyList_New(0);
2302 keys = PyList_New(0);
2288 goto bail;
2303 goto bail;
2289 }
2304 }
2290
2305
2291 dict = PyDict_New();
2306 dict = PyDict_New();
2292 if (dict == NULL)
2307 if (dict == NULL)
2293 goto bail;
2308 goto bail;
2294
2309
2295 for (i = 0; i < revcount; i++) {
2310 for (i = 0; i < revcount; i++) {
2296 PyObject *key;
2311 PyObject *key;
2297
2312
2298 if ((final & (1 << i)) == 0)
2313 if ((final & (1 << i)) == 0)
2299 continue;
2314 continue;
2300
2315
2301 key = PyList_GET_ITEM(revs, i);
2316 key = PyList_GET_ITEM(revs, i);
2302 Py_INCREF(key);
2317 Py_INCREF(key);
2303 Py_INCREF(Py_None);
2318 Py_INCREF(Py_None);
2304 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2319 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2305 Py_DECREF(key);
2320 Py_DECREF(key);
2306 Py_DECREF(Py_None);
2321 Py_DECREF(Py_None);
2307 goto bail;
2322 goto bail;
2308 }
2323 }
2309 }
2324 }
2310
2325
2311 keys = PyDict_Keys(dict);
2326 keys = PyDict_Keys(dict);
2312
2327
2313 bail:
2328 bail:
2314 free(depth);
2329 free(depth);
2315 free(seen);
2330 free(seen);
2316 free(interesting);
2331 free(interesting);
2317 Py_XDECREF(dict);
2332 Py_XDECREF(dict);
2318
2333
2319 return keys;
2334 return keys;
2320 }
2335 }
2321
2336
2322 /*
2337 /*
2323 * Given a (possibly overlapping) set of revs, return all the
2338 * Given a (possibly overlapping) set of revs, return all the
2324 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2339 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2325 */
2340 */
2326 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2341 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2327 {
2342 {
2328 PyObject *ret = NULL;
2343 PyObject *ret = NULL;
2329 Py_ssize_t argcount, i, len;
2344 Py_ssize_t argcount, i, len;
2330 bitmask repeat = 0;
2345 bitmask repeat = 0;
2331 int revcount = 0;
2346 int revcount = 0;
2332 int *revs;
2347 int *revs;
2333
2348
2334 argcount = PySequence_Length(args);
2349 argcount = PySequence_Length(args);
2335 revs = PyMem_Malloc(argcount * sizeof(*revs));
2350 revs = PyMem_Malloc(argcount * sizeof(*revs));
2336 if (argcount > 0 && revs == NULL)
2351 if (argcount > 0 && revs == NULL)
2337 return PyErr_NoMemory();
2352 return PyErr_NoMemory();
2338 len = index_length(self);
2353 len = index_length(self);
2339
2354
2340 for (i = 0; i < argcount; i++) {
2355 for (i = 0; i < argcount; i++) {
2341 static const int capacity = 24;
2356 static const int capacity = 24;
2342 PyObject *obj = PySequence_GetItem(args, i);
2357 PyObject *obj = PySequence_GetItem(args, i);
2343 bitmask x;
2358 bitmask x;
2344 long val;
2359 long val;
2345
2360
2346 if (!PyInt_Check(obj)) {
2361 if (!PyInt_Check(obj)) {
2347 PyErr_SetString(PyExc_TypeError,
2362 PyErr_SetString(PyExc_TypeError,
2348 "arguments must all be ints");
2363 "arguments must all be ints");
2349 Py_DECREF(obj);
2364 Py_DECREF(obj);
2350 goto bail;
2365 goto bail;
2351 }
2366 }
2352 val = PyInt_AsLong(obj);
2367 val = PyInt_AsLong(obj);
2353 Py_DECREF(obj);
2368 Py_DECREF(obj);
2354 if (val == -1) {
2369 if (val == -1) {
2355 ret = PyList_New(0);
2370 ret = PyList_New(0);
2356 goto done;
2371 goto done;
2357 }
2372 }
2358 if (val < 0 || val >= len) {
2373 if (val < 0 || val >= len) {
2359 PyErr_SetString(PyExc_IndexError, "index out of range");
2374 PyErr_SetString(PyExc_IndexError, "index out of range");
2360 goto bail;
2375 goto bail;
2361 }
2376 }
2362 /* this cheesy bloom filter lets us avoid some more
2377 /* this cheesy bloom filter lets us avoid some more
2363 * expensive duplicate checks in the common set-is-disjoint
2378 * expensive duplicate checks in the common set-is-disjoint
2364 * case */
2379 * case */
2365 x = 1ull << (val & 0x3f);
2380 x = 1ull << (val & 0x3f);
2366 if (repeat & x) {
2381 if (repeat & x) {
2367 int k;
2382 int k;
2368 for (k = 0; k < revcount; k++) {
2383 for (k = 0; k < revcount; k++) {
2369 if (val == revs[k])
2384 if (val == revs[k])
2370 goto duplicate;
2385 goto duplicate;
2371 }
2386 }
2372 } else
2387 } else
2373 repeat |= x;
2388 repeat |= x;
2374 if (revcount >= capacity) {
2389 if (revcount >= capacity) {
2375 PyErr_Format(PyExc_OverflowError,
2390 PyErr_Format(PyExc_OverflowError,
2376 "bitset size (%d) > capacity (%d)",
2391 "bitset size (%d) > capacity (%d)",
2377 revcount, capacity);
2392 revcount, capacity);
2378 goto bail;
2393 goto bail;
2379 }
2394 }
2380 revs[revcount++] = (int)val;
2395 revs[revcount++] = (int)val;
2381 duplicate:;
2396 duplicate:;
2382 }
2397 }
2383
2398
2384 if (revcount == 0) {
2399 if (revcount == 0) {
2385 ret = PyList_New(0);
2400 ret = PyList_New(0);
2386 goto done;
2401 goto done;
2387 }
2402 }
2388 if (revcount == 1) {
2403 if (revcount == 1) {
2389 PyObject *obj;
2404 PyObject *obj;
2390 ret = PyList_New(1);
2405 ret = PyList_New(1);
2391 if (ret == NULL)
2406 if (ret == NULL)
2392 goto bail;
2407 goto bail;
2393 obj = PyInt_FromLong(revs[0]);
2408 obj = PyInt_FromLong(revs[0]);
2394 if (obj == NULL)
2409 if (obj == NULL)
2395 goto bail;
2410 goto bail;
2396 PyList_SET_ITEM(ret, 0, obj);
2411 PyList_SET_ITEM(ret, 0, obj);
2397 goto done;
2412 goto done;
2398 }
2413 }
2399
2414
2400 ret = find_gca_candidates(self, revs, revcount);
2415 ret = find_gca_candidates(self, revs, revcount);
2401 if (ret == NULL)
2416 if (ret == NULL)
2402 goto bail;
2417 goto bail;
2403
2418
2404 done:
2419 done:
2405 PyMem_Free(revs);
2420 PyMem_Free(revs);
2406 return ret;
2421 return ret;
2407
2422
2408 bail:
2423 bail:
2409 PyMem_Free(revs);
2424 PyMem_Free(revs);
2410 Py_XDECREF(ret);
2425 Py_XDECREF(ret);
2411 return NULL;
2426 return NULL;
2412 }
2427 }
2413
2428
2414 /*
2429 /*
2415 * Given a (possibly overlapping) set of revs, return the greatest
2430 * Given a (possibly overlapping) set of revs, return the greatest
2416 * common ancestors: those with the longest path to the root.
2431 * common ancestors: those with the longest path to the root.
2417 */
2432 */
2418 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2433 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2419 {
2434 {
2420 PyObject *ret;
2435 PyObject *ret;
2421 PyObject *gca = index_commonancestorsheads(self, args);
2436 PyObject *gca = index_commonancestorsheads(self, args);
2422 if (gca == NULL)
2437 if (gca == NULL)
2423 return NULL;
2438 return NULL;
2424
2439
2425 if (PyList_GET_SIZE(gca) <= 1) {
2440 if (PyList_GET_SIZE(gca) <= 1) {
2426 return gca;
2441 return gca;
2427 }
2442 }
2428
2443
2429 ret = find_deepest(self, gca);
2444 ret = find_deepest(self, gca);
2430 Py_DECREF(gca);
2445 Py_DECREF(gca);
2431 return ret;
2446 return ret;
2432 }
2447 }
2433
2448
2434 /*
2449 /*
2435 * Invalidate any trie entries introduced by added revs.
2450 * Invalidate any trie entries introduced by added revs.
2436 */
2451 */
2437 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2452 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2438 {
2453 {
2439 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2454 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2440
2455
2441 for (i = start; i < len; i++) {
2456 for (i = start; i < len; i++) {
2442 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2457 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2443 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2458 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2444
2459
2445 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2460 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2446 }
2461 }
2447
2462
2448 if (start == 0)
2463 if (start == 0)
2449 Py_CLEAR(self->added);
2464 Py_CLEAR(self->added);
2450 }
2465 }
2451
2466
2452 /*
2467 /*
2453 * Delete a numeric range of revs, which must be at the end of the
2468 * Delete a numeric range of revs, which must be at the end of the
2454 * range, but exclude the sentinel nullid entry.
2469 * range, but exclude the sentinel nullid entry.
2455 */
2470 */
2456 static int index_slice_del(indexObject *self, PyObject *item)
2471 static int index_slice_del(indexObject *self, PyObject *item)
2457 {
2472 {
2458 Py_ssize_t start, stop, step, slicelength;
2473 Py_ssize_t start, stop, step, slicelength;
2459 Py_ssize_t length = index_length(self) + 1;
2474 Py_ssize_t length = index_length(self) + 1;
2460 int ret = 0;
2475 int ret = 0;
2461
2476
2462 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2477 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2463 #ifdef IS_PY3K
2478 #ifdef IS_PY3K
2464 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2479 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2465 &slicelength) < 0)
2480 &slicelength) < 0)
2466 #else
2481 #else
2467 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2482 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2468 &step, &slicelength) < 0)
2483 &step, &slicelength) < 0)
2469 #endif
2484 #endif
2470 return -1;
2485 return -1;
2471
2486
2472 if (slicelength <= 0)
2487 if (slicelength <= 0)
2473 return 0;
2488 return 0;
2474
2489
2475 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2490 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2476 stop = start;
2491 stop = start;
2477
2492
2478 if (step < 0) {
2493 if (step < 0) {
2479 stop = start + 1;
2494 stop = start + 1;
2480 start = stop + step * (slicelength - 1) - 1;
2495 start = stop + step * (slicelength - 1) - 1;
2481 step = -step;
2496 step = -step;
2482 }
2497 }
2483
2498
2484 if (step != 1) {
2499 if (step != 1) {
2485 PyErr_SetString(PyExc_ValueError,
2500 PyErr_SetString(PyExc_ValueError,
2486 "revlog index delete requires step size of 1");
2501 "revlog index delete requires step size of 1");
2487 return -1;
2502 return -1;
2488 }
2503 }
2489
2504
2490 if (stop != length - 1) {
2505 if (stop != length - 1) {
2491 PyErr_SetString(PyExc_IndexError,
2506 PyErr_SetString(PyExc_IndexError,
2492 "revlog index deletion indices are invalid");
2507 "revlog index deletion indices are invalid");
2493 return -1;
2508 return -1;
2494 }
2509 }
2495
2510
2496 if (start < self->length) {
2511 if (start < self->length) {
2497 if (self->ntinitialized) {
2512 if (self->ntinitialized) {
2498 Py_ssize_t i;
2513 Py_ssize_t i;
2499
2514
2500 for (i = start; i < self->length; i++) {
2515 for (i = start; i < self->length; i++) {
2501 const char *node = index_node_existing(self, i);
2516 const char *node = index_node_existing(self, i);
2502 if (node == NULL)
2517 if (node == NULL)
2503 return -1;
2518 return -1;
2504
2519
2505 nt_delete_node(&self->nt, node);
2520 nt_delete_node(&self->nt, node);
2506 }
2521 }
2507 if (self->added)
2522 if (self->added)
2508 index_invalidate_added(self, 0);
2523 index_invalidate_added(self, 0);
2509 if (self->ntrev > start)
2524 if (self->ntrev > start)
2510 self->ntrev = (int)start;
2525 self->ntrev = (int)start;
2511 }
2526 }
2512 self->length = start;
2527 self->length = start;
2513 if (start < self->raw_length) {
2528 if (start < self->raw_length) {
2514 if (self->cache) {
2529 if (self->cache) {
2515 Py_ssize_t i;
2530 Py_ssize_t i;
2516 for (i = start; i < self->raw_length; i++)
2531 for (i = start; i < self->raw_length; i++)
2517 Py_CLEAR(self->cache[i]);
2532 Py_CLEAR(self->cache[i]);
2518 }
2533 }
2519 self->raw_length = start;
2534 self->raw_length = start;
2520 }
2535 }
2521 goto done;
2536 goto done;
2522 }
2537 }
2523
2538
2524 if (self->ntinitialized) {
2539 if (self->ntinitialized) {
2525 index_invalidate_added(self, start - self->length);
2540 index_invalidate_added(self, start - self->length);
2526 if (self->ntrev > start)
2541 if (self->ntrev > start)
2527 self->ntrev = (int)start;
2542 self->ntrev = (int)start;
2528 }
2543 }
2529 if (self->added)
2544 if (self->added)
2530 ret = PyList_SetSlice(self->added, start - self->length,
2545 ret = PyList_SetSlice(self->added, start - self->length,
2531 PyList_GET_SIZE(self->added), NULL);
2546 PyList_GET_SIZE(self->added), NULL);
2532 done:
2547 done:
2533 Py_CLEAR(self->headrevs);
2548 Py_CLEAR(self->headrevs);
2534 return ret;
2549 return ret;
2535 }
2550 }
2536
2551
2537 /*
2552 /*
2538 * Supported ops:
2553 * Supported ops:
2539 *
2554 *
2540 * slice deletion
2555 * slice deletion
2541 * string assignment (extend node->rev mapping)
2556 * string assignment (extend node->rev mapping)
2542 * string deletion (shrink node->rev mapping)
2557 * string deletion (shrink node->rev mapping)
2543 */
2558 */
2544 static int index_assign_subscript(indexObject *self, PyObject *item,
2559 static int index_assign_subscript(indexObject *self, PyObject *item,
2545 PyObject *value)
2560 PyObject *value)
2546 {
2561 {
2547 char *node;
2562 char *node;
2548 long rev;
2563 long rev;
2549
2564
2550 if (PySlice_Check(item) && value == NULL)
2565 if (PySlice_Check(item) && value == NULL)
2551 return index_slice_del(self, item);
2566 return index_slice_del(self, item);
2552
2567
2553 if (node_check(item, &node) == -1)
2568 if (node_check(item, &node) == -1)
2554 return -1;
2569 return -1;
2555
2570
2556 if (value == NULL)
2571 if (value == NULL)
2557 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2572 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2558 : 0;
2573 : 0;
2559 rev = PyInt_AsLong(value);
2574 rev = PyInt_AsLong(value);
2560 if (rev > INT_MAX || rev < 0) {
2575 if (rev > INT_MAX || rev < 0) {
2561 if (!PyErr_Occurred())
2576 if (!PyErr_Occurred())
2562 PyErr_SetString(PyExc_ValueError, "rev out of range");
2577 PyErr_SetString(PyExc_ValueError, "rev out of range");
2563 return -1;
2578 return -1;
2564 }
2579 }
2565
2580
2566 if (index_init_nt(self) == -1)
2581 if (index_init_nt(self) == -1)
2567 return -1;
2582 return -1;
2568 return nt_insert(&self->nt, node, (int)rev);
2583 return nt_insert(&self->nt, node, (int)rev);
2569 }
2584 }
2570
2585
2571 /*
2586 /*
2572 * Find all RevlogNG entries in an index that has inline data. Update
2587 * Find all RevlogNG entries in an index that has inline data. Update
2573 * the optional "offsets" table with those entries.
2588 * the optional "offsets" table with those entries.
2574 */
2589 */
2575 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2590 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2576 {
2591 {
2577 const char *data = (const char *)self->buf.buf;
2592 const char *data = (const char *)self->buf.buf;
2578 Py_ssize_t pos = 0;
2593 Py_ssize_t pos = 0;
2579 Py_ssize_t end = self->buf.len;
2594 Py_ssize_t end = self->buf.len;
2580 long incr = v1_hdrsize;
2595 long incr = v1_hdrsize;
2581 Py_ssize_t len = 0;
2596 Py_ssize_t len = 0;
2582
2597
2583 while (pos + v1_hdrsize <= end && pos >= 0) {
2598 while (pos + v1_hdrsize <= end && pos >= 0) {
2584 uint32_t comp_len;
2599 uint32_t comp_len;
2585 /* 3rd element of header is length of compressed inline data */
2600 /* 3rd element of header is length of compressed inline data */
2586 comp_len = getbe32(data + pos + 8);
2601 comp_len = getbe32(data + pos + 8);
2587 incr = v1_hdrsize + comp_len;
2602 incr = v1_hdrsize + comp_len;
2588 if (offsets)
2603 if (offsets)
2589 offsets[len] = data + pos;
2604 offsets[len] = data + pos;
2590 len++;
2605 len++;
2591 pos += incr;
2606 pos += incr;
2592 }
2607 }
2593
2608
2594 if (pos != end) {
2609 if (pos != end) {
2595 if (!PyErr_Occurred())
2610 if (!PyErr_Occurred())
2596 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2611 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2597 return -1;
2612 return -1;
2598 }
2613 }
2599
2614
2600 return len;
2615 return len;
2601 }
2616 }
2602
2617
2603 static int index_init(indexObject *self, PyObject *args)
2618 static int index_init(indexObject *self, PyObject *args)
2604 {
2619 {
2605 PyObject *data_obj, *inlined_obj;
2620 PyObject *data_obj, *inlined_obj;
2606 Py_ssize_t size;
2621 Py_ssize_t size;
2607
2622
2608 /* Initialize before argument-checking to avoid index_dealloc() crash.
2623 /* Initialize before argument-checking to avoid index_dealloc() crash.
2609 */
2624 */
2610 self->raw_length = 0;
2625 self->raw_length = 0;
2611 self->added = NULL;
2626 self->added = NULL;
2612 self->cache = NULL;
2627 self->cache = NULL;
2613 self->data = NULL;
2628 self->data = NULL;
2614 memset(&self->buf, 0, sizeof(self->buf));
2629 memset(&self->buf, 0, sizeof(self->buf));
2615 self->headrevs = NULL;
2630 self->headrevs = NULL;
2616 self->filteredrevs = Py_None;
2631 self->filteredrevs = Py_None;
2617 Py_INCREF(Py_None);
2632 Py_INCREF(Py_None);
2618 self->ntinitialized = 0;
2633 self->ntinitialized = 0;
2619 self->offsets = NULL;
2634 self->offsets = NULL;
2620
2635
2621 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2636 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2622 return -1;
2637 return -1;
2623 if (!PyObject_CheckBuffer(data_obj)) {
2638 if (!PyObject_CheckBuffer(data_obj)) {
2624 PyErr_SetString(PyExc_TypeError,
2639 PyErr_SetString(PyExc_TypeError,
2625 "data does not support buffer interface");
2640 "data does not support buffer interface");
2626 return -1;
2641 return -1;
2627 }
2642 }
2628
2643
2629 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2644 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2630 return -1;
2645 return -1;
2631 size = self->buf.len;
2646 size = self->buf.len;
2632
2647
2633 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2648 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2634 self->data = data_obj;
2649 self->data = data_obj;
2635
2650
2636 self->ntlookups = self->ntmisses = 0;
2651 self->ntlookups = self->ntmisses = 0;
2637 self->ntrev = -1;
2652 self->ntrev = -1;
2638 Py_INCREF(self->data);
2653 Py_INCREF(self->data);
2639
2654
2640 if (self->inlined) {
2655 if (self->inlined) {
2641 Py_ssize_t len = inline_scan(self, NULL);
2656 Py_ssize_t len = inline_scan(self, NULL);
2642 if (len == -1)
2657 if (len == -1)
2643 goto bail;
2658 goto bail;
2644 self->raw_length = len;
2659 self->raw_length = len;
2645 self->length = len;
2660 self->length = len;
2646 } else {
2661 } else {
2647 if (size % v1_hdrsize) {
2662 if (size % v1_hdrsize) {
2648 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2663 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2649 goto bail;
2664 goto bail;
2650 }
2665 }
2651 self->raw_length = size / v1_hdrsize;
2666 self->raw_length = size / v1_hdrsize;
2652 self->length = self->raw_length;
2667 self->length = self->raw_length;
2653 }
2668 }
2654
2669
2655 return 0;
2670 return 0;
2656 bail:
2671 bail:
2657 return -1;
2672 return -1;
2658 }
2673 }
2659
2674
2660 static PyObject *index_nodemap(indexObject *self)
2675 static PyObject *index_nodemap(indexObject *self)
2661 {
2676 {
2662 Py_INCREF(self);
2677 Py_INCREF(self);
2663 return (PyObject *)self;
2678 return (PyObject *)self;
2664 }
2679 }
2665
2680
2666 static void _index_clearcaches(indexObject *self)
2681 static void _index_clearcaches(indexObject *self)
2667 {
2682 {
2668 if (self->cache) {
2683 if (self->cache) {
2669 Py_ssize_t i;
2684 Py_ssize_t i;
2670
2685
2671 for (i = 0; i < self->raw_length; i++)
2686 for (i = 0; i < self->raw_length; i++)
2672 Py_CLEAR(self->cache[i]);
2687 Py_CLEAR(self->cache[i]);
2673 free(self->cache);
2688 free(self->cache);
2674 self->cache = NULL;
2689 self->cache = NULL;
2675 }
2690 }
2676 if (self->offsets) {
2691 if (self->offsets) {
2677 PyMem_Free((void *)self->offsets);
2692 PyMem_Free((void *)self->offsets);
2678 self->offsets = NULL;
2693 self->offsets = NULL;
2679 }
2694 }
2680 if (self->ntinitialized) {
2695 if (self->ntinitialized) {
2681 nt_dealloc(&self->nt);
2696 nt_dealloc(&self->nt);
2682 }
2697 }
2683 self->ntinitialized = 0;
2698 self->ntinitialized = 0;
2684 Py_CLEAR(self->headrevs);
2699 Py_CLEAR(self->headrevs);
2685 }
2700 }
2686
2701
2687 static PyObject *index_clearcaches(indexObject *self)
2702 static PyObject *index_clearcaches(indexObject *self)
2688 {
2703 {
2689 _index_clearcaches(self);
2704 _index_clearcaches(self);
2690 self->ntrev = -1;
2705 self->ntrev = -1;
2691 self->ntlookups = self->ntmisses = 0;
2706 self->ntlookups = self->ntmisses = 0;
2692 Py_RETURN_NONE;
2707 Py_RETURN_NONE;
2693 }
2708 }
2694
2709
2695 static void index_dealloc(indexObject *self)
2710 static void index_dealloc(indexObject *self)
2696 {
2711 {
2697 _index_clearcaches(self);
2712 _index_clearcaches(self);
2698 Py_XDECREF(self->filteredrevs);
2713 Py_XDECREF(self->filteredrevs);
2699 if (self->buf.buf) {
2714 if (self->buf.buf) {
2700 PyBuffer_Release(&self->buf);
2715 PyBuffer_Release(&self->buf);
2701 memset(&self->buf, 0, sizeof(self->buf));
2716 memset(&self->buf, 0, sizeof(self->buf));
2702 }
2717 }
2703 Py_XDECREF(self->data);
2718 Py_XDECREF(self->data);
2704 Py_XDECREF(self->added);
2719 Py_XDECREF(self->added);
2705 PyObject_Del(self);
2720 PyObject_Del(self);
2706 }
2721 }
2707
2722
2708 static PySequenceMethods index_sequence_methods = {
2723 static PySequenceMethods index_sequence_methods = {
2709 (lenfunc)index_length, /* sq_length */
2724 (lenfunc)index_length, /* sq_length */
2710 0, /* sq_concat */
2725 0, /* sq_concat */
2711 0, /* sq_repeat */
2726 0, /* sq_repeat */
2712 (ssizeargfunc)index_get, /* sq_item */
2727 (ssizeargfunc)index_get, /* sq_item */
2713 0, /* sq_slice */
2728 0, /* sq_slice */
2714 0, /* sq_ass_item */
2729 0, /* sq_ass_item */
2715 0, /* sq_ass_slice */
2730 0, /* sq_ass_slice */
2716 (objobjproc)index_contains, /* sq_contains */
2731 (objobjproc)index_contains, /* sq_contains */
2717 };
2732 };
2718
2733
2719 static PyMappingMethods index_mapping_methods = {
2734 static PyMappingMethods index_mapping_methods = {
2720 (lenfunc)index_length, /* mp_length */
2735 (lenfunc)index_length, /* mp_length */
2721 (binaryfunc)index_getitem, /* mp_subscript */
2736 (binaryfunc)index_getitem, /* mp_subscript */
2722 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2737 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2723 };
2738 };
2724
2739
2725 static PyMethodDef index_methods[] = {
2740 static PyMethodDef index_methods[] = {
2726 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2741 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2727 "return the gca set of the given revs"},
2742 "return the gca set of the given revs"},
2728 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2743 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2729 METH_VARARGS,
2744 METH_VARARGS,
2730 "return the heads of the common ancestors of the given revs"},
2745 "return the heads of the common ancestors of the given revs"},
2731 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2746 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2732 "clear the index caches"},
2747 "clear the index caches"},
2733 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2748 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2734 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2749 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2735 "return True if the node exist in the index"},
2750 "return True if the node exist in the index"},
2751 {"rev", (PyCFunction)index_m_rev, METH_O,
2752 "return `rev` associated with a node or raise RevlogError"},
2736 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2753 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2737 "compute phases"},
2754 "compute phases"},
2738 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2755 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2739 "reachableroots"},
2756 "reachableroots"},
2740 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2757 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2741 "get head revisions"}, /* Can do filtering since 3.2 */
2758 "get head revisions"}, /* Can do filtering since 3.2 */
2742 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2759 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2743 "get filtered head revisions"}, /* Can always do filtering */
2760 "get filtered head revisions"}, /* Can always do filtering */
2744 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2761 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2745 "True if the object is a snapshot"},
2762 "True if the object is a snapshot"},
2746 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2763 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2747 "Gather snapshot data in a cache dict"},
2764 "Gather snapshot data in a cache dict"},
2748 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2765 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2749 "determine revisions with deltas to reconstruct fulltext"},
2766 "determine revisions with deltas to reconstruct fulltext"},
2750 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2767 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2751 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2768 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2752 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2769 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2753 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2770 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2754 "match a potentially ambiguous node ID"},
2771 "match a potentially ambiguous node ID"},
2755 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2772 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2756 "find length of shortest hex nodeid of a binary ID"},
2773 "find length of shortest hex nodeid of a binary ID"},
2757 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2774 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2758 {NULL} /* Sentinel */
2775 {NULL} /* Sentinel */
2759 };
2776 };
2760
2777
2761 static PyGetSetDef index_getset[] = {
2778 static PyGetSetDef index_getset[] = {
2762 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2779 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2763 {NULL} /* Sentinel */
2780 {NULL} /* Sentinel */
2764 };
2781 };
2765
2782
2766 PyTypeObject HgRevlogIndex_Type = {
2783 PyTypeObject HgRevlogIndex_Type = {
2767 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2784 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2768 "parsers.index", /* tp_name */
2785 "parsers.index", /* tp_name */
2769 sizeof(indexObject), /* tp_basicsize */
2786 sizeof(indexObject), /* tp_basicsize */
2770 0, /* tp_itemsize */
2787 0, /* tp_itemsize */
2771 (destructor)index_dealloc, /* tp_dealloc */
2788 (destructor)index_dealloc, /* tp_dealloc */
2772 0, /* tp_print */
2789 0, /* tp_print */
2773 0, /* tp_getattr */
2790 0, /* tp_getattr */
2774 0, /* tp_setattr */
2791 0, /* tp_setattr */
2775 0, /* tp_compare */
2792 0, /* tp_compare */
2776 0, /* tp_repr */
2793 0, /* tp_repr */
2777 0, /* tp_as_number */
2794 0, /* tp_as_number */
2778 &index_sequence_methods, /* tp_as_sequence */
2795 &index_sequence_methods, /* tp_as_sequence */
2779 &index_mapping_methods, /* tp_as_mapping */
2796 &index_mapping_methods, /* tp_as_mapping */
2780 0, /* tp_hash */
2797 0, /* tp_hash */
2781 0, /* tp_call */
2798 0, /* tp_call */
2782 0, /* tp_str */
2799 0, /* tp_str */
2783 0, /* tp_getattro */
2800 0, /* tp_getattro */
2784 0, /* tp_setattro */
2801 0, /* tp_setattro */
2785 0, /* tp_as_buffer */
2802 0, /* tp_as_buffer */
2786 Py_TPFLAGS_DEFAULT, /* tp_flags */
2803 Py_TPFLAGS_DEFAULT, /* tp_flags */
2787 "revlog index", /* tp_doc */
2804 "revlog index", /* tp_doc */
2788 0, /* tp_traverse */
2805 0, /* tp_traverse */
2789 0, /* tp_clear */
2806 0, /* tp_clear */
2790 0, /* tp_richcompare */
2807 0, /* tp_richcompare */
2791 0, /* tp_weaklistoffset */
2808 0, /* tp_weaklistoffset */
2792 0, /* tp_iter */
2809 0, /* tp_iter */
2793 0, /* tp_iternext */
2810 0, /* tp_iternext */
2794 index_methods, /* tp_methods */
2811 index_methods, /* tp_methods */
2795 0, /* tp_members */
2812 0, /* tp_members */
2796 index_getset, /* tp_getset */
2813 index_getset, /* tp_getset */
2797 0, /* tp_base */
2814 0, /* tp_base */
2798 0, /* tp_dict */
2815 0, /* tp_dict */
2799 0, /* tp_descr_get */
2816 0, /* tp_descr_get */
2800 0, /* tp_descr_set */
2817 0, /* tp_descr_set */
2801 0, /* tp_dictoffset */
2818 0, /* tp_dictoffset */
2802 (initproc)index_init, /* tp_init */
2819 (initproc)index_init, /* tp_init */
2803 0, /* tp_alloc */
2820 0, /* tp_alloc */
2804 };
2821 };
2805
2822
2806 /*
2823 /*
2807 * returns a tuple of the form (index, index, cache) with elements as
2824 * returns a tuple of the form (index, index, cache) with elements as
2808 * follows:
2825 * follows:
2809 *
2826 *
2810 * index: an index object that lazily parses RevlogNG records
2827 * index: an index object that lazily parses RevlogNG records
2811 * cache: if data is inlined, a tuple (0, index_file_content), else None
2828 * cache: if data is inlined, a tuple (0, index_file_content), else None
2812 * index_file_content could be a string, or a buffer
2829 * index_file_content could be a string, or a buffer
2813 *
2830 *
2814 * added complications are for backwards compatibility
2831 * added complications are for backwards compatibility
2815 */
2832 */
2816 PyObject *parse_index2(PyObject *self, PyObject *args)
2833 PyObject *parse_index2(PyObject *self, PyObject *args)
2817 {
2834 {
2818 PyObject *tuple = NULL, *cache = NULL;
2835 PyObject *tuple = NULL, *cache = NULL;
2819 indexObject *idx;
2836 indexObject *idx;
2820 int ret;
2837 int ret;
2821
2838
2822 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2839 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2823 if (idx == NULL)
2840 if (idx == NULL)
2824 goto bail;
2841 goto bail;
2825
2842
2826 ret = index_init(idx, args);
2843 ret = index_init(idx, args);
2827 if (ret == -1)
2844 if (ret == -1)
2828 goto bail;
2845 goto bail;
2829
2846
2830 if (idx->inlined) {
2847 if (idx->inlined) {
2831 cache = Py_BuildValue("iO", 0, idx->data);
2848 cache = Py_BuildValue("iO", 0, idx->data);
2832 if (cache == NULL)
2849 if (cache == NULL)
2833 goto bail;
2850 goto bail;
2834 } else {
2851 } else {
2835 cache = Py_None;
2852 cache = Py_None;
2836 Py_INCREF(cache);
2853 Py_INCREF(cache);
2837 }
2854 }
2838
2855
2839 tuple = Py_BuildValue("NN", idx, cache);
2856 tuple = Py_BuildValue("NN", idx, cache);
2840 if (!tuple)
2857 if (!tuple)
2841 goto bail;
2858 goto bail;
2842 return tuple;
2859 return tuple;
2843
2860
2844 bail:
2861 bail:
2845 Py_XDECREF(idx);
2862 Py_XDECREF(idx);
2846 Py_XDECREF(cache);
2863 Py_XDECREF(cache);
2847 Py_XDECREF(tuple);
2864 Py_XDECREF(tuple);
2848 return NULL;
2865 return NULL;
2849 }
2866 }
2850
2867
2851 #ifdef WITH_RUST
2868 #ifdef WITH_RUST
2852
2869
2853 /* rustlazyancestors: iteration over ancestors implemented in Rust
2870 /* rustlazyancestors: iteration over ancestors implemented in Rust
2854 *
2871 *
2855 * This class holds a reference to an index and to the Rust iterator.
2872 * This class holds a reference to an index and to the Rust iterator.
2856 */
2873 */
2857 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2874 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2858
2875
2859 struct rustlazyancestorsObjectStruct {
2876 struct rustlazyancestorsObjectStruct {
2860 PyObject_HEAD
2877 PyObject_HEAD
2861 /* Type-specific fields go here. */
2878 /* Type-specific fields go here. */
2862 indexObject *index; /* Ref kept to avoid GC'ing the index */
2879 indexObject *index; /* Ref kept to avoid GC'ing the index */
2863 void *iter; /* Rust iterator */
2880 void *iter; /* Rust iterator */
2864 };
2881 };
2865
2882
2866 /* FFI exposed from Rust code */
2883 /* FFI exposed from Rust code */
2867 rustlazyancestorsObject *rustlazyancestors_init(indexObject *index,
2884 rustlazyancestorsObject *rustlazyancestors_init(indexObject *index,
2868 /* intrevs vector */
2885 /* intrevs vector */
2869 Py_ssize_t initrevslen,
2886 Py_ssize_t initrevslen,
2870 long *initrevs, long stoprev,
2887 long *initrevs, long stoprev,
2871 int inclusive);
2888 int inclusive);
2872 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2889 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2873 int rustlazyancestors_next(rustlazyancestorsObject *self);
2890 int rustlazyancestors_next(rustlazyancestorsObject *self);
2874 int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev);
2891 int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev);
2875
2892
2876 /* CPython instance methods */
2893 /* CPython instance methods */
2877 static int rustla_init(rustlazyancestorsObject *self, PyObject *args)
2894 static int rustla_init(rustlazyancestorsObject *self, PyObject *args)
2878 {
2895 {
2879 PyObject *initrevsarg = NULL;
2896 PyObject *initrevsarg = NULL;
2880 PyObject *inclusivearg = NULL;
2897 PyObject *inclusivearg = NULL;
2881 long stoprev = 0;
2898 long stoprev = 0;
2882 long *initrevs = NULL;
2899 long *initrevs = NULL;
2883 int inclusive = 0;
2900 int inclusive = 0;
2884 Py_ssize_t i;
2901 Py_ssize_t i;
2885
2902
2886 indexObject *index;
2903 indexObject *index;
2887 if (!PyArg_ParseTuple(args, "O!O!lO!", &HgRevlogIndex_Type, &index,
2904 if (!PyArg_ParseTuple(args, "O!O!lO!", &HgRevlogIndex_Type, &index,
2888 &PyList_Type, &initrevsarg, &stoprev,
2905 &PyList_Type, &initrevsarg, &stoprev,
2889 &PyBool_Type, &inclusivearg))
2906 &PyBool_Type, &inclusivearg))
2890 return -1;
2907 return -1;
2891
2908
2892 Py_INCREF(index);
2909 Py_INCREF(index);
2893 self->index = index;
2910 self->index = index;
2894
2911
2895 if (inclusivearg == Py_True)
2912 if (inclusivearg == Py_True)
2896 inclusive = 1;
2913 inclusive = 1;
2897
2914
2898 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2915 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2899
2916
2900 initrevs = (long *)calloc(linit, sizeof(long));
2917 initrevs = (long *)calloc(linit, sizeof(long));
2901
2918
2902 if (initrevs == NULL) {
2919 if (initrevs == NULL) {
2903 PyErr_NoMemory();
2920 PyErr_NoMemory();
2904 goto bail;
2921 goto bail;
2905 }
2922 }
2906
2923
2907 for (i = 0; i < linit; i++) {
2924 for (i = 0; i < linit; i++) {
2908 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2925 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2909 }
2926 }
2910 if (PyErr_Occurred())
2927 if (PyErr_Occurred())
2911 goto bail;
2928 goto bail;
2912
2929
2913 self->iter =
2930 self->iter =
2914 rustlazyancestors_init(index, linit, initrevs, stoprev, inclusive);
2931 rustlazyancestors_init(index, linit, initrevs, stoprev, inclusive);
2915 if (self->iter == NULL) {
2932 if (self->iter == NULL) {
2916 /* if this is because of GraphError::ParentOutOfRange
2933 /* if this is because of GraphError::ParentOutOfRange
2917 * HgRevlogIndex_GetParents() has already set the proper
2934 * HgRevlogIndex_GetParents() has already set the proper
2918 * exception */
2935 * exception */
2919 goto bail;
2936 goto bail;
2920 }
2937 }
2921
2938
2922 free(initrevs);
2939 free(initrevs);
2923 return 0;
2940 return 0;
2924
2941
2925 bail:
2942 bail:
2926 free(initrevs);
2943 free(initrevs);
2927 return -1;
2944 return -1;
2928 };
2945 };
2929
2946
2930 static void rustla_dealloc(rustlazyancestorsObject *self)
2947 static void rustla_dealloc(rustlazyancestorsObject *self)
2931 {
2948 {
2932 Py_XDECREF(self->index);
2949 Py_XDECREF(self->index);
2933 if (self->iter != NULL) { /* can happen if rustla_init failed */
2950 if (self->iter != NULL) { /* can happen if rustla_init failed */
2934 rustlazyancestors_drop(self->iter);
2951 rustlazyancestors_drop(self->iter);
2935 }
2952 }
2936 PyObject_Del(self);
2953 PyObject_Del(self);
2937 }
2954 }
2938
2955
2939 static PyObject *rustla_next(rustlazyancestorsObject *self)
2956 static PyObject *rustla_next(rustlazyancestorsObject *self)
2940 {
2957 {
2941 int res = rustlazyancestors_next(self->iter);
2958 int res = rustlazyancestors_next(self->iter);
2942 if (res == -1) {
2959 if (res == -1) {
2943 /* Setting an explicit exception seems unnecessary
2960 /* Setting an explicit exception seems unnecessary
2944 * as examples from Python source code (Objects/rangeobjets.c
2961 * as examples from Python source code (Objects/rangeobjets.c
2945 * and Modules/_io/stringio.c) seem to demonstrate.
2962 * and Modules/_io/stringio.c) seem to demonstrate.
2946 */
2963 */
2947 return NULL;
2964 return NULL;
2948 }
2965 }
2949 return PyInt_FromLong(res);
2966 return PyInt_FromLong(res);
2950 }
2967 }
2951
2968
2952 static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev)
2969 static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev)
2953 {
2970 {
2954 long lrev;
2971 long lrev;
2955 if (!pylong_to_long(rev, &lrev)) {
2972 if (!pylong_to_long(rev, &lrev)) {
2956 PyErr_Clear();
2973 PyErr_Clear();
2957 return 0;
2974 return 0;
2958 }
2975 }
2959 return rustlazyancestors_contains(self->iter, lrev);
2976 return rustlazyancestors_contains(self->iter, lrev);
2960 }
2977 }
2961
2978
2962 static PySequenceMethods rustla_sequence_methods = {
2979 static PySequenceMethods rustla_sequence_methods = {
2963 0, /* sq_length */
2980 0, /* sq_length */
2964 0, /* sq_concat */
2981 0, /* sq_concat */
2965 0, /* sq_repeat */
2982 0, /* sq_repeat */
2966 0, /* sq_item */
2983 0, /* sq_item */
2967 0, /* sq_slice */
2984 0, /* sq_slice */
2968 0, /* sq_ass_item */
2985 0, /* sq_ass_item */
2969 0, /* sq_ass_slice */
2986 0, /* sq_ass_slice */
2970 (objobjproc)rustla_contains, /* sq_contains */
2987 (objobjproc)rustla_contains, /* sq_contains */
2971 };
2988 };
2972
2989
2973 static PyTypeObject rustlazyancestorsType = {
2990 static PyTypeObject rustlazyancestorsType = {
2974 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2991 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2975 "parsers.rustlazyancestors", /* tp_name */
2992 "parsers.rustlazyancestors", /* tp_name */
2976 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2993 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2977 0, /* tp_itemsize */
2994 0, /* tp_itemsize */
2978 (destructor)rustla_dealloc, /* tp_dealloc */
2995 (destructor)rustla_dealloc, /* tp_dealloc */
2979 0, /* tp_print */
2996 0, /* tp_print */
2980 0, /* tp_getattr */
2997 0, /* tp_getattr */
2981 0, /* tp_setattr */
2998 0, /* tp_setattr */
2982 0, /* tp_compare */
2999 0, /* tp_compare */
2983 0, /* tp_repr */
3000 0, /* tp_repr */
2984 0, /* tp_as_number */
3001 0, /* tp_as_number */
2985 &rustla_sequence_methods, /* tp_as_sequence */
3002 &rustla_sequence_methods, /* tp_as_sequence */
2986 0, /* tp_as_mapping */
3003 0, /* tp_as_mapping */
2987 0, /* tp_hash */
3004 0, /* tp_hash */
2988 0, /* tp_call */
3005 0, /* tp_call */
2989 0, /* tp_str */
3006 0, /* tp_str */
2990 0, /* tp_getattro */
3007 0, /* tp_getattro */
2991 0, /* tp_setattro */
3008 0, /* tp_setattro */
2992 0, /* tp_as_buffer */
3009 0, /* tp_as_buffer */
2993 Py_TPFLAGS_DEFAULT, /* tp_flags */
3010 Py_TPFLAGS_DEFAULT, /* tp_flags */
2994 "Iterator over ancestors, implemented in Rust", /* tp_doc */
3011 "Iterator over ancestors, implemented in Rust", /* tp_doc */
2995 0, /* tp_traverse */
3012 0, /* tp_traverse */
2996 0, /* tp_clear */
3013 0, /* tp_clear */
2997 0, /* tp_richcompare */
3014 0, /* tp_richcompare */
2998 0, /* tp_weaklistoffset */
3015 0, /* tp_weaklistoffset */
2999 0, /* tp_iter */
3016 0, /* tp_iter */
3000 (iternextfunc)rustla_next, /* tp_iternext */
3017 (iternextfunc)rustla_next, /* tp_iternext */
3001 0, /* tp_methods */
3018 0, /* tp_methods */
3002 0, /* tp_members */
3019 0, /* tp_members */
3003 0, /* tp_getset */
3020 0, /* tp_getset */
3004 0, /* tp_base */
3021 0, /* tp_base */
3005 0, /* tp_dict */
3022 0, /* tp_dict */
3006 0, /* tp_descr_get */
3023 0, /* tp_descr_get */
3007 0, /* tp_descr_set */
3024 0, /* tp_descr_set */
3008 0, /* tp_dictoffset */
3025 0, /* tp_dictoffset */
3009 (initproc)rustla_init, /* tp_init */
3026 (initproc)rustla_init, /* tp_init */
3010 0, /* tp_alloc */
3027 0, /* tp_alloc */
3011 };
3028 };
3012 #endif /* WITH_RUST */
3029 #endif /* WITH_RUST */
3013
3030
3014 void revlog_module_init(PyObject *mod)
3031 void revlog_module_init(PyObject *mod)
3015 {
3032 {
3016 PyObject *caps = NULL;
3033 PyObject *caps = NULL;
3017 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3034 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3018 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3035 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3019 return;
3036 return;
3020 Py_INCREF(&HgRevlogIndex_Type);
3037 Py_INCREF(&HgRevlogIndex_Type);
3021 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3038 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3022
3039
3023 nodetreeType.tp_new = PyType_GenericNew;
3040 nodetreeType.tp_new = PyType_GenericNew;
3024 if (PyType_Ready(&nodetreeType) < 0)
3041 if (PyType_Ready(&nodetreeType) < 0)
3025 return;
3042 return;
3026 Py_INCREF(&nodetreeType);
3043 Py_INCREF(&nodetreeType);
3027 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3044 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3028
3045
3029 if (!nullentry) {
3046 if (!nullentry) {
3030 nullentry =
3047 nullentry =
3031 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
3048 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
3032 -1, -1, -1, nullid, (Py_ssize_t)20);
3049 -1, -1, -1, nullid, (Py_ssize_t)20);
3033 }
3050 }
3034 if (nullentry)
3051 if (nullentry)
3035 PyObject_GC_UnTrack(nullentry);
3052 PyObject_GC_UnTrack(nullentry);
3036
3053
3037 caps = PyCapsule_New(HgRevlogIndex_GetParents,
3054 caps = PyCapsule_New(HgRevlogIndex_GetParents,
3038 "mercurial.cext.parsers.index_get_parents_CAPI",
3055 "mercurial.cext.parsers.index_get_parents_CAPI",
3039 NULL);
3056 NULL);
3040 if (caps != NULL)
3057 if (caps != NULL)
3041 PyModule_AddObject(mod, "index_get_parents_CAPI", caps);
3058 PyModule_AddObject(mod, "index_get_parents_CAPI", caps);
3042
3059
3043 #ifdef WITH_RUST
3060 #ifdef WITH_RUST
3044 rustlazyancestorsType.tp_new = PyType_GenericNew;
3061 rustlazyancestorsType.tp_new = PyType_GenericNew;
3045 if (PyType_Ready(&rustlazyancestorsType) < 0)
3062 if (PyType_Ready(&rustlazyancestorsType) < 0)
3046 return;
3063 return;
3047 Py_INCREF(&rustlazyancestorsType);
3064 Py_INCREF(&rustlazyancestorsType);
3048 PyModule_AddObject(mod, "rustlazyancestors",
3065 PyModule_AddObject(mod, "rustlazyancestors",
3049 (PyObject *)&rustlazyancestorsType);
3066 (PyObject *)&rustlazyancestorsType);
3050 #endif
3067 #endif
3051 }
3068 }
@@ -1,157 +1,157 b''
1 # policy.py - module policy logic for Mercurial.
1 # policy.py - module policy logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import sys
11 import sys
12
12
13 from .pycompat import getattr
13 from .pycompat import getattr
14
14
15 # Rules for how modules can be loaded. Values are:
15 # Rules for how modules can be loaded. Values are:
16 #
16 #
17 # c - require C extensions
17 # c - require C extensions
18 # rust+c - require Rust and C extensions
18 # rust+c - require Rust and C extensions
19 # rust+c-allow - allow Rust and C extensions with fallback to pure Python
19 # rust+c-allow - allow Rust and C extensions with fallback to pure Python
20 # for each
20 # for each
21 # allow - allow pure Python implementation when C loading fails
21 # allow - allow pure Python implementation when C loading fails
22 # cffi - required cffi versions (implemented within pure module)
22 # cffi - required cffi versions (implemented within pure module)
23 # cffi-allow - allow pure Python implementation if cffi version is missing
23 # cffi-allow - allow pure Python implementation if cffi version is missing
24 # py - only load pure Python modules
24 # py - only load pure Python modules
25 #
25 #
26 # By default, fall back to the pure modules so the in-place build can
26 # By default, fall back to the pure modules so the in-place build can
27 # run without recompiling the C extensions. This will be overridden by
27 # run without recompiling the C extensions. This will be overridden by
28 # __modulepolicy__ generated by setup.py.
28 # __modulepolicy__ generated by setup.py.
29 policy = b'allow'
29 policy = b'allow'
30 _packageprefs = {
30 _packageprefs = {
31 # policy: (versioned package, pure package)
31 # policy: (versioned package, pure package)
32 b'c': ('cext', None),
32 b'c': ('cext', None),
33 b'allow': ('cext', 'pure'),
33 b'allow': ('cext', 'pure'),
34 b'cffi': ('cffi', None),
34 b'cffi': ('cffi', None),
35 b'cffi-allow': ('cffi', 'pure'),
35 b'cffi-allow': ('cffi', 'pure'),
36 b'py': (None, 'pure'),
36 b'py': (None, 'pure'),
37 # For now, rust policies impact importrust only
37 # For now, rust policies impact importrust only
38 b'rust+c': ('cext', None),
38 b'rust+c': ('cext', None),
39 b'rust+c-allow': ('cext', 'pure'),
39 b'rust+c-allow': ('cext', 'pure'),
40 }
40 }
41
41
42 try:
42 try:
43 from . import __modulepolicy__
43 from . import __modulepolicy__
44
44
45 policy = __modulepolicy__.modulepolicy
45 policy = __modulepolicy__.modulepolicy
46 except ImportError:
46 except ImportError:
47 pass
47 pass
48
48
49 # PyPy doesn't load C extensions.
49 # PyPy doesn't load C extensions.
50 #
50 #
51 # The canonical way to do this is to test platform.python_implementation().
51 # The canonical way to do this is to test platform.python_implementation().
52 # But we don't import platform and don't bloat for it here.
52 # But we don't import platform and don't bloat for it here.
53 if '__pypy__' in sys.builtin_module_names:
53 if '__pypy__' in sys.builtin_module_names:
54 policy = b'cffi'
54 policy = b'cffi'
55
55
56 # Environment variable can always force settings.
56 # Environment variable can always force settings.
57 if sys.version_info[0] >= 3:
57 if sys.version_info[0] >= 3:
58 if 'HGMODULEPOLICY' in os.environ:
58 if 'HGMODULEPOLICY' in os.environ:
59 policy = os.environ['HGMODULEPOLICY'].encode('utf-8')
59 policy = os.environ['HGMODULEPOLICY'].encode('utf-8')
60 else:
60 else:
61 policy = os.environ.get('HGMODULEPOLICY', policy)
61 policy = os.environ.get('HGMODULEPOLICY', policy)
62
62
63
63
64 def _importfrom(pkgname, modname):
64 def _importfrom(pkgname, modname):
65 # from .<pkgname> import <modname> (where . is looked through this module)
65 # from .<pkgname> import <modname> (where . is looked through this module)
66 fakelocals = {}
66 fakelocals = {}
67 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
67 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
68 try:
68 try:
69 fakelocals[modname] = mod = getattr(pkg, modname)
69 fakelocals[modname] = mod = getattr(pkg, modname)
70 except AttributeError:
70 except AttributeError:
71 raise ImportError('cannot import name %s' % modname)
71 raise ImportError('cannot import name %s' % modname)
72 # force import; fakelocals[modname] may be replaced with the real module
72 # force import; fakelocals[modname] may be replaced with the real module
73 getattr(mod, '__doc__', None)
73 getattr(mod, '__doc__', None)
74 return fakelocals[modname]
74 return fakelocals[modname]
75
75
76
76
77 # keep in sync with "version" in C modules
77 # keep in sync with "version" in C modules
78 _cextversions = {
78 _cextversions = {
79 ('cext', 'base85'): 1,
79 ('cext', 'base85'): 1,
80 ('cext', 'bdiff'): 3,
80 ('cext', 'bdiff'): 3,
81 ('cext', 'mpatch'): 1,
81 ('cext', 'mpatch'): 1,
82 ('cext', 'osutil'): 4,
82 ('cext', 'osutil'): 4,
83 ('cext', 'parsers'): 14,
83 ('cext', 'parsers'): 15,
84 }
84 }
85
85
86 # map import request to other package or module
86 # map import request to other package or module
87 _modredirects = {
87 _modredirects = {
88 ('cext', 'charencode'): ('cext', 'parsers'),
88 ('cext', 'charencode'): ('cext', 'parsers'),
89 ('cffi', 'base85'): ('pure', 'base85'),
89 ('cffi', 'base85'): ('pure', 'base85'),
90 ('cffi', 'charencode'): ('pure', 'charencode'),
90 ('cffi', 'charencode'): ('pure', 'charencode'),
91 ('cffi', 'parsers'): ('pure', 'parsers'),
91 ('cffi', 'parsers'): ('pure', 'parsers'),
92 }
92 }
93
93
94
94
95 def _checkmod(pkgname, modname, mod):
95 def _checkmod(pkgname, modname, mod):
96 expected = _cextversions.get((pkgname, modname))
96 expected = _cextversions.get((pkgname, modname))
97 actual = getattr(mod, 'version', None)
97 actual = getattr(mod, 'version', None)
98 if actual != expected:
98 if actual != expected:
99 raise ImportError(
99 raise ImportError(
100 'cannot import module %s.%s '
100 'cannot import module %s.%s '
101 '(expected version: %d, actual: %r)'
101 '(expected version: %d, actual: %r)'
102 % (pkgname, modname, expected, actual)
102 % (pkgname, modname, expected, actual)
103 )
103 )
104
104
105
105
106 def importmod(modname):
106 def importmod(modname):
107 """Import module according to policy and check API version"""
107 """Import module according to policy and check API version"""
108 try:
108 try:
109 verpkg, purepkg = _packageprefs[policy]
109 verpkg, purepkg = _packageprefs[policy]
110 except KeyError:
110 except KeyError:
111 raise ImportError('invalid HGMODULEPOLICY %r' % policy)
111 raise ImportError('invalid HGMODULEPOLICY %r' % policy)
112 assert verpkg or purepkg
112 assert verpkg or purepkg
113 if verpkg:
113 if verpkg:
114 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
114 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
115 try:
115 try:
116 mod = _importfrom(pn, mn)
116 mod = _importfrom(pn, mn)
117 if pn == verpkg:
117 if pn == verpkg:
118 _checkmod(pn, mn, mod)
118 _checkmod(pn, mn, mod)
119 return mod
119 return mod
120 except ImportError:
120 except ImportError:
121 if not purepkg:
121 if not purepkg:
122 raise
122 raise
123 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
123 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
124 return _importfrom(pn, mn)
124 return _importfrom(pn, mn)
125
125
126
126
127 def _isrustpermissive():
127 def _isrustpermissive():
128 """Assuming the policy is a Rust one, tell if it's permissive."""
128 """Assuming the policy is a Rust one, tell if it's permissive."""
129 return policy.endswith(b'-allow')
129 return policy.endswith(b'-allow')
130
130
131
131
132 def importrust(modname, member=None, default=None):
132 def importrust(modname, member=None, default=None):
133 """Import Rust module according to policy and availability.
133 """Import Rust module according to policy and availability.
134
134
135 If policy isn't a Rust one, this returns `default`.
135 If policy isn't a Rust one, this returns `default`.
136
136
137 If either the module or its member is not available, this returns `default`
137 If either the module or its member is not available, this returns `default`
138 if policy is permissive and raises `ImportError` if not.
138 if policy is permissive and raises `ImportError` if not.
139 """
139 """
140 if not policy.startswith(b'rust'):
140 if not policy.startswith(b'rust'):
141 return default
141 return default
142
142
143 try:
143 try:
144 mod = _importfrom('rustext', modname)
144 mod = _importfrom('rustext', modname)
145 except ImportError:
145 except ImportError:
146 if _isrustpermissive():
146 if _isrustpermissive():
147 return default
147 return default
148 raise
148 raise
149 if member is None:
149 if member is None:
150 return mod
150 return mod
151
151
152 try:
152 try:
153 return getattr(mod, member)
153 return getattr(mod, member)
154 except AttributeError:
154 except AttributeError:
155 if _isrustpermissive():
155 if _isrustpermissive():
156 return default
156 return default
157 raise ImportError("Cannot import name %s" % member)
157 raise ImportError("Cannot import name %s" % member)
@@ -1,217 +1,223 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid, nullrev
13 from ..node import nullid, nullrev
14 from .. import (
14 from .. import (
15 pycompat,
15 pycompat,
16 revlogutils,
16 revlogutils,
17 util,
17 util,
18 )
18 )
19
19
20 stringio = pycompat.bytesio
20 stringio = pycompat.bytesio
21
21
22
22
23 _pack = struct.pack
23 _pack = struct.pack
24 _unpack = struct.unpack
24 _unpack = struct.unpack
25 _compress = zlib.compress
25 _compress = zlib.compress
26 _decompress = zlib.decompress
26 _decompress = zlib.decompress
27
27
28 # Some code below makes tuples directly because it's more convenient. However,
28 # Some code below makes tuples directly because it's more convenient. However,
29 # code outside this module should always use dirstatetuple.
29 # code outside this module should always use dirstatetuple.
30 def dirstatetuple(*x):
30 def dirstatetuple(*x):
31 # x is a tuple
31 # x is a tuple
32 return x
32 return x
33
33
34
34
35 indexformatng = b">Qiiiiii20s12x"
35 indexformatng = b">Qiiiiii20s12x"
36 indexfirst = struct.calcsize(b'Q')
36 indexfirst = struct.calcsize(b'Q')
37 sizeint = struct.calcsize(b'i')
37 sizeint = struct.calcsize(b'i')
38 indexsize = struct.calcsize(indexformatng)
38 indexsize = struct.calcsize(indexformatng)
39
39
40
40
41 def gettype(q):
41 def gettype(q):
42 return int(q & 0xFFFF)
42 return int(q & 0xFFFF)
43
43
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return int(int(offset) << 16 | type)
46 return int(int(offset) << 16 | type)
47
47
48
48
49 class BaseIndexObject(object):
49 class BaseIndexObject(object):
50 @util.propertycache
50 @util.propertycache
51 def nodemap(self):
51 def nodemap(self):
52 nodemap = revlogutils.NodeMap({nullid: nullrev})
52 nodemap = revlogutils.NodeMap({nullid: nullrev})
53 for r in range(0, len(self)):
53 for r in range(0, len(self)):
54 n = self[r][7]
54 n = self[r][7]
55 nodemap[n] = r
55 nodemap[n] = r
56 return nodemap
56 return nodemap
57
57
58 def has_node(self, node):
58 def has_node(self, node):
59 """return True if the node exist in the index"""
59 """return True if the node exist in the index"""
60 return node in self.nodemap
60 return node in self.nodemap
61
61
62 def rev(self, node):
63 """return a revision for a node
64
65 If the node is unknown, raise a RevlogError"""
66 return self.nodemap[node]
67
62 def _stripnodes(self, start):
68 def _stripnodes(self, start):
63 if 'nodemap' in vars(self):
69 if 'nodemap' in vars(self):
64 for r in range(start, len(self)):
70 for r in range(start, len(self)):
65 n = self[r][7]
71 n = self[r][7]
66 del self.nodemap[n]
72 del self.nodemap[n]
67
73
68 def clearcaches(self):
74 def clearcaches(self):
69 self.__dict__.pop('nodemap', None)
75 self.__dict__.pop('nodemap', None)
70
76
71 def __len__(self):
77 def __len__(self):
72 return self._lgt + len(self._extra)
78 return self._lgt + len(self._extra)
73
79
74 def append(self, tup):
80 def append(self, tup):
75 if 'nodemap' in vars(self):
81 if 'nodemap' in vars(self):
76 self.nodemap[tup[7]] = len(self)
82 self.nodemap[tup[7]] = len(self)
77 self._extra.append(tup)
83 self._extra.append(tup)
78
84
79 def _check_index(self, i):
85 def _check_index(self, i):
80 if not isinstance(i, int):
86 if not isinstance(i, int):
81 raise TypeError(b"expecting int indexes")
87 raise TypeError(b"expecting int indexes")
82 if i < 0 or i >= len(self):
88 if i < 0 or i >= len(self):
83 raise IndexError
89 raise IndexError
84
90
85 def __getitem__(self, i):
91 def __getitem__(self, i):
86 if i == -1:
92 if i == -1:
87 return (0, 0, 0, -1, -1, -1, -1, nullid)
93 return (0, 0, 0, -1, -1, -1, -1, nullid)
88 self._check_index(i)
94 self._check_index(i)
89 if i >= self._lgt:
95 if i >= self._lgt:
90 return self._extra[i - self._lgt]
96 return self._extra[i - self._lgt]
91 index = self._calculate_index(i)
97 index = self._calculate_index(i)
92 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
98 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
93 if i == 0:
99 if i == 0:
94 e = list(r)
100 e = list(r)
95 type = gettype(e[0])
101 type = gettype(e[0])
96 e[0] = offset_type(0, type)
102 e[0] = offset_type(0, type)
97 return tuple(e)
103 return tuple(e)
98 return r
104 return r
99
105
100
106
101 class IndexObject(BaseIndexObject):
107 class IndexObject(BaseIndexObject):
102 def __init__(self, data):
108 def __init__(self, data):
103 assert len(data) % indexsize == 0
109 assert len(data) % indexsize == 0
104 self._data = data
110 self._data = data
105 self._lgt = len(data) // indexsize
111 self._lgt = len(data) // indexsize
106 self._extra = []
112 self._extra = []
107
113
108 def _calculate_index(self, i):
114 def _calculate_index(self, i):
109 return i * indexsize
115 return i * indexsize
110
116
111 def __delitem__(self, i):
117 def __delitem__(self, i):
112 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
118 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
113 raise ValueError(b"deleting slices only supports a:-1 with step 1")
119 raise ValueError(b"deleting slices only supports a:-1 with step 1")
114 i = i.start
120 i = i.start
115 self._check_index(i)
121 self._check_index(i)
116 self._stripnodes(i)
122 self._stripnodes(i)
117 if i < self._lgt:
123 if i < self._lgt:
118 self._data = self._data[: i * indexsize]
124 self._data = self._data[: i * indexsize]
119 self._lgt = i
125 self._lgt = i
120 self._extra = []
126 self._extra = []
121 else:
127 else:
122 self._extra = self._extra[: i - self._lgt]
128 self._extra = self._extra[: i - self._lgt]
123
129
124
130
125 class InlinedIndexObject(BaseIndexObject):
131 class InlinedIndexObject(BaseIndexObject):
126 def __init__(self, data, inline=0):
132 def __init__(self, data, inline=0):
127 self._data = data
133 self._data = data
128 self._lgt = self._inline_scan(None)
134 self._lgt = self._inline_scan(None)
129 self._inline_scan(self._lgt)
135 self._inline_scan(self._lgt)
130 self._extra = []
136 self._extra = []
131
137
132 def _inline_scan(self, lgt):
138 def _inline_scan(self, lgt):
133 off = 0
139 off = 0
134 if lgt is not None:
140 if lgt is not None:
135 self._offsets = [0] * lgt
141 self._offsets = [0] * lgt
136 count = 0
142 count = 0
137 while off <= len(self._data) - indexsize:
143 while off <= len(self._data) - indexsize:
138 (s,) = struct.unpack(
144 (s,) = struct.unpack(
139 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
145 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
140 )
146 )
141 if lgt is not None:
147 if lgt is not None:
142 self._offsets[count] = off
148 self._offsets[count] = off
143 count += 1
149 count += 1
144 off += indexsize + s
150 off += indexsize + s
145 if off != len(self._data):
151 if off != len(self._data):
146 raise ValueError(b"corrupted data")
152 raise ValueError(b"corrupted data")
147 return count
153 return count
148
154
149 def __delitem__(self, i):
155 def __delitem__(self, i):
150 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
156 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
151 raise ValueError(b"deleting slices only supports a:-1 with step 1")
157 raise ValueError(b"deleting slices only supports a:-1 with step 1")
152 i = i.start
158 i = i.start
153 self._check_index(i)
159 self._check_index(i)
154 self._stripnodes(i)
160 self._stripnodes(i)
155 if i < self._lgt:
161 if i < self._lgt:
156 self._offsets = self._offsets[:i]
162 self._offsets = self._offsets[:i]
157 self._lgt = i
163 self._lgt = i
158 self._extra = []
164 self._extra = []
159 else:
165 else:
160 self._extra = self._extra[: i - self._lgt]
166 self._extra = self._extra[: i - self._lgt]
161
167
162 def _calculate_index(self, i):
168 def _calculate_index(self, i):
163 return self._offsets[i]
169 return self._offsets[i]
164
170
165
171
166 def parse_index2(data, inline):
172 def parse_index2(data, inline):
167 if not inline:
173 if not inline:
168 return IndexObject(data), None
174 return IndexObject(data), None
169 return InlinedIndexObject(data, inline), (0, data)
175 return InlinedIndexObject(data, inline), (0, data)
170
176
171
177
172 def parse_dirstate(dmap, copymap, st):
178 def parse_dirstate(dmap, copymap, st):
173 parents = [st[:20], st[20:40]]
179 parents = [st[:20], st[20:40]]
174 # dereference fields so they will be local in loop
180 # dereference fields so they will be local in loop
175 format = b">cllll"
181 format = b">cllll"
176 e_size = struct.calcsize(format)
182 e_size = struct.calcsize(format)
177 pos1 = 40
183 pos1 = 40
178 l = len(st)
184 l = len(st)
179
185
180 # the inner loop
186 # the inner loop
181 while pos1 < l:
187 while pos1 < l:
182 pos2 = pos1 + e_size
188 pos2 = pos1 + e_size
183 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
189 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
184 pos1 = pos2 + e[4]
190 pos1 = pos2 + e[4]
185 f = st[pos2:pos1]
191 f = st[pos2:pos1]
186 if b'\0' in f:
192 if b'\0' in f:
187 f, c = f.split(b'\0')
193 f, c = f.split(b'\0')
188 copymap[f] = c
194 copymap[f] = c
189 dmap[f] = e[:4]
195 dmap[f] = e[:4]
190 return parents
196 return parents
191
197
192
198
193 def pack_dirstate(dmap, copymap, pl, now):
199 def pack_dirstate(dmap, copymap, pl, now):
194 now = int(now)
200 now = int(now)
195 cs = stringio()
201 cs = stringio()
196 write = cs.write
202 write = cs.write
197 write(b"".join(pl))
203 write(b"".join(pl))
198 for f, e in pycompat.iteritems(dmap):
204 for f, e in pycompat.iteritems(dmap):
199 if e[0] == b'n' and e[3] == now:
205 if e[0] == b'n' and e[3] == now:
200 # The file was last modified "simultaneously" with the current
206 # The file was last modified "simultaneously" with the current
201 # write to dirstate (i.e. within the same second for file-
207 # write to dirstate (i.e. within the same second for file-
202 # systems with a granularity of 1 sec). This commonly happens
208 # systems with a granularity of 1 sec). This commonly happens
203 # for at least a couple of files on 'update'.
209 # for at least a couple of files on 'update'.
204 # The user could change the file without changing its size
210 # The user could change the file without changing its size
205 # within the same second. Invalidate the file's mtime in
211 # within the same second. Invalidate the file's mtime in
206 # dirstate, forcing future 'status' calls to compare the
212 # dirstate, forcing future 'status' calls to compare the
207 # contents of the file if the size is the same. This prevents
213 # contents of the file if the size is the same. This prevents
208 # mistakenly treating such files as clean.
214 # mistakenly treating such files as clean.
209 e = dirstatetuple(e[0], e[1], e[2], -1)
215 e = dirstatetuple(e[0], e[1], e[2], -1)
210 dmap[f] = e
216 dmap[f] = e
211
217
212 if f in copymap:
218 if f in copymap:
213 f = b"%s\0%s" % (f, copymap[f])
219 f = b"%s\0%s" % (f, copymap[f])
214 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
220 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
215 write(e)
221 write(e)
216 write(f)
222 write(f)
217 return cs.getvalue()
223 return cs.getvalue()
@@ -1,2964 +1,2970 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 revlogutils,
68 revlogutils,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 flagutil,
78 flagutil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109
109
110 # Aliased for performance.
110 # Aliased for performance.
111 _zlibdecompress = zlib.decompress
111 _zlibdecompress = zlib.decompress
112
112
113 # max size of revlog with inline data
113 # max size of revlog with inline data
114 _maxinline = 131072
114 _maxinline = 131072
115 _chunksize = 1048576
115 _chunksize = 1048576
116
116
117 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
119 return text, False, {}
119 return text, False, {}
120
120
121
121
122 def ellipsiswriteprocessor(rl, text, sidedata):
122 def ellipsiswriteprocessor(rl, text, sidedata):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsisrawprocessor(rl, text):
126 def ellipsisrawprocessor(rl, text):
127 return False
127 return False
128
128
129
129
130 ellipsisprocessor = (
130 ellipsisprocessor = (
131 ellipsisreadprocessor,
131 ellipsisreadprocessor,
132 ellipsiswriteprocessor,
132 ellipsiswriteprocessor,
133 ellipsisrawprocessor,
133 ellipsisrawprocessor,
134 )
134 )
135
135
136
136
137 def getoffset(q):
137 def getoffset(q):
138 return int(q >> 16)
138 return int(q >> 16)
139
139
140
140
141 def gettype(q):
141 def gettype(q):
142 return int(q & 0xFFFF)
142 return int(q & 0xFFFF)
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 @attr.s(slots=True, frozen=True)
151 @attr.s(slots=True, frozen=True)
152 class _revisioninfo(object):
152 class _revisioninfo(object):
153 """Information about a revision that allows building its fulltext
153 """Information about a revision that allows building its fulltext
154 node: expected hash of the revision
154 node: expected hash of the revision
155 p1, p2: parent revs of the revision
155 p1, p2: parent revs of the revision
156 btext: built text cache consisting of a one-element list
156 btext: built text cache consisting of a one-element list
157 cachedelta: (baserev, uncompressed_delta) or None
157 cachedelta: (baserev, uncompressed_delta) or None
158 flags: flags associated to the revision storage
158 flags: flags associated to the revision storage
159
159
160 One of btext[0] or cachedelta must be set.
160 One of btext[0] or cachedelta must be set.
161 """
161 """
162
162
163 node = attr.ib()
163 node = attr.ib()
164 p1 = attr.ib()
164 p1 = attr.ib()
165 p2 = attr.ib()
165 p2 = attr.ib()
166 btext = attr.ib()
166 btext = attr.ib()
167 textlen = attr.ib()
167 textlen = attr.ib()
168 cachedelta = attr.ib()
168 cachedelta = attr.ib()
169 flags = attr.ib()
169 flags = attr.ib()
170
170
171
171
172 @interfaceutil.implementer(repository.irevisiondelta)
172 @interfaceutil.implementer(repository.irevisiondelta)
173 @attr.s(slots=True)
173 @attr.s(slots=True)
174 class revlogrevisiondelta(object):
174 class revlogrevisiondelta(object):
175 node = attr.ib()
175 node = attr.ib()
176 p1node = attr.ib()
176 p1node = attr.ib()
177 p2node = attr.ib()
177 p2node = attr.ib()
178 basenode = attr.ib()
178 basenode = attr.ib()
179 flags = attr.ib()
179 flags = attr.ib()
180 baserevisionsize = attr.ib()
180 baserevisionsize = attr.ib()
181 revision = attr.ib()
181 revision = attr.ib()
182 delta = attr.ib()
182 delta = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 # index v0:
194 # index v0:
195 # 4 bytes: offset
195 # 4 bytes: offset
196 # 4 bytes: compressed length
196 # 4 bytes: compressed length
197 # 4 bytes: base rev
197 # 4 bytes: base rev
198 # 4 bytes: link rev
198 # 4 bytes: link rev
199 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 1 nodeid
200 # 20 bytes: parent 2 nodeid
200 # 20 bytes: parent 2 nodeid
201 # 20 bytes: nodeid
201 # 20 bytes: nodeid
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_pack = indexformatv0.pack
204 indexformatv0_unpack = indexformatv0.unpack
204 indexformatv0_unpack = indexformatv0.unpack
205
205
206
206
207 class revlogoldindex(list):
207 class revlogoldindex(list):
208 @util.propertycache
208 @util.propertycache
209 def nodemap(self):
209 def nodemap(self):
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
210 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 for r in range(0, len(self)):
211 for r in range(0, len(self)):
212 n = self[r][7]
212 n = self[r][7]
213 nodemap[n] = r
213 nodemap[n] = r
214 return nodemap
214 return nodemap
215
215
216 def has_node(self, node):
216 def has_node(self, node):
217 """return True if the node exist in the index"""
217 """return True if the node exist in the index"""
218 return node in self.nodemap
218 return node in self.nodemap
219
219
220 def rev(self, node):
221 """return a revision for a node
222
223 If the node is unknown, raise a RevlogError"""
224 return self.nodemap[node]
225
220 def append(self, tup):
226 def append(self, tup):
221 self.nodemap[tup[7]] = len(self)
227 self.nodemap[tup[7]] = len(self)
222 super(revlogoldindex, self).append(tup)
228 super(revlogoldindex, self).append(tup)
223
229
224 def __delitem__(self, i):
230 def __delitem__(self, i):
225 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
231 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
226 raise ValueError(b"deleting slices only supports a:-1 with step 1")
232 raise ValueError(b"deleting slices only supports a:-1 with step 1")
227 for r in pycompat.xrange(i.start, len(self)):
233 for r in pycompat.xrange(i.start, len(self)):
228 del self.nodemap[self[r][7]]
234 del self.nodemap[self[r][7]]
229 super(revlogoldindex, self).__delitem__(i)
235 super(revlogoldindex, self).__delitem__(i)
230
236
231 def clearcaches(self):
237 def clearcaches(self):
232 self.__dict__.pop('nodemap', None)
238 self.__dict__.pop('nodemap', None)
233
239
234 def __getitem__(self, i):
240 def __getitem__(self, i):
235 if i == -1:
241 if i == -1:
236 return (0, 0, 0, -1, -1, -1, -1, nullid)
242 return (0, 0, 0, -1, -1, -1, -1, nullid)
237 return list.__getitem__(self, i)
243 return list.__getitem__(self, i)
238
244
239
245
240 class revlogoldio(object):
246 class revlogoldio(object):
241 def __init__(self):
247 def __init__(self):
242 self.size = indexformatv0.size
248 self.size = indexformatv0.size
243
249
244 def parseindex(self, data, inline):
250 def parseindex(self, data, inline):
245 s = self.size
251 s = self.size
246 index = []
252 index = []
247 nodemap = revlogutils.NodeMap({nullid: nullrev})
253 nodemap = revlogutils.NodeMap({nullid: nullrev})
248 n = off = 0
254 n = off = 0
249 l = len(data)
255 l = len(data)
250 while off + s <= l:
256 while off + s <= l:
251 cur = data[off : off + s]
257 cur = data[off : off + s]
252 off += s
258 off += s
253 e = indexformatv0_unpack(cur)
259 e = indexformatv0_unpack(cur)
254 # transform to revlogv1 format
260 # transform to revlogv1 format
255 e2 = (
261 e2 = (
256 offset_type(e[0], 0),
262 offset_type(e[0], 0),
257 e[1],
263 e[1],
258 -1,
264 -1,
259 e[2],
265 e[2],
260 e[3],
266 e[3],
261 nodemap.get(e[4], nullrev),
267 nodemap.get(e[4], nullrev),
262 nodemap.get(e[5], nullrev),
268 nodemap.get(e[5], nullrev),
263 e[6],
269 e[6],
264 )
270 )
265 index.append(e2)
271 index.append(e2)
266 nodemap[e[6]] = n
272 nodemap[e[6]] = n
267 n += 1
273 n += 1
268
274
269 index = revlogoldindex(index)
275 index = revlogoldindex(index)
270 return index, None
276 return index, None
271
277
272 def packentry(self, entry, node, version, rev):
278 def packentry(self, entry, node, version, rev):
273 if gettype(entry[0]):
279 if gettype(entry[0]):
274 raise error.RevlogError(
280 raise error.RevlogError(
275 _(b'index entry flags need revlog version 1')
281 _(b'index entry flags need revlog version 1')
276 )
282 )
277 e2 = (
283 e2 = (
278 getoffset(entry[0]),
284 getoffset(entry[0]),
279 entry[1],
285 entry[1],
280 entry[3],
286 entry[3],
281 entry[4],
287 entry[4],
282 node(entry[5]),
288 node(entry[5]),
283 node(entry[6]),
289 node(entry[6]),
284 entry[7],
290 entry[7],
285 )
291 )
286 return indexformatv0_pack(*e2)
292 return indexformatv0_pack(*e2)
287
293
288
294
289 # index ng:
295 # index ng:
290 # 6 bytes: offset
296 # 6 bytes: offset
291 # 2 bytes: flags
297 # 2 bytes: flags
292 # 4 bytes: compressed length
298 # 4 bytes: compressed length
293 # 4 bytes: uncompressed length
299 # 4 bytes: uncompressed length
294 # 4 bytes: base rev
300 # 4 bytes: base rev
295 # 4 bytes: link rev
301 # 4 bytes: link rev
296 # 4 bytes: parent 1 rev
302 # 4 bytes: parent 1 rev
297 # 4 bytes: parent 2 rev
303 # 4 bytes: parent 2 rev
298 # 32 bytes: nodeid
304 # 32 bytes: nodeid
299 indexformatng = struct.Struct(b">Qiiiiii20s12x")
305 indexformatng = struct.Struct(b">Qiiiiii20s12x")
300 indexformatng_pack = indexformatng.pack
306 indexformatng_pack = indexformatng.pack
301 versionformat = struct.Struct(b">I")
307 versionformat = struct.Struct(b">I")
302 versionformat_pack = versionformat.pack
308 versionformat_pack = versionformat.pack
303 versionformat_unpack = versionformat.unpack
309 versionformat_unpack = versionformat.unpack
304
310
305 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
311 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
306 # signed integer)
312 # signed integer)
307 _maxentrysize = 0x7FFFFFFF
313 _maxentrysize = 0x7FFFFFFF
308
314
309
315
310 class revlogio(object):
316 class revlogio(object):
311 def __init__(self):
317 def __init__(self):
312 self.size = indexformatng.size
318 self.size = indexformatng.size
313
319
314 def parseindex(self, data, inline):
320 def parseindex(self, data, inline):
315 # call the C implementation to parse the index data
321 # call the C implementation to parse the index data
316 index, cache = parsers.parse_index2(data, inline)
322 index, cache = parsers.parse_index2(data, inline)
317 return index, cache
323 return index, cache
318
324
319 def packentry(self, entry, node, version, rev):
325 def packentry(self, entry, node, version, rev):
320 p = indexformatng_pack(*entry)
326 p = indexformatng_pack(*entry)
321 if rev == 0:
327 if rev == 0:
322 p = versionformat_pack(version) + p[4:]
328 p = versionformat_pack(version) + p[4:]
323 return p
329 return p
324
330
325
331
326 class revlog(object):
332 class revlog(object):
327 """
333 """
328 the underlying revision storage object
334 the underlying revision storage object
329
335
330 A revlog consists of two parts, an index and the revision data.
336 A revlog consists of two parts, an index and the revision data.
331
337
332 The index is a file with a fixed record size containing
338 The index is a file with a fixed record size containing
333 information on each revision, including its nodeid (hash), the
339 information on each revision, including its nodeid (hash), the
334 nodeids of its parents, the position and offset of its data within
340 nodeids of its parents, the position and offset of its data within
335 the data file, and the revision it's based on. Finally, each entry
341 the data file, and the revision it's based on. Finally, each entry
336 contains a linkrev entry that can serve as a pointer to external
342 contains a linkrev entry that can serve as a pointer to external
337 data.
343 data.
338
344
339 The revision data itself is a linear collection of data chunks.
345 The revision data itself is a linear collection of data chunks.
340 Each chunk represents a revision and is usually represented as a
346 Each chunk represents a revision and is usually represented as a
341 delta against the previous chunk. To bound lookup time, runs of
347 delta against the previous chunk. To bound lookup time, runs of
342 deltas are limited to about 2 times the length of the original
348 deltas are limited to about 2 times the length of the original
343 version data. This makes retrieval of a version proportional to
349 version data. This makes retrieval of a version proportional to
344 its size, or O(1) relative to the number of revisions.
350 its size, or O(1) relative to the number of revisions.
345
351
346 Both pieces of the revlog are written to in an append-only
352 Both pieces of the revlog are written to in an append-only
347 fashion, which means we never need to rewrite a file to insert or
353 fashion, which means we never need to rewrite a file to insert or
348 remove data, and can use some simple techniques to avoid the need
354 remove data, and can use some simple techniques to avoid the need
349 for locking while reading.
355 for locking while reading.
350
356
351 If checkambig, indexfile is opened with checkambig=True at
357 If checkambig, indexfile is opened with checkambig=True at
352 writing, to avoid file stat ambiguity.
358 writing, to avoid file stat ambiguity.
353
359
354 If mmaplargeindex is True, and an mmapindexthreshold is set, the
360 If mmaplargeindex is True, and an mmapindexthreshold is set, the
355 index will be mmapped rather than read if it is larger than the
361 index will be mmapped rather than read if it is larger than the
356 configured threshold.
362 configured threshold.
357
363
358 If censorable is True, the revlog can have censored revisions.
364 If censorable is True, the revlog can have censored revisions.
359
365
360 If `upperboundcomp` is not None, this is the expected maximal gain from
366 If `upperboundcomp` is not None, this is the expected maximal gain from
361 compression for the data content.
367 compression for the data content.
362 """
368 """
363
369
364 _flagserrorclass = error.RevlogError
370 _flagserrorclass = error.RevlogError
365
371
366 def __init__(
372 def __init__(
367 self,
373 self,
368 opener,
374 opener,
369 indexfile,
375 indexfile,
370 datafile=None,
376 datafile=None,
371 checkambig=False,
377 checkambig=False,
372 mmaplargeindex=False,
378 mmaplargeindex=False,
373 censorable=False,
379 censorable=False,
374 upperboundcomp=None,
380 upperboundcomp=None,
375 ):
381 ):
376 """
382 """
377 create a revlog object
383 create a revlog object
378
384
379 opener is a function that abstracts the file opening operation
385 opener is a function that abstracts the file opening operation
380 and can be used to implement COW semantics or the like.
386 and can be used to implement COW semantics or the like.
381
387
382 """
388 """
383 self.upperboundcomp = upperboundcomp
389 self.upperboundcomp = upperboundcomp
384 self.indexfile = indexfile
390 self.indexfile = indexfile
385 self.datafile = datafile or (indexfile[:-2] + b".d")
391 self.datafile = datafile or (indexfile[:-2] + b".d")
386 self.opener = opener
392 self.opener = opener
387 # When True, indexfile is opened with checkambig=True at writing, to
393 # When True, indexfile is opened with checkambig=True at writing, to
388 # avoid file stat ambiguity.
394 # avoid file stat ambiguity.
389 self._checkambig = checkambig
395 self._checkambig = checkambig
390 self._mmaplargeindex = mmaplargeindex
396 self._mmaplargeindex = mmaplargeindex
391 self._censorable = censorable
397 self._censorable = censorable
392 # 3-tuple of (node, rev, text) for a raw revision.
398 # 3-tuple of (node, rev, text) for a raw revision.
393 self._revisioncache = None
399 self._revisioncache = None
394 # Maps rev to chain base rev.
400 # Maps rev to chain base rev.
395 self._chainbasecache = util.lrucachedict(100)
401 self._chainbasecache = util.lrucachedict(100)
396 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
402 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
397 self._chunkcache = (0, b'')
403 self._chunkcache = (0, b'')
398 # How much data to read and cache into the raw revlog data cache.
404 # How much data to read and cache into the raw revlog data cache.
399 self._chunkcachesize = 65536
405 self._chunkcachesize = 65536
400 self._maxchainlen = None
406 self._maxchainlen = None
401 self._deltabothparents = True
407 self._deltabothparents = True
402 self.index = None
408 self.index = None
403 # Mapping of partial identifiers to full nodes.
409 # Mapping of partial identifiers to full nodes.
404 self._pcache = {}
410 self._pcache = {}
405 # Mapping of revision integer to full node.
411 # Mapping of revision integer to full node.
406 self._nodepos = None
412 self._nodepos = None
407 self._compengine = b'zlib'
413 self._compengine = b'zlib'
408 self._compengineopts = {}
414 self._compengineopts = {}
409 self._maxdeltachainspan = -1
415 self._maxdeltachainspan = -1
410 self._withsparseread = False
416 self._withsparseread = False
411 self._sparserevlog = False
417 self._sparserevlog = False
412 self._srdensitythreshold = 0.50
418 self._srdensitythreshold = 0.50
413 self._srmingapsize = 262144
419 self._srmingapsize = 262144
414
420
415 # Make copy of flag processors so each revlog instance can support
421 # Make copy of flag processors so each revlog instance can support
416 # custom flags.
422 # custom flags.
417 self._flagprocessors = dict(flagutil.flagprocessors)
423 self._flagprocessors = dict(flagutil.flagprocessors)
418
424
419 # 2-tuple of file handles being used for active writing.
425 # 2-tuple of file handles being used for active writing.
420 self._writinghandles = None
426 self._writinghandles = None
421
427
422 self._loadindex()
428 self._loadindex()
423
429
424 def _loadindex(self):
430 def _loadindex(self):
425 mmapindexthreshold = None
431 mmapindexthreshold = None
426 opts = self.opener.options
432 opts = self.opener.options
427
433
428 if b'revlogv2' in opts:
434 if b'revlogv2' in opts:
429 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
435 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
430 elif b'revlogv1' in opts:
436 elif b'revlogv1' in opts:
431 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
437 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
432 if b'generaldelta' in opts:
438 if b'generaldelta' in opts:
433 newversionflags |= FLAG_GENERALDELTA
439 newversionflags |= FLAG_GENERALDELTA
434 elif b'revlogv0' in self.opener.options:
440 elif b'revlogv0' in self.opener.options:
435 newversionflags = REVLOGV0
441 newversionflags = REVLOGV0
436 else:
442 else:
437 newversionflags = REVLOG_DEFAULT_VERSION
443 newversionflags = REVLOG_DEFAULT_VERSION
438
444
439 if b'chunkcachesize' in opts:
445 if b'chunkcachesize' in opts:
440 self._chunkcachesize = opts[b'chunkcachesize']
446 self._chunkcachesize = opts[b'chunkcachesize']
441 if b'maxchainlen' in opts:
447 if b'maxchainlen' in opts:
442 self._maxchainlen = opts[b'maxchainlen']
448 self._maxchainlen = opts[b'maxchainlen']
443 if b'deltabothparents' in opts:
449 if b'deltabothparents' in opts:
444 self._deltabothparents = opts[b'deltabothparents']
450 self._deltabothparents = opts[b'deltabothparents']
445 self._lazydelta = bool(opts.get(b'lazydelta', True))
451 self._lazydelta = bool(opts.get(b'lazydelta', True))
446 self._lazydeltabase = False
452 self._lazydeltabase = False
447 if self._lazydelta:
453 if self._lazydelta:
448 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
454 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
449 if b'compengine' in opts:
455 if b'compengine' in opts:
450 self._compengine = opts[b'compengine']
456 self._compengine = opts[b'compengine']
451 if b'zlib.level' in opts:
457 if b'zlib.level' in opts:
452 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
458 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
453 if b'zstd.level' in opts:
459 if b'zstd.level' in opts:
454 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
460 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
455 if b'maxdeltachainspan' in opts:
461 if b'maxdeltachainspan' in opts:
456 self._maxdeltachainspan = opts[b'maxdeltachainspan']
462 self._maxdeltachainspan = opts[b'maxdeltachainspan']
457 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
463 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
458 mmapindexthreshold = opts[b'mmapindexthreshold']
464 mmapindexthreshold = opts[b'mmapindexthreshold']
459 self.hassidedata = bool(opts.get(b'side-data', False))
465 self.hassidedata = bool(opts.get(b'side-data', False))
460 if self.hassidedata:
466 if self.hassidedata:
461 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
467 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
462 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
468 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
463 withsparseread = bool(opts.get(b'with-sparse-read', False))
469 withsparseread = bool(opts.get(b'with-sparse-read', False))
464 # sparse-revlog forces sparse-read
470 # sparse-revlog forces sparse-read
465 self._withsparseread = self._sparserevlog or withsparseread
471 self._withsparseread = self._sparserevlog or withsparseread
466 if b'sparse-read-density-threshold' in opts:
472 if b'sparse-read-density-threshold' in opts:
467 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
473 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
468 if b'sparse-read-min-gap-size' in opts:
474 if b'sparse-read-min-gap-size' in opts:
469 self._srmingapsize = opts[b'sparse-read-min-gap-size']
475 self._srmingapsize = opts[b'sparse-read-min-gap-size']
470 if opts.get(b'enableellipsis'):
476 if opts.get(b'enableellipsis'):
471 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
477 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
472
478
473 # revlog v0 doesn't have flag processors
479 # revlog v0 doesn't have flag processors
474 for flag, processor in pycompat.iteritems(
480 for flag, processor in pycompat.iteritems(
475 opts.get(b'flagprocessors', {})
481 opts.get(b'flagprocessors', {})
476 ):
482 ):
477 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
483 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
478
484
479 if self._chunkcachesize <= 0:
485 if self._chunkcachesize <= 0:
480 raise error.RevlogError(
486 raise error.RevlogError(
481 _(b'revlog chunk cache size %r is not greater than 0')
487 _(b'revlog chunk cache size %r is not greater than 0')
482 % self._chunkcachesize
488 % self._chunkcachesize
483 )
489 )
484 elif self._chunkcachesize & (self._chunkcachesize - 1):
490 elif self._chunkcachesize & (self._chunkcachesize - 1):
485 raise error.RevlogError(
491 raise error.RevlogError(
486 _(b'revlog chunk cache size %r is not a power of 2')
492 _(b'revlog chunk cache size %r is not a power of 2')
487 % self._chunkcachesize
493 % self._chunkcachesize
488 )
494 )
489
495
490 indexdata = b''
496 indexdata = b''
491 self._initempty = True
497 self._initempty = True
492 try:
498 try:
493 with self._indexfp() as f:
499 with self._indexfp() as f:
494 if (
500 if (
495 mmapindexthreshold is not None
501 mmapindexthreshold is not None
496 and self.opener.fstat(f).st_size >= mmapindexthreshold
502 and self.opener.fstat(f).st_size >= mmapindexthreshold
497 ):
503 ):
498 # TODO: should .close() to release resources without
504 # TODO: should .close() to release resources without
499 # relying on Python GC
505 # relying on Python GC
500 indexdata = util.buffer(util.mmapread(f))
506 indexdata = util.buffer(util.mmapread(f))
501 else:
507 else:
502 indexdata = f.read()
508 indexdata = f.read()
503 if len(indexdata) > 0:
509 if len(indexdata) > 0:
504 versionflags = versionformat_unpack(indexdata[:4])[0]
510 versionflags = versionformat_unpack(indexdata[:4])[0]
505 self._initempty = False
511 self._initempty = False
506 else:
512 else:
507 versionflags = newversionflags
513 versionflags = newversionflags
508 except IOError as inst:
514 except IOError as inst:
509 if inst.errno != errno.ENOENT:
515 if inst.errno != errno.ENOENT:
510 raise
516 raise
511
517
512 versionflags = newversionflags
518 versionflags = newversionflags
513
519
514 self.version = versionflags
520 self.version = versionflags
515
521
516 flags = versionflags & ~0xFFFF
522 flags = versionflags & ~0xFFFF
517 fmt = versionflags & 0xFFFF
523 fmt = versionflags & 0xFFFF
518
524
519 if fmt == REVLOGV0:
525 if fmt == REVLOGV0:
520 if flags:
526 if flags:
521 raise error.RevlogError(
527 raise error.RevlogError(
522 _(b'unknown flags (%#04x) in version %d revlog %s')
528 _(b'unknown flags (%#04x) in version %d revlog %s')
523 % (flags >> 16, fmt, self.indexfile)
529 % (flags >> 16, fmt, self.indexfile)
524 )
530 )
525
531
526 self._inline = False
532 self._inline = False
527 self._generaldelta = False
533 self._generaldelta = False
528
534
529 elif fmt == REVLOGV1:
535 elif fmt == REVLOGV1:
530 if flags & ~REVLOGV1_FLAGS:
536 if flags & ~REVLOGV1_FLAGS:
531 raise error.RevlogError(
537 raise error.RevlogError(
532 _(b'unknown flags (%#04x) in version %d revlog %s')
538 _(b'unknown flags (%#04x) in version %d revlog %s')
533 % (flags >> 16, fmt, self.indexfile)
539 % (flags >> 16, fmt, self.indexfile)
534 )
540 )
535
541
536 self._inline = versionflags & FLAG_INLINE_DATA
542 self._inline = versionflags & FLAG_INLINE_DATA
537 self._generaldelta = versionflags & FLAG_GENERALDELTA
543 self._generaldelta = versionflags & FLAG_GENERALDELTA
538
544
539 elif fmt == REVLOGV2:
545 elif fmt == REVLOGV2:
540 if flags & ~REVLOGV2_FLAGS:
546 if flags & ~REVLOGV2_FLAGS:
541 raise error.RevlogError(
547 raise error.RevlogError(
542 _(b'unknown flags (%#04x) in version %d revlog %s')
548 _(b'unknown flags (%#04x) in version %d revlog %s')
543 % (flags >> 16, fmt, self.indexfile)
549 % (flags >> 16, fmt, self.indexfile)
544 )
550 )
545
551
546 self._inline = versionflags & FLAG_INLINE_DATA
552 self._inline = versionflags & FLAG_INLINE_DATA
547 # generaldelta implied by version 2 revlogs.
553 # generaldelta implied by version 2 revlogs.
548 self._generaldelta = True
554 self._generaldelta = True
549
555
550 else:
556 else:
551 raise error.RevlogError(
557 raise error.RevlogError(
552 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
558 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
553 )
559 )
554 # sparse-revlog can't be on without general-delta (issue6056)
560 # sparse-revlog can't be on without general-delta (issue6056)
555 if not self._generaldelta:
561 if not self._generaldelta:
556 self._sparserevlog = False
562 self._sparserevlog = False
557
563
558 self._storedeltachains = True
564 self._storedeltachains = True
559
565
560 self._io = revlogio()
566 self._io = revlogio()
561 if self.version == REVLOGV0:
567 if self.version == REVLOGV0:
562 self._io = revlogoldio()
568 self._io = revlogoldio()
563 try:
569 try:
564 d = self._io.parseindex(indexdata, self._inline)
570 d = self._io.parseindex(indexdata, self._inline)
565 except (ValueError, IndexError):
571 except (ValueError, IndexError):
566 raise error.RevlogError(
572 raise error.RevlogError(
567 _(b"index %s is corrupted") % self.indexfile
573 _(b"index %s is corrupted") % self.indexfile
568 )
574 )
569 self.index, self._chunkcache = d
575 self.index, self._chunkcache = d
570 self.nodemap = self.index.nodemap
576 self.nodemap = self.index.nodemap
571 if not self._chunkcache:
577 if not self._chunkcache:
572 self._chunkclear()
578 self._chunkclear()
573 # revnum -> (chain-length, sum-delta-length)
579 # revnum -> (chain-length, sum-delta-length)
574 self._chaininfocache = {}
580 self._chaininfocache = {}
575 # revlog header -> revlog compressor
581 # revlog header -> revlog compressor
576 self._decompressors = {}
582 self._decompressors = {}
577
583
578 @util.propertycache
584 @util.propertycache
579 def _compressor(self):
585 def _compressor(self):
580 engine = util.compengines[self._compengine]
586 engine = util.compengines[self._compengine]
581 return engine.revlogcompressor(self._compengineopts)
587 return engine.revlogcompressor(self._compengineopts)
582
588
583 def _indexfp(self, mode=b'r'):
589 def _indexfp(self, mode=b'r'):
584 """file object for the revlog's index file"""
590 """file object for the revlog's index file"""
585 args = {'mode': mode}
591 args = {'mode': mode}
586 if mode != b'r':
592 if mode != b'r':
587 args['checkambig'] = self._checkambig
593 args['checkambig'] = self._checkambig
588 if mode == b'w':
594 if mode == b'w':
589 args['atomictemp'] = True
595 args['atomictemp'] = True
590 return self.opener(self.indexfile, **args)
596 return self.opener(self.indexfile, **args)
591
597
592 def _datafp(self, mode=b'r'):
598 def _datafp(self, mode=b'r'):
593 """file object for the revlog's data file"""
599 """file object for the revlog's data file"""
594 return self.opener(self.datafile, mode=mode)
600 return self.opener(self.datafile, mode=mode)
595
601
596 @contextlib.contextmanager
602 @contextlib.contextmanager
597 def _datareadfp(self, existingfp=None):
603 def _datareadfp(self, existingfp=None):
598 """file object suitable to read data"""
604 """file object suitable to read data"""
599 # Use explicit file handle, if given.
605 # Use explicit file handle, if given.
600 if existingfp is not None:
606 if existingfp is not None:
601 yield existingfp
607 yield existingfp
602
608
603 # Use a file handle being actively used for writes, if available.
609 # Use a file handle being actively used for writes, if available.
604 # There is some danger to doing this because reads will seek the
610 # There is some danger to doing this because reads will seek the
605 # file. However, _writeentry() performs a SEEK_END before all writes,
611 # file. However, _writeentry() performs a SEEK_END before all writes,
606 # so we should be safe.
612 # so we should be safe.
607 elif self._writinghandles:
613 elif self._writinghandles:
608 if self._inline:
614 if self._inline:
609 yield self._writinghandles[0]
615 yield self._writinghandles[0]
610 else:
616 else:
611 yield self._writinghandles[1]
617 yield self._writinghandles[1]
612
618
613 # Otherwise open a new file handle.
619 # Otherwise open a new file handle.
614 else:
620 else:
615 if self._inline:
621 if self._inline:
616 func = self._indexfp
622 func = self._indexfp
617 else:
623 else:
618 func = self._datafp
624 func = self._datafp
619 with func() as fp:
625 with func() as fp:
620 yield fp
626 yield fp
621
627
622 def tiprev(self):
628 def tiprev(self):
623 return len(self.index) - 1
629 return len(self.index) - 1
624
630
625 def tip(self):
631 def tip(self):
626 return self.node(self.tiprev())
632 return self.node(self.tiprev())
627
633
628 def __contains__(self, rev):
634 def __contains__(self, rev):
629 return 0 <= rev < len(self)
635 return 0 <= rev < len(self)
630
636
631 def __len__(self):
637 def __len__(self):
632 return len(self.index)
638 return len(self.index)
633
639
634 def __iter__(self):
640 def __iter__(self):
635 return iter(pycompat.xrange(len(self)))
641 return iter(pycompat.xrange(len(self)))
636
642
637 def revs(self, start=0, stop=None):
643 def revs(self, start=0, stop=None):
638 """iterate over all rev in this revlog (from start to stop)"""
644 """iterate over all rev in this revlog (from start to stop)"""
639 return storageutil.iterrevs(len(self), start=start, stop=stop)
645 return storageutil.iterrevs(len(self), start=start, stop=stop)
640
646
641 @util.propertycache
647 @util.propertycache
642 def nodemap(self):
648 def nodemap(self):
643 if self.index:
649 if self.index:
644 # populate mapping down to the initial node
650 # populate mapping down to the initial node
645 node0 = self.index[0][7] # get around changelog filtering
651 node0 = self.index[0][7] # get around changelog filtering
646 self.rev(node0)
652 self.rev(node0)
647 return self.index.nodemap
653 return self.index.nodemap
648
654
649 @property
655 @property
650 def _nodecache(self):
656 def _nodecache(self):
651 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
657 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
652 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
658 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
653 return self.index.nodemap
659 return self.index.nodemap
654
660
655 def hasnode(self, node):
661 def hasnode(self, node):
656 try:
662 try:
657 self.rev(node)
663 self.rev(node)
658 return True
664 return True
659 except KeyError:
665 except KeyError:
660 return False
666 return False
661
667
662 def candelta(self, baserev, rev):
668 def candelta(self, baserev, rev):
663 """whether two revisions (baserev, rev) can be delta-ed or not"""
669 """whether two revisions (baserev, rev) can be delta-ed or not"""
664 # Disable delta if either rev requires a content-changing flag
670 # Disable delta if either rev requires a content-changing flag
665 # processor (ex. LFS). This is because such flag processor can alter
671 # processor (ex. LFS). This is because such flag processor can alter
666 # the rawtext content that the delta will be based on, and two clients
672 # the rawtext content that the delta will be based on, and two clients
667 # could have a same revlog node with different flags (i.e. different
673 # could have a same revlog node with different flags (i.e. different
668 # rawtext contents) and the delta could be incompatible.
674 # rawtext contents) and the delta could be incompatible.
669 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
675 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
670 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
676 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
671 ):
677 ):
672 return False
678 return False
673 return True
679 return True
674
680
675 def clearcaches(self):
681 def clearcaches(self):
676 self._revisioncache = None
682 self._revisioncache = None
677 self._chainbasecache.clear()
683 self._chainbasecache.clear()
678 self._chunkcache = (0, b'')
684 self._chunkcache = (0, b'')
679 self._pcache = {}
685 self._pcache = {}
680 self.index.clearcaches()
686 self.index.clearcaches()
681
687
682 def rev(self, node):
688 def rev(self, node):
683 try:
689 try:
684 return self.index.nodemap[node]
690 return self.index.nodemap[node]
685 except TypeError:
691 except TypeError:
686 raise
692 raise
687 except error.RevlogError:
693 except error.RevlogError:
688 # parsers.c radix tree lookup failed
694 # parsers.c radix tree lookup failed
689 if node == wdirid or node in wdirfilenodeids:
695 if node == wdirid or node in wdirfilenodeids:
690 raise error.WdirUnsupported
696 raise error.WdirUnsupported
691 raise error.LookupError(node, self.indexfile, _(b'no node'))
697 raise error.LookupError(node, self.indexfile, _(b'no node'))
692
698
693 # Accessors for index entries.
699 # Accessors for index entries.
694
700
695 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
701 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
696 # are flags.
702 # are flags.
697 def start(self, rev):
703 def start(self, rev):
698 return int(self.index[rev][0] >> 16)
704 return int(self.index[rev][0] >> 16)
699
705
700 def flags(self, rev):
706 def flags(self, rev):
701 return self.index[rev][0] & 0xFFFF
707 return self.index[rev][0] & 0xFFFF
702
708
703 def length(self, rev):
709 def length(self, rev):
704 return self.index[rev][1]
710 return self.index[rev][1]
705
711
706 def rawsize(self, rev):
712 def rawsize(self, rev):
707 """return the length of the uncompressed text for a given revision"""
713 """return the length of the uncompressed text for a given revision"""
708 l = self.index[rev][2]
714 l = self.index[rev][2]
709 if l >= 0:
715 if l >= 0:
710 return l
716 return l
711
717
712 t = self.rawdata(rev)
718 t = self.rawdata(rev)
713 return len(t)
719 return len(t)
714
720
715 def size(self, rev):
721 def size(self, rev):
716 """length of non-raw text (processed by a "read" flag processor)"""
722 """length of non-raw text (processed by a "read" flag processor)"""
717 # fast path: if no "read" flag processor could change the content,
723 # fast path: if no "read" flag processor could change the content,
718 # size is rawsize. note: ELLIPSIS is known to not change the content.
724 # size is rawsize. note: ELLIPSIS is known to not change the content.
719 flags = self.flags(rev)
725 flags = self.flags(rev)
720 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
726 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
721 return self.rawsize(rev)
727 return self.rawsize(rev)
722
728
723 return len(self.revision(rev, raw=False))
729 return len(self.revision(rev, raw=False))
724
730
725 def chainbase(self, rev):
731 def chainbase(self, rev):
726 base = self._chainbasecache.get(rev)
732 base = self._chainbasecache.get(rev)
727 if base is not None:
733 if base is not None:
728 return base
734 return base
729
735
730 index = self.index
736 index = self.index
731 iterrev = rev
737 iterrev = rev
732 base = index[iterrev][3]
738 base = index[iterrev][3]
733 while base != iterrev:
739 while base != iterrev:
734 iterrev = base
740 iterrev = base
735 base = index[iterrev][3]
741 base = index[iterrev][3]
736
742
737 self._chainbasecache[rev] = base
743 self._chainbasecache[rev] = base
738 return base
744 return base
739
745
740 def linkrev(self, rev):
746 def linkrev(self, rev):
741 return self.index[rev][4]
747 return self.index[rev][4]
742
748
743 def parentrevs(self, rev):
749 def parentrevs(self, rev):
744 try:
750 try:
745 entry = self.index[rev]
751 entry = self.index[rev]
746 except IndexError:
752 except IndexError:
747 if rev == wdirrev:
753 if rev == wdirrev:
748 raise error.WdirUnsupported
754 raise error.WdirUnsupported
749 raise
755 raise
750
756
751 return entry[5], entry[6]
757 return entry[5], entry[6]
752
758
753 # fast parentrevs(rev) where rev isn't filtered
759 # fast parentrevs(rev) where rev isn't filtered
754 _uncheckedparentrevs = parentrevs
760 _uncheckedparentrevs = parentrevs
755
761
756 def node(self, rev):
762 def node(self, rev):
757 try:
763 try:
758 return self.index[rev][7]
764 return self.index[rev][7]
759 except IndexError:
765 except IndexError:
760 if rev == wdirrev:
766 if rev == wdirrev:
761 raise error.WdirUnsupported
767 raise error.WdirUnsupported
762 raise
768 raise
763
769
764 # Derived from index values.
770 # Derived from index values.
765
771
766 def end(self, rev):
772 def end(self, rev):
767 return self.start(rev) + self.length(rev)
773 return self.start(rev) + self.length(rev)
768
774
769 def parents(self, node):
775 def parents(self, node):
770 i = self.index
776 i = self.index
771 d = i[self.rev(node)]
777 d = i[self.rev(node)]
772 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
778 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
773
779
774 def chainlen(self, rev):
780 def chainlen(self, rev):
775 return self._chaininfo(rev)[0]
781 return self._chaininfo(rev)[0]
776
782
777 def _chaininfo(self, rev):
783 def _chaininfo(self, rev):
778 chaininfocache = self._chaininfocache
784 chaininfocache = self._chaininfocache
779 if rev in chaininfocache:
785 if rev in chaininfocache:
780 return chaininfocache[rev]
786 return chaininfocache[rev]
781 index = self.index
787 index = self.index
782 generaldelta = self._generaldelta
788 generaldelta = self._generaldelta
783 iterrev = rev
789 iterrev = rev
784 e = index[iterrev]
790 e = index[iterrev]
785 clen = 0
791 clen = 0
786 compresseddeltalen = 0
792 compresseddeltalen = 0
787 while iterrev != e[3]:
793 while iterrev != e[3]:
788 clen += 1
794 clen += 1
789 compresseddeltalen += e[1]
795 compresseddeltalen += e[1]
790 if generaldelta:
796 if generaldelta:
791 iterrev = e[3]
797 iterrev = e[3]
792 else:
798 else:
793 iterrev -= 1
799 iterrev -= 1
794 if iterrev in chaininfocache:
800 if iterrev in chaininfocache:
795 t = chaininfocache[iterrev]
801 t = chaininfocache[iterrev]
796 clen += t[0]
802 clen += t[0]
797 compresseddeltalen += t[1]
803 compresseddeltalen += t[1]
798 break
804 break
799 e = index[iterrev]
805 e = index[iterrev]
800 else:
806 else:
801 # Add text length of base since decompressing that also takes
807 # Add text length of base since decompressing that also takes
802 # work. For cache hits the length is already included.
808 # work. For cache hits the length is already included.
803 compresseddeltalen += e[1]
809 compresseddeltalen += e[1]
804 r = (clen, compresseddeltalen)
810 r = (clen, compresseddeltalen)
805 chaininfocache[rev] = r
811 chaininfocache[rev] = r
806 return r
812 return r
807
813
808 def _deltachain(self, rev, stoprev=None):
814 def _deltachain(self, rev, stoprev=None):
809 """Obtain the delta chain for a revision.
815 """Obtain the delta chain for a revision.
810
816
811 ``stoprev`` specifies a revision to stop at. If not specified, we
817 ``stoprev`` specifies a revision to stop at. If not specified, we
812 stop at the base of the chain.
818 stop at the base of the chain.
813
819
814 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
820 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
815 revs in ascending order and ``stopped`` is a bool indicating whether
821 revs in ascending order and ``stopped`` is a bool indicating whether
816 ``stoprev`` was hit.
822 ``stoprev`` was hit.
817 """
823 """
818 # Try C implementation.
824 # Try C implementation.
819 try:
825 try:
820 return self.index.deltachain(rev, stoprev, self._generaldelta)
826 return self.index.deltachain(rev, stoprev, self._generaldelta)
821 except AttributeError:
827 except AttributeError:
822 pass
828 pass
823
829
824 chain = []
830 chain = []
825
831
826 # Alias to prevent attribute lookup in tight loop.
832 # Alias to prevent attribute lookup in tight loop.
827 index = self.index
833 index = self.index
828 generaldelta = self._generaldelta
834 generaldelta = self._generaldelta
829
835
830 iterrev = rev
836 iterrev = rev
831 e = index[iterrev]
837 e = index[iterrev]
832 while iterrev != e[3] and iterrev != stoprev:
838 while iterrev != e[3] and iterrev != stoprev:
833 chain.append(iterrev)
839 chain.append(iterrev)
834 if generaldelta:
840 if generaldelta:
835 iterrev = e[3]
841 iterrev = e[3]
836 else:
842 else:
837 iterrev -= 1
843 iterrev -= 1
838 e = index[iterrev]
844 e = index[iterrev]
839
845
840 if iterrev == stoprev:
846 if iterrev == stoprev:
841 stopped = True
847 stopped = True
842 else:
848 else:
843 chain.append(iterrev)
849 chain.append(iterrev)
844 stopped = False
850 stopped = False
845
851
846 chain.reverse()
852 chain.reverse()
847 return chain, stopped
853 return chain, stopped
848
854
849 def ancestors(self, revs, stoprev=0, inclusive=False):
855 def ancestors(self, revs, stoprev=0, inclusive=False):
850 """Generate the ancestors of 'revs' in reverse revision order.
856 """Generate the ancestors of 'revs' in reverse revision order.
851 Does not generate revs lower than stoprev.
857 Does not generate revs lower than stoprev.
852
858
853 See the documentation for ancestor.lazyancestors for more details."""
859 See the documentation for ancestor.lazyancestors for more details."""
854
860
855 # first, make sure start revisions aren't filtered
861 # first, make sure start revisions aren't filtered
856 revs = list(revs)
862 revs = list(revs)
857 checkrev = self.node
863 checkrev = self.node
858 for r in revs:
864 for r in revs:
859 checkrev(r)
865 checkrev(r)
860 # and we're sure ancestors aren't filtered as well
866 # and we're sure ancestors aren't filtered as well
861
867
862 if rustancestor is not None:
868 if rustancestor is not None:
863 lazyancestors = rustancestor.LazyAncestors
869 lazyancestors = rustancestor.LazyAncestors
864 arg = self.index
870 arg = self.index
865 elif util.safehasattr(parsers, b'rustlazyancestors'):
871 elif util.safehasattr(parsers, b'rustlazyancestors'):
866 lazyancestors = ancestor.rustlazyancestors
872 lazyancestors = ancestor.rustlazyancestors
867 arg = self.index
873 arg = self.index
868 else:
874 else:
869 lazyancestors = ancestor.lazyancestors
875 lazyancestors = ancestor.lazyancestors
870 arg = self._uncheckedparentrevs
876 arg = self._uncheckedparentrevs
871 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
877 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
872
878
873 def descendants(self, revs):
879 def descendants(self, revs):
874 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
880 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
875
881
876 def findcommonmissing(self, common=None, heads=None):
882 def findcommonmissing(self, common=None, heads=None):
877 """Return a tuple of the ancestors of common and the ancestors of heads
883 """Return a tuple of the ancestors of common and the ancestors of heads
878 that are not ancestors of common. In revset terminology, we return the
884 that are not ancestors of common. In revset terminology, we return the
879 tuple:
885 tuple:
880
886
881 ::common, (::heads) - (::common)
887 ::common, (::heads) - (::common)
882
888
883 The list is sorted by revision number, meaning it is
889 The list is sorted by revision number, meaning it is
884 topologically sorted.
890 topologically sorted.
885
891
886 'heads' and 'common' are both lists of node IDs. If heads is
892 'heads' and 'common' are both lists of node IDs. If heads is
887 not supplied, uses all of the revlog's heads. If common is not
893 not supplied, uses all of the revlog's heads. If common is not
888 supplied, uses nullid."""
894 supplied, uses nullid."""
889 if common is None:
895 if common is None:
890 common = [nullid]
896 common = [nullid]
891 if heads is None:
897 if heads is None:
892 heads = self.heads()
898 heads = self.heads()
893
899
894 common = [self.rev(n) for n in common]
900 common = [self.rev(n) for n in common]
895 heads = [self.rev(n) for n in heads]
901 heads = [self.rev(n) for n in heads]
896
902
897 # we want the ancestors, but inclusive
903 # we want the ancestors, but inclusive
898 class lazyset(object):
904 class lazyset(object):
899 def __init__(self, lazyvalues):
905 def __init__(self, lazyvalues):
900 self.addedvalues = set()
906 self.addedvalues = set()
901 self.lazyvalues = lazyvalues
907 self.lazyvalues = lazyvalues
902
908
903 def __contains__(self, value):
909 def __contains__(self, value):
904 return value in self.addedvalues or value in self.lazyvalues
910 return value in self.addedvalues or value in self.lazyvalues
905
911
906 def __iter__(self):
912 def __iter__(self):
907 added = self.addedvalues
913 added = self.addedvalues
908 for r in added:
914 for r in added:
909 yield r
915 yield r
910 for r in self.lazyvalues:
916 for r in self.lazyvalues:
911 if not r in added:
917 if not r in added:
912 yield r
918 yield r
913
919
914 def add(self, value):
920 def add(self, value):
915 self.addedvalues.add(value)
921 self.addedvalues.add(value)
916
922
917 def update(self, values):
923 def update(self, values):
918 self.addedvalues.update(values)
924 self.addedvalues.update(values)
919
925
920 has = lazyset(self.ancestors(common))
926 has = lazyset(self.ancestors(common))
921 has.add(nullrev)
927 has.add(nullrev)
922 has.update(common)
928 has.update(common)
923
929
924 # take all ancestors from heads that aren't in has
930 # take all ancestors from heads that aren't in has
925 missing = set()
931 missing = set()
926 visit = collections.deque(r for r in heads if r not in has)
932 visit = collections.deque(r for r in heads if r not in has)
927 while visit:
933 while visit:
928 r = visit.popleft()
934 r = visit.popleft()
929 if r in missing:
935 if r in missing:
930 continue
936 continue
931 else:
937 else:
932 missing.add(r)
938 missing.add(r)
933 for p in self.parentrevs(r):
939 for p in self.parentrevs(r):
934 if p not in has:
940 if p not in has:
935 visit.append(p)
941 visit.append(p)
936 missing = list(missing)
942 missing = list(missing)
937 missing.sort()
943 missing.sort()
938 return has, [self.node(miss) for miss in missing]
944 return has, [self.node(miss) for miss in missing]
939
945
940 def incrementalmissingrevs(self, common=None):
946 def incrementalmissingrevs(self, common=None):
941 """Return an object that can be used to incrementally compute the
947 """Return an object that can be used to incrementally compute the
942 revision numbers of the ancestors of arbitrary sets that are not
948 revision numbers of the ancestors of arbitrary sets that are not
943 ancestors of common. This is an ancestor.incrementalmissingancestors
949 ancestors of common. This is an ancestor.incrementalmissingancestors
944 object.
950 object.
945
951
946 'common' is a list of revision numbers. If common is not supplied, uses
952 'common' is a list of revision numbers. If common is not supplied, uses
947 nullrev.
953 nullrev.
948 """
954 """
949 if common is None:
955 if common is None:
950 common = [nullrev]
956 common = [nullrev]
951
957
952 if rustancestor is not None:
958 if rustancestor is not None:
953 return rustancestor.MissingAncestors(self.index, common)
959 return rustancestor.MissingAncestors(self.index, common)
954 return ancestor.incrementalmissingancestors(self.parentrevs, common)
960 return ancestor.incrementalmissingancestors(self.parentrevs, common)
955
961
956 def findmissingrevs(self, common=None, heads=None):
962 def findmissingrevs(self, common=None, heads=None):
957 """Return the revision numbers of the ancestors of heads that
963 """Return the revision numbers of the ancestors of heads that
958 are not ancestors of common.
964 are not ancestors of common.
959
965
960 More specifically, return a list of revision numbers corresponding to
966 More specifically, return a list of revision numbers corresponding to
961 nodes N such that every N satisfies the following constraints:
967 nodes N such that every N satisfies the following constraints:
962
968
963 1. N is an ancestor of some node in 'heads'
969 1. N is an ancestor of some node in 'heads'
964 2. N is not an ancestor of any node in 'common'
970 2. N is not an ancestor of any node in 'common'
965
971
966 The list is sorted by revision number, meaning it is
972 The list is sorted by revision number, meaning it is
967 topologically sorted.
973 topologically sorted.
968
974
969 'heads' and 'common' are both lists of revision numbers. If heads is
975 'heads' and 'common' are both lists of revision numbers. If heads is
970 not supplied, uses all of the revlog's heads. If common is not
976 not supplied, uses all of the revlog's heads. If common is not
971 supplied, uses nullid."""
977 supplied, uses nullid."""
972 if common is None:
978 if common is None:
973 common = [nullrev]
979 common = [nullrev]
974 if heads is None:
980 if heads is None:
975 heads = self.headrevs()
981 heads = self.headrevs()
976
982
977 inc = self.incrementalmissingrevs(common=common)
983 inc = self.incrementalmissingrevs(common=common)
978 return inc.missingancestors(heads)
984 return inc.missingancestors(heads)
979
985
980 def findmissing(self, common=None, heads=None):
986 def findmissing(self, common=None, heads=None):
981 """Return the ancestors of heads that are not ancestors of common.
987 """Return the ancestors of heads that are not ancestors of common.
982
988
983 More specifically, return a list of nodes N such that every N
989 More specifically, return a list of nodes N such that every N
984 satisfies the following constraints:
990 satisfies the following constraints:
985
991
986 1. N is an ancestor of some node in 'heads'
992 1. N is an ancestor of some node in 'heads'
987 2. N is not an ancestor of any node in 'common'
993 2. N is not an ancestor of any node in 'common'
988
994
989 The list is sorted by revision number, meaning it is
995 The list is sorted by revision number, meaning it is
990 topologically sorted.
996 topologically sorted.
991
997
992 'heads' and 'common' are both lists of node IDs. If heads is
998 'heads' and 'common' are both lists of node IDs. If heads is
993 not supplied, uses all of the revlog's heads. If common is not
999 not supplied, uses all of the revlog's heads. If common is not
994 supplied, uses nullid."""
1000 supplied, uses nullid."""
995 if common is None:
1001 if common is None:
996 common = [nullid]
1002 common = [nullid]
997 if heads is None:
1003 if heads is None:
998 heads = self.heads()
1004 heads = self.heads()
999
1005
1000 common = [self.rev(n) for n in common]
1006 common = [self.rev(n) for n in common]
1001 heads = [self.rev(n) for n in heads]
1007 heads = [self.rev(n) for n in heads]
1002
1008
1003 inc = self.incrementalmissingrevs(common=common)
1009 inc = self.incrementalmissingrevs(common=common)
1004 return [self.node(r) for r in inc.missingancestors(heads)]
1010 return [self.node(r) for r in inc.missingancestors(heads)]
1005
1011
1006 def nodesbetween(self, roots=None, heads=None):
1012 def nodesbetween(self, roots=None, heads=None):
1007 """Return a topological path from 'roots' to 'heads'.
1013 """Return a topological path from 'roots' to 'heads'.
1008
1014
1009 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1015 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1010 topologically sorted list of all nodes N that satisfy both of
1016 topologically sorted list of all nodes N that satisfy both of
1011 these constraints:
1017 these constraints:
1012
1018
1013 1. N is a descendant of some node in 'roots'
1019 1. N is a descendant of some node in 'roots'
1014 2. N is an ancestor of some node in 'heads'
1020 2. N is an ancestor of some node in 'heads'
1015
1021
1016 Every node is considered to be both a descendant and an ancestor
1022 Every node is considered to be both a descendant and an ancestor
1017 of itself, so every reachable node in 'roots' and 'heads' will be
1023 of itself, so every reachable node in 'roots' and 'heads' will be
1018 included in 'nodes'.
1024 included in 'nodes'.
1019
1025
1020 'outroots' is the list of reachable nodes in 'roots', i.e., the
1026 'outroots' is the list of reachable nodes in 'roots', i.e., the
1021 subset of 'roots' that is returned in 'nodes'. Likewise,
1027 subset of 'roots' that is returned in 'nodes'. Likewise,
1022 'outheads' is the subset of 'heads' that is also in 'nodes'.
1028 'outheads' is the subset of 'heads' that is also in 'nodes'.
1023
1029
1024 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1030 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1025 unspecified, uses nullid as the only root. If 'heads' is
1031 unspecified, uses nullid as the only root. If 'heads' is
1026 unspecified, uses list of all of the revlog's heads."""
1032 unspecified, uses list of all of the revlog's heads."""
1027 nonodes = ([], [], [])
1033 nonodes = ([], [], [])
1028 if roots is not None:
1034 if roots is not None:
1029 roots = list(roots)
1035 roots = list(roots)
1030 if not roots:
1036 if not roots:
1031 return nonodes
1037 return nonodes
1032 lowestrev = min([self.rev(n) for n in roots])
1038 lowestrev = min([self.rev(n) for n in roots])
1033 else:
1039 else:
1034 roots = [nullid] # Everybody's a descendant of nullid
1040 roots = [nullid] # Everybody's a descendant of nullid
1035 lowestrev = nullrev
1041 lowestrev = nullrev
1036 if (lowestrev == nullrev) and (heads is None):
1042 if (lowestrev == nullrev) and (heads is None):
1037 # We want _all_ the nodes!
1043 # We want _all_ the nodes!
1038 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1044 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1039 if heads is None:
1045 if heads is None:
1040 # All nodes are ancestors, so the latest ancestor is the last
1046 # All nodes are ancestors, so the latest ancestor is the last
1041 # node.
1047 # node.
1042 highestrev = len(self) - 1
1048 highestrev = len(self) - 1
1043 # Set ancestors to None to signal that every node is an ancestor.
1049 # Set ancestors to None to signal that every node is an ancestor.
1044 ancestors = None
1050 ancestors = None
1045 # Set heads to an empty dictionary for later discovery of heads
1051 # Set heads to an empty dictionary for later discovery of heads
1046 heads = {}
1052 heads = {}
1047 else:
1053 else:
1048 heads = list(heads)
1054 heads = list(heads)
1049 if not heads:
1055 if not heads:
1050 return nonodes
1056 return nonodes
1051 ancestors = set()
1057 ancestors = set()
1052 # Turn heads into a dictionary so we can remove 'fake' heads.
1058 # Turn heads into a dictionary so we can remove 'fake' heads.
1053 # Also, later we will be using it to filter out the heads we can't
1059 # Also, later we will be using it to filter out the heads we can't
1054 # find from roots.
1060 # find from roots.
1055 heads = dict.fromkeys(heads, False)
1061 heads = dict.fromkeys(heads, False)
1056 # Start at the top and keep marking parents until we're done.
1062 # Start at the top and keep marking parents until we're done.
1057 nodestotag = set(heads)
1063 nodestotag = set(heads)
1058 # Remember where the top was so we can use it as a limit later.
1064 # Remember where the top was so we can use it as a limit later.
1059 highestrev = max([self.rev(n) for n in nodestotag])
1065 highestrev = max([self.rev(n) for n in nodestotag])
1060 while nodestotag:
1066 while nodestotag:
1061 # grab a node to tag
1067 # grab a node to tag
1062 n = nodestotag.pop()
1068 n = nodestotag.pop()
1063 # Never tag nullid
1069 # Never tag nullid
1064 if n == nullid:
1070 if n == nullid:
1065 continue
1071 continue
1066 # A node's revision number represents its place in a
1072 # A node's revision number represents its place in a
1067 # topologically sorted list of nodes.
1073 # topologically sorted list of nodes.
1068 r = self.rev(n)
1074 r = self.rev(n)
1069 if r >= lowestrev:
1075 if r >= lowestrev:
1070 if n not in ancestors:
1076 if n not in ancestors:
1071 # If we are possibly a descendant of one of the roots
1077 # If we are possibly a descendant of one of the roots
1072 # and we haven't already been marked as an ancestor
1078 # and we haven't already been marked as an ancestor
1073 ancestors.add(n) # Mark as ancestor
1079 ancestors.add(n) # Mark as ancestor
1074 # Add non-nullid parents to list of nodes to tag.
1080 # Add non-nullid parents to list of nodes to tag.
1075 nodestotag.update(
1081 nodestotag.update(
1076 [p for p in self.parents(n) if p != nullid]
1082 [p for p in self.parents(n) if p != nullid]
1077 )
1083 )
1078 elif n in heads: # We've seen it before, is it a fake head?
1084 elif n in heads: # We've seen it before, is it a fake head?
1079 # So it is, real heads should not be the ancestors of
1085 # So it is, real heads should not be the ancestors of
1080 # any other heads.
1086 # any other heads.
1081 heads.pop(n)
1087 heads.pop(n)
1082 if not ancestors:
1088 if not ancestors:
1083 return nonodes
1089 return nonodes
1084 # Now that we have our set of ancestors, we want to remove any
1090 # Now that we have our set of ancestors, we want to remove any
1085 # roots that are not ancestors.
1091 # roots that are not ancestors.
1086
1092
1087 # If one of the roots was nullid, everything is included anyway.
1093 # If one of the roots was nullid, everything is included anyway.
1088 if lowestrev > nullrev:
1094 if lowestrev > nullrev:
1089 # But, since we weren't, let's recompute the lowest rev to not
1095 # But, since we weren't, let's recompute the lowest rev to not
1090 # include roots that aren't ancestors.
1096 # include roots that aren't ancestors.
1091
1097
1092 # Filter out roots that aren't ancestors of heads
1098 # Filter out roots that aren't ancestors of heads
1093 roots = [root for root in roots if root in ancestors]
1099 roots = [root for root in roots if root in ancestors]
1094 # Recompute the lowest revision
1100 # Recompute the lowest revision
1095 if roots:
1101 if roots:
1096 lowestrev = min([self.rev(root) for root in roots])
1102 lowestrev = min([self.rev(root) for root in roots])
1097 else:
1103 else:
1098 # No more roots? Return empty list
1104 # No more roots? Return empty list
1099 return nonodes
1105 return nonodes
1100 else:
1106 else:
1101 # We are descending from nullid, and don't need to care about
1107 # We are descending from nullid, and don't need to care about
1102 # any other roots.
1108 # any other roots.
1103 lowestrev = nullrev
1109 lowestrev = nullrev
1104 roots = [nullid]
1110 roots = [nullid]
1105 # Transform our roots list into a set.
1111 # Transform our roots list into a set.
1106 descendants = set(roots)
1112 descendants = set(roots)
1107 # Also, keep the original roots so we can filter out roots that aren't
1113 # Also, keep the original roots so we can filter out roots that aren't
1108 # 'real' roots (i.e. are descended from other roots).
1114 # 'real' roots (i.e. are descended from other roots).
1109 roots = descendants.copy()
1115 roots = descendants.copy()
1110 # Our topologically sorted list of output nodes.
1116 # Our topologically sorted list of output nodes.
1111 orderedout = []
1117 orderedout = []
1112 # Don't start at nullid since we don't want nullid in our output list,
1118 # Don't start at nullid since we don't want nullid in our output list,
1113 # and if nullid shows up in descendants, empty parents will look like
1119 # and if nullid shows up in descendants, empty parents will look like
1114 # they're descendants.
1120 # they're descendants.
1115 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1121 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1116 n = self.node(r)
1122 n = self.node(r)
1117 isdescendant = False
1123 isdescendant = False
1118 if lowestrev == nullrev: # Everybody is a descendant of nullid
1124 if lowestrev == nullrev: # Everybody is a descendant of nullid
1119 isdescendant = True
1125 isdescendant = True
1120 elif n in descendants:
1126 elif n in descendants:
1121 # n is already a descendant
1127 # n is already a descendant
1122 isdescendant = True
1128 isdescendant = True
1123 # This check only needs to be done here because all the roots
1129 # This check only needs to be done here because all the roots
1124 # will start being marked is descendants before the loop.
1130 # will start being marked is descendants before the loop.
1125 if n in roots:
1131 if n in roots:
1126 # If n was a root, check if it's a 'real' root.
1132 # If n was a root, check if it's a 'real' root.
1127 p = tuple(self.parents(n))
1133 p = tuple(self.parents(n))
1128 # If any of its parents are descendants, it's not a root.
1134 # If any of its parents are descendants, it's not a root.
1129 if (p[0] in descendants) or (p[1] in descendants):
1135 if (p[0] in descendants) or (p[1] in descendants):
1130 roots.remove(n)
1136 roots.remove(n)
1131 else:
1137 else:
1132 p = tuple(self.parents(n))
1138 p = tuple(self.parents(n))
1133 # A node is a descendant if either of its parents are
1139 # A node is a descendant if either of its parents are
1134 # descendants. (We seeded the dependents list with the roots
1140 # descendants. (We seeded the dependents list with the roots
1135 # up there, remember?)
1141 # up there, remember?)
1136 if (p[0] in descendants) or (p[1] in descendants):
1142 if (p[0] in descendants) or (p[1] in descendants):
1137 descendants.add(n)
1143 descendants.add(n)
1138 isdescendant = True
1144 isdescendant = True
1139 if isdescendant and ((ancestors is None) or (n in ancestors)):
1145 if isdescendant and ((ancestors is None) or (n in ancestors)):
1140 # Only include nodes that are both descendants and ancestors.
1146 # Only include nodes that are both descendants and ancestors.
1141 orderedout.append(n)
1147 orderedout.append(n)
1142 if (ancestors is not None) and (n in heads):
1148 if (ancestors is not None) and (n in heads):
1143 # We're trying to figure out which heads are reachable
1149 # We're trying to figure out which heads are reachable
1144 # from roots.
1150 # from roots.
1145 # Mark this head as having been reached
1151 # Mark this head as having been reached
1146 heads[n] = True
1152 heads[n] = True
1147 elif ancestors is None:
1153 elif ancestors is None:
1148 # Otherwise, we're trying to discover the heads.
1154 # Otherwise, we're trying to discover the heads.
1149 # Assume this is a head because if it isn't, the next step
1155 # Assume this is a head because if it isn't, the next step
1150 # will eventually remove it.
1156 # will eventually remove it.
1151 heads[n] = True
1157 heads[n] = True
1152 # But, obviously its parents aren't.
1158 # But, obviously its parents aren't.
1153 for p in self.parents(n):
1159 for p in self.parents(n):
1154 heads.pop(p, None)
1160 heads.pop(p, None)
1155 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1161 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1156 roots = list(roots)
1162 roots = list(roots)
1157 assert orderedout
1163 assert orderedout
1158 assert roots
1164 assert roots
1159 assert heads
1165 assert heads
1160 return (orderedout, roots, heads)
1166 return (orderedout, roots, heads)
1161
1167
1162 def headrevs(self, revs=None):
1168 def headrevs(self, revs=None):
1163 if revs is None:
1169 if revs is None:
1164 try:
1170 try:
1165 return self.index.headrevs()
1171 return self.index.headrevs()
1166 except AttributeError:
1172 except AttributeError:
1167 return self._headrevs()
1173 return self._headrevs()
1168 if rustdagop is not None:
1174 if rustdagop is not None:
1169 return rustdagop.headrevs(self.index, revs)
1175 return rustdagop.headrevs(self.index, revs)
1170 return dagop.headrevs(revs, self._uncheckedparentrevs)
1176 return dagop.headrevs(revs, self._uncheckedparentrevs)
1171
1177
1172 def computephases(self, roots):
1178 def computephases(self, roots):
1173 return self.index.computephasesmapsets(roots)
1179 return self.index.computephasesmapsets(roots)
1174
1180
1175 def _headrevs(self):
1181 def _headrevs(self):
1176 count = len(self)
1182 count = len(self)
1177 if not count:
1183 if not count:
1178 return [nullrev]
1184 return [nullrev]
1179 # we won't iter over filtered rev so nobody is a head at start
1185 # we won't iter over filtered rev so nobody is a head at start
1180 ishead = [0] * (count + 1)
1186 ishead = [0] * (count + 1)
1181 index = self.index
1187 index = self.index
1182 for r in self:
1188 for r in self:
1183 ishead[r] = 1 # I may be an head
1189 ishead[r] = 1 # I may be an head
1184 e = index[r]
1190 e = index[r]
1185 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1191 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1186 return [r for r, val in enumerate(ishead) if val]
1192 return [r for r, val in enumerate(ishead) if val]
1187
1193
1188 def heads(self, start=None, stop=None):
1194 def heads(self, start=None, stop=None):
1189 """return the list of all nodes that have no children
1195 """return the list of all nodes that have no children
1190
1196
1191 if start is specified, only heads that are descendants of
1197 if start is specified, only heads that are descendants of
1192 start will be returned
1198 start will be returned
1193 if stop is specified, it will consider all the revs from stop
1199 if stop is specified, it will consider all the revs from stop
1194 as if they had no children
1200 as if they had no children
1195 """
1201 """
1196 if start is None and stop is None:
1202 if start is None and stop is None:
1197 if not len(self):
1203 if not len(self):
1198 return [nullid]
1204 return [nullid]
1199 return [self.node(r) for r in self.headrevs()]
1205 return [self.node(r) for r in self.headrevs()]
1200
1206
1201 if start is None:
1207 if start is None:
1202 start = nullrev
1208 start = nullrev
1203 else:
1209 else:
1204 start = self.rev(start)
1210 start = self.rev(start)
1205
1211
1206 stoprevs = set(self.rev(n) for n in stop or [])
1212 stoprevs = set(self.rev(n) for n in stop or [])
1207
1213
1208 revs = dagop.headrevssubset(
1214 revs = dagop.headrevssubset(
1209 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1215 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1210 )
1216 )
1211
1217
1212 return [self.node(rev) for rev in revs]
1218 return [self.node(rev) for rev in revs]
1213
1219
1214 def children(self, node):
1220 def children(self, node):
1215 """find the children of a given node"""
1221 """find the children of a given node"""
1216 c = []
1222 c = []
1217 p = self.rev(node)
1223 p = self.rev(node)
1218 for r in self.revs(start=p + 1):
1224 for r in self.revs(start=p + 1):
1219 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1225 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1220 if prevs:
1226 if prevs:
1221 for pr in prevs:
1227 for pr in prevs:
1222 if pr == p:
1228 if pr == p:
1223 c.append(self.node(r))
1229 c.append(self.node(r))
1224 elif p == nullrev:
1230 elif p == nullrev:
1225 c.append(self.node(r))
1231 c.append(self.node(r))
1226 return c
1232 return c
1227
1233
1228 def commonancestorsheads(self, a, b):
1234 def commonancestorsheads(self, a, b):
1229 """calculate all the heads of the common ancestors of nodes a and b"""
1235 """calculate all the heads of the common ancestors of nodes a and b"""
1230 a, b = self.rev(a), self.rev(b)
1236 a, b = self.rev(a), self.rev(b)
1231 ancs = self._commonancestorsheads(a, b)
1237 ancs = self._commonancestorsheads(a, b)
1232 return pycompat.maplist(self.node, ancs)
1238 return pycompat.maplist(self.node, ancs)
1233
1239
1234 def _commonancestorsheads(self, *revs):
1240 def _commonancestorsheads(self, *revs):
1235 """calculate all the heads of the common ancestors of revs"""
1241 """calculate all the heads of the common ancestors of revs"""
1236 try:
1242 try:
1237 ancs = self.index.commonancestorsheads(*revs)
1243 ancs = self.index.commonancestorsheads(*revs)
1238 except (AttributeError, OverflowError): # C implementation failed
1244 except (AttributeError, OverflowError): # C implementation failed
1239 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1245 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1240 return ancs
1246 return ancs
1241
1247
1242 def isancestor(self, a, b):
1248 def isancestor(self, a, b):
1243 """return True if node a is an ancestor of node b
1249 """return True if node a is an ancestor of node b
1244
1250
1245 A revision is considered an ancestor of itself."""
1251 A revision is considered an ancestor of itself."""
1246 a, b = self.rev(a), self.rev(b)
1252 a, b = self.rev(a), self.rev(b)
1247 return self.isancestorrev(a, b)
1253 return self.isancestorrev(a, b)
1248
1254
1249 def isancestorrev(self, a, b):
1255 def isancestorrev(self, a, b):
1250 """return True if revision a is an ancestor of revision b
1256 """return True if revision a is an ancestor of revision b
1251
1257
1252 A revision is considered an ancestor of itself.
1258 A revision is considered an ancestor of itself.
1253
1259
1254 The implementation of this is trivial but the use of
1260 The implementation of this is trivial but the use of
1255 reachableroots is not."""
1261 reachableroots is not."""
1256 if a == nullrev:
1262 if a == nullrev:
1257 return True
1263 return True
1258 elif a == b:
1264 elif a == b:
1259 return True
1265 return True
1260 elif a > b:
1266 elif a > b:
1261 return False
1267 return False
1262 return bool(self.reachableroots(a, [b], [a], includepath=False))
1268 return bool(self.reachableroots(a, [b], [a], includepath=False))
1263
1269
1264 def reachableroots(self, minroot, heads, roots, includepath=False):
1270 def reachableroots(self, minroot, heads, roots, includepath=False):
1265 """return (heads(::<roots> and <roots>::<heads>))
1271 """return (heads(::<roots> and <roots>::<heads>))
1266
1272
1267 If includepath is True, return (<roots>::<heads>)."""
1273 If includepath is True, return (<roots>::<heads>)."""
1268 try:
1274 try:
1269 return self.index.reachableroots2(
1275 return self.index.reachableroots2(
1270 minroot, heads, roots, includepath
1276 minroot, heads, roots, includepath
1271 )
1277 )
1272 except AttributeError:
1278 except AttributeError:
1273 return dagop._reachablerootspure(
1279 return dagop._reachablerootspure(
1274 self.parentrevs, minroot, roots, heads, includepath
1280 self.parentrevs, minroot, roots, heads, includepath
1275 )
1281 )
1276
1282
1277 def ancestor(self, a, b):
1283 def ancestor(self, a, b):
1278 """calculate the "best" common ancestor of nodes a and b"""
1284 """calculate the "best" common ancestor of nodes a and b"""
1279
1285
1280 a, b = self.rev(a), self.rev(b)
1286 a, b = self.rev(a), self.rev(b)
1281 try:
1287 try:
1282 ancs = self.index.ancestors(a, b)
1288 ancs = self.index.ancestors(a, b)
1283 except (AttributeError, OverflowError):
1289 except (AttributeError, OverflowError):
1284 ancs = ancestor.ancestors(self.parentrevs, a, b)
1290 ancs = ancestor.ancestors(self.parentrevs, a, b)
1285 if ancs:
1291 if ancs:
1286 # choose a consistent winner when there's a tie
1292 # choose a consistent winner when there's a tie
1287 return min(map(self.node, ancs))
1293 return min(map(self.node, ancs))
1288 return nullid
1294 return nullid
1289
1295
1290 def _match(self, id):
1296 def _match(self, id):
1291 if isinstance(id, int):
1297 if isinstance(id, int):
1292 # rev
1298 # rev
1293 return self.node(id)
1299 return self.node(id)
1294 if len(id) == 20:
1300 if len(id) == 20:
1295 # possibly a binary node
1301 # possibly a binary node
1296 # odds of a binary node being all hex in ASCII are 1 in 10**25
1302 # odds of a binary node being all hex in ASCII are 1 in 10**25
1297 try:
1303 try:
1298 node = id
1304 node = id
1299 self.rev(node) # quick search the index
1305 self.rev(node) # quick search the index
1300 return node
1306 return node
1301 except error.LookupError:
1307 except error.LookupError:
1302 pass # may be partial hex id
1308 pass # may be partial hex id
1303 try:
1309 try:
1304 # str(rev)
1310 # str(rev)
1305 rev = int(id)
1311 rev = int(id)
1306 if b"%d" % rev != id:
1312 if b"%d" % rev != id:
1307 raise ValueError
1313 raise ValueError
1308 if rev < 0:
1314 if rev < 0:
1309 rev = len(self) + rev
1315 rev = len(self) + rev
1310 if rev < 0 or rev >= len(self):
1316 if rev < 0 or rev >= len(self):
1311 raise ValueError
1317 raise ValueError
1312 return self.node(rev)
1318 return self.node(rev)
1313 except (ValueError, OverflowError):
1319 except (ValueError, OverflowError):
1314 pass
1320 pass
1315 if len(id) == 40:
1321 if len(id) == 40:
1316 try:
1322 try:
1317 # a full hex nodeid?
1323 # a full hex nodeid?
1318 node = bin(id)
1324 node = bin(id)
1319 self.rev(node)
1325 self.rev(node)
1320 return node
1326 return node
1321 except (TypeError, error.LookupError):
1327 except (TypeError, error.LookupError):
1322 pass
1328 pass
1323
1329
1324 def _partialmatch(self, id):
1330 def _partialmatch(self, id):
1325 # we don't care wdirfilenodeids as they should be always full hash
1331 # we don't care wdirfilenodeids as they should be always full hash
1326 maybewdir = wdirhex.startswith(id)
1332 maybewdir = wdirhex.startswith(id)
1327 try:
1333 try:
1328 partial = self.index.partialmatch(id)
1334 partial = self.index.partialmatch(id)
1329 if partial and self.hasnode(partial):
1335 if partial and self.hasnode(partial):
1330 if maybewdir:
1336 if maybewdir:
1331 # single 'ff...' match in radix tree, ambiguous with wdir
1337 # single 'ff...' match in radix tree, ambiguous with wdir
1332 raise error.RevlogError
1338 raise error.RevlogError
1333 return partial
1339 return partial
1334 if maybewdir:
1340 if maybewdir:
1335 # no 'ff...' match in radix tree, wdir identified
1341 # no 'ff...' match in radix tree, wdir identified
1336 raise error.WdirUnsupported
1342 raise error.WdirUnsupported
1337 return None
1343 return None
1338 except error.RevlogError:
1344 except error.RevlogError:
1339 # parsers.c radix tree lookup gave multiple matches
1345 # parsers.c radix tree lookup gave multiple matches
1340 # fast path: for unfiltered changelog, radix tree is accurate
1346 # fast path: for unfiltered changelog, radix tree is accurate
1341 if not getattr(self, 'filteredrevs', None):
1347 if not getattr(self, 'filteredrevs', None):
1342 raise error.AmbiguousPrefixLookupError(
1348 raise error.AmbiguousPrefixLookupError(
1343 id, self.indexfile, _(b'ambiguous identifier')
1349 id, self.indexfile, _(b'ambiguous identifier')
1344 )
1350 )
1345 # fall through to slow path that filters hidden revisions
1351 # fall through to slow path that filters hidden revisions
1346 except (AttributeError, ValueError):
1352 except (AttributeError, ValueError):
1347 # we are pure python, or key was too short to search radix tree
1353 # we are pure python, or key was too short to search radix tree
1348 pass
1354 pass
1349
1355
1350 if id in self._pcache:
1356 if id in self._pcache:
1351 return self._pcache[id]
1357 return self._pcache[id]
1352
1358
1353 if len(id) <= 40:
1359 if len(id) <= 40:
1354 try:
1360 try:
1355 # hex(node)[:...]
1361 # hex(node)[:...]
1356 l = len(id) // 2 # grab an even number of digits
1362 l = len(id) // 2 # grab an even number of digits
1357 prefix = bin(id[: l * 2])
1363 prefix = bin(id[: l * 2])
1358 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1364 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1359 nl = [
1365 nl = [
1360 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1366 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1361 ]
1367 ]
1362 if nullhex.startswith(id):
1368 if nullhex.startswith(id):
1363 nl.append(nullid)
1369 nl.append(nullid)
1364 if len(nl) > 0:
1370 if len(nl) > 0:
1365 if len(nl) == 1 and not maybewdir:
1371 if len(nl) == 1 and not maybewdir:
1366 self._pcache[id] = nl[0]
1372 self._pcache[id] = nl[0]
1367 return nl[0]
1373 return nl[0]
1368 raise error.AmbiguousPrefixLookupError(
1374 raise error.AmbiguousPrefixLookupError(
1369 id, self.indexfile, _(b'ambiguous identifier')
1375 id, self.indexfile, _(b'ambiguous identifier')
1370 )
1376 )
1371 if maybewdir:
1377 if maybewdir:
1372 raise error.WdirUnsupported
1378 raise error.WdirUnsupported
1373 return None
1379 return None
1374 except TypeError:
1380 except TypeError:
1375 pass
1381 pass
1376
1382
1377 def lookup(self, id):
1383 def lookup(self, id):
1378 """locate a node based on:
1384 """locate a node based on:
1379 - revision number or str(revision number)
1385 - revision number or str(revision number)
1380 - nodeid or subset of hex nodeid
1386 - nodeid or subset of hex nodeid
1381 """
1387 """
1382 n = self._match(id)
1388 n = self._match(id)
1383 if n is not None:
1389 if n is not None:
1384 return n
1390 return n
1385 n = self._partialmatch(id)
1391 n = self._partialmatch(id)
1386 if n:
1392 if n:
1387 return n
1393 return n
1388
1394
1389 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1395 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1390
1396
1391 def shortest(self, node, minlength=1):
1397 def shortest(self, node, minlength=1):
1392 """Find the shortest unambiguous prefix that matches node."""
1398 """Find the shortest unambiguous prefix that matches node."""
1393
1399
1394 def isvalid(prefix):
1400 def isvalid(prefix):
1395 try:
1401 try:
1396 matchednode = self._partialmatch(prefix)
1402 matchednode = self._partialmatch(prefix)
1397 except error.AmbiguousPrefixLookupError:
1403 except error.AmbiguousPrefixLookupError:
1398 return False
1404 return False
1399 except error.WdirUnsupported:
1405 except error.WdirUnsupported:
1400 # single 'ff...' match
1406 # single 'ff...' match
1401 return True
1407 return True
1402 if matchednode is None:
1408 if matchednode is None:
1403 raise error.LookupError(node, self.indexfile, _(b'no node'))
1409 raise error.LookupError(node, self.indexfile, _(b'no node'))
1404 return True
1410 return True
1405
1411
1406 def maybewdir(prefix):
1412 def maybewdir(prefix):
1407 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1413 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1408
1414
1409 hexnode = hex(node)
1415 hexnode = hex(node)
1410
1416
1411 def disambiguate(hexnode, minlength):
1417 def disambiguate(hexnode, minlength):
1412 """Disambiguate against wdirid."""
1418 """Disambiguate against wdirid."""
1413 for length in range(minlength, 41):
1419 for length in range(minlength, 41):
1414 prefix = hexnode[:length]
1420 prefix = hexnode[:length]
1415 if not maybewdir(prefix):
1421 if not maybewdir(prefix):
1416 return prefix
1422 return prefix
1417
1423
1418 if not getattr(self, 'filteredrevs', None):
1424 if not getattr(self, 'filteredrevs', None):
1419 try:
1425 try:
1420 length = max(self.index.shortest(node), minlength)
1426 length = max(self.index.shortest(node), minlength)
1421 return disambiguate(hexnode, length)
1427 return disambiguate(hexnode, length)
1422 except error.RevlogError:
1428 except error.RevlogError:
1423 if node != wdirid:
1429 if node != wdirid:
1424 raise error.LookupError(node, self.indexfile, _(b'no node'))
1430 raise error.LookupError(node, self.indexfile, _(b'no node'))
1425 except AttributeError:
1431 except AttributeError:
1426 # Fall through to pure code
1432 # Fall through to pure code
1427 pass
1433 pass
1428
1434
1429 if node == wdirid:
1435 if node == wdirid:
1430 for length in range(minlength, 41):
1436 for length in range(minlength, 41):
1431 prefix = hexnode[:length]
1437 prefix = hexnode[:length]
1432 if isvalid(prefix):
1438 if isvalid(prefix):
1433 return prefix
1439 return prefix
1434
1440
1435 for length in range(minlength, 41):
1441 for length in range(minlength, 41):
1436 prefix = hexnode[:length]
1442 prefix = hexnode[:length]
1437 if isvalid(prefix):
1443 if isvalid(prefix):
1438 return disambiguate(hexnode, length)
1444 return disambiguate(hexnode, length)
1439
1445
1440 def cmp(self, node, text):
1446 def cmp(self, node, text):
1441 """compare text with a given file revision
1447 """compare text with a given file revision
1442
1448
1443 returns True if text is different than what is stored.
1449 returns True if text is different than what is stored.
1444 """
1450 """
1445 p1, p2 = self.parents(node)
1451 p1, p2 = self.parents(node)
1446 return storageutil.hashrevisionsha1(text, p1, p2) != node
1452 return storageutil.hashrevisionsha1(text, p1, p2) != node
1447
1453
1448 def _cachesegment(self, offset, data):
1454 def _cachesegment(self, offset, data):
1449 """Add a segment to the revlog cache.
1455 """Add a segment to the revlog cache.
1450
1456
1451 Accepts an absolute offset and the data that is at that location.
1457 Accepts an absolute offset and the data that is at that location.
1452 """
1458 """
1453 o, d = self._chunkcache
1459 o, d = self._chunkcache
1454 # try to add to existing cache
1460 # try to add to existing cache
1455 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1461 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1456 self._chunkcache = o, d + data
1462 self._chunkcache = o, d + data
1457 else:
1463 else:
1458 self._chunkcache = offset, data
1464 self._chunkcache = offset, data
1459
1465
1460 def _readsegment(self, offset, length, df=None):
1466 def _readsegment(self, offset, length, df=None):
1461 """Load a segment of raw data from the revlog.
1467 """Load a segment of raw data from the revlog.
1462
1468
1463 Accepts an absolute offset, length to read, and an optional existing
1469 Accepts an absolute offset, length to read, and an optional existing
1464 file handle to read from.
1470 file handle to read from.
1465
1471
1466 If an existing file handle is passed, it will be seeked and the
1472 If an existing file handle is passed, it will be seeked and the
1467 original seek position will NOT be restored.
1473 original seek position will NOT be restored.
1468
1474
1469 Returns a str or buffer of raw byte data.
1475 Returns a str or buffer of raw byte data.
1470
1476
1471 Raises if the requested number of bytes could not be read.
1477 Raises if the requested number of bytes could not be read.
1472 """
1478 """
1473 # Cache data both forward and backward around the requested
1479 # Cache data both forward and backward around the requested
1474 # data, in a fixed size window. This helps speed up operations
1480 # data, in a fixed size window. This helps speed up operations
1475 # involving reading the revlog backwards.
1481 # involving reading the revlog backwards.
1476 cachesize = self._chunkcachesize
1482 cachesize = self._chunkcachesize
1477 realoffset = offset & ~(cachesize - 1)
1483 realoffset = offset & ~(cachesize - 1)
1478 reallength = (
1484 reallength = (
1479 (offset + length + cachesize) & ~(cachesize - 1)
1485 (offset + length + cachesize) & ~(cachesize - 1)
1480 ) - realoffset
1486 ) - realoffset
1481 with self._datareadfp(df) as df:
1487 with self._datareadfp(df) as df:
1482 df.seek(realoffset)
1488 df.seek(realoffset)
1483 d = df.read(reallength)
1489 d = df.read(reallength)
1484
1490
1485 self._cachesegment(realoffset, d)
1491 self._cachesegment(realoffset, d)
1486 if offset != realoffset or reallength != length:
1492 if offset != realoffset or reallength != length:
1487 startoffset = offset - realoffset
1493 startoffset = offset - realoffset
1488 if len(d) - startoffset < length:
1494 if len(d) - startoffset < length:
1489 raise error.RevlogError(
1495 raise error.RevlogError(
1490 _(
1496 _(
1491 b'partial read of revlog %s; expected %d bytes from '
1497 b'partial read of revlog %s; expected %d bytes from '
1492 b'offset %d, got %d'
1498 b'offset %d, got %d'
1493 )
1499 )
1494 % (
1500 % (
1495 self.indexfile if self._inline else self.datafile,
1501 self.indexfile if self._inline else self.datafile,
1496 length,
1502 length,
1497 realoffset,
1503 realoffset,
1498 len(d) - startoffset,
1504 len(d) - startoffset,
1499 )
1505 )
1500 )
1506 )
1501
1507
1502 return util.buffer(d, startoffset, length)
1508 return util.buffer(d, startoffset, length)
1503
1509
1504 if len(d) < length:
1510 if len(d) < length:
1505 raise error.RevlogError(
1511 raise error.RevlogError(
1506 _(
1512 _(
1507 b'partial read of revlog %s; expected %d bytes from offset '
1513 b'partial read of revlog %s; expected %d bytes from offset '
1508 b'%d, got %d'
1514 b'%d, got %d'
1509 )
1515 )
1510 % (
1516 % (
1511 self.indexfile if self._inline else self.datafile,
1517 self.indexfile if self._inline else self.datafile,
1512 length,
1518 length,
1513 offset,
1519 offset,
1514 len(d),
1520 len(d),
1515 )
1521 )
1516 )
1522 )
1517
1523
1518 return d
1524 return d
1519
1525
1520 def _getsegment(self, offset, length, df=None):
1526 def _getsegment(self, offset, length, df=None):
1521 """Obtain a segment of raw data from the revlog.
1527 """Obtain a segment of raw data from the revlog.
1522
1528
1523 Accepts an absolute offset, length of bytes to obtain, and an
1529 Accepts an absolute offset, length of bytes to obtain, and an
1524 optional file handle to the already-opened revlog. If the file
1530 optional file handle to the already-opened revlog. If the file
1525 handle is used, it's original seek position will not be preserved.
1531 handle is used, it's original seek position will not be preserved.
1526
1532
1527 Requests for data may be returned from a cache.
1533 Requests for data may be returned from a cache.
1528
1534
1529 Returns a str or a buffer instance of raw byte data.
1535 Returns a str or a buffer instance of raw byte data.
1530 """
1536 """
1531 o, d = self._chunkcache
1537 o, d = self._chunkcache
1532 l = len(d)
1538 l = len(d)
1533
1539
1534 # is it in the cache?
1540 # is it in the cache?
1535 cachestart = offset - o
1541 cachestart = offset - o
1536 cacheend = cachestart + length
1542 cacheend = cachestart + length
1537 if cachestart >= 0 and cacheend <= l:
1543 if cachestart >= 0 and cacheend <= l:
1538 if cachestart == 0 and cacheend == l:
1544 if cachestart == 0 and cacheend == l:
1539 return d # avoid a copy
1545 return d # avoid a copy
1540 return util.buffer(d, cachestart, cacheend - cachestart)
1546 return util.buffer(d, cachestart, cacheend - cachestart)
1541
1547
1542 return self._readsegment(offset, length, df=df)
1548 return self._readsegment(offset, length, df=df)
1543
1549
1544 def _getsegmentforrevs(self, startrev, endrev, df=None):
1550 def _getsegmentforrevs(self, startrev, endrev, df=None):
1545 """Obtain a segment of raw data corresponding to a range of revisions.
1551 """Obtain a segment of raw data corresponding to a range of revisions.
1546
1552
1547 Accepts the start and end revisions and an optional already-open
1553 Accepts the start and end revisions and an optional already-open
1548 file handle to be used for reading. If the file handle is read, its
1554 file handle to be used for reading. If the file handle is read, its
1549 seek position will not be preserved.
1555 seek position will not be preserved.
1550
1556
1551 Requests for data may be satisfied by a cache.
1557 Requests for data may be satisfied by a cache.
1552
1558
1553 Returns a 2-tuple of (offset, data) for the requested range of
1559 Returns a 2-tuple of (offset, data) for the requested range of
1554 revisions. Offset is the integer offset from the beginning of the
1560 revisions. Offset is the integer offset from the beginning of the
1555 revlog and data is a str or buffer of the raw byte data.
1561 revlog and data is a str or buffer of the raw byte data.
1556
1562
1557 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1563 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1558 to determine where each revision's data begins and ends.
1564 to determine where each revision's data begins and ends.
1559 """
1565 """
1560 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1566 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1561 # (functions are expensive).
1567 # (functions are expensive).
1562 index = self.index
1568 index = self.index
1563 istart = index[startrev]
1569 istart = index[startrev]
1564 start = int(istart[0] >> 16)
1570 start = int(istart[0] >> 16)
1565 if startrev == endrev:
1571 if startrev == endrev:
1566 end = start + istart[1]
1572 end = start + istart[1]
1567 else:
1573 else:
1568 iend = index[endrev]
1574 iend = index[endrev]
1569 end = int(iend[0] >> 16) + iend[1]
1575 end = int(iend[0] >> 16) + iend[1]
1570
1576
1571 if self._inline:
1577 if self._inline:
1572 start += (startrev + 1) * self._io.size
1578 start += (startrev + 1) * self._io.size
1573 end += (endrev + 1) * self._io.size
1579 end += (endrev + 1) * self._io.size
1574 length = end - start
1580 length = end - start
1575
1581
1576 return start, self._getsegment(start, length, df=df)
1582 return start, self._getsegment(start, length, df=df)
1577
1583
1578 def _chunk(self, rev, df=None):
1584 def _chunk(self, rev, df=None):
1579 """Obtain a single decompressed chunk for a revision.
1585 """Obtain a single decompressed chunk for a revision.
1580
1586
1581 Accepts an integer revision and an optional already-open file handle
1587 Accepts an integer revision and an optional already-open file handle
1582 to be used for reading. If used, the seek position of the file will not
1588 to be used for reading. If used, the seek position of the file will not
1583 be preserved.
1589 be preserved.
1584
1590
1585 Returns a str holding uncompressed data for the requested revision.
1591 Returns a str holding uncompressed data for the requested revision.
1586 """
1592 """
1587 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1593 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1588
1594
1589 def _chunks(self, revs, df=None, targetsize=None):
1595 def _chunks(self, revs, df=None, targetsize=None):
1590 """Obtain decompressed chunks for the specified revisions.
1596 """Obtain decompressed chunks for the specified revisions.
1591
1597
1592 Accepts an iterable of numeric revisions that are assumed to be in
1598 Accepts an iterable of numeric revisions that are assumed to be in
1593 ascending order. Also accepts an optional already-open file handle
1599 ascending order. Also accepts an optional already-open file handle
1594 to be used for reading. If used, the seek position of the file will
1600 to be used for reading. If used, the seek position of the file will
1595 not be preserved.
1601 not be preserved.
1596
1602
1597 This function is similar to calling ``self._chunk()`` multiple times,
1603 This function is similar to calling ``self._chunk()`` multiple times,
1598 but is faster.
1604 but is faster.
1599
1605
1600 Returns a list with decompressed data for each requested revision.
1606 Returns a list with decompressed data for each requested revision.
1601 """
1607 """
1602 if not revs:
1608 if not revs:
1603 return []
1609 return []
1604 start = self.start
1610 start = self.start
1605 length = self.length
1611 length = self.length
1606 inline = self._inline
1612 inline = self._inline
1607 iosize = self._io.size
1613 iosize = self._io.size
1608 buffer = util.buffer
1614 buffer = util.buffer
1609
1615
1610 l = []
1616 l = []
1611 ladd = l.append
1617 ladd = l.append
1612
1618
1613 if not self._withsparseread:
1619 if not self._withsparseread:
1614 slicedchunks = (revs,)
1620 slicedchunks = (revs,)
1615 else:
1621 else:
1616 slicedchunks = deltautil.slicechunk(
1622 slicedchunks = deltautil.slicechunk(
1617 self, revs, targetsize=targetsize
1623 self, revs, targetsize=targetsize
1618 )
1624 )
1619
1625
1620 for revschunk in slicedchunks:
1626 for revschunk in slicedchunks:
1621 firstrev = revschunk[0]
1627 firstrev = revschunk[0]
1622 # Skip trailing revisions with empty diff
1628 # Skip trailing revisions with empty diff
1623 for lastrev in revschunk[::-1]:
1629 for lastrev in revschunk[::-1]:
1624 if length(lastrev) != 0:
1630 if length(lastrev) != 0:
1625 break
1631 break
1626
1632
1627 try:
1633 try:
1628 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1634 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1629 except OverflowError:
1635 except OverflowError:
1630 # issue4215 - we can't cache a run of chunks greater than
1636 # issue4215 - we can't cache a run of chunks greater than
1631 # 2G on Windows
1637 # 2G on Windows
1632 return [self._chunk(rev, df=df) for rev in revschunk]
1638 return [self._chunk(rev, df=df) for rev in revschunk]
1633
1639
1634 decomp = self.decompress
1640 decomp = self.decompress
1635 for rev in revschunk:
1641 for rev in revschunk:
1636 chunkstart = start(rev)
1642 chunkstart = start(rev)
1637 if inline:
1643 if inline:
1638 chunkstart += (rev + 1) * iosize
1644 chunkstart += (rev + 1) * iosize
1639 chunklength = length(rev)
1645 chunklength = length(rev)
1640 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1646 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1641
1647
1642 return l
1648 return l
1643
1649
1644 def _chunkclear(self):
1650 def _chunkclear(self):
1645 """Clear the raw chunk cache."""
1651 """Clear the raw chunk cache."""
1646 self._chunkcache = (0, b'')
1652 self._chunkcache = (0, b'')
1647
1653
1648 def deltaparent(self, rev):
1654 def deltaparent(self, rev):
1649 """return deltaparent of the given revision"""
1655 """return deltaparent of the given revision"""
1650 base = self.index[rev][3]
1656 base = self.index[rev][3]
1651 if base == rev:
1657 if base == rev:
1652 return nullrev
1658 return nullrev
1653 elif self._generaldelta:
1659 elif self._generaldelta:
1654 return base
1660 return base
1655 else:
1661 else:
1656 return rev - 1
1662 return rev - 1
1657
1663
1658 def issnapshot(self, rev):
1664 def issnapshot(self, rev):
1659 """tells whether rev is a snapshot
1665 """tells whether rev is a snapshot
1660 """
1666 """
1661 if not self._sparserevlog:
1667 if not self._sparserevlog:
1662 return self.deltaparent(rev) == nullrev
1668 return self.deltaparent(rev) == nullrev
1663 elif util.safehasattr(self.index, b'issnapshot'):
1669 elif util.safehasattr(self.index, b'issnapshot'):
1664 # directly assign the method to cache the testing and access
1670 # directly assign the method to cache the testing and access
1665 self.issnapshot = self.index.issnapshot
1671 self.issnapshot = self.index.issnapshot
1666 return self.issnapshot(rev)
1672 return self.issnapshot(rev)
1667 if rev == nullrev:
1673 if rev == nullrev:
1668 return True
1674 return True
1669 entry = self.index[rev]
1675 entry = self.index[rev]
1670 base = entry[3]
1676 base = entry[3]
1671 if base == rev:
1677 if base == rev:
1672 return True
1678 return True
1673 if base == nullrev:
1679 if base == nullrev:
1674 return True
1680 return True
1675 p1 = entry[5]
1681 p1 = entry[5]
1676 p2 = entry[6]
1682 p2 = entry[6]
1677 if base == p1 or base == p2:
1683 if base == p1 or base == p2:
1678 return False
1684 return False
1679 return self.issnapshot(base)
1685 return self.issnapshot(base)
1680
1686
1681 def snapshotdepth(self, rev):
1687 def snapshotdepth(self, rev):
1682 """number of snapshot in the chain before this one"""
1688 """number of snapshot in the chain before this one"""
1683 if not self.issnapshot(rev):
1689 if not self.issnapshot(rev):
1684 raise error.ProgrammingError(b'revision %d not a snapshot')
1690 raise error.ProgrammingError(b'revision %d not a snapshot')
1685 return len(self._deltachain(rev)[0]) - 1
1691 return len(self._deltachain(rev)[0]) - 1
1686
1692
1687 def revdiff(self, rev1, rev2):
1693 def revdiff(self, rev1, rev2):
1688 """return or calculate a delta between two revisions
1694 """return or calculate a delta between two revisions
1689
1695
1690 The delta calculated is in binary form and is intended to be written to
1696 The delta calculated is in binary form and is intended to be written to
1691 revlog data directly. So this function needs raw revision data.
1697 revlog data directly. So this function needs raw revision data.
1692 """
1698 """
1693 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1699 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1694 return bytes(self._chunk(rev2))
1700 return bytes(self._chunk(rev2))
1695
1701
1696 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1702 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1697
1703
1698 def _processflags(self, text, flags, operation, raw=False):
1704 def _processflags(self, text, flags, operation, raw=False):
1699 """deprecated entry point to access flag processors"""
1705 """deprecated entry point to access flag processors"""
1700 msg = b'_processflag(...) use the specialized variant'
1706 msg = b'_processflag(...) use the specialized variant'
1701 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1707 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1702 if raw:
1708 if raw:
1703 return text, flagutil.processflagsraw(self, text, flags)
1709 return text, flagutil.processflagsraw(self, text, flags)
1704 elif operation == b'read':
1710 elif operation == b'read':
1705 return flagutil.processflagsread(self, text, flags)
1711 return flagutil.processflagsread(self, text, flags)
1706 else: # write operation
1712 else: # write operation
1707 return flagutil.processflagswrite(self, text, flags)
1713 return flagutil.processflagswrite(self, text, flags)
1708
1714
1709 def revision(self, nodeorrev, _df=None, raw=False):
1715 def revision(self, nodeorrev, _df=None, raw=False):
1710 """return an uncompressed revision of a given node or revision
1716 """return an uncompressed revision of a given node or revision
1711 number.
1717 number.
1712
1718
1713 _df - an existing file handle to read from. (internal-only)
1719 _df - an existing file handle to read from. (internal-only)
1714 raw - an optional argument specifying if the revision data is to be
1720 raw - an optional argument specifying if the revision data is to be
1715 treated as raw data when applying flag transforms. 'raw' should be set
1721 treated as raw data when applying flag transforms. 'raw' should be set
1716 to True when generating changegroups or in debug commands.
1722 to True when generating changegroups or in debug commands.
1717 """
1723 """
1718 if raw:
1724 if raw:
1719 msg = (
1725 msg = (
1720 b'revlog.revision(..., raw=True) is deprecated, '
1726 b'revlog.revision(..., raw=True) is deprecated, '
1721 b'use revlog.rawdata(...)'
1727 b'use revlog.rawdata(...)'
1722 )
1728 )
1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1729 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1724 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1730 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1725
1731
1726 def sidedata(self, nodeorrev, _df=None):
1732 def sidedata(self, nodeorrev, _df=None):
1727 """a map of extra data related to the changeset but not part of the hash
1733 """a map of extra data related to the changeset but not part of the hash
1728
1734
1729 This function currently return a dictionary. However, more advanced
1735 This function currently return a dictionary. However, more advanced
1730 mapping object will likely be used in the future for a more
1736 mapping object will likely be used in the future for a more
1731 efficient/lazy code.
1737 efficient/lazy code.
1732 """
1738 """
1733 return self._revisiondata(nodeorrev, _df)[1]
1739 return self._revisiondata(nodeorrev, _df)[1]
1734
1740
1735 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1741 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1736 # deal with <nodeorrev> argument type
1742 # deal with <nodeorrev> argument type
1737 if isinstance(nodeorrev, int):
1743 if isinstance(nodeorrev, int):
1738 rev = nodeorrev
1744 rev = nodeorrev
1739 node = self.node(rev)
1745 node = self.node(rev)
1740 else:
1746 else:
1741 node = nodeorrev
1747 node = nodeorrev
1742 rev = None
1748 rev = None
1743
1749
1744 # fast path the special `nullid` rev
1750 # fast path the special `nullid` rev
1745 if node == nullid:
1751 if node == nullid:
1746 return b"", {}
1752 return b"", {}
1747
1753
1748 # The text as stored inside the revlog. Might be the revision or might
1754 # The text as stored inside the revlog. Might be the revision or might
1749 # need to be processed to retrieve the revision.
1755 # need to be processed to retrieve the revision.
1750 rawtext = None
1756 rawtext = None
1751
1757
1752 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1758 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1753
1759
1754 if raw and validated:
1760 if raw and validated:
1755 # if we don't want to process the raw text and that raw
1761 # if we don't want to process the raw text and that raw
1756 # text is cached, we can exit early.
1762 # text is cached, we can exit early.
1757 return rawtext, {}
1763 return rawtext, {}
1758 if rev is None:
1764 if rev is None:
1759 rev = self.rev(node)
1765 rev = self.rev(node)
1760 # the revlog's flag for this revision
1766 # the revlog's flag for this revision
1761 # (usually alter its state or content)
1767 # (usually alter its state or content)
1762 flags = self.flags(rev)
1768 flags = self.flags(rev)
1763
1769
1764 if validated and flags == REVIDX_DEFAULT_FLAGS:
1770 if validated and flags == REVIDX_DEFAULT_FLAGS:
1765 # no extra flags set, no flag processor runs, text = rawtext
1771 # no extra flags set, no flag processor runs, text = rawtext
1766 return rawtext, {}
1772 return rawtext, {}
1767
1773
1768 sidedata = {}
1774 sidedata = {}
1769 if raw:
1775 if raw:
1770 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1776 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1771 text = rawtext
1777 text = rawtext
1772 else:
1778 else:
1773 try:
1779 try:
1774 r = flagutil.processflagsread(self, rawtext, flags)
1780 r = flagutil.processflagsread(self, rawtext, flags)
1775 except error.SidedataHashError as exc:
1781 except error.SidedataHashError as exc:
1776 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1782 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1777 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1783 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1778 raise error.RevlogError(msg)
1784 raise error.RevlogError(msg)
1779 text, validatehash, sidedata = r
1785 text, validatehash, sidedata = r
1780 if validatehash:
1786 if validatehash:
1781 self.checkhash(text, node, rev=rev)
1787 self.checkhash(text, node, rev=rev)
1782 if not validated:
1788 if not validated:
1783 self._revisioncache = (node, rev, rawtext)
1789 self._revisioncache = (node, rev, rawtext)
1784
1790
1785 return text, sidedata
1791 return text, sidedata
1786
1792
1787 def _rawtext(self, node, rev, _df=None):
1793 def _rawtext(self, node, rev, _df=None):
1788 """return the possibly unvalidated rawtext for a revision
1794 """return the possibly unvalidated rawtext for a revision
1789
1795
1790 returns (rev, rawtext, validated)
1796 returns (rev, rawtext, validated)
1791 """
1797 """
1792
1798
1793 # revision in the cache (could be useful to apply delta)
1799 # revision in the cache (could be useful to apply delta)
1794 cachedrev = None
1800 cachedrev = None
1795 # An intermediate text to apply deltas to
1801 # An intermediate text to apply deltas to
1796 basetext = None
1802 basetext = None
1797
1803
1798 # Check if we have the entry in cache
1804 # Check if we have the entry in cache
1799 # The cache entry looks like (node, rev, rawtext)
1805 # The cache entry looks like (node, rev, rawtext)
1800 if self._revisioncache:
1806 if self._revisioncache:
1801 if self._revisioncache[0] == node:
1807 if self._revisioncache[0] == node:
1802 return (rev, self._revisioncache[2], True)
1808 return (rev, self._revisioncache[2], True)
1803 cachedrev = self._revisioncache[1]
1809 cachedrev = self._revisioncache[1]
1804
1810
1805 if rev is None:
1811 if rev is None:
1806 rev = self.rev(node)
1812 rev = self.rev(node)
1807
1813
1808 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1814 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1809 if stopped:
1815 if stopped:
1810 basetext = self._revisioncache[2]
1816 basetext = self._revisioncache[2]
1811
1817
1812 # drop cache to save memory, the caller is expected to
1818 # drop cache to save memory, the caller is expected to
1813 # update self._revisioncache after validating the text
1819 # update self._revisioncache after validating the text
1814 self._revisioncache = None
1820 self._revisioncache = None
1815
1821
1816 targetsize = None
1822 targetsize = None
1817 rawsize = self.index[rev][2]
1823 rawsize = self.index[rev][2]
1818 if 0 <= rawsize:
1824 if 0 <= rawsize:
1819 targetsize = 4 * rawsize
1825 targetsize = 4 * rawsize
1820
1826
1821 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1827 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1822 if basetext is None:
1828 if basetext is None:
1823 basetext = bytes(bins[0])
1829 basetext = bytes(bins[0])
1824 bins = bins[1:]
1830 bins = bins[1:]
1825
1831
1826 rawtext = mdiff.patches(basetext, bins)
1832 rawtext = mdiff.patches(basetext, bins)
1827 del basetext # let us have a chance to free memory early
1833 del basetext # let us have a chance to free memory early
1828 return (rev, rawtext, False)
1834 return (rev, rawtext, False)
1829
1835
1830 def rawdata(self, nodeorrev, _df=None):
1836 def rawdata(self, nodeorrev, _df=None):
1831 """return an uncompressed raw data of a given node or revision number.
1837 """return an uncompressed raw data of a given node or revision number.
1832
1838
1833 _df - an existing file handle to read from. (internal-only)
1839 _df - an existing file handle to read from. (internal-only)
1834 """
1840 """
1835 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1841 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1836
1842
1837 def hash(self, text, p1, p2):
1843 def hash(self, text, p1, p2):
1838 """Compute a node hash.
1844 """Compute a node hash.
1839
1845
1840 Available as a function so that subclasses can replace the hash
1846 Available as a function so that subclasses can replace the hash
1841 as needed.
1847 as needed.
1842 """
1848 """
1843 return storageutil.hashrevisionsha1(text, p1, p2)
1849 return storageutil.hashrevisionsha1(text, p1, p2)
1844
1850
1845 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1851 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1846 """Check node hash integrity.
1852 """Check node hash integrity.
1847
1853
1848 Available as a function so that subclasses can extend hash mismatch
1854 Available as a function so that subclasses can extend hash mismatch
1849 behaviors as needed.
1855 behaviors as needed.
1850 """
1856 """
1851 try:
1857 try:
1852 if p1 is None and p2 is None:
1858 if p1 is None and p2 is None:
1853 p1, p2 = self.parents(node)
1859 p1, p2 = self.parents(node)
1854 if node != self.hash(text, p1, p2):
1860 if node != self.hash(text, p1, p2):
1855 # Clear the revision cache on hash failure. The revision cache
1861 # Clear the revision cache on hash failure. The revision cache
1856 # only stores the raw revision and clearing the cache does have
1862 # only stores the raw revision and clearing the cache does have
1857 # the side-effect that we won't have a cache hit when the raw
1863 # the side-effect that we won't have a cache hit when the raw
1858 # revision data is accessed. But this case should be rare and
1864 # revision data is accessed. But this case should be rare and
1859 # it is extra work to teach the cache about the hash
1865 # it is extra work to teach the cache about the hash
1860 # verification state.
1866 # verification state.
1861 if self._revisioncache and self._revisioncache[0] == node:
1867 if self._revisioncache and self._revisioncache[0] == node:
1862 self._revisioncache = None
1868 self._revisioncache = None
1863
1869
1864 revornode = rev
1870 revornode = rev
1865 if revornode is None:
1871 if revornode is None:
1866 revornode = templatefilters.short(hex(node))
1872 revornode = templatefilters.short(hex(node))
1867 raise error.RevlogError(
1873 raise error.RevlogError(
1868 _(b"integrity check failed on %s:%s")
1874 _(b"integrity check failed on %s:%s")
1869 % (self.indexfile, pycompat.bytestr(revornode))
1875 % (self.indexfile, pycompat.bytestr(revornode))
1870 )
1876 )
1871 except error.RevlogError:
1877 except error.RevlogError:
1872 if self._censorable and storageutil.iscensoredtext(text):
1878 if self._censorable and storageutil.iscensoredtext(text):
1873 raise error.CensoredNodeError(self.indexfile, node, text)
1879 raise error.CensoredNodeError(self.indexfile, node, text)
1874 raise
1880 raise
1875
1881
1876 def _enforceinlinesize(self, tr, fp=None):
1882 def _enforceinlinesize(self, tr, fp=None):
1877 """Check if the revlog is too big for inline and convert if so.
1883 """Check if the revlog is too big for inline and convert if so.
1878
1884
1879 This should be called after revisions are added to the revlog. If the
1885 This should be called after revisions are added to the revlog. If the
1880 revlog has grown too large to be an inline revlog, it will convert it
1886 revlog has grown too large to be an inline revlog, it will convert it
1881 to use multiple index and data files.
1887 to use multiple index and data files.
1882 """
1888 """
1883 tiprev = len(self) - 1
1889 tiprev = len(self) - 1
1884 if (
1890 if (
1885 not self._inline
1891 not self._inline
1886 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1892 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1887 ):
1893 ):
1888 return
1894 return
1889
1895
1890 trinfo = tr.find(self.indexfile)
1896 trinfo = tr.find(self.indexfile)
1891 if trinfo is None:
1897 if trinfo is None:
1892 raise error.RevlogError(
1898 raise error.RevlogError(
1893 _(b"%s not found in the transaction") % self.indexfile
1899 _(b"%s not found in the transaction") % self.indexfile
1894 )
1900 )
1895
1901
1896 trindex = trinfo[2]
1902 trindex = trinfo[2]
1897 if trindex is not None:
1903 if trindex is not None:
1898 dataoff = self.start(trindex)
1904 dataoff = self.start(trindex)
1899 else:
1905 else:
1900 # revlog was stripped at start of transaction, use all leftover data
1906 # revlog was stripped at start of transaction, use all leftover data
1901 trindex = len(self) - 1
1907 trindex = len(self) - 1
1902 dataoff = self.end(tiprev)
1908 dataoff = self.end(tiprev)
1903
1909
1904 tr.add(self.datafile, dataoff)
1910 tr.add(self.datafile, dataoff)
1905
1911
1906 if fp:
1912 if fp:
1907 fp.flush()
1913 fp.flush()
1908 fp.close()
1914 fp.close()
1909 # We can't use the cached file handle after close(). So prevent
1915 # We can't use the cached file handle after close(). So prevent
1910 # its usage.
1916 # its usage.
1911 self._writinghandles = None
1917 self._writinghandles = None
1912
1918
1913 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1919 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1914 for r in self:
1920 for r in self:
1915 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1921 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1916
1922
1917 with self._indexfp(b'w') as fp:
1923 with self._indexfp(b'w') as fp:
1918 self.version &= ~FLAG_INLINE_DATA
1924 self.version &= ~FLAG_INLINE_DATA
1919 self._inline = False
1925 self._inline = False
1920 io = self._io
1926 io = self._io
1921 for i in self:
1927 for i in self:
1922 e = io.packentry(self.index[i], self.node, self.version, i)
1928 e = io.packentry(self.index[i], self.node, self.version, i)
1923 fp.write(e)
1929 fp.write(e)
1924
1930
1925 # the temp file replace the real index when we exit the context
1931 # the temp file replace the real index when we exit the context
1926 # manager
1932 # manager
1927
1933
1928 tr.replace(self.indexfile, trindex * self._io.size)
1934 tr.replace(self.indexfile, trindex * self._io.size)
1929 self._chunkclear()
1935 self._chunkclear()
1930
1936
1931 def _nodeduplicatecallback(self, transaction, node):
1937 def _nodeduplicatecallback(self, transaction, node):
1932 """called when trying to add a node already stored.
1938 """called when trying to add a node already stored.
1933 """
1939 """
1934
1940
1935 def addrevision(
1941 def addrevision(
1936 self,
1942 self,
1937 text,
1943 text,
1938 transaction,
1944 transaction,
1939 link,
1945 link,
1940 p1,
1946 p1,
1941 p2,
1947 p2,
1942 cachedelta=None,
1948 cachedelta=None,
1943 node=None,
1949 node=None,
1944 flags=REVIDX_DEFAULT_FLAGS,
1950 flags=REVIDX_DEFAULT_FLAGS,
1945 deltacomputer=None,
1951 deltacomputer=None,
1946 sidedata=None,
1952 sidedata=None,
1947 ):
1953 ):
1948 """add a revision to the log
1954 """add a revision to the log
1949
1955
1950 text - the revision data to add
1956 text - the revision data to add
1951 transaction - the transaction object used for rollback
1957 transaction - the transaction object used for rollback
1952 link - the linkrev data to add
1958 link - the linkrev data to add
1953 p1, p2 - the parent nodeids of the revision
1959 p1, p2 - the parent nodeids of the revision
1954 cachedelta - an optional precomputed delta
1960 cachedelta - an optional precomputed delta
1955 node - nodeid of revision; typically node is not specified, and it is
1961 node - nodeid of revision; typically node is not specified, and it is
1956 computed by default as hash(text, p1, p2), however subclasses might
1962 computed by default as hash(text, p1, p2), however subclasses might
1957 use different hashing method (and override checkhash() in such case)
1963 use different hashing method (and override checkhash() in such case)
1958 flags - the known flags to set on the revision
1964 flags - the known flags to set on the revision
1959 deltacomputer - an optional deltacomputer instance shared between
1965 deltacomputer - an optional deltacomputer instance shared between
1960 multiple calls
1966 multiple calls
1961 """
1967 """
1962 if link == nullrev:
1968 if link == nullrev:
1963 raise error.RevlogError(
1969 raise error.RevlogError(
1964 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1970 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1965 )
1971 )
1966
1972
1967 if sidedata is None:
1973 if sidedata is None:
1968 sidedata = {}
1974 sidedata = {}
1969 flags = flags & ~REVIDX_SIDEDATA
1975 flags = flags & ~REVIDX_SIDEDATA
1970 elif not self.hassidedata:
1976 elif not self.hassidedata:
1971 raise error.ProgrammingError(
1977 raise error.ProgrammingError(
1972 _(b"trying to add sidedata to a revlog who don't support them")
1978 _(b"trying to add sidedata to a revlog who don't support them")
1973 )
1979 )
1974 else:
1980 else:
1975 flags |= REVIDX_SIDEDATA
1981 flags |= REVIDX_SIDEDATA
1976
1982
1977 if flags:
1983 if flags:
1978 node = node or self.hash(text, p1, p2)
1984 node = node or self.hash(text, p1, p2)
1979
1985
1980 rawtext, validatehash = flagutil.processflagswrite(
1986 rawtext, validatehash = flagutil.processflagswrite(
1981 self, text, flags, sidedata=sidedata
1987 self, text, flags, sidedata=sidedata
1982 )
1988 )
1983
1989
1984 # If the flag processor modifies the revision data, ignore any provided
1990 # If the flag processor modifies the revision data, ignore any provided
1985 # cachedelta.
1991 # cachedelta.
1986 if rawtext != text:
1992 if rawtext != text:
1987 cachedelta = None
1993 cachedelta = None
1988
1994
1989 if len(rawtext) > _maxentrysize:
1995 if len(rawtext) > _maxentrysize:
1990 raise error.RevlogError(
1996 raise error.RevlogError(
1991 _(
1997 _(
1992 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1998 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1993 )
1999 )
1994 % (self.indexfile, len(rawtext))
2000 % (self.indexfile, len(rawtext))
1995 )
2001 )
1996
2002
1997 node = node or self.hash(rawtext, p1, p2)
2003 node = node or self.hash(rawtext, p1, p2)
1998 if self.index.has_node(node):
2004 if self.index.has_node(node):
1999 return node
2005 return node
2000
2006
2001 if validatehash:
2007 if validatehash:
2002 self.checkhash(rawtext, node, p1=p1, p2=p2)
2008 self.checkhash(rawtext, node, p1=p1, p2=p2)
2003
2009
2004 return self.addrawrevision(
2010 return self.addrawrevision(
2005 rawtext,
2011 rawtext,
2006 transaction,
2012 transaction,
2007 link,
2013 link,
2008 p1,
2014 p1,
2009 p2,
2015 p2,
2010 node,
2016 node,
2011 flags,
2017 flags,
2012 cachedelta=cachedelta,
2018 cachedelta=cachedelta,
2013 deltacomputer=deltacomputer,
2019 deltacomputer=deltacomputer,
2014 )
2020 )
2015
2021
2016 def addrawrevision(
2022 def addrawrevision(
2017 self,
2023 self,
2018 rawtext,
2024 rawtext,
2019 transaction,
2025 transaction,
2020 link,
2026 link,
2021 p1,
2027 p1,
2022 p2,
2028 p2,
2023 node,
2029 node,
2024 flags,
2030 flags,
2025 cachedelta=None,
2031 cachedelta=None,
2026 deltacomputer=None,
2032 deltacomputer=None,
2027 ):
2033 ):
2028 """add a raw revision with known flags, node and parents
2034 """add a raw revision with known flags, node and parents
2029 useful when reusing a revision not stored in this revlog (ex: received
2035 useful when reusing a revision not stored in this revlog (ex: received
2030 over wire, or read from an external bundle).
2036 over wire, or read from an external bundle).
2031 """
2037 """
2032 dfh = None
2038 dfh = None
2033 if not self._inline:
2039 if not self._inline:
2034 dfh = self._datafp(b"a+")
2040 dfh = self._datafp(b"a+")
2035 ifh = self._indexfp(b"a+")
2041 ifh = self._indexfp(b"a+")
2036 try:
2042 try:
2037 return self._addrevision(
2043 return self._addrevision(
2038 node,
2044 node,
2039 rawtext,
2045 rawtext,
2040 transaction,
2046 transaction,
2041 link,
2047 link,
2042 p1,
2048 p1,
2043 p2,
2049 p2,
2044 flags,
2050 flags,
2045 cachedelta,
2051 cachedelta,
2046 ifh,
2052 ifh,
2047 dfh,
2053 dfh,
2048 deltacomputer=deltacomputer,
2054 deltacomputer=deltacomputer,
2049 )
2055 )
2050 finally:
2056 finally:
2051 if dfh:
2057 if dfh:
2052 dfh.close()
2058 dfh.close()
2053 ifh.close()
2059 ifh.close()
2054
2060
2055 def compress(self, data):
2061 def compress(self, data):
2056 """Generate a possibly-compressed representation of data."""
2062 """Generate a possibly-compressed representation of data."""
2057 if not data:
2063 if not data:
2058 return b'', data
2064 return b'', data
2059
2065
2060 compressed = self._compressor.compress(data)
2066 compressed = self._compressor.compress(data)
2061
2067
2062 if compressed:
2068 if compressed:
2063 # The revlog compressor added the header in the returned data.
2069 # The revlog compressor added the header in the returned data.
2064 return b'', compressed
2070 return b'', compressed
2065
2071
2066 if data[0:1] == b'\0':
2072 if data[0:1] == b'\0':
2067 return b'', data
2073 return b'', data
2068 return b'u', data
2074 return b'u', data
2069
2075
2070 def decompress(self, data):
2076 def decompress(self, data):
2071 """Decompress a revlog chunk.
2077 """Decompress a revlog chunk.
2072
2078
2073 The chunk is expected to begin with a header identifying the
2079 The chunk is expected to begin with a header identifying the
2074 format type so it can be routed to an appropriate decompressor.
2080 format type so it can be routed to an appropriate decompressor.
2075 """
2081 """
2076 if not data:
2082 if not data:
2077 return data
2083 return data
2078
2084
2079 # Revlogs are read much more frequently than they are written and many
2085 # Revlogs are read much more frequently than they are written and many
2080 # chunks only take microseconds to decompress, so performance is
2086 # chunks only take microseconds to decompress, so performance is
2081 # important here.
2087 # important here.
2082 #
2088 #
2083 # We can make a few assumptions about revlogs:
2089 # We can make a few assumptions about revlogs:
2084 #
2090 #
2085 # 1) the majority of chunks will be compressed (as opposed to inline
2091 # 1) the majority of chunks will be compressed (as opposed to inline
2086 # raw data).
2092 # raw data).
2087 # 2) decompressing *any* data will likely by at least 10x slower than
2093 # 2) decompressing *any* data will likely by at least 10x slower than
2088 # returning raw inline data.
2094 # returning raw inline data.
2089 # 3) we want to prioritize common and officially supported compression
2095 # 3) we want to prioritize common and officially supported compression
2090 # engines
2096 # engines
2091 #
2097 #
2092 # It follows that we want to optimize for "decompress compressed data
2098 # It follows that we want to optimize for "decompress compressed data
2093 # when encoded with common and officially supported compression engines"
2099 # when encoded with common and officially supported compression engines"
2094 # case over "raw data" and "data encoded by less common or non-official
2100 # case over "raw data" and "data encoded by less common or non-official
2095 # compression engines." That is why we have the inline lookup first
2101 # compression engines." That is why we have the inline lookup first
2096 # followed by the compengines lookup.
2102 # followed by the compengines lookup.
2097 #
2103 #
2098 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2104 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2099 # compressed chunks. And this matters for changelog and manifest reads.
2105 # compressed chunks. And this matters for changelog and manifest reads.
2100 t = data[0:1]
2106 t = data[0:1]
2101
2107
2102 if t == b'x':
2108 if t == b'x':
2103 try:
2109 try:
2104 return _zlibdecompress(data)
2110 return _zlibdecompress(data)
2105 except zlib.error as e:
2111 except zlib.error as e:
2106 raise error.RevlogError(
2112 raise error.RevlogError(
2107 _(b'revlog decompress error: %s')
2113 _(b'revlog decompress error: %s')
2108 % stringutil.forcebytestr(e)
2114 % stringutil.forcebytestr(e)
2109 )
2115 )
2110 # '\0' is more common than 'u' so it goes first.
2116 # '\0' is more common than 'u' so it goes first.
2111 elif t == b'\0':
2117 elif t == b'\0':
2112 return data
2118 return data
2113 elif t == b'u':
2119 elif t == b'u':
2114 return util.buffer(data, 1)
2120 return util.buffer(data, 1)
2115
2121
2116 try:
2122 try:
2117 compressor = self._decompressors[t]
2123 compressor = self._decompressors[t]
2118 except KeyError:
2124 except KeyError:
2119 try:
2125 try:
2120 engine = util.compengines.forrevlogheader(t)
2126 engine = util.compengines.forrevlogheader(t)
2121 compressor = engine.revlogcompressor(self._compengineopts)
2127 compressor = engine.revlogcompressor(self._compengineopts)
2122 self._decompressors[t] = compressor
2128 self._decompressors[t] = compressor
2123 except KeyError:
2129 except KeyError:
2124 raise error.RevlogError(_(b'unknown compression type %r') % t)
2130 raise error.RevlogError(_(b'unknown compression type %r') % t)
2125
2131
2126 return compressor.decompress(data)
2132 return compressor.decompress(data)
2127
2133
2128 def _addrevision(
2134 def _addrevision(
2129 self,
2135 self,
2130 node,
2136 node,
2131 rawtext,
2137 rawtext,
2132 transaction,
2138 transaction,
2133 link,
2139 link,
2134 p1,
2140 p1,
2135 p2,
2141 p2,
2136 flags,
2142 flags,
2137 cachedelta,
2143 cachedelta,
2138 ifh,
2144 ifh,
2139 dfh,
2145 dfh,
2140 alwayscache=False,
2146 alwayscache=False,
2141 deltacomputer=None,
2147 deltacomputer=None,
2142 ):
2148 ):
2143 """internal function to add revisions to the log
2149 """internal function to add revisions to the log
2144
2150
2145 see addrevision for argument descriptions.
2151 see addrevision for argument descriptions.
2146
2152
2147 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2153 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2148
2154
2149 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2155 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2150 be used.
2156 be used.
2151
2157
2152 invariants:
2158 invariants:
2153 - rawtext is optional (can be None); if not set, cachedelta must be set.
2159 - rawtext is optional (can be None); if not set, cachedelta must be set.
2154 if both are set, they must correspond to each other.
2160 if both are set, they must correspond to each other.
2155 """
2161 """
2156 if node == nullid:
2162 if node == nullid:
2157 raise error.RevlogError(
2163 raise error.RevlogError(
2158 _(b"%s: attempt to add null revision") % self.indexfile
2164 _(b"%s: attempt to add null revision") % self.indexfile
2159 )
2165 )
2160 if node == wdirid or node in wdirfilenodeids:
2166 if node == wdirid or node in wdirfilenodeids:
2161 raise error.RevlogError(
2167 raise error.RevlogError(
2162 _(b"%s: attempt to add wdir revision") % self.indexfile
2168 _(b"%s: attempt to add wdir revision") % self.indexfile
2163 )
2169 )
2164
2170
2165 if self._inline:
2171 if self._inline:
2166 fh = ifh
2172 fh = ifh
2167 else:
2173 else:
2168 fh = dfh
2174 fh = dfh
2169
2175
2170 btext = [rawtext]
2176 btext = [rawtext]
2171
2177
2172 curr = len(self)
2178 curr = len(self)
2173 prev = curr - 1
2179 prev = curr - 1
2174 offset = self.end(prev)
2180 offset = self.end(prev)
2175 p1r, p2r = self.rev(p1), self.rev(p2)
2181 p1r, p2r = self.rev(p1), self.rev(p2)
2176
2182
2177 # full versions are inserted when the needed deltas
2183 # full versions are inserted when the needed deltas
2178 # become comparable to the uncompressed text
2184 # become comparable to the uncompressed text
2179 if rawtext is None:
2185 if rawtext is None:
2180 # need rawtext size, before changed by flag processors, which is
2186 # need rawtext size, before changed by flag processors, which is
2181 # the non-raw size. use revlog explicitly to avoid filelog's extra
2187 # the non-raw size. use revlog explicitly to avoid filelog's extra
2182 # logic that might remove metadata size.
2188 # logic that might remove metadata size.
2183 textlen = mdiff.patchedsize(
2189 textlen = mdiff.patchedsize(
2184 revlog.size(self, cachedelta[0]), cachedelta[1]
2190 revlog.size(self, cachedelta[0]), cachedelta[1]
2185 )
2191 )
2186 else:
2192 else:
2187 textlen = len(rawtext)
2193 textlen = len(rawtext)
2188
2194
2189 if deltacomputer is None:
2195 if deltacomputer is None:
2190 deltacomputer = deltautil.deltacomputer(self)
2196 deltacomputer = deltautil.deltacomputer(self)
2191
2197
2192 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2198 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2193
2199
2194 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2200 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2195
2201
2196 e = (
2202 e = (
2197 offset_type(offset, flags),
2203 offset_type(offset, flags),
2198 deltainfo.deltalen,
2204 deltainfo.deltalen,
2199 textlen,
2205 textlen,
2200 deltainfo.base,
2206 deltainfo.base,
2201 link,
2207 link,
2202 p1r,
2208 p1r,
2203 p2r,
2209 p2r,
2204 node,
2210 node,
2205 )
2211 )
2206 self.index.append(e)
2212 self.index.append(e)
2207
2213
2208 # Reset the pure node cache start lookup offset to account for new
2214 # Reset the pure node cache start lookup offset to account for new
2209 # revision.
2215 # revision.
2210 if self._nodepos is not None:
2216 if self._nodepos is not None:
2211 self._nodepos = curr
2217 self._nodepos = curr
2212
2218
2213 entry = self._io.packentry(e, self.node, self.version, curr)
2219 entry = self._io.packentry(e, self.node, self.version, curr)
2214 self._writeentry(
2220 self._writeentry(
2215 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2221 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2216 )
2222 )
2217
2223
2218 rawtext = btext[0]
2224 rawtext = btext[0]
2219
2225
2220 if alwayscache and rawtext is None:
2226 if alwayscache and rawtext is None:
2221 rawtext = deltacomputer.buildtext(revinfo, fh)
2227 rawtext = deltacomputer.buildtext(revinfo, fh)
2222
2228
2223 if type(rawtext) == bytes: # only accept immutable objects
2229 if type(rawtext) == bytes: # only accept immutable objects
2224 self._revisioncache = (node, curr, rawtext)
2230 self._revisioncache = (node, curr, rawtext)
2225 self._chainbasecache[curr] = deltainfo.chainbase
2231 self._chainbasecache[curr] = deltainfo.chainbase
2226 return node
2232 return node
2227
2233
2228 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2234 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2229 # Files opened in a+ mode have inconsistent behavior on various
2235 # Files opened in a+ mode have inconsistent behavior on various
2230 # platforms. Windows requires that a file positioning call be made
2236 # platforms. Windows requires that a file positioning call be made
2231 # when the file handle transitions between reads and writes. See
2237 # when the file handle transitions between reads and writes. See
2232 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2238 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2233 # platforms, Python or the platform itself can be buggy. Some versions
2239 # platforms, Python or the platform itself can be buggy. Some versions
2234 # of Solaris have been observed to not append at the end of the file
2240 # of Solaris have been observed to not append at the end of the file
2235 # if the file was seeked to before the end. See issue4943 for more.
2241 # if the file was seeked to before the end. See issue4943 for more.
2236 #
2242 #
2237 # We work around this issue by inserting a seek() before writing.
2243 # We work around this issue by inserting a seek() before writing.
2238 # Note: This is likely not necessary on Python 3. However, because
2244 # Note: This is likely not necessary on Python 3. However, because
2239 # the file handle is reused for reads and may be seeked there, we need
2245 # the file handle is reused for reads and may be seeked there, we need
2240 # to be careful before changing this.
2246 # to be careful before changing this.
2241 ifh.seek(0, os.SEEK_END)
2247 ifh.seek(0, os.SEEK_END)
2242 if dfh:
2248 if dfh:
2243 dfh.seek(0, os.SEEK_END)
2249 dfh.seek(0, os.SEEK_END)
2244
2250
2245 curr = len(self) - 1
2251 curr = len(self) - 1
2246 if not self._inline:
2252 if not self._inline:
2247 transaction.add(self.datafile, offset)
2253 transaction.add(self.datafile, offset)
2248 transaction.add(self.indexfile, curr * len(entry))
2254 transaction.add(self.indexfile, curr * len(entry))
2249 if data[0]:
2255 if data[0]:
2250 dfh.write(data[0])
2256 dfh.write(data[0])
2251 dfh.write(data[1])
2257 dfh.write(data[1])
2252 ifh.write(entry)
2258 ifh.write(entry)
2253 else:
2259 else:
2254 offset += curr * self._io.size
2260 offset += curr * self._io.size
2255 transaction.add(self.indexfile, offset, curr)
2261 transaction.add(self.indexfile, offset, curr)
2256 ifh.write(entry)
2262 ifh.write(entry)
2257 ifh.write(data[0])
2263 ifh.write(data[0])
2258 ifh.write(data[1])
2264 ifh.write(data[1])
2259 self._enforceinlinesize(transaction, ifh)
2265 self._enforceinlinesize(transaction, ifh)
2260
2266
2261 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2267 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2262 """
2268 """
2263 add a delta group
2269 add a delta group
2264
2270
2265 given a set of deltas, add them to the revision log. the
2271 given a set of deltas, add them to the revision log. the
2266 first delta is against its parent, which should be in our
2272 first delta is against its parent, which should be in our
2267 log, the rest are against the previous delta.
2273 log, the rest are against the previous delta.
2268
2274
2269 If ``addrevisioncb`` is defined, it will be called with arguments of
2275 If ``addrevisioncb`` is defined, it will be called with arguments of
2270 this revlog and the node that was added.
2276 this revlog and the node that was added.
2271 """
2277 """
2272
2278
2273 if self._writinghandles:
2279 if self._writinghandles:
2274 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2280 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2275
2281
2276 nodes = []
2282 nodes = []
2277
2283
2278 r = len(self)
2284 r = len(self)
2279 end = 0
2285 end = 0
2280 if r:
2286 if r:
2281 end = self.end(r - 1)
2287 end = self.end(r - 1)
2282 ifh = self._indexfp(b"a+")
2288 ifh = self._indexfp(b"a+")
2283 isize = r * self._io.size
2289 isize = r * self._io.size
2284 if self._inline:
2290 if self._inline:
2285 transaction.add(self.indexfile, end + isize, r)
2291 transaction.add(self.indexfile, end + isize, r)
2286 dfh = None
2292 dfh = None
2287 else:
2293 else:
2288 transaction.add(self.indexfile, isize, r)
2294 transaction.add(self.indexfile, isize, r)
2289 transaction.add(self.datafile, end)
2295 transaction.add(self.datafile, end)
2290 dfh = self._datafp(b"a+")
2296 dfh = self._datafp(b"a+")
2291
2297
2292 def flush():
2298 def flush():
2293 if dfh:
2299 if dfh:
2294 dfh.flush()
2300 dfh.flush()
2295 ifh.flush()
2301 ifh.flush()
2296
2302
2297 self._writinghandles = (ifh, dfh)
2303 self._writinghandles = (ifh, dfh)
2298
2304
2299 try:
2305 try:
2300 deltacomputer = deltautil.deltacomputer(self)
2306 deltacomputer = deltautil.deltacomputer(self)
2301 # loop through our set of deltas
2307 # loop through our set of deltas
2302 for data in deltas:
2308 for data in deltas:
2303 node, p1, p2, linknode, deltabase, delta, flags = data
2309 node, p1, p2, linknode, deltabase, delta, flags = data
2304 link = linkmapper(linknode)
2310 link = linkmapper(linknode)
2305 flags = flags or REVIDX_DEFAULT_FLAGS
2311 flags = flags or REVIDX_DEFAULT_FLAGS
2306
2312
2307 nodes.append(node)
2313 nodes.append(node)
2308
2314
2309 if self.index.has_node(node):
2315 if self.index.has_node(node):
2310 self._nodeduplicatecallback(transaction, node)
2316 self._nodeduplicatecallback(transaction, node)
2311 # this can happen if two branches make the same change
2317 # this can happen if two branches make the same change
2312 continue
2318 continue
2313
2319
2314 for p in (p1, p2):
2320 for p in (p1, p2):
2315 if not self.index.has_node(p):
2321 if not self.index.has_node(p):
2316 raise error.LookupError(
2322 raise error.LookupError(
2317 p, self.indexfile, _(b'unknown parent')
2323 p, self.indexfile, _(b'unknown parent')
2318 )
2324 )
2319
2325
2320 if not self.index.has_node(deltabase):
2326 if not self.index.has_node(deltabase):
2321 raise error.LookupError(
2327 raise error.LookupError(
2322 deltabase, self.indexfile, _(b'unknown delta base')
2328 deltabase, self.indexfile, _(b'unknown delta base')
2323 )
2329 )
2324
2330
2325 baserev = self.rev(deltabase)
2331 baserev = self.rev(deltabase)
2326
2332
2327 if baserev != nullrev and self.iscensored(baserev):
2333 if baserev != nullrev and self.iscensored(baserev):
2328 # if base is censored, delta must be full replacement in a
2334 # if base is censored, delta must be full replacement in a
2329 # single patch operation
2335 # single patch operation
2330 hlen = struct.calcsize(b">lll")
2336 hlen = struct.calcsize(b">lll")
2331 oldlen = self.rawsize(baserev)
2337 oldlen = self.rawsize(baserev)
2332 newlen = len(delta) - hlen
2338 newlen = len(delta) - hlen
2333 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2339 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2334 raise error.CensoredBaseError(
2340 raise error.CensoredBaseError(
2335 self.indexfile, self.node(baserev)
2341 self.indexfile, self.node(baserev)
2336 )
2342 )
2337
2343
2338 if not flags and self._peek_iscensored(baserev, delta, flush):
2344 if not flags and self._peek_iscensored(baserev, delta, flush):
2339 flags |= REVIDX_ISCENSORED
2345 flags |= REVIDX_ISCENSORED
2340
2346
2341 # We assume consumers of addrevisioncb will want to retrieve
2347 # We assume consumers of addrevisioncb will want to retrieve
2342 # the added revision, which will require a call to
2348 # the added revision, which will require a call to
2343 # revision(). revision() will fast path if there is a cache
2349 # revision(). revision() will fast path if there is a cache
2344 # hit. So, we tell _addrevision() to always cache in this case.
2350 # hit. So, we tell _addrevision() to always cache in this case.
2345 # We're only using addgroup() in the context of changegroup
2351 # We're only using addgroup() in the context of changegroup
2346 # generation so the revision data can always be handled as raw
2352 # generation so the revision data can always be handled as raw
2347 # by the flagprocessor.
2353 # by the flagprocessor.
2348 self._addrevision(
2354 self._addrevision(
2349 node,
2355 node,
2350 None,
2356 None,
2351 transaction,
2357 transaction,
2352 link,
2358 link,
2353 p1,
2359 p1,
2354 p2,
2360 p2,
2355 flags,
2361 flags,
2356 (baserev, delta),
2362 (baserev, delta),
2357 ifh,
2363 ifh,
2358 dfh,
2364 dfh,
2359 alwayscache=bool(addrevisioncb),
2365 alwayscache=bool(addrevisioncb),
2360 deltacomputer=deltacomputer,
2366 deltacomputer=deltacomputer,
2361 )
2367 )
2362
2368
2363 if addrevisioncb:
2369 if addrevisioncb:
2364 addrevisioncb(self, node)
2370 addrevisioncb(self, node)
2365
2371
2366 if not dfh and not self._inline:
2372 if not dfh and not self._inline:
2367 # addrevision switched from inline to conventional
2373 # addrevision switched from inline to conventional
2368 # reopen the index
2374 # reopen the index
2369 ifh.close()
2375 ifh.close()
2370 dfh = self._datafp(b"a+")
2376 dfh = self._datafp(b"a+")
2371 ifh = self._indexfp(b"a+")
2377 ifh = self._indexfp(b"a+")
2372 self._writinghandles = (ifh, dfh)
2378 self._writinghandles = (ifh, dfh)
2373 finally:
2379 finally:
2374 self._writinghandles = None
2380 self._writinghandles = None
2375
2381
2376 if dfh:
2382 if dfh:
2377 dfh.close()
2383 dfh.close()
2378 ifh.close()
2384 ifh.close()
2379
2385
2380 return nodes
2386 return nodes
2381
2387
2382 def iscensored(self, rev):
2388 def iscensored(self, rev):
2383 """Check if a file revision is censored."""
2389 """Check if a file revision is censored."""
2384 if not self._censorable:
2390 if not self._censorable:
2385 return False
2391 return False
2386
2392
2387 return self.flags(rev) & REVIDX_ISCENSORED
2393 return self.flags(rev) & REVIDX_ISCENSORED
2388
2394
2389 def _peek_iscensored(self, baserev, delta, flush):
2395 def _peek_iscensored(self, baserev, delta, flush):
2390 """Quickly check if a delta produces a censored revision."""
2396 """Quickly check if a delta produces a censored revision."""
2391 if not self._censorable:
2397 if not self._censorable:
2392 return False
2398 return False
2393
2399
2394 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2400 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2395
2401
2396 def getstrippoint(self, minlink):
2402 def getstrippoint(self, minlink):
2397 """find the minimum rev that must be stripped to strip the linkrev
2403 """find the minimum rev that must be stripped to strip the linkrev
2398
2404
2399 Returns a tuple containing the minimum rev and a set of all revs that
2405 Returns a tuple containing the minimum rev and a set of all revs that
2400 have linkrevs that will be broken by this strip.
2406 have linkrevs that will be broken by this strip.
2401 """
2407 """
2402 return storageutil.resolvestripinfo(
2408 return storageutil.resolvestripinfo(
2403 minlink,
2409 minlink,
2404 len(self) - 1,
2410 len(self) - 1,
2405 self.headrevs(),
2411 self.headrevs(),
2406 self.linkrev,
2412 self.linkrev,
2407 self.parentrevs,
2413 self.parentrevs,
2408 )
2414 )
2409
2415
2410 def strip(self, minlink, transaction):
2416 def strip(self, minlink, transaction):
2411 """truncate the revlog on the first revision with a linkrev >= minlink
2417 """truncate the revlog on the first revision with a linkrev >= minlink
2412
2418
2413 This function is called when we're stripping revision minlink and
2419 This function is called when we're stripping revision minlink and
2414 its descendants from the repository.
2420 its descendants from the repository.
2415
2421
2416 We have to remove all revisions with linkrev >= minlink, because
2422 We have to remove all revisions with linkrev >= minlink, because
2417 the equivalent changelog revisions will be renumbered after the
2423 the equivalent changelog revisions will be renumbered after the
2418 strip.
2424 strip.
2419
2425
2420 So we truncate the revlog on the first of these revisions, and
2426 So we truncate the revlog on the first of these revisions, and
2421 trust that the caller has saved the revisions that shouldn't be
2427 trust that the caller has saved the revisions that shouldn't be
2422 removed and that it'll re-add them after this truncation.
2428 removed and that it'll re-add them after this truncation.
2423 """
2429 """
2424 if len(self) == 0:
2430 if len(self) == 0:
2425 return
2431 return
2426
2432
2427 rev, _ = self.getstrippoint(minlink)
2433 rev, _ = self.getstrippoint(minlink)
2428 if rev == len(self):
2434 if rev == len(self):
2429 return
2435 return
2430
2436
2431 # first truncate the files on disk
2437 # first truncate the files on disk
2432 end = self.start(rev)
2438 end = self.start(rev)
2433 if not self._inline:
2439 if not self._inline:
2434 transaction.add(self.datafile, end)
2440 transaction.add(self.datafile, end)
2435 end = rev * self._io.size
2441 end = rev * self._io.size
2436 else:
2442 else:
2437 end += rev * self._io.size
2443 end += rev * self._io.size
2438
2444
2439 transaction.add(self.indexfile, end)
2445 transaction.add(self.indexfile, end)
2440
2446
2441 # then reset internal state in memory to forget those revisions
2447 # then reset internal state in memory to forget those revisions
2442 self._revisioncache = None
2448 self._revisioncache = None
2443 self._chaininfocache = {}
2449 self._chaininfocache = {}
2444 self._chunkclear()
2450 self._chunkclear()
2445
2451
2446 del self.index[rev:-1]
2452 del self.index[rev:-1]
2447 self._nodepos = None
2453 self._nodepos = None
2448
2454
2449 def checksize(self):
2455 def checksize(self):
2450 """Check size of index and data files
2456 """Check size of index and data files
2451
2457
2452 return a (dd, di) tuple.
2458 return a (dd, di) tuple.
2453 - dd: extra bytes for the "data" file
2459 - dd: extra bytes for the "data" file
2454 - di: extra bytes for the "index" file
2460 - di: extra bytes for the "index" file
2455
2461
2456 A healthy revlog will return (0, 0).
2462 A healthy revlog will return (0, 0).
2457 """
2463 """
2458 expected = 0
2464 expected = 0
2459 if len(self):
2465 if len(self):
2460 expected = max(0, self.end(len(self) - 1))
2466 expected = max(0, self.end(len(self) - 1))
2461
2467
2462 try:
2468 try:
2463 with self._datafp() as f:
2469 with self._datafp() as f:
2464 f.seek(0, io.SEEK_END)
2470 f.seek(0, io.SEEK_END)
2465 actual = f.tell()
2471 actual = f.tell()
2466 dd = actual - expected
2472 dd = actual - expected
2467 except IOError as inst:
2473 except IOError as inst:
2468 if inst.errno != errno.ENOENT:
2474 if inst.errno != errno.ENOENT:
2469 raise
2475 raise
2470 dd = 0
2476 dd = 0
2471
2477
2472 try:
2478 try:
2473 f = self.opener(self.indexfile)
2479 f = self.opener(self.indexfile)
2474 f.seek(0, io.SEEK_END)
2480 f.seek(0, io.SEEK_END)
2475 actual = f.tell()
2481 actual = f.tell()
2476 f.close()
2482 f.close()
2477 s = self._io.size
2483 s = self._io.size
2478 i = max(0, actual // s)
2484 i = max(0, actual // s)
2479 di = actual - (i * s)
2485 di = actual - (i * s)
2480 if self._inline:
2486 if self._inline:
2481 databytes = 0
2487 databytes = 0
2482 for r in self:
2488 for r in self:
2483 databytes += max(0, self.length(r))
2489 databytes += max(0, self.length(r))
2484 dd = 0
2490 dd = 0
2485 di = actual - len(self) * s - databytes
2491 di = actual - len(self) * s - databytes
2486 except IOError as inst:
2492 except IOError as inst:
2487 if inst.errno != errno.ENOENT:
2493 if inst.errno != errno.ENOENT:
2488 raise
2494 raise
2489 di = 0
2495 di = 0
2490
2496
2491 return (dd, di)
2497 return (dd, di)
2492
2498
2493 def files(self):
2499 def files(self):
2494 res = [self.indexfile]
2500 res = [self.indexfile]
2495 if not self._inline:
2501 if not self._inline:
2496 res.append(self.datafile)
2502 res.append(self.datafile)
2497 return res
2503 return res
2498
2504
2499 def emitrevisions(
2505 def emitrevisions(
2500 self,
2506 self,
2501 nodes,
2507 nodes,
2502 nodesorder=None,
2508 nodesorder=None,
2503 revisiondata=False,
2509 revisiondata=False,
2504 assumehaveparentrevisions=False,
2510 assumehaveparentrevisions=False,
2505 deltamode=repository.CG_DELTAMODE_STD,
2511 deltamode=repository.CG_DELTAMODE_STD,
2506 ):
2512 ):
2507 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2513 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2508 raise error.ProgrammingError(
2514 raise error.ProgrammingError(
2509 b'unhandled value for nodesorder: %s' % nodesorder
2515 b'unhandled value for nodesorder: %s' % nodesorder
2510 )
2516 )
2511
2517
2512 if nodesorder is None and not self._generaldelta:
2518 if nodesorder is None and not self._generaldelta:
2513 nodesorder = b'storage'
2519 nodesorder = b'storage'
2514
2520
2515 if (
2521 if (
2516 not self._storedeltachains
2522 not self._storedeltachains
2517 and deltamode != repository.CG_DELTAMODE_PREV
2523 and deltamode != repository.CG_DELTAMODE_PREV
2518 ):
2524 ):
2519 deltamode = repository.CG_DELTAMODE_FULL
2525 deltamode = repository.CG_DELTAMODE_FULL
2520
2526
2521 return storageutil.emitrevisions(
2527 return storageutil.emitrevisions(
2522 self,
2528 self,
2523 nodes,
2529 nodes,
2524 nodesorder,
2530 nodesorder,
2525 revlogrevisiondelta,
2531 revlogrevisiondelta,
2526 deltaparentfn=self.deltaparent,
2532 deltaparentfn=self.deltaparent,
2527 candeltafn=self.candelta,
2533 candeltafn=self.candelta,
2528 rawsizefn=self.rawsize,
2534 rawsizefn=self.rawsize,
2529 revdifffn=self.revdiff,
2535 revdifffn=self.revdiff,
2530 flagsfn=self.flags,
2536 flagsfn=self.flags,
2531 deltamode=deltamode,
2537 deltamode=deltamode,
2532 revisiondata=revisiondata,
2538 revisiondata=revisiondata,
2533 assumehaveparentrevisions=assumehaveparentrevisions,
2539 assumehaveparentrevisions=assumehaveparentrevisions,
2534 )
2540 )
2535
2541
2536 DELTAREUSEALWAYS = b'always'
2542 DELTAREUSEALWAYS = b'always'
2537 DELTAREUSESAMEREVS = b'samerevs'
2543 DELTAREUSESAMEREVS = b'samerevs'
2538 DELTAREUSENEVER = b'never'
2544 DELTAREUSENEVER = b'never'
2539
2545
2540 DELTAREUSEFULLADD = b'fulladd'
2546 DELTAREUSEFULLADD = b'fulladd'
2541
2547
2542 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2548 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2543
2549
2544 def clone(
2550 def clone(
2545 self,
2551 self,
2546 tr,
2552 tr,
2547 destrevlog,
2553 destrevlog,
2548 addrevisioncb=None,
2554 addrevisioncb=None,
2549 deltareuse=DELTAREUSESAMEREVS,
2555 deltareuse=DELTAREUSESAMEREVS,
2550 forcedeltabothparents=None,
2556 forcedeltabothparents=None,
2551 sidedatacompanion=None,
2557 sidedatacompanion=None,
2552 ):
2558 ):
2553 """Copy this revlog to another, possibly with format changes.
2559 """Copy this revlog to another, possibly with format changes.
2554
2560
2555 The destination revlog will contain the same revisions and nodes.
2561 The destination revlog will contain the same revisions and nodes.
2556 However, it may not be bit-for-bit identical due to e.g. delta encoding
2562 However, it may not be bit-for-bit identical due to e.g. delta encoding
2557 differences.
2563 differences.
2558
2564
2559 The ``deltareuse`` argument control how deltas from the existing revlog
2565 The ``deltareuse`` argument control how deltas from the existing revlog
2560 are preserved in the destination revlog. The argument can have the
2566 are preserved in the destination revlog. The argument can have the
2561 following values:
2567 following values:
2562
2568
2563 DELTAREUSEALWAYS
2569 DELTAREUSEALWAYS
2564 Deltas will always be reused (if possible), even if the destination
2570 Deltas will always be reused (if possible), even if the destination
2565 revlog would not select the same revisions for the delta. This is the
2571 revlog would not select the same revisions for the delta. This is the
2566 fastest mode of operation.
2572 fastest mode of operation.
2567 DELTAREUSESAMEREVS
2573 DELTAREUSESAMEREVS
2568 Deltas will be reused if the destination revlog would pick the same
2574 Deltas will be reused if the destination revlog would pick the same
2569 revisions for the delta. This mode strikes a balance between speed
2575 revisions for the delta. This mode strikes a balance between speed
2570 and optimization.
2576 and optimization.
2571 DELTAREUSENEVER
2577 DELTAREUSENEVER
2572 Deltas will never be reused. This is the slowest mode of execution.
2578 Deltas will never be reused. This is the slowest mode of execution.
2573 This mode can be used to recompute deltas (e.g. if the diff/delta
2579 This mode can be used to recompute deltas (e.g. if the diff/delta
2574 algorithm changes).
2580 algorithm changes).
2575 DELTAREUSEFULLADD
2581 DELTAREUSEFULLADD
2576 Revision will be re-added as if their were new content. This is
2582 Revision will be re-added as if their were new content. This is
2577 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2583 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2578 eg: large file detection and handling.
2584 eg: large file detection and handling.
2579
2585
2580 Delta computation can be slow, so the choice of delta reuse policy can
2586 Delta computation can be slow, so the choice of delta reuse policy can
2581 significantly affect run time.
2587 significantly affect run time.
2582
2588
2583 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2589 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2584 two extremes. Deltas will be reused if they are appropriate. But if the
2590 two extremes. Deltas will be reused if they are appropriate. But if the
2585 delta could choose a better revision, it will do so. This means if you
2591 delta could choose a better revision, it will do so. This means if you
2586 are converting a non-generaldelta revlog to a generaldelta revlog,
2592 are converting a non-generaldelta revlog to a generaldelta revlog,
2587 deltas will be recomputed if the delta's parent isn't a parent of the
2593 deltas will be recomputed if the delta's parent isn't a parent of the
2588 revision.
2594 revision.
2589
2595
2590 In addition to the delta policy, the ``forcedeltabothparents``
2596 In addition to the delta policy, the ``forcedeltabothparents``
2591 argument controls whether to force compute deltas against both parents
2597 argument controls whether to force compute deltas against both parents
2592 for merges. By default, the current default is used.
2598 for merges. By default, the current default is used.
2593
2599
2594 If not None, the `sidedatacompanion` is callable that accept two
2600 If not None, the `sidedatacompanion` is callable that accept two
2595 arguments:
2601 arguments:
2596
2602
2597 (srcrevlog, rev)
2603 (srcrevlog, rev)
2598
2604
2599 and return a triplet that control changes to sidedata content from the
2605 and return a triplet that control changes to sidedata content from the
2600 old revision to the new clone result:
2606 old revision to the new clone result:
2601
2607
2602 (dropall, filterout, update)
2608 (dropall, filterout, update)
2603
2609
2604 * if `dropall` is True, all sidedata should be dropped
2610 * if `dropall` is True, all sidedata should be dropped
2605 * `filterout` is a set of sidedata keys that should be dropped
2611 * `filterout` is a set of sidedata keys that should be dropped
2606 * `update` is a mapping of additionnal/new key -> value
2612 * `update` is a mapping of additionnal/new key -> value
2607 """
2613 """
2608 if deltareuse not in self.DELTAREUSEALL:
2614 if deltareuse not in self.DELTAREUSEALL:
2609 raise ValueError(
2615 raise ValueError(
2610 _(b'value for deltareuse invalid: %s') % deltareuse
2616 _(b'value for deltareuse invalid: %s') % deltareuse
2611 )
2617 )
2612
2618
2613 if len(destrevlog):
2619 if len(destrevlog):
2614 raise ValueError(_(b'destination revlog is not empty'))
2620 raise ValueError(_(b'destination revlog is not empty'))
2615
2621
2616 if getattr(self, 'filteredrevs', None):
2622 if getattr(self, 'filteredrevs', None):
2617 raise ValueError(_(b'source revlog has filtered revisions'))
2623 raise ValueError(_(b'source revlog has filtered revisions'))
2618 if getattr(destrevlog, 'filteredrevs', None):
2624 if getattr(destrevlog, 'filteredrevs', None):
2619 raise ValueError(_(b'destination revlog has filtered revisions'))
2625 raise ValueError(_(b'destination revlog has filtered revisions'))
2620
2626
2621 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2627 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2622 # if possible.
2628 # if possible.
2623 oldlazydelta = destrevlog._lazydelta
2629 oldlazydelta = destrevlog._lazydelta
2624 oldlazydeltabase = destrevlog._lazydeltabase
2630 oldlazydeltabase = destrevlog._lazydeltabase
2625 oldamd = destrevlog._deltabothparents
2631 oldamd = destrevlog._deltabothparents
2626
2632
2627 try:
2633 try:
2628 if deltareuse == self.DELTAREUSEALWAYS:
2634 if deltareuse == self.DELTAREUSEALWAYS:
2629 destrevlog._lazydeltabase = True
2635 destrevlog._lazydeltabase = True
2630 destrevlog._lazydelta = True
2636 destrevlog._lazydelta = True
2631 elif deltareuse == self.DELTAREUSESAMEREVS:
2637 elif deltareuse == self.DELTAREUSESAMEREVS:
2632 destrevlog._lazydeltabase = False
2638 destrevlog._lazydeltabase = False
2633 destrevlog._lazydelta = True
2639 destrevlog._lazydelta = True
2634 elif deltareuse == self.DELTAREUSENEVER:
2640 elif deltareuse == self.DELTAREUSENEVER:
2635 destrevlog._lazydeltabase = False
2641 destrevlog._lazydeltabase = False
2636 destrevlog._lazydelta = False
2642 destrevlog._lazydelta = False
2637
2643
2638 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2644 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2639
2645
2640 self._clone(
2646 self._clone(
2641 tr,
2647 tr,
2642 destrevlog,
2648 destrevlog,
2643 addrevisioncb,
2649 addrevisioncb,
2644 deltareuse,
2650 deltareuse,
2645 forcedeltabothparents,
2651 forcedeltabothparents,
2646 sidedatacompanion,
2652 sidedatacompanion,
2647 )
2653 )
2648
2654
2649 finally:
2655 finally:
2650 destrevlog._lazydelta = oldlazydelta
2656 destrevlog._lazydelta = oldlazydelta
2651 destrevlog._lazydeltabase = oldlazydeltabase
2657 destrevlog._lazydeltabase = oldlazydeltabase
2652 destrevlog._deltabothparents = oldamd
2658 destrevlog._deltabothparents = oldamd
2653
2659
2654 def _clone(
2660 def _clone(
2655 self,
2661 self,
2656 tr,
2662 tr,
2657 destrevlog,
2663 destrevlog,
2658 addrevisioncb,
2664 addrevisioncb,
2659 deltareuse,
2665 deltareuse,
2660 forcedeltabothparents,
2666 forcedeltabothparents,
2661 sidedatacompanion,
2667 sidedatacompanion,
2662 ):
2668 ):
2663 """perform the core duty of `revlog.clone` after parameter processing"""
2669 """perform the core duty of `revlog.clone` after parameter processing"""
2664 deltacomputer = deltautil.deltacomputer(destrevlog)
2670 deltacomputer = deltautil.deltacomputer(destrevlog)
2665 index = self.index
2671 index = self.index
2666 for rev in self:
2672 for rev in self:
2667 entry = index[rev]
2673 entry = index[rev]
2668
2674
2669 # Some classes override linkrev to take filtered revs into
2675 # Some classes override linkrev to take filtered revs into
2670 # account. Use raw entry from index.
2676 # account. Use raw entry from index.
2671 flags = entry[0] & 0xFFFF
2677 flags = entry[0] & 0xFFFF
2672 linkrev = entry[4]
2678 linkrev = entry[4]
2673 p1 = index[entry[5]][7]
2679 p1 = index[entry[5]][7]
2674 p2 = index[entry[6]][7]
2680 p2 = index[entry[6]][7]
2675 node = entry[7]
2681 node = entry[7]
2676
2682
2677 sidedataactions = (False, [], {})
2683 sidedataactions = (False, [], {})
2678 if sidedatacompanion is not None:
2684 if sidedatacompanion is not None:
2679 sidedataactions = sidedatacompanion(self, rev)
2685 sidedataactions = sidedatacompanion(self, rev)
2680
2686
2681 # (Possibly) reuse the delta from the revlog if allowed and
2687 # (Possibly) reuse the delta from the revlog if allowed and
2682 # the revlog chunk is a delta.
2688 # the revlog chunk is a delta.
2683 cachedelta = None
2689 cachedelta = None
2684 rawtext = None
2690 rawtext = None
2685 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2691 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2686 dropall, filterout, update = sidedataactions
2692 dropall, filterout, update = sidedataactions
2687 text, sidedata = self._revisiondata(rev)
2693 text, sidedata = self._revisiondata(rev)
2688 if dropall:
2694 if dropall:
2689 sidedata = {}
2695 sidedata = {}
2690 for key in filterout:
2696 for key in filterout:
2691 sidedata.pop(key, None)
2697 sidedata.pop(key, None)
2692 sidedata.update(update)
2698 sidedata.update(update)
2693 if not sidedata:
2699 if not sidedata:
2694 sidedata = None
2700 sidedata = None
2695 destrevlog.addrevision(
2701 destrevlog.addrevision(
2696 text,
2702 text,
2697 tr,
2703 tr,
2698 linkrev,
2704 linkrev,
2699 p1,
2705 p1,
2700 p2,
2706 p2,
2701 cachedelta=cachedelta,
2707 cachedelta=cachedelta,
2702 node=node,
2708 node=node,
2703 flags=flags,
2709 flags=flags,
2704 deltacomputer=deltacomputer,
2710 deltacomputer=deltacomputer,
2705 sidedata=sidedata,
2711 sidedata=sidedata,
2706 )
2712 )
2707 else:
2713 else:
2708 if destrevlog._lazydelta:
2714 if destrevlog._lazydelta:
2709 dp = self.deltaparent(rev)
2715 dp = self.deltaparent(rev)
2710 if dp != nullrev:
2716 if dp != nullrev:
2711 cachedelta = (dp, bytes(self._chunk(rev)))
2717 cachedelta = (dp, bytes(self._chunk(rev)))
2712
2718
2713 if not cachedelta:
2719 if not cachedelta:
2714 rawtext = self.rawdata(rev)
2720 rawtext = self.rawdata(rev)
2715
2721
2716 ifh = destrevlog.opener(
2722 ifh = destrevlog.opener(
2717 destrevlog.indexfile, b'a+', checkambig=False
2723 destrevlog.indexfile, b'a+', checkambig=False
2718 )
2724 )
2719 dfh = None
2725 dfh = None
2720 if not destrevlog._inline:
2726 if not destrevlog._inline:
2721 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2727 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2722 try:
2728 try:
2723 destrevlog._addrevision(
2729 destrevlog._addrevision(
2724 node,
2730 node,
2725 rawtext,
2731 rawtext,
2726 tr,
2732 tr,
2727 linkrev,
2733 linkrev,
2728 p1,
2734 p1,
2729 p2,
2735 p2,
2730 flags,
2736 flags,
2731 cachedelta,
2737 cachedelta,
2732 ifh,
2738 ifh,
2733 dfh,
2739 dfh,
2734 deltacomputer=deltacomputer,
2740 deltacomputer=deltacomputer,
2735 )
2741 )
2736 finally:
2742 finally:
2737 if dfh:
2743 if dfh:
2738 dfh.close()
2744 dfh.close()
2739 ifh.close()
2745 ifh.close()
2740
2746
2741 if addrevisioncb:
2747 if addrevisioncb:
2742 addrevisioncb(self, rev, node)
2748 addrevisioncb(self, rev, node)
2743
2749
2744 def censorrevision(self, tr, censornode, tombstone=b''):
2750 def censorrevision(self, tr, censornode, tombstone=b''):
2745 if (self.version & 0xFFFF) == REVLOGV0:
2751 if (self.version & 0xFFFF) == REVLOGV0:
2746 raise error.RevlogError(
2752 raise error.RevlogError(
2747 _(b'cannot censor with version %d revlogs') % self.version
2753 _(b'cannot censor with version %d revlogs') % self.version
2748 )
2754 )
2749
2755
2750 censorrev = self.rev(censornode)
2756 censorrev = self.rev(censornode)
2751 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2757 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2752
2758
2753 if len(tombstone) > self.rawsize(censorrev):
2759 if len(tombstone) > self.rawsize(censorrev):
2754 raise error.Abort(
2760 raise error.Abort(
2755 _(b'censor tombstone must be no longer than censored data')
2761 _(b'censor tombstone must be no longer than censored data')
2756 )
2762 )
2757
2763
2758 # Rewriting the revlog in place is hard. Our strategy for censoring is
2764 # Rewriting the revlog in place is hard. Our strategy for censoring is
2759 # to create a new revlog, copy all revisions to it, then replace the
2765 # to create a new revlog, copy all revisions to it, then replace the
2760 # revlogs on transaction close.
2766 # revlogs on transaction close.
2761
2767
2762 newindexfile = self.indexfile + b'.tmpcensored'
2768 newindexfile = self.indexfile + b'.tmpcensored'
2763 newdatafile = self.datafile + b'.tmpcensored'
2769 newdatafile = self.datafile + b'.tmpcensored'
2764
2770
2765 # This is a bit dangerous. We could easily have a mismatch of state.
2771 # This is a bit dangerous. We could easily have a mismatch of state.
2766 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2772 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2767 newrl.version = self.version
2773 newrl.version = self.version
2768 newrl._generaldelta = self._generaldelta
2774 newrl._generaldelta = self._generaldelta
2769 newrl._io = self._io
2775 newrl._io = self._io
2770
2776
2771 for rev in self.revs():
2777 for rev in self.revs():
2772 node = self.node(rev)
2778 node = self.node(rev)
2773 p1, p2 = self.parents(node)
2779 p1, p2 = self.parents(node)
2774
2780
2775 if rev == censorrev:
2781 if rev == censorrev:
2776 newrl.addrawrevision(
2782 newrl.addrawrevision(
2777 tombstone,
2783 tombstone,
2778 tr,
2784 tr,
2779 self.linkrev(censorrev),
2785 self.linkrev(censorrev),
2780 p1,
2786 p1,
2781 p2,
2787 p2,
2782 censornode,
2788 censornode,
2783 REVIDX_ISCENSORED,
2789 REVIDX_ISCENSORED,
2784 )
2790 )
2785
2791
2786 if newrl.deltaparent(rev) != nullrev:
2792 if newrl.deltaparent(rev) != nullrev:
2787 raise error.Abort(
2793 raise error.Abort(
2788 _(
2794 _(
2789 b'censored revision stored as delta; '
2795 b'censored revision stored as delta; '
2790 b'cannot censor'
2796 b'cannot censor'
2791 ),
2797 ),
2792 hint=_(
2798 hint=_(
2793 b'censoring of revlogs is not '
2799 b'censoring of revlogs is not '
2794 b'fully implemented; please report '
2800 b'fully implemented; please report '
2795 b'this bug'
2801 b'this bug'
2796 ),
2802 ),
2797 )
2803 )
2798 continue
2804 continue
2799
2805
2800 if self.iscensored(rev):
2806 if self.iscensored(rev):
2801 if self.deltaparent(rev) != nullrev:
2807 if self.deltaparent(rev) != nullrev:
2802 raise error.Abort(
2808 raise error.Abort(
2803 _(
2809 _(
2804 b'cannot censor due to censored '
2810 b'cannot censor due to censored '
2805 b'revision having delta stored'
2811 b'revision having delta stored'
2806 )
2812 )
2807 )
2813 )
2808 rawtext = self._chunk(rev)
2814 rawtext = self._chunk(rev)
2809 else:
2815 else:
2810 rawtext = self.rawdata(rev)
2816 rawtext = self.rawdata(rev)
2811
2817
2812 newrl.addrawrevision(
2818 newrl.addrawrevision(
2813 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2819 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2814 )
2820 )
2815
2821
2816 tr.addbackup(self.indexfile, location=b'store')
2822 tr.addbackup(self.indexfile, location=b'store')
2817 if not self._inline:
2823 if not self._inline:
2818 tr.addbackup(self.datafile, location=b'store')
2824 tr.addbackup(self.datafile, location=b'store')
2819
2825
2820 self.opener.rename(newrl.indexfile, self.indexfile)
2826 self.opener.rename(newrl.indexfile, self.indexfile)
2821 if not self._inline:
2827 if not self._inline:
2822 self.opener.rename(newrl.datafile, self.datafile)
2828 self.opener.rename(newrl.datafile, self.datafile)
2823
2829
2824 self.clearcaches()
2830 self.clearcaches()
2825 self._loadindex()
2831 self._loadindex()
2826
2832
2827 def verifyintegrity(self, state):
2833 def verifyintegrity(self, state):
2828 """Verifies the integrity of the revlog.
2834 """Verifies the integrity of the revlog.
2829
2835
2830 Yields ``revlogproblem`` instances describing problems that are
2836 Yields ``revlogproblem`` instances describing problems that are
2831 found.
2837 found.
2832 """
2838 """
2833 dd, di = self.checksize()
2839 dd, di = self.checksize()
2834 if dd:
2840 if dd:
2835 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2841 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2836 if di:
2842 if di:
2837 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2843 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2838
2844
2839 version = self.version & 0xFFFF
2845 version = self.version & 0xFFFF
2840
2846
2841 # The verifier tells us what version revlog we should be.
2847 # The verifier tells us what version revlog we should be.
2842 if version != state[b'expectedversion']:
2848 if version != state[b'expectedversion']:
2843 yield revlogproblem(
2849 yield revlogproblem(
2844 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2850 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2845 % (self.indexfile, version, state[b'expectedversion'])
2851 % (self.indexfile, version, state[b'expectedversion'])
2846 )
2852 )
2847
2853
2848 state[b'skipread'] = set()
2854 state[b'skipread'] = set()
2849
2855
2850 for rev in self:
2856 for rev in self:
2851 node = self.node(rev)
2857 node = self.node(rev)
2852
2858
2853 # Verify contents. 4 cases to care about:
2859 # Verify contents. 4 cases to care about:
2854 #
2860 #
2855 # common: the most common case
2861 # common: the most common case
2856 # rename: with a rename
2862 # rename: with a rename
2857 # meta: file content starts with b'\1\n', the metadata
2863 # meta: file content starts with b'\1\n', the metadata
2858 # header defined in filelog.py, but without a rename
2864 # header defined in filelog.py, but without a rename
2859 # ext: content stored externally
2865 # ext: content stored externally
2860 #
2866 #
2861 # More formally, their differences are shown below:
2867 # More formally, their differences are shown below:
2862 #
2868 #
2863 # | common | rename | meta | ext
2869 # | common | rename | meta | ext
2864 # -------------------------------------------------------
2870 # -------------------------------------------------------
2865 # flags() | 0 | 0 | 0 | not 0
2871 # flags() | 0 | 0 | 0 | not 0
2866 # renamed() | False | True | False | ?
2872 # renamed() | False | True | False | ?
2867 # rawtext[0:2]=='\1\n'| False | True | True | ?
2873 # rawtext[0:2]=='\1\n'| False | True | True | ?
2868 #
2874 #
2869 # "rawtext" means the raw text stored in revlog data, which
2875 # "rawtext" means the raw text stored in revlog data, which
2870 # could be retrieved by "rawdata(rev)". "text"
2876 # could be retrieved by "rawdata(rev)". "text"
2871 # mentioned below is "revision(rev)".
2877 # mentioned below is "revision(rev)".
2872 #
2878 #
2873 # There are 3 different lengths stored physically:
2879 # There are 3 different lengths stored physically:
2874 # 1. L1: rawsize, stored in revlog index
2880 # 1. L1: rawsize, stored in revlog index
2875 # 2. L2: len(rawtext), stored in revlog data
2881 # 2. L2: len(rawtext), stored in revlog data
2876 # 3. L3: len(text), stored in revlog data if flags==0, or
2882 # 3. L3: len(text), stored in revlog data if flags==0, or
2877 # possibly somewhere else if flags!=0
2883 # possibly somewhere else if flags!=0
2878 #
2884 #
2879 # L1 should be equal to L2. L3 could be different from them.
2885 # L1 should be equal to L2. L3 could be different from them.
2880 # "text" may or may not affect commit hash depending on flag
2886 # "text" may or may not affect commit hash depending on flag
2881 # processors (see flagutil.addflagprocessor).
2887 # processors (see flagutil.addflagprocessor).
2882 #
2888 #
2883 # | common | rename | meta | ext
2889 # | common | rename | meta | ext
2884 # -------------------------------------------------
2890 # -------------------------------------------------
2885 # rawsize() | L1 | L1 | L1 | L1
2891 # rawsize() | L1 | L1 | L1 | L1
2886 # size() | L1 | L2-LM | L1(*) | L1 (?)
2892 # size() | L1 | L2-LM | L1(*) | L1 (?)
2887 # len(rawtext) | L2 | L2 | L2 | L2
2893 # len(rawtext) | L2 | L2 | L2 | L2
2888 # len(text) | L2 | L2 | L2 | L3
2894 # len(text) | L2 | L2 | L2 | L3
2889 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2895 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2890 #
2896 #
2891 # LM: length of metadata, depending on rawtext
2897 # LM: length of metadata, depending on rawtext
2892 # (*): not ideal, see comment in filelog.size
2898 # (*): not ideal, see comment in filelog.size
2893 # (?): could be "- len(meta)" if the resolved content has
2899 # (?): could be "- len(meta)" if the resolved content has
2894 # rename metadata
2900 # rename metadata
2895 #
2901 #
2896 # Checks needed to be done:
2902 # Checks needed to be done:
2897 # 1. length check: L1 == L2, in all cases.
2903 # 1. length check: L1 == L2, in all cases.
2898 # 2. hash check: depending on flag processor, we may need to
2904 # 2. hash check: depending on flag processor, we may need to
2899 # use either "text" (external), or "rawtext" (in revlog).
2905 # use either "text" (external), or "rawtext" (in revlog).
2900
2906
2901 try:
2907 try:
2902 skipflags = state.get(b'skipflags', 0)
2908 skipflags = state.get(b'skipflags', 0)
2903 if skipflags:
2909 if skipflags:
2904 skipflags &= self.flags(rev)
2910 skipflags &= self.flags(rev)
2905
2911
2906 if skipflags:
2912 if skipflags:
2907 state[b'skipread'].add(node)
2913 state[b'skipread'].add(node)
2908 else:
2914 else:
2909 # Side-effect: read content and verify hash.
2915 # Side-effect: read content and verify hash.
2910 self.revision(node)
2916 self.revision(node)
2911
2917
2912 l1 = self.rawsize(rev)
2918 l1 = self.rawsize(rev)
2913 l2 = len(self.rawdata(node))
2919 l2 = len(self.rawdata(node))
2914
2920
2915 if l1 != l2:
2921 if l1 != l2:
2916 yield revlogproblem(
2922 yield revlogproblem(
2917 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2923 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2918 node=node,
2924 node=node,
2919 )
2925 )
2920
2926
2921 except error.CensoredNodeError:
2927 except error.CensoredNodeError:
2922 if state[b'erroroncensored']:
2928 if state[b'erroroncensored']:
2923 yield revlogproblem(
2929 yield revlogproblem(
2924 error=_(b'censored file data'), node=node
2930 error=_(b'censored file data'), node=node
2925 )
2931 )
2926 state[b'skipread'].add(node)
2932 state[b'skipread'].add(node)
2927 except Exception as e:
2933 except Exception as e:
2928 yield revlogproblem(
2934 yield revlogproblem(
2929 error=_(b'unpacking %s: %s')
2935 error=_(b'unpacking %s: %s')
2930 % (short(node), stringutil.forcebytestr(e)),
2936 % (short(node), stringutil.forcebytestr(e)),
2931 node=node,
2937 node=node,
2932 )
2938 )
2933 state[b'skipread'].add(node)
2939 state[b'skipread'].add(node)
2934
2940
2935 def storageinfo(
2941 def storageinfo(
2936 self,
2942 self,
2937 exclusivefiles=False,
2943 exclusivefiles=False,
2938 sharedfiles=False,
2944 sharedfiles=False,
2939 revisionscount=False,
2945 revisionscount=False,
2940 trackedsize=False,
2946 trackedsize=False,
2941 storedsize=False,
2947 storedsize=False,
2942 ):
2948 ):
2943 d = {}
2949 d = {}
2944
2950
2945 if exclusivefiles:
2951 if exclusivefiles:
2946 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2952 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2947 if not self._inline:
2953 if not self._inline:
2948 d[b'exclusivefiles'].append((self.opener, self.datafile))
2954 d[b'exclusivefiles'].append((self.opener, self.datafile))
2949
2955
2950 if sharedfiles:
2956 if sharedfiles:
2951 d[b'sharedfiles'] = []
2957 d[b'sharedfiles'] = []
2952
2958
2953 if revisionscount:
2959 if revisionscount:
2954 d[b'revisionscount'] = len(self)
2960 d[b'revisionscount'] = len(self)
2955
2961
2956 if trackedsize:
2962 if trackedsize:
2957 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2963 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2958
2964
2959 if storedsize:
2965 if storedsize:
2960 d[b'storedsize'] = sum(
2966 d[b'storedsize'] = sum(
2961 self.opener.stat(path).st_size for path in self.files()
2967 self.opener.stat(path).st_size for path in self.files()
2962 )
2968 )
2963
2969
2964 return d
2970 return d
General Comments 0
You need to be logged in to leave comments. Login now