##// END OF EJS Templates
revlog: use node tree (native code) for shortest() calculation...
Martin von Zweigbergk -
r37987:0304f224 default
parent child Browse files
Show More
@@ -1,797 +1,797
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <stddef.h>
12 #include <stddef.h>
13 #include <string.h>
13 #include <string.h>
14
14
15 #include "bitmanipulation.h"
15 #include "bitmanipulation.h"
16 #include "charencode.h"
16 #include "charencode.h"
17 #include "util.h"
17 #include "util.h"
18
18
19 #ifdef IS_PY3K
19 #ifdef IS_PY3K
20 /* The mapping of Python types is meant to be temporary to get Python
20 /* The mapping of Python types is meant to be temporary to get Python
21 * 3 to compile. We should remove this once Python 3 support is fully
21 * 3 to compile. We should remove this once Python 3 support is fully
22 * supported and proper types are used in the extensions themselves. */
22 * supported and proper types are used in the extensions themselves. */
23 #define PyInt_Check PyLong_Check
23 #define PyInt_Check PyLong_Check
24 #define PyInt_FromLong PyLong_FromLong
24 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromSsize_t PyLong_FromSsize_t
25 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_AsLong PyLong_AsLong
26 #define PyInt_AsLong PyLong_AsLong
27 #endif
27 #endif
28
28
29 static const char *const versionerrortext = "Python minor version mismatch";
29 static const char *const versionerrortext = "Python minor version mismatch";
30
30
31 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
31 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
32 {
32 {
33 Py_ssize_t expected_size;
33 Py_ssize_t expected_size;
34
34
35 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
35 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
36 return NULL;
36 return NULL;
37
37
38 return _dict_new_presized(expected_size);
38 return _dict_new_presized(expected_size);
39 }
39 }
40
40
41 /*
41 /*
42 * This code assumes that a manifest is stitched together with newline
42 * This code assumes that a manifest is stitched together with newline
43 * ('\n') characters.
43 * ('\n') characters.
44 */
44 */
45 static PyObject *parse_manifest(PyObject *self, PyObject *args)
45 static PyObject *parse_manifest(PyObject *self, PyObject *args)
46 {
46 {
47 PyObject *mfdict, *fdict;
47 PyObject *mfdict, *fdict;
48 char *str, *start, *end;
48 char *str, *start, *end;
49 int len;
49 int len;
50
50
51 if (!PyArg_ParseTuple(
51 if (!PyArg_ParseTuple(
52 args, PY23("O!O!s#:parse_manifest", "O!O!y#:parse_manifest"),
52 args, PY23("O!O!s#:parse_manifest", "O!O!y#:parse_manifest"),
53 &PyDict_Type, &mfdict, &PyDict_Type, &fdict, &str, &len))
53 &PyDict_Type, &mfdict, &PyDict_Type, &fdict, &str, &len))
54 goto quit;
54 goto quit;
55
55
56 start = str;
56 start = str;
57 end = str + len;
57 end = str + len;
58 while (start < end) {
58 while (start < end) {
59 PyObject *file = NULL, *node = NULL;
59 PyObject *file = NULL, *node = NULL;
60 PyObject *flags = NULL;
60 PyObject *flags = NULL;
61 char *zero = NULL, *newline = NULL;
61 char *zero = NULL, *newline = NULL;
62 ptrdiff_t nlen;
62 ptrdiff_t nlen;
63
63
64 zero = memchr(start, '\0', end - start);
64 zero = memchr(start, '\0', end - start);
65 if (!zero) {
65 if (!zero) {
66 PyErr_SetString(PyExc_ValueError,
66 PyErr_SetString(PyExc_ValueError,
67 "manifest entry has no separator");
67 "manifest entry has no separator");
68 goto quit;
68 goto quit;
69 }
69 }
70
70
71 newline = memchr(zero + 1, '\n', end - (zero + 1));
71 newline = memchr(zero + 1, '\n', end - (zero + 1));
72 if (!newline) {
72 if (!newline) {
73 PyErr_SetString(PyExc_ValueError,
73 PyErr_SetString(PyExc_ValueError,
74 "manifest contains trailing garbage");
74 "manifest contains trailing garbage");
75 goto quit;
75 goto quit;
76 }
76 }
77
77
78 file = PyBytes_FromStringAndSize(start, zero - start);
78 file = PyBytes_FromStringAndSize(start, zero - start);
79
79
80 if (!file)
80 if (!file)
81 goto bail;
81 goto bail;
82
82
83 nlen = newline - zero - 1;
83 nlen = newline - zero - 1;
84
84
85 node = unhexlify(zero + 1, nlen > 40 ? 40 : (Py_ssize_t)nlen);
85 node = unhexlify(zero + 1, nlen > 40 ? 40 : (Py_ssize_t)nlen);
86 if (!node)
86 if (!node)
87 goto bail;
87 goto bail;
88
88
89 if (nlen > 40) {
89 if (nlen > 40) {
90 flags = PyBytes_FromStringAndSize(zero + 41, nlen - 40);
90 flags = PyBytes_FromStringAndSize(zero + 41, nlen - 40);
91 if (!flags)
91 if (!flags)
92 goto bail;
92 goto bail;
93
93
94 if (PyDict_SetItem(fdict, file, flags) == -1)
94 if (PyDict_SetItem(fdict, file, flags) == -1)
95 goto bail;
95 goto bail;
96 }
96 }
97
97
98 if (PyDict_SetItem(mfdict, file, node) == -1)
98 if (PyDict_SetItem(mfdict, file, node) == -1)
99 goto bail;
99 goto bail;
100
100
101 start = newline + 1;
101 start = newline + 1;
102
102
103 Py_XDECREF(flags);
103 Py_XDECREF(flags);
104 Py_XDECREF(node);
104 Py_XDECREF(node);
105 Py_XDECREF(file);
105 Py_XDECREF(file);
106 continue;
106 continue;
107 bail:
107 bail:
108 Py_XDECREF(flags);
108 Py_XDECREF(flags);
109 Py_XDECREF(node);
109 Py_XDECREF(node);
110 Py_XDECREF(file);
110 Py_XDECREF(file);
111 goto quit;
111 goto quit;
112 }
112 }
113
113
114 Py_INCREF(Py_None);
114 Py_INCREF(Py_None);
115 return Py_None;
115 return Py_None;
116 quit:
116 quit:
117 return NULL;
117 return NULL;
118 }
118 }
119
119
120 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
120 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
121 int size, int mtime)
121 int size, int mtime)
122 {
122 {
123 dirstateTupleObject *t =
123 dirstateTupleObject *t =
124 PyObject_New(dirstateTupleObject, &dirstateTupleType);
124 PyObject_New(dirstateTupleObject, &dirstateTupleType);
125 if (!t)
125 if (!t)
126 return NULL;
126 return NULL;
127 t->state = state;
127 t->state = state;
128 t->mode = mode;
128 t->mode = mode;
129 t->size = size;
129 t->size = size;
130 t->mtime = mtime;
130 t->mtime = mtime;
131 return t;
131 return t;
132 }
132 }
133
133
134 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
134 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
135 PyObject *kwds)
135 PyObject *kwds)
136 {
136 {
137 /* We do all the initialization here and not a tp_init function because
137 /* We do all the initialization here and not a tp_init function because
138 * dirstate_tuple is immutable. */
138 * dirstate_tuple is immutable. */
139 dirstateTupleObject *t;
139 dirstateTupleObject *t;
140 char state;
140 char state;
141 int size, mode, mtime;
141 int size, mode, mtime;
142 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
142 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
143 return NULL;
143 return NULL;
144
144
145 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
145 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
146 if (!t)
146 if (!t)
147 return NULL;
147 return NULL;
148 t->state = state;
148 t->state = state;
149 t->mode = mode;
149 t->mode = mode;
150 t->size = size;
150 t->size = size;
151 t->mtime = mtime;
151 t->mtime = mtime;
152
152
153 return (PyObject *)t;
153 return (PyObject *)t;
154 }
154 }
155
155
156 static void dirstate_tuple_dealloc(PyObject *o)
156 static void dirstate_tuple_dealloc(PyObject *o)
157 {
157 {
158 PyObject_Del(o);
158 PyObject_Del(o);
159 }
159 }
160
160
161 static Py_ssize_t dirstate_tuple_length(PyObject *o)
161 static Py_ssize_t dirstate_tuple_length(PyObject *o)
162 {
162 {
163 return 4;
163 return 4;
164 }
164 }
165
165
166 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
166 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
167 {
167 {
168 dirstateTupleObject *t = (dirstateTupleObject *)o;
168 dirstateTupleObject *t = (dirstateTupleObject *)o;
169 switch (i) {
169 switch (i) {
170 case 0:
170 case 0:
171 return PyBytes_FromStringAndSize(&t->state, 1);
171 return PyBytes_FromStringAndSize(&t->state, 1);
172 case 1:
172 case 1:
173 return PyInt_FromLong(t->mode);
173 return PyInt_FromLong(t->mode);
174 case 2:
174 case 2:
175 return PyInt_FromLong(t->size);
175 return PyInt_FromLong(t->size);
176 case 3:
176 case 3:
177 return PyInt_FromLong(t->mtime);
177 return PyInt_FromLong(t->mtime);
178 default:
178 default:
179 PyErr_SetString(PyExc_IndexError, "index out of range");
179 PyErr_SetString(PyExc_IndexError, "index out of range");
180 return NULL;
180 return NULL;
181 }
181 }
182 }
182 }
183
183
184 static PySequenceMethods dirstate_tuple_sq = {
184 static PySequenceMethods dirstate_tuple_sq = {
185 dirstate_tuple_length, /* sq_length */
185 dirstate_tuple_length, /* sq_length */
186 0, /* sq_concat */
186 0, /* sq_concat */
187 0, /* sq_repeat */
187 0, /* sq_repeat */
188 dirstate_tuple_item, /* sq_item */
188 dirstate_tuple_item, /* sq_item */
189 0, /* sq_ass_item */
189 0, /* sq_ass_item */
190 0, /* sq_contains */
190 0, /* sq_contains */
191 0, /* sq_inplace_concat */
191 0, /* sq_inplace_concat */
192 0 /* sq_inplace_repeat */
192 0 /* sq_inplace_repeat */
193 };
193 };
194
194
195 PyTypeObject dirstateTupleType = {
195 PyTypeObject dirstateTupleType = {
196 PyVarObject_HEAD_INIT(NULL, 0) /* header */
196 PyVarObject_HEAD_INIT(NULL, 0) /* header */
197 "dirstate_tuple", /* tp_name */
197 "dirstate_tuple", /* tp_name */
198 sizeof(dirstateTupleObject), /* tp_basicsize */
198 sizeof(dirstateTupleObject), /* tp_basicsize */
199 0, /* tp_itemsize */
199 0, /* tp_itemsize */
200 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
200 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
201 0, /* tp_print */
201 0, /* tp_print */
202 0, /* tp_getattr */
202 0, /* tp_getattr */
203 0, /* tp_setattr */
203 0, /* tp_setattr */
204 0, /* tp_compare */
204 0, /* tp_compare */
205 0, /* tp_repr */
205 0, /* tp_repr */
206 0, /* tp_as_number */
206 0, /* tp_as_number */
207 &dirstate_tuple_sq, /* tp_as_sequence */
207 &dirstate_tuple_sq, /* tp_as_sequence */
208 0, /* tp_as_mapping */
208 0, /* tp_as_mapping */
209 0, /* tp_hash */
209 0, /* tp_hash */
210 0, /* tp_call */
210 0, /* tp_call */
211 0, /* tp_str */
211 0, /* tp_str */
212 0, /* tp_getattro */
212 0, /* tp_getattro */
213 0, /* tp_setattro */
213 0, /* tp_setattro */
214 0, /* tp_as_buffer */
214 0, /* tp_as_buffer */
215 Py_TPFLAGS_DEFAULT, /* tp_flags */
215 Py_TPFLAGS_DEFAULT, /* tp_flags */
216 "dirstate tuple", /* tp_doc */
216 "dirstate tuple", /* tp_doc */
217 0, /* tp_traverse */
217 0, /* tp_traverse */
218 0, /* tp_clear */
218 0, /* tp_clear */
219 0, /* tp_richcompare */
219 0, /* tp_richcompare */
220 0, /* tp_weaklistoffset */
220 0, /* tp_weaklistoffset */
221 0, /* tp_iter */
221 0, /* tp_iter */
222 0, /* tp_iternext */
222 0, /* tp_iternext */
223 0, /* tp_methods */
223 0, /* tp_methods */
224 0, /* tp_members */
224 0, /* tp_members */
225 0, /* tp_getset */
225 0, /* tp_getset */
226 0, /* tp_base */
226 0, /* tp_base */
227 0, /* tp_dict */
227 0, /* tp_dict */
228 0, /* tp_descr_get */
228 0, /* tp_descr_get */
229 0, /* tp_descr_set */
229 0, /* tp_descr_set */
230 0, /* tp_dictoffset */
230 0, /* tp_dictoffset */
231 0, /* tp_init */
231 0, /* tp_init */
232 0, /* tp_alloc */
232 0, /* tp_alloc */
233 dirstate_tuple_new, /* tp_new */
233 dirstate_tuple_new, /* tp_new */
234 };
234 };
235
235
236 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
236 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
237 {
237 {
238 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
238 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
239 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
239 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
240 char state, *cur, *str, *cpos;
240 char state, *cur, *str, *cpos;
241 int mode, size, mtime;
241 int mode, size, mtime;
242 unsigned int flen, len, pos = 40;
242 unsigned int flen, len, pos = 40;
243 int readlen;
243 int readlen;
244
244
245 if (!PyArg_ParseTuple(
245 if (!PyArg_ParseTuple(
246 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
246 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
247 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen))
247 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen))
248 goto quit;
248 goto quit;
249
249
250 len = readlen;
250 len = readlen;
251
251
252 /* read parents */
252 /* read parents */
253 if (len < 40) {
253 if (len < 40) {
254 PyErr_SetString(PyExc_ValueError,
254 PyErr_SetString(PyExc_ValueError,
255 "too little data for parents");
255 "too little data for parents");
256 goto quit;
256 goto quit;
257 }
257 }
258
258
259 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, 20, str + 20, 20);
259 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, 20, str + 20, 20);
260 if (!parents)
260 if (!parents)
261 goto quit;
261 goto quit;
262
262
263 /* read filenames */
263 /* read filenames */
264 while (pos >= 40 && pos < len) {
264 while (pos >= 40 && pos < len) {
265 if (pos + 17 > len) {
265 if (pos + 17 > len) {
266 PyErr_SetString(PyExc_ValueError,
266 PyErr_SetString(PyExc_ValueError,
267 "overflow in dirstate");
267 "overflow in dirstate");
268 goto quit;
268 goto quit;
269 }
269 }
270 cur = str + pos;
270 cur = str + pos;
271 /* unpack header */
271 /* unpack header */
272 state = *cur;
272 state = *cur;
273 mode = getbe32(cur + 1);
273 mode = getbe32(cur + 1);
274 size = getbe32(cur + 5);
274 size = getbe32(cur + 5);
275 mtime = getbe32(cur + 9);
275 mtime = getbe32(cur + 9);
276 flen = getbe32(cur + 13);
276 flen = getbe32(cur + 13);
277 pos += 17;
277 pos += 17;
278 cur += 17;
278 cur += 17;
279 if (flen > len - pos) {
279 if (flen > len - pos) {
280 PyErr_SetString(PyExc_ValueError,
280 PyErr_SetString(PyExc_ValueError,
281 "overflow in dirstate");
281 "overflow in dirstate");
282 goto quit;
282 goto quit;
283 }
283 }
284
284
285 entry =
285 entry =
286 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
286 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
287 cpos = memchr(cur, 0, flen);
287 cpos = memchr(cur, 0, flen);
288 if (cpos) {
288 if (cpos) {
289 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
289 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
290 cname = PyBytes_FromStringAndSize(
290 cname = PyBytes_FromStringAndSize(
291 cpos + 1, flen - (cpos - cur) - 1);
291 cpos + 1, flen - (cpos - cur) - 1);
292 if (!fname || !cname ||
292 if (!fname || !cname ||
293 PyDict_SetItem(cmap, fname, cname) == -1 ||
293 PyDict_SetItem(cmap, fname, cname) == -1 ||
294 PyDict_SetItem(dmap, fname, entry) == -1)
294 PyDict_SetItem(dmap, fname, entry) == -1)
295 goto quit;
295 goto quit;
296 Py_DECREF(cname);
296 Py_DECREF(cname);
297 } else {
297 } else {
298 fname = PyBytes_FromStringAndSize(cur, flen);
298 fname = PyBytes_FromStringAndSize(cur, flen);
299 if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
299 if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
300 goto quit;
300 goto quit;
301 }
301 }
302 Py_DECREF(fname);
302 Py_DECREF(fname);
303 Py_DECREF(entry);
303 Py_DECREF(entry);
304 fname = cname = entry = NULL;
304 fname = cname = entry = NULL;
305 pos += flen;
305 pos += flen;
306 }
306 }
307
307
308 ret = parents;
308 ret = parents;
309 Py_INCREF(ret);
309 Py_INCREF(ret);
310 quit:
310 quit:
311 Py_XDECREF(fname);
311 Py_XDECREF(fname);
312 Py_XDECREF(cname);
312 Py_XDECREF(cname);
313 Py_XDECREF(entry);
313 Py_XDECREF(entry);
314 Py_XDECREF(parents);
314 Py_XDECREF(parents);
315 return ret;
315 return ret;
316 }
316 }
317
317
318 /*
318 /*
319 * Build a set of non-normal and other parent entries from the dirstate dmap
319 * Build a set of non-normal and other parent entries from the dirstate dmap
320 */
320 */
321 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
321 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
322 {
322 {
323 PyObject *dmap, *fname, *v;
323 PyObject *dmap, *fname, *v;
324 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
324 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
325 Py_ssize_t pos;
325 Py_ssize_t pos;
326
326
327 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type, &dmap))
327 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type, &dmap))
328 goto bail;
328 goto bail;
329
329
330 nonnset = PySet_New(NULL);
330 nonnset = PySet_New(NULL);
331 if (nonnset == NULL)
331 if (nonnset == NULL)
332 goto bail;
332 goto bail;
333
333
334 otherpset = PySet_New(NULL);
334 otherpset = PySet_New(NULL);
335 if (otherpset == NULL)
335 if (otherpset == NULL)
336 goto bail;
336 goto bail;
337
337
338 pos = 0;
338 pos = 0;
339 while (PyDict_Next(dmap, &pos, &fname, &v)) {
339 while (PyDict_Next(dmap, &pos, &fname, &v)) {
340 dirstateTupleObject *t;
340 dirstateTupleObject *t;
341 if (!dirstate_tuple_check(v)) {
341 if (!dirstate_tuple_check(v)) {
342 PyErr_SetString(PyExc_TypeError,
342 PyErr_SetString(PyExc_TypeError,
343 "expected a dirstate tuple");
343 "expected a dirstate tuple");
344 goto bail;
344 goto bail;
345 }
345 }
346 t = (dirstateTupleObject *)v;
346 t = (dirstateTupleObject *)v;
347
347
348 if (t->state == 'n' && t->size == -2) {
348 if (t->state == 'n' && t->size == -2) {
349 if (PySet_Add(otherpset, fname) == -1) {
349 if (PySet_Add(otherpset, fname) == -1) {
350 goto bail;
350 goto bail;
351 }
351 }
352 }
352 }
353
353
354 if (t->state == 'n' && t->mtime != -1)
354 if (t->state == 'n' && t->mtime != -1)
355 continue;
355 continue;
356 if (PySet_Add(nonnset, fname) == -1)
356 if (PySet_Add(nonnset, fname) == -1)
357 goto bail;
357 goto bail;
358 }
358 }
359
359
360 result = Py_BuildValue("(OO)", nonnset, otherpset);
360 result = Py_BuildValue("(OO)", nonnset, otherpset);
361 if (result == NULL)
361 if (result == NULL)
362 goto bail;
362 goto bail;
363 Py_DECREF(nonnset);
363 Py_DECREF(nonnset);
364 Py_DECREF(otherpset);
364 Py_DECREF(otherpset);
365 return result;
365 return result;
366 bail:
366 bail:
367 Py_XDECREF(nonnset);
367 Py_XDECREF(nonnset);
368 Py_XDECREF(otherpset);
368 Py_XDECREF(otherpset);
369 Py_XDECREF(result);
369 Py_XDECREF(result);
370 return NULL;
370 return NULL;
371 }
371 }
372
372
373 /*
373 /*
374 * Efficiently pack a dirstate object into its on-disk format.
374 * Efficiently pack a dirstate object into its on-disk format.
375 */
375 */
376 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
376 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
377 {
377 {
378 PyObject *packobj = NULL;
378 PyObject *packobj = NULL;
379 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
379 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
380 Py_ssize_t nbytes, pos, l;
380 Py_ssize_t nbytes, pos, l;
381 PyObject *k, *v = NULL, *pn;
381 PyObject *k, *v = NULL, *pn;
382 char *p, *s;
382 char *p, *s;
383 int now;
383 int now;
384
384
385 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", &PyDict_Type, &map,
385 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", &PyDict_Type, &map,
386 &PyDict_Type, &copymap, &pl, &now))
386 &PyDict_Type, &copymap, &pl, &now))
387 return NULL;
387 return NULL;
388
388
389 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
389 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
390 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
390 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
391 return NULL;
391 return NULL;
392 }
392 }
393
393
394 /* Figure out how much we need to allocate. */
394 /* Figure out how much we need to allocate. */
395 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
395 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
396 PyObject *c;
396 PyObject *c;
397 if (!PyBytes_Check(k)) {
397 if (!PyBytes_Check(k)) {
398 PyErr_SetString(PyExc_TypeError, "expected string key");
398 PyErr_SetString(PyExc_TypeError, "expected string key");
399 goto bail;
399 goto bail;
400 }
400 }
401 nbytes += PyBytes_GET_SIZE(k) + 17;
401 nbytes += PyBytes_GET_SIZE(k) + 17;
402 c = PyDict_GetItem(copymap, k);
402 c = PyDict_GetItem(copymap, k);
403 if (c) {
403 if (c) {
404 if (!PyBytes_Check(c)) {
404 if (!PyBytes_Check(c)) {
405 PyErr_SetString(PyExc_TypeError,
405 PyErr_SetString(PyExc_TypeError,
406 "expected string key");
406 "expected string key");
407 goto bail;
407 goto bail;
408 }
408 }
409 nbytes += PyBytes_GET_SIZE(c) + 1;
409 nbytes += PyBytes_GET_SIZE(c) + 1;
410 }
410 }
411 }
411 }
412
412
413 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
413 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
414 if (packobj == NULL)
414 if (packobj == NULL)
415 goto bail;
415 goto bail;
416
416
417 p = PyBytes_AS_STRING(packobj);
417 p = PyBytes_AS_STRING(packobj);
418
418
419 pn = PySequence_ITEM(pl, 0);
419 pn = PySequence_ITEM(pl, 0);
420 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
420 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
421 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
421 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
422 goto bail;
422 goto bail;
423 }
423 }
424 memcpy(p, s, l);
424 memcpy(p, s, l);
425 p += 20;
425 p += 20;
426 pn = PySequence_ITEM(pl, 1);
426 pn = PySequence_ITEM(pl, 1);
427 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
427 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
428 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
428 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
429 goto bail;
429 goto bail;
430 }
430 }
431 memcpy(p, s, l);
431 memcpy(p, s, l);
432 p += 20;
432 p += 20;
433
433
434 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
434 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
435 dirstateTupleObject *tuple;
435 dirstateTupleObject *tuple;
436 char state;
436 char state;
437 int mode, size, mtime;
437 int mode, size, mtime;
438 Py_ssize_t len, l;
438 Py_ssize_t len, l;
439 PyObject *o;
439 PyObject *o;
440 char *t;
440 char *t;
441
441
442 if (!dirstate_tuple_check(v)) {
442 if (!dirstate_tuple_check(v)) {
443 PyErr_SetString(PyExc_TypeError,
443 PyErr_SetString(PyExc_TypeError,
444 "expected a dirstate tuple");
444 "expected a dirstate tuple");
445 goto bail;
445 goto bail;
446 }
446 }
447 tuple = (dirstateTupleObject *)v;
447 tuple = (dirstateTupleObject *)v;
448
448
449 state = tuple->state;
449 state = tuple->state;
450 mode = tuple->mode;
450 mode = tuple->mode;
451 size = tuple->size;
451 size = tuple->size;
452 mtime = tuple->mtime;
452 mtime = tuple->mtime;
453 if (state == 'n' && mtime == now) {
453 if (state == 'n' && mtime == now) {
454 /* See pure/parsers.py:pack_dirstate for why we do
454 /* See pure/parsers.py:pack_dirstate for why we do
455 * this. */
455 * this. */
456 mtime = -1;
456 mtime = -1;
457 mtime_unset = (PyObject *)make_dirstate_tuple(
457 mtime_unset = (PyObject *)make_dirstate_tuple(
458 state, mode, size, mtime);
458 state, mode, size, mtime);
459 if (!mtime_unset)
459 if (!mtime_unset)
460 goto bail;
460 goto bail;
461 if (PyDict_SetItem(map, k, mtime_unset) == -1)
461 if (PyDict_SetItem(map, k, mtime_unset) == -1)
462 goto bail;
462 goto bail;
463 Py_DECREF(mtime_unset);
463 Py_DECREF(mtime_unset);
464 mtime_unset = NULL;
464 mtime_unset = NULL;
465 }
465 }
466 *p++ = state;
466 *p++ = state;
467 putbe32((uint32_t)mode, p);
467 putbe32((uint32_t)mode, p);
468 putbe32((uint32_t)size, p + 4);
468 putbe32((uint32_t)size, p + 4);
469 putbe32((uint32_t)mtime, p + 8);
469 putbe32((uint32_t)mtime, p + 8);
470 t = p + 12;
470 t = p + 12;
471 p += 16;
471 p += 16;
472 len = PyBytes_GET_SIZE(k);
472 len = PyBytes_GET_SIZE(k);
473 memcpy(p, PyBytes_AS_STRING(k), len);
473 memcpy(p, PyBytes_AS_STRING(k), len);
474 p += len;
474 p += len;
475 o = PyDict_GetItem(copymap, k);
475 o = PyDict_GetItem(copymap, k);
476 if (o) {
476 if (o) {
477 *p++ = '\0';
477 *p++ = '\0';
478 l = PyBytes_GET_SIZE(o);
478 l = PyBytes_GET_SIZE(o);
479 memcpy(p, PyBytes_AS_STRING(o), l);
479 memcpy(p, PyBytes_AS_STRING(o), l);
480 p += l;
480 p += l;
481 len += l + 1;
481 len += l + 1;
482 }
482 }
483 putbe32((uint32_t)len, t);
483 putbe32((uint32_t)len, t);
484 }
484 }
485
485
486 pos = p - PyBytes_AS_STRING(packobj);
486 pos = p - PyBytes_AS_STRING(packobj);
487 if (pos != nbytes) {
487 if (pos != nbytes) {
488 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
488 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
489 (long)pos, (long)nbytes);
489 (long)pos, (long)nbytes);
490 goto bail;
490 goto bail;
491 }
491 }
492
492
493 return packobj;
493 return packobj;
494 bail:
494 bail:
495 Py_XDECREF(mtime_unset);
495 Py_XDECREF(mtime_unset);
496 Py_XDECREF(packobj);
496 Py_XDECREF(packobj);
497 Py_XDECREF(v);
497 Py_XDECREF(v);
498 return NULL;
498 return NULL;
499 }
499 }
500
500
501 #define BUMPED_FIX 1
501 #define BUMPED_FIX 1
502 #define USING_SHA_256 2
502 #define USING_SHA_256 2
503 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
503 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
504
504
505 static PyObject *readshas(const char *source, unsigned char num,
505 static PyObject *readshas(const char *source, unsigned char num,
506 Py_ssize_t hashwidth)
506 Py_ssize_t hashwidth)
507 {
507 {
508 int i;
508 int i;
509 PyObject *list = PyTuple_New(num);
509 PyObject *list = PyTuple_New(num);
510 if (list == NULL) {
510 if (list == NULL) {
511 return NULL;
511 return NULL;
512 }
512 }
513 for (i = 0; i < num; i++) {
513 for (i = 0; i < num; i++) {
514 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
514 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
515 if (hash == NULL) {
515 if (hash == NULL) {
516 Py_DECREF(list);
516 Py_DECREF(list);
517 return NULL;
517 return NULL;
518 }
518 }
519 PyTuple_SET_ITEM(list, i, hash);
519 PyTuple_SET_ITEM(list, i, hash);
520 source += hashwidth;
520 source += hashwidth;
521 }
521 }
522 return list;
522 return list;
523 }
523 }
524
524
525 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
525 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
526 uint32_t *msize)
526 uint32_t *msize)
527 {
527 {
528 const char *data = databegin;
528 const char *data = databegin;
529 const char *meta;
529 const char *meta;
530
530
531 double mtime;
531 double mtime;
532 int16_t tz;
532 int16_t tz;
533 uint16_t flags;
533 uint16_t flags;
534 unsigned char nsuccs, nparents, nmetadata;
534 unsigned char nsuccs, nparents, nmetadata;
535 Py_ssize_t hashwidth = 20;
535 Py_ssize_t hashwidth = 20;
536
536
537 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
537 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
538 PyObject *metadata = NULL, *ret = NULL;
538 PyObject *metadata = NULL, *ret = NULL;
539 int i;
539 int i;
540
540
541 if (data + FM1_HEADER_SIZE > dataend) {
541 if (data + FM1_HEADER_SIZE > dataend) {
542 goto overflow;
542 goto overflow;
543 }
543 }
544
544
545 *msize = getbe32(data);
545 *msize = getbe32(data);
546 data += 4;
546 data += 4;
547 mtime = getbefloat64(data);
547 mtime = getbefloat64(data);
548 data += 8;
548 data += 8;
549 tz = getbeint16(data);
549 tz = getbeint16(data);
550 data += 2;
550 data += 2;
551 flags = getbeuint16(data);
551 flags = getbeuint16(data);
552 data += 2;
552 data += 2;
553
553
554 if (flags & USING_SHA_256) {
554 if (flags & USING_SHA_256) {
555 hashwidth = 32;
555 hashwidth = 32;
556 }
556 }
557
557
558 nsuccs = (unsigned char)(*data++);
558 nsuccs = (unsigned char)(*data++);
559 nparents = (unsigned char)(*data++);
559 nparents = (unsigned char)(*data++);
560 nmetadata = (unsigned char)(*data++);
560 nmetadata = (unsigned char)(*data++);
561
561
562 if (databegin + *msize > dataend) {
562 if (databegin + *msize > dataend) {
563 goto overflow;
563 goto overflow;
564 }
564 }
565 dataend = databegin + *msize; /* narrow down to marker size */
565 dataend = databegin + *msize; /* narrow down to marker size */
566
566
567 if (data + hashwidth > dataend) {
567 if (data + hashwidth > dataend) {
568 goto overflow;
568 goto overflow;
569 }
569 }
570 prec = PyBytes_FromStringAndSize(data, hashwidth);
570 prec = PyBytes_FromStringAndSize(data, hashwidth);
571 data += hashwidth;
571 data += hashwidth;
572 if (prec == NULL) {
572 if (prec == NULL) {
573 goto bail;
573 goto bail;
574 }
574 }
575
575
576 if (data + nsuccs * hashwidth > dataend) {
576 if (data + nsuccs * hashwidth > dataend) {
577 goto overflow;
577 goto overflow;
578 }
578 }
579 succs = readshas(data, nsuccs, hashwidth);
579 succs = readshas(data, nsuccs, hashwidth);
580 if (succs == NULL) {
580 if (succs == NULL) {
581 goto bail;
581 goto bail;
582 }
582 }
583 data += nsuccs * hashwidth;
583 data += nsuccs * hashwidth;
584
584
585 if (nparents == 1 || nparents == 2) {
585 if (nparents == 1 || nparents == 2) {
586 if (data + nparents * hashwidth > dataend) {
586 if (data + nparents * hashwidth > dataend) {
587 goto overflow;
587 goto overflow;
588 }
588 }
589 parents = readshas(data, nparents, hashwidth);
589 parents = readshas(data, nparents, hashwidth);
590 if (parents == NULL) {
590 if (parents == NULL) {
591 goto bail;
591 goto bail;
592 }
592 }
593 data += nparents * hashwidth;
593 data += nparents * hashwidth;
594 } else {
594 } else {
595 parents = Py_None;
595 parents = Py_None;
596 Py_INCREF(parents);
596 Py_INCREF(parents);
597 }
597 }
598
598
599 if (data + 2 * nmetadata > dataend) {
599 if (data + 2 * nmetadata > dataend) {
600 goto overflow;
600 goto overflow;
601 }
601 }
602 meta = data + (2 * nmetadata);
602 meta = data + (2 * nmetadata);
603 metadata = PyTuple_New(nmetadata);
603 metadata = PyTuple_New(nmetadata);
604 if (metadata == NULL) {
604 if (metadata == NULL) {
605 goto bail;
605 goto bail;
606 }
606 }
607 for (i = 0; i < nmetadata; i++) {
607 for (i = 0; i < nmetadata; i++) {
608 PyObject *tmp, *left = NULL, *right = NULL;
608 PyObject *tmp, *left = NULL, *right = NULL;
609 Py_ssize_t leftsize = (unsigned char)(*data++);
609 Py_ssize_t leftsize = (unsigned char)(*data++);
610 Py_ssize_t rightsize = (unsigned char)(*data++);
610 Py_ssize_t rightsize = (unsigned char)(*data++);
611 if (meta + leftsize + rightsize > dataend) {
611 if (meta + leftsize + rightsize > dataend) {
612 goto overflow;
612 goto overflow;
613 }
613 }
614 left = PyBytes_FromStringAndSize(meta, leftsize);
614 left = PyBytes_FromStringAndSize(meta, leftsize);
615 meta += leftsize;
615 meta += leftsize;
616 right = PyBytes_FromStringAndSize(meta, rightsize);
616 right = PyBytes_FromStringAndSize(meta, rightsize);
617 meta += rightsize;
617 meta += rightsize;
618 tmp = PyTuple_New(2);
618 tmp = PyTuple_New(2);
619 if (!left || !right || !tmp) {
619 if (!left || !right || !tmp) {
620 Py_XDECREF(left);
620 Py_XDECREF(left);
621 Py_XDECREF(right);
621 Py_XDECREF(right);
622 Py_XDECREF(tmp);
622 Py_XDECREF(tmp);
623 goto bail;
623 goto bail;
624 }
624 }
625 PyTuple_SET_ITEM(tmp, 0, left);
625 PyTuple_SET_ITEM(tmp, 0, left);
626 PyTuple_SET_ITEM(tmp, 1, right);
626 PyTuple_SET_ITEM(tmp, 1, right);
627 PyTuple_SET_ITEM(metadata, i, tmp);
627 PyTuple_SET_ITEM(metadata, i, tmp);
628 }
628 }
629 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
629 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
630 (int)tz * 60, parents);
630 (int)tz * 60, parents);
631 goto bail; /* return successfully */
631 goto bail; /* return successfully */
632
632
633 overflow:
633 overflow:
634 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
634 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
635 bail:
635 bail:
636 Py_XDECREF(prec);
636 Py_XDECREF(prec);
637 Py_XDECREF(succs);
637 Py_XDECREF(succs);
638 Py_XDECREF(metadata);
638 Py_XDECREF(metadata);
639 Py_XDECREF(parents);
639 Py_XDECREF(parents);
640 return ret;
640 return ret;
641 }
641 }
642
642
643 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
643 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
644 {
644 {
645 const char *data, *dataend;
645 const char *data, *dataend;
646 int datalen;
646 int datalen;
647 Py_ssize_t offset, stop;
647 Py_ssize_t offset, stop;
648 PyObject *markers = NULL;
648 PyObject *markers = NULL;
649
649
650 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
650 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
651 &offset, &stop)) {
651 &offset, &stop)) {
652 return NULL;
652 return NULL;
653 }
653 }
654 dataend = data + datalen;
654 dataend = data + datalen;
655 data += offset;
655 data += offset;
656 markers = PyList_New(0);
656 markers = PyList_New(0);
657 if (!markers) {
657 if (!markers) {
658 return NULL;
658 return NULL;
659 }
659 }
660 while (offset < stop) {
660 while (offset < stop) {
661 uint32_t msize;
661 uint32_t msize;
662 int error;
662 int error;
663 PyObject *record = fm1readmarker(data, dataend, &msize);
663 PyObject *record = fm1readmarker(data, dataend, &msize);
664 if (!record) {
664 if (!record) {
665 goto bail;
665 goto bail;
666 }
666 }
667 error = PyList_Append(markers, record);
667 error = PyList_Append(markers, record);
668 Py_DECREF(record);
668 Py_DECREF(record);
669 if (error) {
669 if (error) {
670 goto bail;
670 goto bail;
671 }
671 }
672 data += msize;
672 data += msize;
673 offset += msize;
673 offset += msize;
674 }
674 }
675 return markers;
675 return markers;
676 bail:
676 bail:
677 Py_DECREF(markers);
677 Py_DECREF(markers);
678 return NULL;
678 return NULL;
679 }
679 }
680
680
681 static char parsers_doc[] = "Efficient content parsing.";
681 static char parsers_doc[] = "Efficient content parsing.";
682
682
683 PyObject *encodedir(PyObject *self, PyObject *args);
683 PyObject *encodedir(PyObject *self, PyObject *args);
684 PyObject *pathencode(PyObject *self, PyObject *args);
684 PyObject *pathencode(PyObject *self, PyObject *args);
685 PyObject *lowerencode(PyObject *self, PyObject *args);
685 PyObject *lowerencode(PyObject *self, PyObject *args);
686 PyObject *parse_index2(PyObject *self, PyObject *args);
686 PyObject *parse_index2(PyObject *self, PyObject *args);
687
687
688 static PyMethodDef methods[] = {
688 static PyMethodDef methods[] = {
689 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
689 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
690 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
690 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
691 "create a set containing non-normal and other parent entries of given "
691 "create a set containing non-normal and other parent entries of given "
692 "dirstate\n"},
692 "dirstate\n"},
693 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
693 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
694 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
694 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
695 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
695 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
696 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
696 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
697 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
697 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
698 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
698 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
699 {"dict_new_presized", dict_new_presized, METH_VARARGS,
699 {"dict_new_presized", dict_new_presized, METH_VARARGS,
700 "construct a dict with an expected size\n"},
700 "construct a dict with an expected size\n"},
701 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
701 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
702 "make file foldmap\n"},
702 "make file foldmap\n"},
703 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
703 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
704 "escape a UTF-8 byte string to JSON (fast path)\n"},
704 "escape a UTF-8 byte string to JSON (fast path)\n"},
705 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
705 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
706 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
706 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
707 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
707 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
708 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
708 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
709 "parse v1 obsolete markers\n"},
709 "parse v1 obsolete markers\n"},
710 {NULL, NULL}};
710 {NULL, NULL}};
711
711
712 void dirs_module_init(PyObject *mod);
712 void dirs_module_init(PyObject *mod);
713 void manifest_module_init(PyObject *mod);
713 void manifest_module_init(PyObject *mod);
714 void revlog_module_init(PyObject *mod);
714 void revlog_module_init(PyObject *mod);
715
715
716 static const int version = 4;
716 static const int version = 5;
717
717
718 static void module_init(PyObject *mod)
718 static void module_init(PyObject *mod)
719 {
719 {
720 PyModule_AddIntConstant(mod, "version", version);
720 PyModule_AddIntConstant(mod, "version", version);
721
721
722 /* This module constant has two purposes. First, it lets us unit test
722 /* This module constant has two purposes. First, it lets us unit test
723 * the ImportError raised without hard-coding any error text. This
723 * the ImportError raised without hard-coding any error text. This
724 * means we can change the text in the future without breaking tests,
724 * means we can change the text in the future without breaking tests,
725 * even across changesets without a recompile. Second, its presence
725 * even across changesets without a recompile. Second, its presence
726 * can be used to determine whether the version-checking logic is
726 * can be used to determine whether the version-checking logic is
727 * present, which also helps in testing across changesets without a
727 * present, which also helps in testing across changesets without a
728 * recompile. Note that this means the pure-Python version of parsers
728 * recompile. Note that this means the pure-Python version of parsers
729 * should not have this module constant. */
729 * should not have this module constant. */
730 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
730 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
731
731
732 dirs_module_init(mod);
732 dirs_module_init(mod);
733 manifest_module_init(mod);
733 manifest_module_init(mod);
734 revlog_module_init(mod);
734 revlog_module_init(mod);
735
735
736 if (PyType_Ready(&dirstateTupleType) < 0)
736 if (PyType_Ready(&dirstateTupleType) < 0)
737 return;
737 return;
738 Py_INCREF(&dirstateTupleType);
738 Py_INCREF(&dirstateTupleType);
739 PyModule_AddObject(mod, "dirstatetuple",
739 PyModule_AddObject(mod, "dirstatetuple",
740 (PyObject *)&dirstateTupleType);
740 (PyObject *)&dirstateTupleType);
741 }
741 }
742
742
743 static int check_python_version(void)
743 static int check_python_version(void)
744 {
744 {
745 PyObject *sys = PyImport_ImportModule("sys"), *ver;
745 PyObject *sys = PyImport_ImportModule("sys"), *ver;
746 long hexversion;
746 long hexversion;
747 if (!sys)
747 if (!sys)
748 return -1;
748 return -1;
749 ver = PyObject_GetAttrString(sys, "hexversion");
749 ver = PyObject_GetAttrString(sys, "hexversion");
750 Py_DECREF(sys);
750 Py_DECREF(sys);
751 if (!ver)
751 if (!ver)
752 return -1;
752 return -1;
753 hexversion = PyInt_AsLong(ver);
753 hexversion = PyInt_AsLong(ver);
754 Py_DECREF(ver);
754 Py_DECREF(ver);
755 /* sys.hexversion is a 32-bit number by default, so the -1 case
755 /* sys.hexversion is a 32-bit number by default, so the -1 case
756 * should only occur in unusual circumstances (e.g. if sys.hexversion
756 * should only occur in unusual circumstances (e.g. if sys.hexversion
757 * is manually set to an invalid value). */
757 * is manually set to an invalid value). */
758 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
758 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
759 PyErr_Format(PyExc_ImportError,
759 PyErr_Format(PyExc_ImportError,
760 "%s: The Mercurial extension "
760 "%s: The Mercurial extension "
761 "modules were compiled with Python " PY_VERSION
761 "modules were compiled with Python " PY_VERSION
762 ", but "
762 ", but "
763 "Mercurial is currently using Python with "
763 "Mercurial is currently using Python with "
764 "sys.hexversion=%ld: "
764 "sys.hexversion=%ld: "
765 "Python %s\n at: %s",
765 "Python %s\n at: %s",
766 versionerrortext, hexversion, Py_GetVersion(),
766 versionerrortext, hexversion, Py_GetVersion(),
767 Py_GetProgramFullPath());
767 Py_GetProgramFullPath());
768 return -1;
768 return -1;
769 }
769 }
770 return 0;
770 return 0;
771 }
771 }
772
772
773 #ifdef IS_PY3K
773 #ifdef IS_PY3K
774 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
774 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
775 parsers_doc, -1, methods};
775 parsers_doc, -1, methods};
776
776
777 PyMODINIT_FUNC PyInit_parsers(void)
777 PyMODINIT_FUNC PyInit_parsers(void)
778 {
778 {
779 PyObject *mod;
779 PyObject *mod;
780
780
781 if (check_python_version() == -1)
781 if (check_python_version() == -1)
782 return NULL;
782 return NULL;
783 mod = PyModule_Create(&parsers_module);
783 mod = PyModule_Create(&parsers_module);
784 module_init(mod);
784 module_init(mod);
785 return mod;
785 return mod;
786 }
786 }
787 #else
787 #else
788 PyMODINIT_FUNC initparsers(void)
788 PyMODINIT_FUNC initparsers(void)
789 {
789 {
790 PyObject *mod;
790 PyObject *mod;
791
791
792 if (check_python_version() == -1)
792 if (check_python_version() == -1)
793 return;
793 return;
794 mod = Py_InitModule3("parsers", methods, parsers_doc);
794 mod = Py_InitModule3("parsers", methods, parsers_doc);
795 module_init(mod);
795 module_init(mod);
796 }
796 }
797 #endif
797 #endif
@@ -1,2105 +1,2179
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <assert.h>
11 #include <assert.h>
12 #include <ctype.h>
12 #include <ctype.h>
13 #include <stddef.h>
13 #include <stddef.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #include "bitmanipulation.h"
16 #include "bitmanipulation.h"
17 #include "charencode.h"
17 #include "charencode.h"
18 #include "util.h"
18 #include "util.h"
19
19
20 #ifdef IS_PY3K
20 #ifdef IS_PY3K
21 /* The mapping of Python types is meant to be temporary to get Python
21 /* The mapping of Python types is meant to be temporary to get Python
22 * 3 to compile. We should remove this once Python 3 support is fully
22 * 3 to compile. We should remove this once Python 3 support is fully
23 * supported and proper types are used in the extensions themselves. */
23 * supported and proper types are used in the extensions themselves. */
24 #define PyInt_Check PyLong_Check
24 #define PyInt_Check PyLong_Check
25 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromLong PyLong_FromLong
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 #define PyInt_AS_LONG PyLong_AS_LONG
27 #define PyInt_AS_LONG PyLong_AS_LONG
28 #define PyInt_AsLong PyLong_AsLong
28 #define PyInt_AsLong PyLong_AsLong
29 #endif
29 #endif
30
30
31 /*
31 /*
32 * A base-16 trie for fast node->rev mapping.
32 * A base-16 trie for fast node->rev mapping.
33 *
33 *
34 * Positive value is index of the next node in the trie
34 * Positive value is index of the next node in the trie
35 * Negative value is a leaf: -(rev + 1)
35 * Negative value is a leaf: -(rev + 1)
36 * Zero is empty
36 * Zero is empty
37 */
37 */
38 typedef struct {
38 typedef struct {
39 int children[16];
39 int children[16];
40 } nodetree;
40 } nodetree;
41
41
42 /*
42 /*
43 * This class has two behaviors.
43 * This class has two behaviors.
44 *
44 *
45 * When used in a list-like way (with integer keys), we decode an
45 * When used in a list-like way (with integer keys), we decode an
46 * entry in a RevlogNG index file on demand. Our last entry is a
46 * entry in a RevlogNG index file on demand. Our last entry is a
47 * sentinel, always a nullid. We have limited support for
47 * sentinel, always a nullid. We have limited support for
48 * integer-keyed insert and delete, only at elements right before the
48 * integer-keyed insert and delete, only at elements right before the
49 * sentinel.
49 * sentinel.
50 *
50 *
51 * With string keys, we lazily perform a reverse mapping from node to
51 * With string keys, we lazily perform a reverse mapping from node to
52 * rev, using a base-16 trie.
52 * rev, using a base-16 trie.
53 */
53 */
54 typedef struct {
54 typedef struct {
55 PyObject_HEAD
55 PyObject_HEAD
56 /* Type-specific fields go here. */
56 /* Type-specific fields go here. */
57 PyObject *data; /* raw bytes of index */
57 PyObject *data; /* raw bytes of index */
58 Py_buffer buf; /* buffer of data */
58 Py_buffer buf; /* buffer of data */
59 PyObject **cache; /* cached tuples */
59 PyObject **cache; /* cached tuples */
60 const char **offsets; /* populated on demand */
60 const char **offsets; /* populated on demand */
61 Py_ssize_t raw_length; /* original number of elements */
61 Py_ssize_t raw_length; /* original number of elements */
62 Py_ssize_t length; /* current number of elements */
62 Py_ssize_t length; /* current number of elements */
63 PyObject *added; /* populated on demand */
63 PyObject *added; /* populated on demand */
64 PyObject *headrevs; /* cache, invalidated on changes */
64 PyObject *headrevs; /* cache, invalidated on changes */
65 PyObject *filteredrevs;/* filtered revs set */
65 PyObject *filteredrevs;/* filtered revs set */
66 nodetree *nt; /* base-16 trie */
66 nodetree *nt; /* base-16 trie */
67 unsigned ntlength; /* # nodes in use */
67 unsigned ntlength; /* # nodes in use */
68 unsigned ntcapacity; /* # nodes allocated */
68 unsigned ntcapacity; /* # nodes allocated */
69 int ntdepth; /* maximum depth of tree */
69 int ntdepth; /* maximum depth of tree */
70 int ntsplits; /* # splits performed */
70 int ntsplits; /* # splits performed */
71 int ntrev; /* last rev scanned */
71 int ntrev; /* last rev scanned */
72 int ntlookups; /* # lookups */
72 int ntlookups; /* # lookups */
73 int ntmisses; /* # lookups that miss the cache */
73 int ntmisses; /* # lookups that miss the cache */
74 int inlined;
74 int inlined;
75 } indexObject;
75 } indexObject;
76
76
77 static Py_ssize_t index_length(const indexObject *self)
77 static Py_ssize_t index_length(const indexObject *self)
78 {
78 {
79 if (self->added == NULL)
79 if (self->added == NULL)
80 return self->length;
80 return self->length;
81 return self->length + PyList_GET_SIZE(self->added);
81 return self->length + PyList_GET_SIZE(self->added);
82 }
82 }
83
83
84 static PyObject *nullentry;
84 static PyObject *nullentry;
85 static const char nullid[20];
85 static const char nullid[20];
86
86
87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
88
88
89 #if LONG_MAX == 0x7fffffffL
89 #if LONG_MAX == 0x7fffffffL
90 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
90 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
91 #else
91 #else
92 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
92 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
93 #endif
93 #endif
94
94
95 /* A RevlogNG v1 index entry is 64 bytes long. */
95 /* A RevlogNG v1 index entry is 64 bytes long. */
96 static const long v1_hdrsize = 64;
96 static const long v1_hdrsize = 64;
97
97
98 /*
98 /*
99 * Return a pointer to the beginning of a RevlogNG record.
99 * Return a pointer to the beginning of a RevlogNG record.
100 */
100 */
101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
102 {
102 {
103 if (self->inlined && pos > 0) {
103 if (self->inlined && pos > 0) {
104 if (self->offsets == NULL) {
104 if (self->offsets == NULL) {
105 self->offsets = PyMem_Malloc(self->raw_length *
105 self->offsets = PyMem_Malloc(self->raw_length *
106 sizeof(*self->offsets));
106 sizeof(*self->offsets));
107 if (self->offsets == NULL)
107 if (self->offsets == NULL)
108 return (const char *)PyErr_NoMemory();
108 return (const char *)PyErr_NoMemory();
109 inline_scan(self, self->offsets);
109 inline_scan(self, self->offsets);
110 }
110 }
111 return self->offsets[pos];
111 return self->offsets[pos];
112 }
112 }
113
113
114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
115 }
115 }
116
116
117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
118 int *ps, int maxrev)
118 int *ps, int maxrev)
119 {
119 {
120 if (rev >= self->length - 1) {
120 if (rev >= self->length - 1) {
121 PyObject *tuple = PyList_GET_ITEM(self->added,
121 PyObject *tuple = PyList_GET_ITEM(self->added,
122 rev - self->length + 1);
122 rev - self->length + 1);
123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
125 } else {
125 } else {
126 const char *data = index_deref(self, rev);
126 const char *data = index_deref(self, rev);
127 ps[0] = getbe32(data + 24);
127 ps[0] = getbe32(data + 24);
128 ps[1] = getbe32(data + 28);
128 ps[1] = getbe32(data + 28);
129 }
129 }
130 /* If index file is corrupted, ps[] may point to invalid revisions. So
130 /* If index file is corrupted, ps[] may point to invalid revisions. So
131 * there is a risk of buffer overflow to trust them unconditionally. */
131 * there is a risk of buffer overflow to trust them unconditionally. */
132 if (ps[0] > maxrev || ps[1] > maxrev) {
132 if (ps[0] > maxrev || ps[1] > maxrev) {
133 PyErr_SetString(PyExc_ValueError, "parent out of range");
133 PyErr_SetString(PyExc_ValueError, "parent out of range");
134 return -1;
134 return -1;
135 }
135 }
136 return 0;
136 return 0;
137 }
137 }
138
138
139
139
140 /*
140 /*
141 * RevlogNG format (all in big endian, data may be inlined):
141 * RevlogNG format (all in big endian, data may be inlined):
142 * 6 bytes: offset
142 * 6 bytes: offset
143 * 2 bytes: flags
143 * 2 bytes: flags
144 * 4 bytes: compressed length
144 * 4 bytes: compressed length
145 * 4 bytes: uncompressed length
145 * 4 bytes: uncompressed length
146 * 4 bytes: base revision
146 * 4 bytes: base revision
147 * 4 bytes: link revision
147 * 4 bytes: link revision
148 * 4 bytes: parent 1 revision
148 * 4 bytes: parent 1 revision
149 * 4 bytes: parent 2 revision
149 * 4 bytes: parent 2 revision
150 * 32 bytes: nodeid (only 20 bytes used)
150 * 32 bytes: nodeid (only 20 bytes used)
151 */
151 */
152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
153 {
153 {
154 uint64_t offset_flags;
154 uint64_t offset_flags;
155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
156 const char *c_node_id;
156 const char *c_node_id;
157 const char *data;
157 const char *data;
158 Py_ssize_t length = index_length(self);
158 Py_ssize_t length = index_length(self);
159 PyObject *entry;
159 PyObject *entry;
160
160
161 if (pos < 0)
161 if (pos < 0)
162 pos += length;
162 pos += length;
163
163
164 if (pos < 0 || pos >= length) {
164 if (pos < 0 || pos >= length) {
165 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
165 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
166 return NULL;
166 return NULL;
167 }
167 }
168
168
169 if (pos == length - 1) {
169 if (pos == length - 1) {
170 Py_INCREF(nullentry);
170 Py_INCREF(nullentry);
171 return nullentry;
171 return nullentry;
172 }
172 }
173
173
174 if (pos >= self->length - 1) {
174 if (pos >= self->length - 1) {
175 PyObject *obj;
175 PyObject *obj;
176 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
176 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
177 Py_INCREF(obj);
177 Py_INCREF(obj);
178 return obj;
178 return obj;
179 }
179 }
180
180
181 if (self->cache) {
181 if (self->cache) {
182 if (self->cache[pos]) {
182 if (self->cache[pos]) {
183 Py_INCREF(self->cache[pos]);
183 Py_INCREF(self->cache[pos]);
184 return self->cache[pos];
184 return self->cache[pos];
185 }
185 }
186 } else {
186 } else {
187 self->cache = calloc(self->raw_length, sizeof(PyObject *));
187 self->cache = calloc(self->raw_length, sizeof(PyObject *));
188 if (self->cache == NULL)
188 if (self->cache == NULL)
189 return PyErr_NoMemory();
189 return PyErr_NoMemory();
190 }
190 }
191
191
192 data = index_deref(self, pos);
192 data = index_deref(self, pos);
193 if (data == NULL)
193 if (data == NULL)
194 return NULL;
194 return NULL;
195
195
196 offset_flags = getbe32(data + 4);
196 offset_flags = getbe32(data + 4);
197 if (pos == 0) /* mask out version number for the first entry */
197 if (pos == 0) /* mask out version number for the first entry */
198 offset_flags &= 0xFFFF;
198 offset_flags &= 0xFFFF;
199 else {
199 else {
200 uint32_t offset_high = getbe32(data);
200 uint32_t offset_high = getbe32(data);
201 offset_flags |= ((uint64_t)offset_high) << 32;
201 offset_flags |= ((uint64_t)offset_high) << 32;
202 }
202 }
203
203
204 comp_len = getbe32(data + 8);
204 comp_len = getbe32(data + 8);
205 uncomp_len = getbe32(data + 12);
205 uncomp_len = getbe32(data + 12);
206 base_rev = getbe32(data + 16);
206 base_rev = getbe32(data + 16);
207 link_rev = getbe32(data + 20);
207 link_rev = getbe32(data + 20);
208 parent_1 = getbe32(data + 24);
208 parent_1 = getbe32(data + 24);
209 parent_2 = getbe32(data + 28);
209 parent_2 = getbe32(data + 28);
210 c_node_id = data + 32;
210 c_node_id = data + 32;
211
211
212 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
212 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
213 uncomp_len, base_rev, link_rev,
213 uncomp_len, base_rev, link_rev,
214 parent_1, parent_2, c_node_id, 20);
214 parent_1, parent_2, c_node_id, 20);
215
215
216 if (entry) {
216 if (entry) {
217 PyObject_GC_UnTrack(entry);
217 PyObject_GC_UnTrack(entry);
218 Py_INCREF(entry);
218 Py_INCREF(entry);
219 }
219 }
220
220
221 self->cache[pos] = entry;
221 self->cache[pos] = entry;
222
222
223 return entry;
223 return entry;
224 }
224 }
225
225
226 /*
226 /*
227 * Return the 20-byte SHA of the node corresponding to the given rev.
227 * Return the 20-byte SHA of the node corresponding to the given rev.
228 */
228 */
229 static const char *index_node(indexObject *self, Py_ssize_t pos)
229 static const char *index_node(indexObject *self, Py_ssize_t pos)
230 {
230 {
231 Py_ssize_t length = index_length(self);
231 Py_ssize_t length = index_length(self);
232 const char *data;
232 const char *data;
233
233
234 if (pos == length - 1 || pos == INT_MAX)
234 if (pos == length - 1 || pos == INT_MAX)
235 return nullid;
235 return nullid;
236
236
237 if (pos >= length)
237 if (pos >= length)
238 return NULL;
238 return NULL;
239
239
240 if (pos >= self->length - 1) {
240 if (pos >= self->length - 1) {
241 PyObject *tuple, *str;
241 PyObject *tuple, *str;
242 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
242 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
243 str = PyTuple_GetItem(tuple, 7);
243 str = PyTuple_GetItem(tuple, 7);
244 return str ? PyBytes_AS_STRING(str) : NULL;
244 return str ? PyBytes_AS_STRING(str) : NULL;
245 }
245 }
246
246
247 data = index_deref(self, pos);
247 data = index_deref(self, pos);
248 return data ? data + 32 : NULL;
248 return data ? data + 32 : NULL;
249 }
249 }
250
250
251 /*
251 /*
252 * Return the 20-byte SHA of the node corresponding to the given rev. The
252 * Return the 20-byte SHA of the node corresponding to the given rev. The
253 * rev is assumed to be existing. If not, an exception is set.
253 * rev is assumed to be existing. If not, an exception is set.
254 */
254 */
255 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
255 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
256 {
256 {
257 const char *node = index_node(self, pos);
257 const char *node = index_node(self, pos);
258 if (node == NULL) {
258 if (node == NULL) {
259 PyErr_Format(PyExc_IndexError, "could not access rev %d",
259 PyErr_Format(PyExc_IndexError, "could not access rev %d",
260 (int)pos);
260 (int)pos);
261 }
261 }
262 return node;
262 return node;
263 }
263 }
264
264
265 static int nt_insert(indexObject *self, const char *node, int rev);
265 static int nt_insert(indexObject *self, const char *node, int rev);
266
266
267 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
267 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
268 {
268 {
269 if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1)
269 if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1)
270 return -1;
270 return -1;
271 if (*nodelen == 20)
271 if (*nodelen == 20)
272 return 0;
272 return 0;
273 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
273 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
274 return -1;
274 return -1;
275 }
275 }
276
276
277 static PyObject *index_insert(indexObject *self, PyObject *args)
277 static PyObject *index_insert(indexObject *self, PyObject *args)
278 {
278 {
279 PyObject *obj;
279 PyObject *obj;
280 char *node;
280 char *node;
281 int index;
281 int index;
282 Py_ssize_t len, nodelen;
282 Py_ssize_t len, nodelen;
283
283
284 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
284 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
285 return NULL;
285 return NULL;
286
286
287 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
287 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
288 PyErr_SetString(PyExc_TypeError, "8-tuple required");
288 PyErr_SetString(PyExc_TypeError, "8-tuple required");
289 return NULL;
289 return NULL;
290 }
290 }
291
291
292 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
292 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
293 return NULL;
293 return NULL;
294
294
295 len = index_length(self);
295 len = index_length(self);
296
296
297 if (index < 0)
297 if (index < 0)
298 index += len;
298 index += len;
299
299
300 if (index != len - 1) {
300 if (index != len - 1) {
301 PyErr_SetString(PyExc_IndexError,
301 PyErr_SetString(PyExc_IndexError,
302 "insert only supported at index -1");
302 "insert only supported at index -1");
303 return NULL;
303 return NULL;
304 }
304 }
305
305
306 if (self->added == NULL) {
306 if (self->added == NULL) {
307 self->added = PyList_New(0);
307 self->added = PyList_New(0);
308 if (self->added == NULL)
308 if (self->added == NULL)
309 return NULL;
309 return NULL;
310 }
310 }
311
311
312 if (PyList_Append(self->added, obj) == -1)
312 if (PyList_Append(self->added, obj) == -1)
313 return NULL;
313 return NULL;
314
314
315 if (self->nt)
315 if (self->nt)
316 nt_insert(self, node, index);
316 nt_insert(self, node, index);
317
317
318 Py_CLEAR(self->headrevs);
318 Py_CLEAR(self->headrevs);
319 Py_RETURN_NONE;
319 Py_RETURN_NONE;
320 }
320 }
321
321
322 static void _index_clearcaches(indexObject *self)
322 static void _index_clearcaches(indexObject *self)
323 {
323 {
324 if (self->cache) {
324 if (self->cache) {
325 Py_ssize_t i;
325 Py_ssize_t i;
326
326
327 for (i = 0; i < self->raw_length; i++)
327 for (i = 0; i < self->raw_length; i++)
328 Py_CLEAR(self->cache[i]);
328 Py_CLEAR(self->cache[i]);
329 free(self->cache);
329 free(self->cache);
330 self->cache = NULL;
330 self->cache = NULL;
331 }
331 }
332 if (self->offsets) {
332 if (self->offsets) {
333 PyMem_Free(self->offsets);
333 PyMem_Free(self->offsets);
334 self->offsets = NULL;
334 self->offsets = NULL;
335 }
335 }
336 if (self->nt) {
336 if (self->nt) {
337 free(self->nt);
337 free(self->nt);
338 self->nt = NULL;
338 self->nt = NULL;
339 }
339 }
340 Py_CLEAR(self->headrevs);
340 Py_CLEAR(self->headrevs);
341 }
341 }
342
342
343 static PyObject *index_clearcaches(indexObject *self)
343 static PyObject *index_clearcaches(indexObject *self)
344 {
344 {
345 _index_clearcaches(self);
345 _index_clearcaches(self);
346 self->ntlength = self->ntcapacity = 0;
346 self->ntlength = self->ntcapacity = 0;
347 self->ntdepth = self->ntsplits = 0;
347 self->ntdepth = self->ntsplits = 0;
348 self->ntrev = -1;
348 self->ntrev = -1;
349 self->ntlookups = self->ntmisses = 0;
349 self->ntlookups = self->ntmisses = 0;
350 Py_RETURN_NONE;
350 Py_RETURN_NONE;
351 }
351 }
352
352
353 static PyObject *index_stats(indexObject *self)
353 static PyObject *index_stats(indexObject *self)
354 {
354 {
355 PyObject *obj = PyDict_New();
355 PyObject *obj = PyDict_New();
356 PyObject *t = NULL;
356 PyObject *t = NULL;
357
357
358 if (obj == NULL)
358 if (obj == NULL)
359 return NULL;
359 return NULL;
360
360
361 #define istat(__n, __d) \
361 #define istat(__n, __d) \
362 do { \
362 do { \
363 t = PyInt_FromSsize_t(self->__n); \
363 t = PyInt_FromSsize_t(self->__n); \
364 if (!t) \
364 if (!t) \
365 goto bail; \
365 goto bail; \
366 if (PyDict_SetItemString(obj, __d, t) == -1) \
366 if (PyDict_SetItemString(obj, __d, t) == -1) \
367 goto bail; \
367 goto bail; \
368 Py_DECREF(t); \
368 Py_DECREF(t); \
369 } while (0)
369 } while (0)
370
370
371 if (self->added) {
371 if (self->added) {
372 Py_ssize_t len = PyList_GET_SIZE(self->added);
372 Py_ssize_t len = PyList_GET_SIZE(self->added);
373 t = PyInt_FromSsize_t(len);
373 t = PyInt_FromSsize_t(len);
374 if (!t)
374 if (!t)
375 goto bail;
375 goto bail;
376 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
376 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
377 goto bail;
377 goto bail;
378 Py_DECREF(t);
378 Py_DECREF(t);
379 }
379 }
380
380
381 if (self->raw_length != self->length - 1)
381 if (self->raw_length != self->length - 1)
382 istat(raw_length, "revs on disk");
382 istat(raw_length, "revs on disk");
383 istat(length, "revs in memory");
383 istat(length, "revs in memory");
384 istat(ntcapacity, "node trie capacity");
384 istat(ntcapacity, "node trie capacity");
385 istat(ntdepth, "node trie depth");
385 istat(ntdepth, "node trie depth");
386 istat(ntlength, "node trie count");
386 istat(ntlength, "node trie count");
387 istat(ntlookups, "node trie lookups");
387 istat(ntlookups, "node trie lookups");
388 istat(ntmisses, "node trie misses");
388 istat(ntmisses, "node trie misses");
389 istat(ntrev, "node trie last rev scanned");
389 istat(ntrev, "node trie last rev scanned");
390 istat(ntsplits, "node trie splits");
390 istat(ntsplits, "node trie splits");
391
391
392 #undef istat
392 #undef istat
393
393
394 return obj;
394 return obj;
395
395
396 bail:
396 bail:
397 Py_XDECREF(obj);
397 Py_XDECREF(obj);
398 Py_XDECREF(t);
398 Py_XDECREF(t);
399 return NULL;
399 return NULL;
400 }
400 }
401
401
402 /*
402 /*
403 * When we cache a list, we want to be sure the caller can't mutate
403 * When we cache a list, we want to be sure the caller can't mutate
404 * the cached copy.
404 * the cached copy.
405 */
405 */
406 static PyObject *list_copy(PyObject *list)
406 static PyObject *list_copy(PyObject *list)
407 {
407 {
408 Py_ssize_t len = PyList_GET_SIZE(list);
408 Py_ssize_t len = PyList_GET_SIZE(list);
409 PyObject *newlist = PyList_New(len);
409 PyObject *newlist = PyList_New(len);
410 Py_ssize_t i;
410 Py_ssize_t i;
411
411
412 if (newlist == NULL)
412 if (newlist == NULL)
413 return NULL;
413 return NULL;
414
414
415 for (i = 0; i < len; i++) {
415 for (i = 0; i < len; i++) {
416 PyObject *obj = PyList_GET_ITEM(list, i);
416 PyObject *obj = PyList_GET_ITEM(list, i);
417 Py_INCREF(obj);
417 Py_INCREF(obj);
418 PyList_SET_ITEM(newlist, i, obj);
418 PyList_SET_ITEM(newlist, i, obj);
419 }
419 }
420
420
421 return newlist;
421 return newlist;
422 }
422 }
423
423
424 static int check_filter(PyObject *filter, Py_ssize_t arg)
424 static int check_filter(PyObject *filter, Py_ssize_t arg)
425 {
425 {
426 if (filter) {
426 if (filter) {
427 PyObject *arglist, *result;
427 PyObject *arglist, *result;
428 int isfiltered;
428 int isfiltered;
429
429
430 arglist = Py_BuildValue("(n)", arg);
430 arglist = Py_BuildValue("(n)", arg);
431 if (!arglist) {
431 if (!arglist) {
432 return -1;
432 return -1;
433 }
433 }
434
434
435 result = PyEval_CallObject(filter, arglist);
435 result = PyEval_CallObject(filter, arglist);
436 Py_DECREF(arglist);
436 Py_DECREF(arglist);
437 if (!result) {
437 if (!result) {
438 return -1;
438 return -1;
439 }
439 }
440
440
441 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
441 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
442 * same as this function, so we can just return it directly.*/
442 * same as this function, so we can just return it directly.*/
443 isfiltered = PyObject_IsTrue(result);
443 isfiltered = PyObject_IsTrue(result);
444 Py_DECREF(result);
444 Py_DECREF(result);
445 return isfiltered;
445 return isfiltered;
446 } else {
446 } else {
447 return 0;
447 return 0;
448 }
448 }
449 }
449 }
450
450
451 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
451 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
452 Py_ssize_t marker, char *phases)
452 Py_ssize_t marker, char *phases)
453 {
453 {
454 PyObject *iter = NULL;
454 PyObject *iter = NULL;
455 PyObject *iter_item = NULL;
455 PyObject *iter_item = NULL;
456 Py_ssize_t min_idx = index_length(self) + 1;
456 Py_ssize_t min_idx = index_length(self) + 1;
457 long iter_item_long;
457 long iter_item_long;
458
458
459 if (PyList_GET_SIZE(list) != 0) {
459 if (PyList_GET_SIZE(list) != 0) {
460 iter = PyObject_GetIter(list);
460 iter = PyObject_GetIter(list);
461 if (iter == NULL)
461 if (iter == NULL)
462 return -2;
462 return -2;
463 while ((iter_item = PyIter_Next(iter))) {
463 while ((iter_item = PyIter_Next(iter))) {
464 iter_item_long = PyInt_AS_LONG(iter_item);
464 iter_item_long = PyInt_AS_LONG(iter_item);
465 Py_DECREF(iter_item);
465 Py_DECREF(iter_item);
466 if (iter_item_long < min_idx)
466 if (iter_item_long < min_idx)
467 min_idx = iter_item_long;
467 min_idx = iter_item_long;
468 phases[iter_item_long] = marker;
468 phases[iter_item_long] = marker;
469 }
469 }
470 Py_DECREF(iter);
470 Py_DECREF(iter);
471 }
471 }
472
472
473 return min_idx;
473 return min_idx;
474 }
474 }
475
475
476 static inline void set_phase_from_parents(char *phases, int parent_1,
476 static inline void set_phase_from_parents(char *phases, int parent_1,
477 int parent_2, Py_ssize_t i)
477 int parent_2, Py_ssize_t i)
478 {
478 {
479 if (parent_1 >= 0 && phases[parent_1] > phases[i])
479 if (parent_1 >= 0 && phases[parent_1] > phases[i])
480 phases[i] = phases[parent_1];
480 phases[i] = phases[parent_1];
481 if (parent_2 >= 0 && phases[parent_2] > phases[i])
481 if (parent_2 >= 0 && phases[parent_2] > phases[i])
482 phases[i] = phases[parent_2];
482 phases[i] = phases[parent_2];
483 }
483 }
484
484
485 static PyObject *reachableroots2(indexObject *self, PyObject *args)
485 static PyObject *reachableroots2(indexObject *self, PyObject *args)
486 {
486 {
487
487
488 /* Input */
488 /* Input */
489 long minroot;
489 long minroot;
490 PyObject *includepatharg = NULL;
490 PyObject *includepatharg = NULL;
491 int includepath = 0;
491 int includepath = 0;
492 /* heads and roots are lists */
492 /* heads and roots are lists */
493 PyObject *heads = NULL;
493 PyObject *heads = NULL;
494 PyObject *roots = NULL;
494 PyObject *roots = NULL;
495 PyObject *reachable = NULL;
495 PyObject *reachable = NULL;
496
496
497 PyObject *val;
497 PyObject *val;
498 Py_ssize_t len = index_length(self) - 1;
498 Py_ssize_t len = index_length(self) - 1;
499 long revnum;
499 long revnum;
500 Py_ssize_t k;
500 Py_ssize_t k;
501 Py_ssize_t i;
501 Py_ssize_t i;
502 Py_ssize_t l;
502 Py_ssize_t l;
503 int r;
503 int r;
504 int parents[2];
504 int parents[2];
505
505
506 /* Internal data structure:
506 /* Internal data structure:
507 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
507 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
508 * revstates: array of length len+1 (all revs + nullrev) */
508 * revstates: array of length len+1 (all revs + nullrev) */
509 int *tovisit = NULL;
509 int *tovisit = NULL;
510 long lentovisit = 0;
510 long lentovisit = 0;
511 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
511 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
512 char *revstates = NULL;
512 char *revstates = NULL;
513
513
514 /* Get arguments */
514 /* Get arguments */
515 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
515 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
516 &PyList_Type, &roots,
516 &PyList_Type, &roots,
517 &PyBool_Type, &includepatharg))
517 &PyBool_Type, &includepatharg))
518 goto bail;
518 goto bail;
519
519
520 if (includepatharg == Py_True)
520 if (includepatharg == Py_True)
521 includepath = 1;
521 includepath = 1;
522
522
523 /* Initialize return set */
523 /* Initialize return set */
524 reachable = PyList_New(0);
524 reachable = PyList_New(0);
525 if (reachable == NULL)
525 if (reachable == NULL)
526 goto bail;
526 goto bail;
527
527
528 /* Initialize internal datastructures */
528 /* Initialize internal datastructures */
529 tovisit = (int *)malloc((len + 1) * sizeof(int));
529 tovisit = (int *)malloc((len + 1) * sizeof(int));
530 if (tovisit == NULL) {
530 if (tovisit == NULL) {
531 PyErr_NoMemory();
531 PyErr_NoMemory();
532 goto bail;
532 goto bail;
533 }
533 }
534
534
535 revstates = (char *)calloc(len + 1, 1);
535 revstates = (char *)calloc(len + 1, 1);
536 if (revstates == NULL) {
536 if (revstates == NULL) {
537 PyErr_NoMemory();
537 PyErr_NoMemory();
538 goto bail;
538 goto bail;
539 }
539 }
540
540
541 l = PyList_GET_SIZE(roots);
541 l = PyList_GET_SIZE(roots);
542 for (i = 0; i < l; i++) {
542 for (i = 0; i < l; i++) {
543 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
543 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
544 if (revnum == -1 && PyErr_Occurred())
544 if (revnum == -1 && PyErr_Occurred())
545 goto bail;
545 goto bail;
546 /* If root is out of range, e.g. wdir(), it must be unreachable
546 /* If root is out of range, e.g. wdir(), it must be unreachable
547 * from heads. So we can just ignore it. */
547 * from heads. So we can just ignore it. */
548 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
548 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
549 continue;
549 continue;
550 revstates[revnum + 1] |= RS_ROOT;
550 revstates[revnum + 1] |= RS_ROOT;
551 }
551 }
552
552
553 /* Populate tovisit with all the heads */
553 /* Populate tovisit with all the heads */
554 l = PyList_GET_SIZE(heads);
554 l = PyList_GET_SIZE(heads);
555 for (i = 0; i < l; i++) {
555 for (i = 0; i < l; i++) {
556 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
556 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
557 if (revnum == -1 && PyErr_Occurred())
557 if (revnum == -1 && PyErr_Occurred())
558 goto bail;
558 goto bail;
559 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
559 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
560 PyErr_SetString(PyExc_IndexError, "head out of range");
560 PyErr_SetString(PyExc_IndexError, "head out of range");
561 goto bail;
561 goto bail;
562 }
562 }
563 if (!(revstates[revnum + 1] & RS_SEEN)) {
563 if (!(revstates[revnum + 1] & RS_SEEN)) {
564 tovisit[lentovisit++] = (int)revnum;
564 tovisit[lentovisit++] = (int)revnum;
565 revstates[revnum + 1] |= RS_SEEN;
565 revstates[revnum + 1] |= RS_SEEN;
566 }
566 }
567 }
567 }
568
568
569 /* Visit the tovisit list and find the reachable roots */
569 /* Visit the tovisit list and find the reachable roots */
570 k = 0;
570 k = 0;
571 while (k < lentovisit) {
571 while (k < lentovisit) {
572 /* Add the node to reachable if it is a root*/
572 /* Add the node to reachable if it is a root*/
573 revnum = tovisit[k++];
573 revnum = tovisit[k++];
574 if (revstates[revnum + 1] & RS_ROOT) {
574 if (revstates[revnum + 1] & RS_ROOT) {
575 revstates[revnum + 1] |= RS_REACHABLE;
575 revstates[revnum + 1] |= RS_REACHABLE;
576 val = PyInt_FromLong(revnum);
576 val = PyInt_FromLong(revnum);
577 if (val == NULL)
577 if (val == NULL)
578 goto bail;
578 goto bail;
579 r = PyList_Append(reachable, val);
579 r = PyList_Append(reachable, val);
580 Py_DECREF(val);
580 Py_DECREF(val);
581 if (r < 0)
581 if (r < 0)
582 goto bail;
582 goto bail;
583 if (includepath == 0)
583 if (includepath == 0)
584 continue;
584 continue;
585 }
585 }
586
586
587 /* Add its parents to the list of nodes to visit */
587 /* Add its parents to the list of nodes to visit */
588 if (revnum == -1)
588 if (revnum == -1)
589 continue;
589 continue;
590 r = index_get_parents(self, revnum, parents, (int)len - 1);
590 r = index_get_parents(self, revnum, parents, (int)len - 1);
591 if (r < 0)
591 if (r < 0)
592 goto bail;
592 goto bail;
593 for (i = 0; i < 2; i++) {
593 for (i = 0; i < 2; i++) {
594 if (!(revstates[parents[i] + 1] & RS_SEEN)
594 if (!(revstates[parents[i] + 1] & RS_SEEN)
595 && parents[i] >= minroot) {
595 && parents[i] >= minroot) {
596 tovisit[lentovisit++] = parents[i];
596 tovisit[lentovisit++] = parents[i];
597 revstates[parents[i] + 1] |= RS_SEEN;
597 revstates[parents[i] + 1] |= RS_SEEN;
598 }
598 }
599 }
599 }
600 }
600 }
601
601
602 /* Find all the nodes in between the roots we found and the heads
602 /* Find all the nodes in between the roots we found and the heads
603 * and add them to the reachable set */
603 * and add them to the reachable set */
604 if (includepath == 1) {
604 if (includepath == 1) {
605 long minidx = minroot;
605 long minidx = minroot;
606 if (minidx < 0)
606 if (minidx < 0)
607 minidx = 0;
607 minidx = 0;
608 for (i = minidx; i < len; i++) {
608 for (i = minidx; i < len; i++) {
609 if (!(revstates[i + 1] & RS_SEEN))
609 if (!(revstates[i + 1] & RS_SEEN))
610 continue;
610 continue;
611 r = index_get_parents(self, i, parents, (int)len - 1);
611 r = index_get_parents(self, i, parents, (int)len - 1);
612 /* Corrupted index file, error is set from
612 /* Corrupted index file, error is set from
613 * index_get_parents */
613 * index_get_parents */
614 if (r < 0)
614 if (r < 0)
615 goto bail;
615 goto bail;
616 if (((revstates[parents[0] + 1] |
616 if (((revstates[parents[0] + 1] |
617 revstates[parents[1] + 1]) & RS_REACHABLE)
617 revstates[parents[1] + 1]) & RS_REACHABLE)
618 && !(revstates[i + 1] & RS_REACHABLE)) {
618 && !(revstates[i + 1] & RS_REACHABLE)) {
619 revstates[i + 1] |= RS_REACHABLE;
619 revstates[i + 1] |= RS_REACHABLE;
620 val = PyInt_FromLong(i);
620 val = PyInt_FromLong(i);
621 if (val == NULL)
621 if (val == NULL)
622 goto bail;
622 goto bail;
623 r = PyList_Append(reachable, val);
623 r = PyList_Append(reachable, val);
624 Py_DECREF(val);
624 Py_DECREF(val);
625 if (r < 0)
625 if (r < 0)
626 goto bail;
626 goto bail;
627 }
627 }
628 }
628 }
629 }
629 }
630
630
631 free(revstates);
631 free(revstates);
632 free(tovisit);
632 free(tovisit);
633 return reachable;
633 return reachable;
634 bail:
634 bail:
635 Py_XDECREF(reachable);
635 Py_XDECREF(reachable);
636 free(revstates);
636 free(revstates);
637 free(tovisit);
637 free(tovisit);
638 return NULL;
638 return NULL;
639 }
639 }
640
640
641 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
641 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
642 {
642 {
643 PyObject *roots = Py_None;
643 PyObject *roots = Py_None;
644 PyObject *ret = NULL;
644 PyObject *ret = NULL;
645 PyObject *phasessize = NULL;
645 PyObject *phasessize = NULL;
646 PyObject *phaseroots = NULL;
646 PyObject *phaseroots = NULL;
647 PyObject *phaseset = NULL;
647 PyObject *phaseset = NULL;
648 PyObject *phasessetlist = NULL;
648 PyObject *phasessetlist = NULL;
649 PyObject *rev = NULL;
649 PyObject *rev = NULL;
650 Py_ssize_t len = index_length(self) - 1;
650 Py_ssize_t len = index_length(self) - 1;
651 Py_ssize_t numphase = 0;
651 Py_ssize_t numphase = 0;
652 Py_ssize_t minrevallphases = 0;
652 Py_ssize_t minrevallphases = 0;
653 Py_ssize_t minrevphase = 0;
653 Py_ssize_t minrevphase = 0;
654 Py_ssize_t i = 0;
654 Py_ssize_t i = 0;
655 char *phases = NULL;
655 char *phases = NULL;
656 long phase;
656 long phase;
657
657
658 if (!PyArg_ParseTuple(args, "O", &roots))
658 if (!PyArg_ParseTuple(args, "O", &roots))
659 goto done;
659 goto done;
660 if (roots == NULL || !PyList_Check(roots)) {
660 if (roots == NULL || !PyList_Check(roots)) {
661 PyErr_SetString(PyExc_TypeError, "roots must be a list");
661 PyErr_SetString(PyExc_TypeError, "roots must be a list");
662 goto done;
662 goto done;
663 }
663 }
664
664
665 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
665 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
666 if (phases == NULL) {
666 if (phases == NULL) {
667 PyErr_NoMemory();
667 PyErr_NoMemory();
668 goto done;
668 goto done;
669 }
669 }
670 /* Put the phase information of all the roots in phases */
670 /* Put the phase information of all the roots in phases */
671 numphase = PyList_GET_SIZE(roots)+1;
671 numphase = PyList_GET_SIZE(roots)+1;
672 minrevallphases = len + 1;
672 minrevallphases = len + 1;
673 phasessetlist = PyList_New(numphase);
673 phasessetlist = PyList_New(numphase);
674 if (phasessetlist == NULL)
674 if (phasessetlist == NULL)
675 goto done;
675 goto done;
676
676
677 PyList_SET_ITEM(phasessetlist, 0, Py_None);
677 PyList_SET_ITEM(phasessetlist, 0, Py_None);
678 Py_INCREF(Py_None);
678 Py_INCREF(Py_None);
679
679
680 for (i = 0; i < numphase-1; i++) {
680 for (i = 0; i < numphase-1; i++) {
681 phaseroots = PyList_GET_ITEM(roots, i);
681 phaseroots = PyList_GET_ITEM(roots, i);
682 phaseset = PySet_New(NULL);
682 phaseset = PySet_New(NULL);
683 if (phaseset == NULL)
683 if (phaseset == NULL)
684 goto release;
684 goto release;
685 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
685 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
686 if (!PyList_Check(phaseroots)) {
686 if (!PyList_Check(phaseroots)) {
687 PyErr_SetString(PyExc_TypeError,
687 PyErr_SetString(PyExc_TypeError,
688 "roots item must be a list");
688 "roots item must be a list");
689 goto release;
689 goto release;
690 }
690 }
691 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
691 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
692 if (minrevphase == -2) /* Error from add_roots_get_min */
692 if (minrevphase == -2) /* Error from add_roots_get_min */
693 goto release;
693 goto release;
694 minrevallphases = MIN(minrevallphases, minrevphase);
694 minrevallphases = MIN(minrevallphases, minrevphase);
695 }
695 }
696 /* Propagate the phase information from the roots to the revs */
696 /* Propagate the phase information from the roots to the revs */
697 if (minrevallphases != -1) {
697 if (minrevallphases != -1) {
698 int parents[2];
698 int parents[2];
699 for (i = minrevallphases; i < len; i++) {
699 for (i = minrevallphases; i < len; i++) {
700 if (index_get_parents(self, i, parents,
700 if (index_get_parents(self, i, parents,
701 (int)len - 1) < 0)
701 (int)len - 1) < 0)
702 goto release;
702 goto release;
703 set_phase_from_parents(phases, parents[0], parents[1], i);
703 set_phase_from_parents(phases, parents[0], parents[1], i);
704 }
704 }
705 }
705 }
706 /* Transform phase list to a python list */
706 /* Transform phase list to a python list */
707 phasessize = PyInt_FromLong(len);
707 phasessize = PyInt_FromLong(len);
708 if (phasessize == NULL)
708 if (phasessize == NULL)
709 goto release;
709 goto release;
710 for (i = 0; i < len; i++) {
710 for (i = 0; i < len; i++) {
711 phase = phases[i];
711 phase = phases[i];
712 /* We only store the sets of phase for non public phase, the public phase
712 /* We only store the sets of phase for non public phase, the public phase
713 * is computed as a difference */
713 * is computed as a difference */
714 if (phase != 0) {
714 if (phase != 0) {
715 phaseset = PyList_GET_ITEM(phasessetlist, phase);
715 phaseset = PyList_GET_ITEM(phasessetlist, phase);
716 rev = PyInt_FromLong(i);
716 rev = PyInt_FromLong(i);
717 if (rev == NULL)
717 if (rev == NULL)
718 goto release;
718 goto release;
719 PySet_Add(phaseset, rev);
719 PySet_Add(phaseset, rev);
720 Py_XDECREF(rev);
720 Py_XDECREF(rev);
721 }
721 }
722 }
722 }
723 ret = PyTuple_Pack(2, phasessize, phasessetlist);
723 ret = PyTuple_Pack(2, phasessize, phasessetlist);
724
724
725 release:
725 release:
726 Py_XDECREF(phasessize);
726 Py_XDECREF(phasessize);
727 Py_XDECREF(phasessetlist);
727 Py_XDECREF(phasessetlist);
728 done:
728 done:
729 free(phases);
729 free(phases);
730 return ret;
730 return ret;
731 }
731 }
732
732
733 static PyObject *index_headrevs(indexObject *self, PyObject *args)
733 static PyObject *index_headrevs(indexObject *self, PyObject *args)
734 {
734 {
735 Py_ssize_t i, j, len;
735 Py_ssize_t i, j, len;
736 char *nothead = NULL;
736 char *nothead = NULL;
737 PyObject *heads = NULL;
737 PyObject *heads = NULL;
738 PyObject *filter = NULL;
738 PyObject *filter = NULL;
739 PyObject *filteredrevs = Py_None;
739 PyObject *filteredrevs = Py_None;
740
740
741 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
741 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
742 return NULL;
742 return NULL;
743 }
743 }
744
744
745 if (self->headrevs && filteredrevs == self->filteredrevs)
745 if (self->headrevs && filteredrevs == self->filteredrevs)
746 return list_copy(self->headrevs);
746 return list_copy(self->headrevs);
747
747
748 Py_DECREF(self->filteredrevs);
748 Py_DECREF(self->filteredrevs);
749 self->filteredrevs = filteredrevs;
749 self->filteredrevs = filteredrevs;
750 Py_INCREF(filteredrevs);
750 Py_INCREF(filteredrevs);
751
751
752 if (filteredrevs != Py_None) {
752 if (filteredrevs != Py_None) {
753 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
753 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
754 if (!filter) {
754 if (!filter) {
755 PyErr_SetString(PyExc_TypeError,
755 PyErr_SetString(PyExc_TypeError,
756 "filteredrevs has no attribute __contains__");
756 "filteredrevs has no attribute __contains__");
757 goto bail;
757 goto bail;
758 }
758 }
759 }
759 }
760
760
761 len = index_length(self) - 1;
761 len = index_length(self) - 1;
762 heads = PyList_New(0);
762 heads = PyList_New(0);
763 if (heads == NULL)
763 if (heads == NULL)
764 goto bail;
764 goto bail;
765 if (len == 0) {
765 if (len == 0) {
766 PyObject *nullid = PyInt_FromLong(-1);
766 PyObject *nullid = PyInt_FromLong(-1);
767 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
767 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
768 Py_XDECREF(nullid);
768 Py_XDECREF(nullid);
769 goto bail;
769 goto bail;
770 }
770 }
771 goto done;
771 goto done;
772 }
772 }
773
773
774 nothead = calloc(len, 1);
774 nothead = calloc(len, 1);
775 if (nothead == NULL) {
775 if (nothead == NULL) {
776 PyErr_NoMemory();
776 PyErr_NoMemory();
777 goto bail;
777 goto bail;
778 }
778 }
779
779
780 for (i = len - 1; i >= 0; i--) {
780 for (i = len - 1; i >= 0; i--) {
781 int isfiltered;
781 int isfiltered;
782 int parents[2];
782 int parents[2];
783
783
784 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
784 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
785 * node already, and therefore this node is not filtered. So we can skip
785 * node already, and therefore this node is not filtered. So we can skip
786 * the expensive check_filter step.
786 * the expensive check_filter step.
787 */
787 */
788 if (nothead[i] != 1) {
788 if (nothead[i] != 1) {
789 isfiltered = check_filter(filter, i);
789 isfiltered = check_filter(filter, i);
790 if (isfiltered == -1) {
790 if (isfiltered == -1) {
791 PyErr_SetString(PyExc_TypeError,
791 PyErr_SetString(PyExc_TypeError,
792 "unable to check filter");
792 "unable to check filter");
793 goto bail;
793 goto bail;
794 }
794 }
795
795
796 if (isfiltered) {
796 if (isfiltered) {
797 nothead[i] = 1;
797 nothead[i] = 1;
798 continue;
798 continue;
799 }
799 }
800 }
800 }
801
801
802 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
802 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
803 goto bail;
803 goto bail;
804 for (j = 0; j < 2; j++) {
804 for (j = 0; j < 2; j++) {
805 if (parents[j] >= 0)
805 if (parents[j] >= 0)
806 nothead[parents[j]] = 1;
806 nothead[parents[j]] = 1;
807 }
807 }
808 }
808 }
809
809
810 for (i = 0; i < len; i++) {
810 for (i = 0; i < len; i++) {
811 PyObject *head;
811 PyObject *head;
812
812
813 if (nothead[i])
813 if (nothead[i])
814 continue;
814 continue;
815 head = PyInt_FromSsize_t(i);
815 head = PyInt_FromSsize_t(i);
816 if (head == NULL || PyList_Append(heads, head) == -1) {
816 if (head == NULL || PyList_Append(heads, head) == -1) {
817 Py_XDECREF(head);
817 Py_XDECREF(head);
818 goto bail;
818 goto bail;
819 }
819 }
820 }
820 }
821
821
822 done:
822 done:
823 self->headrevs = heads;
823 self->headrevs = heads;
824 Py_XDECREF(filter);
824 Py_XDECREF(filter);
825 free(nothead);
825 free(nothead);
826 return list_copy(self->headrevs);
826 return list_copy(self->headrevs);
827 bail:
827 bail:
828 Py_XDECREF(filter);
828 Py_XDECREF(filter);
829 Py_XDECREF(heads);
829 Py_XDECREF(heads);
830 free(nothead);
830 free(nothead);
831 return NULL;
831 return NULL;
832 }
832 }
833
833
834 /**
834 /**
835 * Obtain the base revision index entry.
835 * Obtain the base revision index entry.
836 *
836 *
837 * Callers must ensure that rev >= 0 or illegal memory access may occur.
837 * Callers must ensure that rev >= 0 or illegal memory access may occur.
838 */
838 */
839 static inline int index_baserev(indexObject *self, int rev)
839 static inline int index_baserev(indexObject *self, int rev)
840 {
840 {
841 const char *data;
841 const char *data;
842
842
843 if (rev >= self->length - 1) {
843 if (rev >= self->length - 1) {
844 PyObject *tuple = PyList_GET_ITEM(self->added,
844 PyObject *tuple = PyList_GET_ITEM(self->added,
845 rev - self->length + 1);
845 rev - self->length + 1);
846 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
846 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
847 }
847 }
848 else {
848 else {
849 data = index_deref(self, rev);
849 data = index_deref(self, rev);
850 if (data == NULL) {
850 if (data == NULL) {
851 return -2;
851 return -2;
852 }
852 }
853
853
854 return getbe32(data + 16);
854 return getbe32(data + 16);
855 }
855 }
856 }
856 }
857
857
858 static PyObject *index_deltachain(indexObject *self, PyObject *args)
858 static PyObject *index_deltachain(indexObject *self, PyObject *args)
859 {
859 {
860 int rev, generaldelta;
860 int rev, generaldelta;
861 PyObject *stoparg;
861 PyObject *stoparg;
862 int stoprev, iterrev, baserev = -1;
862 int stoprev, iterrev, baserev = -1;
863 int stopped;
863 int stopped;
864 PyObject *chain = NULL, *result = NULL;
864 PyObject *chain = NULL, *result = NULL;
865 const Py_ssize_t length = index_length(self);
865 const Py_ssize_t length = index_length(self);
866
866
867 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
867 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
868 return NULL;
868 return NULL;
869 }
869 }
870
870
871 if (PyInt_Check(stoparg)) {
871 if (PyInt_Check(stoparg)) {
872 stoprev = (int)PyInt_AsLong(stoparg);
872 stoprev = (int)PyInt_AsLong(stoparg);
873 if (stoprev == -1 && PyErr_Occurred()) {
873 if (stoprev == -1 && PyErr_Occurred()) {
874 return NULL;
874 return NULL;
875 }
875 }
876 }
876 }
877 else if (stoparg == Py_None) {
877 else if (stoparg == Py_None) {
878 stoprev = -2;
878 stoprev = -2;
879 }
879 }
880 else {
880 else {
881 PyErr_SetString(PyExc_ValueError,
881 PyErr_SetString(PyExc_ValueError,
882 "stoprev must be integer or None");
882 "stoprev must be integer or None");
883 return NULL;
883 return NULL;
884 }
884 }
885
885
886 if (rev < 0 || rev >= length - 1) {
886 if (rev < 0 || rev >= length - 1) {
887 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
887 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
888 return NULL;
888 return NULL;
889 }
889 }
890
890
891 chain = PyList_New(0);
891 chain = PyList_New(0);
892 if (chain == NULL) {
892 if (chain == NULL) {
893 return NULL;
893 return NULL;
894 }
894 }
895
895
896 baserev = index_baserev(self, rev);
896 baserev = index_baserev(self, rev);
897
897
898 /* This should never happen. */
898 /* This should never happen. */
899 if (baserev <= -2) {
899 if (baserev <= -2) {
900 /* Error should be set by index_deref() */
900 /* Error should be set by index_deref() */
901 assert(PyErr_Occurred());
901 assert(PyErr_Occurred());
902 goto bail;
902 goto bail;
903 }
903 }
904
904
905 iterrev = rev;
905 iterrev = rev;
906
906
907 while (iterrev != baserev && iterrev != stoprev) {
907 while (iterrev != baserev && iterrev != stoprev) {
908 PyObject *value = PyInt_FromLong(iterrev);
908 PyObject *value = PyInt_FromLong(iterrev);
909 if (value == NULL) {
909 if (value == NULL) {
910 goto bail;
910 goto bail;
911 }
911 }
912 if (PyList_Append(chain, value)) {
912 if (PyList_Append(chain, value)) {
913 Py_DECREF(value);
913 Py_DECREF(value);
914 goto bail;
914 goto bail;
915 }
915 }
916 Py_DECREF(value);
916 Py_DECREF(value);
917
917
918 if (generaldelta) {
918 if (generaldelta) {
919 iterrev = baserev;
919 iterrev = baserev;
920 }
920 }
921 else {
921 else {
922 iterrev--;
922 iterrev--;
923 }
923 }
924
924
925 if (iterrev < 0) {
925 if (iterrev < 0) {
926 break;
926 break;
927 }
927 }
928
928
929 if (iterrev >= length - 1) {
929 if (iterrev >= length - 1) {
930 PyErr_SetString(PyExc_IndexError, "revision outside index");
930 PyErr_SetString(PyExc_IndexError, "revision outside index");
931 return NULL;
931 return NULL;
932 }
932 }
933
933
934 baserev = index_baserev(self, iterrev);
934 baserev = index_baserev(self, iterrev);
935
935
936 /* This should never happen. */
936 /* This should never happen. */
937 if (baserev <= -2) {
937 if (baserev <= -2) {
938 /* Error should be set by index_deref() */
938 /* Error should be set by index_deref() */
939 assert(PyErr_Occurred());
939 assert(PyErr_Occurred());
940 goto bail;
940 goto bail;
941 }
941 }
942 }
942 }
943
943
944 if (iterrev == stoprev) {
944 if (iterrev == stoprev) {
945 stopped = 1;
945 stopped = 1;
946 }
946 }
947 else {
947 else {
948 PyObject *value = PyInt_FromLong(iterrev);
948 PyObject *value = PyInt_FromLong(iterrev);
949 if (value == NULL) {
949 if (value == NULL) {
950 goto bail;
950 goto bail;
951 }
951 }
952 if (PyList_Append(chain, value)) {
952 if (PyList_Append(chain, value)) {
953 Py_DECREF(value);
953 Py_DECREF(value);
954 goto bail;
954 goto bail;
955 }
955 }
956 Py_DECREF(value);
956 Py_DECREF(value);
957
957
958 stopped = 0;
958 stopped = 0;
959 }
959 }
960
960
961 if (PyList_Reverse(chain)) {
961 if (PyList_Reverse(chain)) {
962 goto bail;
962 goto bail;
963 }
963 }
964
964
965 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
965 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
966 Py_DECREF(chain);
966 Py_DECREF(chain);
967 return result;
967 return result;
968
968
969 bail:
969 bail:
970 Py_DECREF(chain);
970 Py_DECREF(chain);
971 return NULL;
971 return NULL;
972 }
972 }
973
973
974 static inline int nt_level(const char *node, Py_ssize_t level)
974 static inline int nt_level(const char *node, Py_ssize_t level)
975 {
975 {
976 int v = node[level>>1];
976 int v = node[level>>1];
977 if (!(level & 1))
977 if (!(level & 1))
978 v >>= 4;
978 v >>= 4;
979 return v & 0xf;
979 return v & 0xf;
980 }
980 }
981
981
982 /*
982 /*
983 * Return values:
983 * Return values:
984 *
984 *
985 * -4: match is ambiguous (multiple candidates)
985 * -4: match is ambiguous (multiple candidates)
986 * -2: not found
986 * -2: not found
987 * rest: valid rev
987 * rest: valid rev
988 */
988 */
989 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
989 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
990 int hex)
990 int hex)
991 {
991 {
992 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
992 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
993 int level, maxlevel, off;
993 int level, maxlevel, off;
994
994
995 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
995 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
996 return -1;
996 return -1;
997
997
998 if (self->nt == NULL)
998 if (self->nt == NULL)
999 return -2;
999 return -2;
1000
1000
1001 if (hex)
1001 if (hex)
1002 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1002 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1003 else
1003 else
1004 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1004 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1005
1005
1006 for (level = off = 0; level < maxlevel; level++) {
1006 for (level = off = 0; level < maxlevel; level++) {
1007 int k = getnybble(node, level);
1007 int k = getnybble(node, level);
1008 nodetree *n = &self->nt[off];
1008 nodetree *n = &self->nt[off];
1009 int v = n->children[k];
1009 int v = n->children[k];
1010
1010
1011 if (v < 0) {
1011 if (v < 0) {
1012 const char *n;
1012 const char *n;
1013 Py_ssize_t i;
1013 Py_ssize_t i;
1014
1014
1015 v = -(v + 1);
1015 v = -(v + 1);
1016 n = index_node(self, v);
1016 n = index_node(self, v);
1017 if (n == NULL)
1017 if (n == NULL)
1018 return -2;
1018 return -2;
1019 for (i = level; i < maxlevel; i++)
1019 for (i = level; i < maxlevel; i++)
1020 if (getnybble(node, i) != nt_level(n, i))
1020 if (getnybble(node, i) != nt_level(n, i))
1021 return -2;
1021 return -2;
1022 return v;
1022 return v;
1023 }
1023 }
1024 if (v == 0)
1024 if (v == 0)
1025 return -2;
1025 return -2;
1026 off = v;
1026 off = v;
1027 }
1027 }
1028 /* multiple matches against an ambiguous prefix */
1028 /* multiple matches against an ambiguous prefix */
1029 return -4;
1029 return -4;
1030 }
1030 }
1031
1031
1032 static int nt_new(indexObject *self)
1032 static int nt_new(indexObject *self)
1033 {
1033 {
1034 if (self->ntlength == self->ntcapacity) {
1034 if (self->ntlength == self->ntcapacity) {
1035 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1035 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1036 PyErr_SetString(PyExc_MemoryError,
1036 PyErr_SetString(PyExc_MemoryError,
1037 "overflow in nt_new");
1037 "overflow in nt_new");
1038 return -1;
1038 return -1;
1039 }
1039 }
1040 self->ntcapacity *= 2;
1040 self->ntcapacity *= 2;
1041 self->nt = realloc(self->nt,
1041 self->nt = realloc(self->nt,
1042 self->ntcapacity * sizeof(nodetree));
1042 self->ntcapacity * sizeof(nodetree));
1043 if (self->nt == NULL) {
1043 if (self->nt == NULL) {
1044 PyErr_SetString(PyExc_MemoryError, "out of memory");
1044 PyErr_SetString(PyExc_MemoryError, "out of memory");
1045 return -1;
1045 return -1;
1046 }
1046 }
1047 memset(&self->nt[self->ntlength], 0,
1047 memset(&self->nt[self->ntlength], 0,
1048 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1048 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1049 }
1049 }
1050 return self->ntlength++;
1050 return self->ntlength++;
1051 }
1051 }
1052
1052
1053 static int nt_insert(indexObject *self, const char *node, int rev)
1053 static int nt_insert(indexObject *self, const char *node, int rev)
1054 {
1054 {
1055 int level = 0;
1055 int level = 0;
1056 int off = 0;
1056 int off = 0;
1057
1057
1058 while (level < 40) {
1058 while (level < 40) {
1059 int k = nt_level(node, level);
1059 int k = nt_level(node, level);
1060 nodetree *n;
1060 nodetree *n;
1061 int v;
1061 int v;
1062
1062
1063 n = &self->nt[off];
1063 n = &self->nt[off];
1064 v = n->children[k];
1064 v = n->children[k];
1065
1065
1066 if (v == 0) {
1066 if (v == 0) {
1067 n->children[k] = -rev - 1;
1067 n->children[k] = -rev - 1;
1068 return 0;
1068 return 0;
1069 }
1069 }
1070 if (v < 0) {
1070 if (v < 0) {
1071 const char *oldnode = index_node(self, -(v + 1));
1071 const char *oldnode = index_node(self, -(v + 1));
1072 int noff;
1072 int noff;
1073
1073
1074 if (!oldnode || !memcmp(oldnode, node, 20)) {
1074 if (!oldnode || !memcmp(oldnode, node, 20)) {
1075 n->children[k] = -rev - 1;
1075 n->children[k] = -rev - 1;
1076 return 0;
1076 return 0;
1077 }
1077 }
1078 noff = nt_new(self);
1078 noff = nt_new(self);
1079 if (noff == -1)
1079 if (noff == -1)
1080 return -1;
1080 return -1;
1081 /* self->nt may have been changed by realloc */
1081 /* self->nt may have been changed by realloc */
1082 self->nt[off].children[k] = noff;
1082 self->nt[off].children[k] = noff;
1083 off = noff;
1083 off = noff;
1084 n = &self->nt[off];
1084 n = &self->nt[off];
1085 n->children[nt_level(oldnode, ++level)] = v;
1085 n->children[nt_level(oldnode, ++level)] = v;
1086 if (level > self->ntdepth)
1086 if (level > self->ntdepth)
1087 self->ntdepth = level;
1087 self->ntdepth = level;
1088 self->ntsplits += 1;
1088 self->ntsplits += 1;
1089 } else {
1089 } else {
1090 level += 1;
1090 level += 1;
1091 off = v;
1091 off = v;
1092 }
1092 }
1093 }
1093 }
1094
1094
1095 return -1;
1095 return -1;
1096 }
1096 }
1097
1097
1098 static int nt_init(indexObject *self)
1098 static int nt_init(indexObject *self)
1099 {
1099 {
1100 if (self->nt == NULL) {
1100 if (self->nt == NULL) {
1101 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1101 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1102 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1102 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1103 return -1;
1103 return -1;
1104 }
1104 }
1105 self->ntcapacity = self->raw_length < 4
1105 self->ntcapacity = self->raw_length < 4
1106 ? 4 : (int)self->raw_length / 2;
1106 ? 4 : (int)self->raw_length / 2;
1107
1107
1108 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1108 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1109 if (self->nt == NULL) {
1109 if (self->nt == NULL) {
1110 PyErr_NoMemory();
1110 PyErr_NoMemory();
1111 return -1;
1111 return -1;
1112 }
1112 }
1113 self->ntlength = 1;
1113 self->ntlength = 1;
1114 self->ntrev = (int)index_length(self) - 1;
1114 self->ntrev = (int)index_length(self) - 1;
1115 self->ntlookups = 1;
1115 self->ntlookups = 1;
1116 self->ntmisses = 0;
1116 self->ntmisses = 0;
1117 if (nt_insert(self, nullid, INT_MAX) == -1)
1117 if (nt_insert(self, nullid, INT_MAX) == -1)
1118 return -1;
1118 return -1;
1119 }
1119 }
1120 return 0;
1120 return 0;
1121 }
1121 }
1122
1122
1123 /*
1123 /*
1124 * Return values:
1124 * Return values:
1125 *
1125 *
1126 * -3: error (exception set)
1126 * -3: error (exception set)
1127 * -2: not found (no exception set)
1127 * -2: not found (no exception set)
1128 * rest: valid rev
1128 * rest: valid rev
1129 */
1129 */
1130 static int index_find_node(indexObject *self,
1130 static int index_find_node(indexObject *self,
1131 const char *node, Py_ssize_t nodelen)
1131 const char *node, Py_ssize_t nodelen)
1132 {
1132 {
1133 int rev;
1133 int rev;
1134
1134
1135 self->ntlookups++;
1135 self->ntlookups++;
1136 rev = nt_find(self, node, nodelen, 0);
1136 rev = nt_find(self, node, nodelen, 0);
1137 if (rev >= -1)
1137 if (rev >= -1)
1138 return rev;
1138 return rev;
1139
1139
1140 if (nt_init(self) == -1)
1140 if (nt_init(self) == -1)
1141 return -3;
1141 return -3;
1142
1142
1143 /*
1143 /*
1144 * For the first handful of lookups, we scan the entire index,
1144 * For the first handful of lookups, we scan the entire index,
1145 * and cache only the matching nodes. This optimizes for cases
1145 * and cache only the matching nodes. This optimizes for cases
1146 * like "hg tip", where only a few nodes are accessed.
1146 * like "hg tip", where only a few nodes are accessed.
1147 *
1147 *
1148 * After that, we cache every node we visit, using a single
1148 * After that, we cache every node we visit, using a single
1149 * scan amortized over multiple lookups. This gives the best
1149 * scan amortized over multiple lookups. This gives the best
1150 * bulk performance, e.g. for "hg log".
1150 * bulk performance, e.g. for "hg log".
1151 */
1151 */
1152 if (self->ntmisses++ < 4) {
1152 if (self->ntmisses++ < 4) {
1153 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1153 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1154 const char *n = index_node_existing(self, rev);
1154 const char *n = index_node_existing(self, rev);
1155 if (n == NULL)
1155 if (n == NULL)
1156 return -3;
1156 return -3;
1157 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1157 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1158 if (nt_insert(self, n, rev) == -1)
1158 if (nt_insert(self, n, rev) == -1)
1159 return -3;
1159 return -3;
1160 break;
1160 break;
1161 }
1161 }
1162 }
1162 }
1163 } else {
1163 } else {
1164 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1164 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1165 const char *n = index_node_existing(self, rev);
1165 const char *n = index_node_existing(self, rev);
1166 if (n == NULL)
1166 if (n == NULL)
1167 return -3;
1167 return -3;
1168 if (nt_insert(self, n, rev) == -1) {
1168 if (nt_insert(self, n, rev) == -1) {
1169 self->ntrev = rev + 1;
1169 self->ntrev = rev + 1;
1170 return -3;
1170 return -3;
1171 }
1171 }
1172 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1172 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1173 break;
1173 break;
1174 }
1174 }
1175 }
1175 }
1176 self->ntrev = rev;
1176 self->ntrev = rev;
1177 }
1177 }
1178
1178
1179 if (rev >= 0)
1179 if (rev >= 0)
1180 return rev;
1180 return rev;
1181 return -2;
1181 return -2;
1182 }
1182 }
1183
1183
1184 static void raise_revlog_error(void)
1184 static void raise_revlog_error(void)
1185 {
1185 {
1186 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1186 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1187
1187
1188 mod = PyImport_ImportModule("mercurial.error");
1188 mod = PyImport_ImportModule("mercurial.error");
1189 if (mod == NULL) {
1189 if (mod == NULL) {
1190 goto cleanup;
1190 goto cleanup;
1191 }
1191 }
1192
1192
1193 dict = PyModule_GetDict(mod);
1193 dict = PyModule_GetDict(mod);
1194 if (dict == NULL) {
1194 if (dict == NULL) {
1195 goto cleanup;
1195 goto cleanup;
1196 }
1196 }
1197 Py_INCREF(dict);
1197 Py_INCREF(dict);
1198
1198
1199 errclass = PyDict_GetItemString(dict, "RevlogError");
1199 errclass = PyDict_GetItemString(dict, "RevlogError");
1200 if (errclass == NULL) {
1200 if (errclass == NULL) {
1201 PyErr_SetString(PyExc_SystemError,
1201 PyErr_SetString(PyExc_SystemError,
1202 "could not find RevlogError");
1202 "could not find RevlogError");
1203 goto cleanup;
1203 goto cleanup;
1204 }
1204 }
1205
1205
1206 /* value of exception is ignored by callers */
1206 /* value of exception is ignored by callers */
1207 PyErr_SetString(errclass, "RevlogError");
1207 PyErr_SetString(errclass, "RevlogError");
1208
1208
1209 cleanup:
1209 cleanup:
1210 Py_XDECREF(dict);
1210 Py_XDECREF(dict);
1211 Py_XDECREF(mod);
1211 Py_XDECREF(mod);
1212 }
1212 }
1213
1213
1214 static PyObject *index_getitem(indexObject *self, PyObject *value)
1214 static PyObject *index_getitem(indexObject *self, PyObject *value)
1215 {
1215 {
1216 char *node;
1216 char *node;
1217 Py_ssize_t nodelen;
1217 Py_ssize_t nodelen;
1218 int rev;
1218 int rev;
1219
1219
1220 if (PyInt_Check(value))
1220 if (PyInt_Check(value))
1221 return index_get(self, PyInt_AS_LONG(value));
1221 return index_get(self, PyInt_AS_LONG(value));
1222
1222
1223 if (node_check(value, &node, &nodelen) == -1)
1223 if (node_check(value, &node, &nodelen) == -1)
1224 return NULL;
1224 return NULL;
1225 rev = index_find_node(self, node, nodelen);
1225 rev = index_find_node(self, node, nodelen);
1226 if (rev >= -1)
1226 if (rev >= -1)
1227 return PyInt_FromLong(rev);
1227 return PyInt_FromLong(rev);
1228 if (rev == -2)
1228 if (rev == -2)
1229 raise_revlog_error();
1229 raise_revlog_error();
1230 return NULL;
1230 return NULL;
1231 }
1231 }
1232
1232
1233 /*
1233 /*
1234 * Fully populate the radix tree.
1234 * Fully populate the radix tree.
1235 */
1235 */
1236 static int nt_populate(indexObject *self) {
1236 static int nt_populate(indexObject *self) {
1237 int rev;
1237 int rev;
1238 if (self->ntrev > 0) {
1238 if (self->ntrev > 0) {
1239 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1239 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1240 const char *n = index_node_existing(self, rev);
1240 const char *n = index_node_existing(self, rev);
1241 if (n == NULL)
1241 if (n == NULL)
1242 return -1;
1242 return -1;
1243 if (nt_insert(self, n, rev) == -1)
1243 if (nt_insert(self, n, rev) == -1)
1244 return -1;
1244 return -1;
1245 }
1245 }
1246 self->ntrev = -1;
1246 self->ntrev = -1;
1247 }
1247 }
1248 return 0;
1248 return 0;
1249 }
1249 }
1250
1250
1251 static int nt_partialmatch(indexObject *self, const char *node,
1251 static int nt_partialmatch(indexObject *self, const char *node,
1252 Py_ssize_t nodelen)
1252 Py_ssize_t nodelen)
1253 {
1253 {
1254 if (nt_init(self) == -1)
1254 if (nt_init(self) == -1)
1255 return -3;
1255 return -3;
1256 if (nt_populate(self) == -1)
1256 if (nt_populate(self) == -1)
1257 return -3;
1257 return -3;
1258
1258
1259 return nt_find(self, node, nodelen, 1);
1259 return nt_find(self, node, nodelen, 1);
1260 }
1260 }
1261
1261
1262 /*
1263 * Find the length of the shortest unique prefix of node.
1264 *
1265 * Return values:
1266 *
1267 * -3: error (exception set)
1268 * -2: not found (no exception set)
1269 * rest: length of shortest prefix
1270 */
1271 static int nt_shortest(indexObject *self, const char *node)
1272 {
1273 int level, off;
1274
1275 if (nt_init(self) == -1)
1276 return -3;
1277 if (nt_populate(self) == -1)
1278 return -3;
1279
1280 for (level = off = 0; level < 40; level++) {
1281 int k, v;
1282 nodetree *n = &self->nt[off];
1283 k = nt_level(node, level);
1284 v = n->children[k];
1285 if (v < 0) {
1286 const char *n;
1287 v = -(v + 1);
1288 n = index_node(self, v);
1289 if (memcmp(node, n, 20) != 0)
1290 /*
1291 * Found a unique prefix, but it wasn't for the
1292 * requested node (i.e the requested node does
1293 * not exist).
1294 */
1295 return -2;
1296 return level + 1;
1297 }
1298 if (v == 0)
1299 return -2;
1300 off = v;
1301 }
1302 /*
1303 * The node was still not unique after 40 hex digits, so this won't
1304 * happen. Also, if we get here, then there's a programming error in
1305 * this file that made us insert a node longer than 40 hex digits.
1306 */
1307 PyErr_SetString(PyExc_Exception, "broken node tree");
1308 return -3;
1309 }
1310
1262 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1311 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1263 {
1312 {
1264 const char *fullnode;
1313 const char *fullnode;
1265 int nodelen;
1314 int nodelen;
1266 char *node;
1315 char *node;
1267 int rev, i;
1316 int rev, i;
1268
1317
1269 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1318 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1270 return NULL;
1319 return NULL;
1271
1320
1272 if (nodelen < 1) {
1321 if (nodelen < 1) {
1273 PyErr_SetString(PyExc_ValueError, "key too short");
1322 PyErr_SetString(PyExc_ValueError, "key too short");
1274 return NULL;
1323 return NULL;
1275 }
1324 }
1276
1325
1277 if (nodelen > 40) {
1326 if (nodelen > 40) {
1278 PyErr_SetString(PyExc_ValueError, "key too long");
1327 PyErr_SetString(PyExc_ValueError, "key too long");
1279 return NULL;
1328 return NULL;
1280 }
1329 }
1281
1330
1282 for (i = 0; i < nodelen; i++)
1331 for (i = 0; i < nodelen; i++)
1283 hexdigit(node, i);
1332 hexdigit(node, i);
1284 if (PyErr_Occurred()) {
1333 if (PyErr_Occurred()) {
1285 /* input contains non-hex characters */
1334 /* input contains non-hex characters */
1286 PyErr_Clear();
1335 PyErr_Clear();
1287 Py_RETURN_NONE;
1336 Py_RETURN_NONE;
1288 }
1337 }
1289
1338
1290 rev = nt_partialmatch(self, node, nodelen);
1339 rev = nt_partialmatch(self, node, nodelen);
1291
1340
1292 switch (rev) {
1341 switch (rev) {
1293 case -4:
1342 case -4:
1294 raise_revlog_error();
1343 raise_revlog_error();
1295 case -3:
1344 case -3:
1296 return NULL;
1345 return NULL;
1297 case -2:
1346 case -2:
1298 Py_RETURN_NONE;
1347 Py_RETURN_NONE;
1299 case -1:
1348 case -1:
1300 return PyBytes_FromStringAndSize(nullid, 20);
1349 return PyBytes_FromStringAndSize(nullid, 20);
1301 }
1350 }
1302
1351
1303 fullnode = index_node_existing(self, rev);
1352 fullnode = index_node_existing(self, rev);
1304 if (fullnode == NULL) {
1353 if (fullnode == NULL) {
1305 return NULL;
1354 return NULL;
1306 }
1355 }
1307 return PyBytes_FromStringAndSize(fullnode, 20);
1356 return PyBytes_FromStringAndSize(fullnode, 20);
1308 }
1357 }
1309
1358
1359 static PyObject *index_shortest(indexObject *self, PyObject *args)
1360 {
1361 Py_ssize_t nodelen;
1362 PyObject *val;
1363 char *node;
1364 int length;
1365
1366 if (!PyArg_ParseTuple(args, "O", &val))
1367 return NULL;
1368 if (node_check(val, &node, &nodelen) == -1)
1369 return NULL;
1370
1371 self->ntlookups++;
1372 length = nt_shortest(self, node);
1373 if (length == -3)
1374 return NULL;
1375 if (length == -2) {
1376 raise_revlog_error();
1377 return NULL;
1378 }
1379 return PyInt_FromLong(length);
1380 }
1381
1310 static PyObject *index_m_get(indexObject *self, PyObject *args)
1382 static PyObject *index_m_get(indexObject *self, PyObject *args)
1311 {
1383 {
1312 Py_ssize_t nodelen;
1384 Py_ssize_t nodelen;
1313 PyObject *val;
1385 PyObject *val;
1314 char *node;
1386 char *node;
1315 int rev;
1387 int rev;
1316
1388
1317 if (!PyArg_ParseTuple(args, "O", &val))
1389 if (!PyArg_ParseTuple(args, "O", &val))
1318 return NULL;
1390 return NULL;
1319 if (node_check(val, &node, &nodelen) == -1)
1391 if (node_check(val, &node, &nodelen) == -1)
1320 return NULL;
1392 return NULL;
1321 rev = index_find_node(self, node, nodelen);
1393 rev = index_find_node(self, node, nodelen);
1322 if (rev == -3)
1394 if (rev == -3)
1323 return NULL;
1395 return NULL;
1324 if (rev == -2)
1396 if (rev == -2)
1325 Py_RETURN_NONE;
1397 Py_RETURN_NONE;
1326 return PyInt_FromLong(rev);
1398 return PyInt_FromLong(rev);
1327 }
1399 }
1328
1400
1329 static int index_contains(indexObject *self, PyObject *value)
1401 static int index_contains(indexObject *self, PyObject *value)
1330 {
1402 {
1331 char *node;
1403 char *node;
1332 Py_ssize_t nodelen;
1404 Py_ssize_t nodelen;
1333
1405
1334 if (PyInt_Check(value)) {
1406 if (PyInt_Check(value)) {
1335 long rev = PyInt_AS_LONG(value);
1407 long rev = PyInt_AS_LONG(value);
1336 return rev >= -1 && rev < index_length(self);
1408 return rev >= -1 && rev < index_length(self);
1337 }
1409 }
1338
1410
1339 if (node_check(value, &node, &nodelen) == -1)
1411 if (node_check(value, &node, &nodelen) == -1)
1340 return -1;
1412 return -1;
1341
1413
1342 switch (index_find_node(self, node, nodelen)) {
1414 switch (index_find_node(self, node, nodelen)) {
1343 case -3:
1415 case -3:
1344 return -1;
1416 return -1;
1345 case -2:
1417 case -2:
1346 return 0;
1418 return 0;
1347 default:
1419 default:
1348 return 1;
1420 return 1;
1349 }
1421 }
1350 }
1422 }
1351
1423
1352 typedef uint64_t bitmask;
1424 typedef uint64_t bitmask;
1353
1425
1354 /*
1426 /*
1355 * Given a disjoint set of revs, return all candidates for the
1427 * Given a disjoint set of revs, return all candidates for the
1356 * greatest common ancestor. In revset notation, this is the set
1428 * greatest common ancestor. In revset notation, this is the set
1357 * "heads(::a and ::b and ...)"
1429 * "heads(::a and ::b and ...)"
1358 */
1430 */
1359 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1431 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1360 int revcount)
1432 int revcount)
1361 {
1433 {
1362 const bitmask allseen = (1ull << revcount) - 1;
1434 const bitmask allseen = (1ull << revcount) - 1;
1363 const bitmask poison = 1ull << revcount;
1435 const bitmask poison = 1ull << revcount;
1364 PyObject *gca = PyList_New(0);
1436 PyObject *gca = PyList_New(0);
1365 int i, v, interesting;
1437 int i, v, interesting;
1366 int maxrev = -1;
1438 int maxrev = -1;
1367 bitmask sp;
1439 bitmask sp;
1368 bitmask *seen;
1440 bitmask *seen;
1369
1441
1370 if (gca == NULL)
1442 if (gca == NULL)
1371 return PyErr_NoMemory();
1443 return PyErr_NoMemory();
1372
1444
1373 for (i = 0; i < revcount; i++) {
1445 for (i = 0; i < revcount; i++) {
1374 if (revs[i] > maxrev)
1446 if (revs[i] > maxrev)
1375 maxrev = revs[i];
1447 maxrev = revs[i];
1376 }
1448 }
1377
1449
1378 seen = calloc(sizeof(*seen), maxrev + 1);
1450 seen = calloc(sizeof(*seen), maxrev + 1);
1379 if (seen == NULL) {
1451 if (seen == NULL) {
1380 Py_DECREF(gca);
1452 Py_DECREF(gca);
1381 return PyErr_NoMemory();
1453 return PyErr_NoMemory();
1382 }
1454 }
1383
1455
1384 for (i = 0; i < revcount; i++)
1456 for (i = 0; i < revcount; i++)
1385 seen[revs[i]] = 1ull << i;
1457 seen[revs[i]] = 1ull << i;
1386
1458
1387 interesting = revcount;
1459 interesting = revcount;
1388
1460
1389 for (v = maxrev; v >= 0 && interesting; v--) {
1461 for (v = maxrev; v >= 0 && interesting; v--) {
1390 bitmask sv = seen[v];
1462 bitmask sv = seen[v];
1391 int parents[2];
1463 int parents[2];
1392
1464
1393 if (!sv)
1465 if (!sv)
1394 continue;
1466 continue;
1395
1467
1396 if (sv < poison) {
1468 if (sv < poison) {
1397 interesting -= 1;
1469 interesting -= 1;
1398 if (sv == allseen) {
1470 if (sv == allseen) {
1399 PyObject *obj = PyInt_FromLong(v);
1471 PyObject *obj = PyInt_FromLong(v);
1400 if (obj == NULL)
1472 if (obj == NULL)
1401 goto bail;
1473 goto bail;
1402 if (PyList_Append(gca, obj) == -1) {
1474 if (PyList_Append(gca, obj) == -1) {
1403 Py_DECREF(obj);
1475 Py_DECREF(obj);
1404 goto bail;
1476 goto bail;
1405 }
1477 }
1406 sv |= poison;
1478 sv |= poison;
1407 for (i = 0; i < revcount; i++) {
1479 for (i = 0; i < revcount; i++) {
1408 if (revs[i] == v)
1480 if (revs[i] == v)
1409 goto done;
1481 goto done;
1410 }
1482 }
1411 }
1483 }
1412 }
1484 }
1413 if (index_get_parents(self, v, parents, maxrev) < 0)
1485 if (index_get_parents(self, v, parents, maxrev) < 0)
1414 goto bail;
1486 goto bail;
1415
1487
1416 for (i = 0; i < 2; i++) {
1488 for (i = 0; i < 2; i++) {
1417 int p = parents[i];
1489 int p = parents[i];
1418 if (p == -1)
1490 if (p == -1)
1419 continue;
1491 continue;
1420 sp = seen[p];
1492 sp = seen[p];
1421 if (sv < poison) {
1493 if (sv < poison) {
1422 if (sp == 0) {
1494 if (sp == 0) {
1423 seen[p] = sv;
1495 seen[p] = sv;
1424 interesting++;
1496 interesting++;
1425 }
1497 }
1426 else if (sp != sv)
1498 else if (sp != sv)
1427 seen[p] |= sv;
1499 seen[p] |= sv;
1428 } else {
1500 } else {
1429 if (sp && sp < poison)
1501 if (sp && sp < poison)
1430 interesting--;
1502 interesting--;
1431 seen[p] = sv;
1503 seen[p] = sv;
1432 }
1504 }
1433 }
1505 }
1434 }
1506 }
1435
1507
1436 done:
1508 done:
1437 free(seen);
1509 free(seen);
1438 return gca;
1510 return gca;
1439 bail:
1511 bail:
1440 free(seen);
1512 free(seen);
1441 Py_XDECREF(gca);
1513 Py_XDECREF(gca);
1442 return NULL;
1514 return NULL;
1443 }
1515 }
1444
1516
1445 /*
1517 /*
1446 * Given a disjoint set of revs, return the subset with the longest
1518 * Given a disjoint set of revs, return the subset with the longest
1447 * path to the root.
1519 * path to the root.
1448 */
1520 */
1449 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1521 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1450 {
1522 {
1451 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1523 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1452 static const Py_ssize_t capacity = 24;
1524 static const Py_ssize_t capacity = 24;
1453 int *depth, *interesting = NULL;
1525 int *depth, *interesting = NULL;
1454 int i, j, v, ninteresting;
1526 int i, j, v, ninteresting;
1455 PyObject *dict = NULL, *keys = NULL;
1527 PyObject *dict = NULL, *keys = NULL;
1456 long *seen = NULL;
1528 long *seen = NULL;
1457 int maxrev = -1;
1529 int maxrev = -1;
1458 long final;
1530 long final;
1459
1531
1460 if (revcount > capacity) {
1532 if (revcount > capacity) {
1461 PyErr_Format(PyExc_OverflowError,
1533 PyErr_Format(PyExc_OverflowError,
1462 "bitset size (%ld) > capacity (%ld)",
1534 "bitset size (%ld) > capacity (%ld)",
1463 (long)revcount, (long)capacity);
1535 (long)revcount, (long)capacity);
1464 return NULL;
1536 return NULL;
1465 }
1537 }
1466
1538
1467 for (i = 0; i < revcount; i++) {
1539 for (i = 0; i < revcount; i++) {
1468 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1540 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1469 if (n > maxrev)
1541 if (n > maxrev)
1470 maxrev = n;
1542 maxrev = n;
1471 }
1543 }
1472
1544
1473 depth = calloc(sizeof(*depth), maxrev + 1);
1545 depth = calloc(sizeof(*depth), maxrev + 1);
1474 if (depth == NULL)
1546 if (depth == NULL)
1475 return PyErr_NoMemory();
1547 return PyErr_NoMemory();
1476
1548
1477 seen = calloc(sizeof(*seen), maxrev + 1);
1549 seen = calloc(sizeof(*seen), maxrev + 1);
1478 if (seen == NULL) {
1550 if (seen == NULL) {
1479 PyErr_NoMemory();
1551 PyErr_NoMemory();
1480 goto bail;
1552 goto bail;
1481 }
1553 }
1482
1554
1483 interesting = calloc(sizeof(*interesting), 1 << revcount);
1555 interesting = calloc(sizeof(*interesting), 1 << revcount);
1484 if (interesting == NULL) {
1556 if (interesting == NULL) {
1485 PyErr_NoMemory();
1557 PyErr_NoMemory();
1486 goto bail;
1558 goto bail;
1487 }
1559 }
1488
1560
1489 if (PyList_Sort(revs) == -1)
1561 if (PyList_Sort(revs) == -1)
1490 goto bail;
1562 goto bail;
1491
1563
1492 for (i = 0; i < revcount; i++) {
1564 for (i = 0; i < revcount; i++) {
1493 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1565 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1494 long b = 1l << i;
1566 long b = 1l << i;
1495 depth[n] = 1;
1567 depth[n] = 1;
1496 seen[n] = b;
1568 seen[n] = b;
1497 interesting[b] = 1;
1569 interesting[b] = 1;
1498 }
1570 }
1499
1571
1500 /* invariant: ninteresting is the number of non-zero entries in
1572 /* invariant: ninteresting is the number of non-zero entries in
1501 * interesting. */
1573 * interesting. */
1502 ninteresting = (int)revcount;
1574 ninteresting = (int)revcount;
1503
1575
1504 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1576 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1505 int dv = depth[v];
1577 int dv = depth[v];
1506 int parents[2];
1578 int parents[2];
1507 long sv;
1579 long sv;
1508
1580
1509 if (dv == 0)
1581 if (dv == 0)
1510 continue;
1582 continue;
1511
1583
1512 sv = seen[v];
1584 sv = seen[v];
1513 if (index_get_parents(self, v, parents, maxrev) < 0)
1585 if (index_get_parents(self, v, parents, maxrev) < 0)
1514 goto bail;
1586 goto bail;
1515
1587
1516 for (i = 0; i < 2; i++) {
1588 for (i = 0; i < 2; i++) {
1517 int p = parents[i];
1589 int p = parents[i];
1518 long sp;
1590 long sp;
1519 int dp;
1591 int dp;
1520
1592
1521 if (p == -1)
1593 if (p == -1)
1522 continue;
1594 continue;
1523
1595
1524 dp = depth[p];
1596 dp = depth[p];
1525 sp = seen[p];
1597 sp = seen[p];
1526 if (dp <= dv) {
1598 if (dp <= dv) {
1527 depth[p] = dv + 1;
1599 depth[p] = dv + 1;
1528 if (sp != sv) {
1600 if (sp != sv) {
1529 interesting[sv] += 1;
1601 interesting[sv] += 1;
1530 seen[p] = sv;
1602 seen[p] = sv;
1531 if (sp) {
1603 if (sp) {
1532 interesting[sp] -= 1;
1604 interesting[sp] -= 1;
1533 if (interesting[sp] == 0)
1605 if (interesting[sp] == 0)
1534 ninteresting -= 1;
1606 ninteresting -= 1;
1535 }
1607 }
1536 }
1608 }
1537 }
1609 }
1538 else if (dv == dp - 1) {
1610 else if (dv == dp - 1) {
1539 long nsp = sp | sv;
1611 long nsp = sp | sv;
1540 if (nsp == sp)
1612 if (nsp == sp)
1541 continue;
1613 continue;
1542 seen[p] = nsp;
1614 seen[p] = nsp;
1543 interesting[sp] -= 1;
1615 interesting[sp] -= 1;
1544 if (interesting[sp] == 0)
1616 if (interesting[sp] == 0)
1545 ninteresting -= 1;
1617 ninteresting -= 1;
1546 if (interesting[nsp] == 0)
1618 if (interesting[nsp] == 0)
1547 ninteresting += 1;
1619 ninteresting += 1;
1548 interesting[nsp] += 1;
1620 interesting[nsp] += 1;
1549 }
1621 }
1550 }
1622 }
1551 interesting[sv] -= 1;
1623 interesting[sv] -= 1;
1552 if (interesting[sv] == 0)
1624 if (interesting[sv] == 0)
1553 ninteresting -= 1;
1625 ninteresting -= 1;
1554 }
1626 }
1555
1627
1556 final = 0;
1628 final = 0;
1557 j = ninteresting;
1629 j = ninteresting;
1558 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1630 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1559 if (interesting[i] == 0)
1631 if (interesting[i] == 0)
1560 continue;
1632 continue;
1561 final |= i;
1633 final |= i;
1562 j -= 1;
1634 j -= 1;
1563 }
1635 }
1564 if (final == 0) {
1636 if (final == 0) {
1565 keys = PyList_New(0);
1637 keys = PyList_New(0);
1566 goto bail;
1638 goto bail;
1567 }
1639 }
1568
1640
1569 dict = PyDict_New();
1641 dict = PyDict_New();
1570 if (dict == NULL)
1642 if (dict == NULL)
1571 goto bail;
1643 goto bail;
1572
1644
1573 for (i = 0; i < revcount; i++) {
1645 for (i = 0; i < revcount; i++) {
1574 PyObject *key;
1646 PyObject *key;
1575
1647
1576 if ((final & (1 << i)) == 0)
1648 if ((final & (1 << i)) == 0)
1577 continue;
1649 continue;
1578
1650
1579 key = PyList_GET_ITEM(revs, i);
1651 key = PyList_GET_ITEM(revs, i);
1580 Py_INCREF(key);
1652 Py_INCREF(key);
1581 Py_INCREF(Py_None);
1653 Py_INCREF(Py_None);
1582 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1654 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1583 Py_DECREF(key);
1655 Py_DECREF(key);
1584 Py_DECREF(Py_None);
1656 Py_DECREF(Py_None);
1585 goto bail;
1657 goto bail;
1586 }
1658 }
1587 }
1659 }
1588
1660
1589 keys = PyDict_Keys(dict);
1661 keys = PyDict_Keys(dict);
1590
1662
1591 bail:
1663 bail:
1592 free(depth);
1664 free(depth);
1593 free(seen);
1665 free(seen);
1594 free(interesting);
1666 free(interesting);
1595 Py_XDECREF(dict);
1667 Py_XDECREF(dict);
1596
1668
1597 return keys;
1669 return keys;
1598 }
1670 }
1599
1671
1600 /*
1672 /*
1601 * Given a (possibly overlapping) set of revs, return all the
1673 * Given a (possibly overlapping) set of revs, return all the
1602 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1674 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1603 */
1675 */
1604 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1676 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1605 {
1677 {
1606 PyObject *ret = NULL;
1678 PyObject *ret = NULL;
1607 Py_ssize_t argcount, i, len;
1679 Py_ssize_t argcount, i, len;
1608 bitmask repeat = 0;
1680 bitmask repeat = 0;
1609 int revcount = 0;
1681 int revcount = 0;
1610 int *revs;
1682 int *revs;
1611
1683
1612 argcount = PySequence_Length(args);
1684 argcount = PySequence_Length(args);
1613 revs = PyMem_Malloc(argcount * sizeof(*revs));
1685 revs = PyMem_Malloc(argcount * sizeof(*revs));
1614 if (argcount > 0 && revs == NULL)
1686 if (argcount > 0 && revs == NULL)
1615 return PyErr_NoMemory();
1687 return PyErr_NoMemory();
1616 len = index_length(self) - 1;
1688 len = index_length(self) - 1;
1617
1689
1618 for (i = 0; i < argcount; i++) {
1690 for (i = 0; i < argcount; i++) {
1619 static const int capacity = 24;
1691 static const int capacity = 24;
1620 PyObject *obj = PySequence_GetItem(args, i);
1692 PyObject *obj = PySequence_GetItem(args, i);
1621 bitmask x;
1693 bitmask x;
1622 long val;
1694 long val;
1623
1695
1624 if (!PyInt_Check(obj)) {
1696 if (!PyInt_Check(obj)) {
1625 PyErr_SetString(PyExc_TypeError,
1697 PyErr_SetString(PyExc_TypeError,
1626 "arguments must all be ints");
1698 "arguments must all be ints");
1627 Py_DECREF(obj);
1699 Py_DECREF(obj);
1628 goto bail;
1700 goto bail;
1629 }
1701 }
1630 val = PyInt_AsLong(obj);
1702 val = PyInt_AsLong(obj);
1631 Py_DECREF(obj);
1703 Py_DECREF(obj);
1632 if (val == -1) {
1704 if (val == -1) {
1633 ret = PyList_New(0);
1705 ret = PyList_New(0);
1634 goto done;
1706 goto done;
1635 }
1707 }
1636 if (val < 0 || val >= len) {
1708 if (val < 0 || val >= len) {
1637 PyErr_SetString(PyExc_IndexError,
1709 PyErr_SetString(PyExc_IndexError,
1638 "index out of range");
1710 "index out of range");
1639 goto bail;
1711 goto bail;
1640 }
1712 }
1641 /* this cheesy bloom filter lets us avoid some more
1713 /* this cheesy bloom filter lets us avoid some more
1642 * expensive duplicate checks in the common set-is-disjoint
1714 * expensive duplicate checks in the common set-is-disjoint
1643 * case */
1715 * case */
1644 x = 1ull << (val & 0x3f);
1716 x = 1ull << (val & 0x3f);
1645 if (repeat & x) {
1717 if (repeat & x) {
1646 int k;
1718 int k;
1647 for (k = 0; k < revcount; k++) {
1719 for (k = 0; k < revcount; k++) {
1648 if (val == revs[k])
1720 if (val == revs[k])
1649 goto duplicate;
1721 goto duplicate;
1650 }
1722 }
1651 }
1723 }
1652 else repeat |= x;
1724 else repeat |= x;
1653 if (revcount >= capacity) {
1725 if (revcount >= capacity) {
1654 PyErr_Format(PyExc_OverflowError,
1726 PyErr_Format(PyExc_OverflowError,
1655 "bitset size (%d) > capacity (%d)",
1727 "bitset size (%d) > capacity (%d)",
1656 revcount, capacity);
1728 revcount, capacity);
1657 goto bail;
1729 goto bail;
1658 }
1730 }
1659 revs[revcount++] = (int)val;
1731 revs[revcount++] = (int)val;
1660 duplicate:;
1732 duplicate:;
1661 }
1733 }
1662
1734
1663 if (revcount == 0) {
1735 if (revcount == 0) {
1664 ret = PyList_New(0);
1736 ret = PyList_New(0);
1665 goto done;
1737 goto done;
1666 }
1738 }
1667 if (revcount == 1) {
1739 if (revcount == 1) {
1668 PyObject *obj;
1740 PyObject *obj;
1669 ret = PyList_New(1);
1741 ret = PyList_New(1);
1670 if (ret == NULL)
1742 if (ret == NULL)
1671 goto bail;
1743 goto bail;
1672 obj = PyInt_FromLong(revs[0]);
1744 obj = PyInt_FromLong(revs[0]);
1673 if (obj == NULL)
1745 if (obj == NULL)
1674 goto bail;
1746 goto bail;
1675 PyList_SET_ITEM(ret, 0, obj);
1747 PyList_SET_ITEM(ret, 0, obj);
1676 goto done;
1748 goto done;
1677 }
1749 }
1678
1750
1679 ret = find_gca_candidates(self, revs, revcount);
1751 ret = find_gca_candidates(self, revs, revcount);
1680 if (ret == NULL)
1752 if (ret == NULL)
1681 goto bail;
1753 goto bail;
1682
1754
1683 done:
1755 done:
1684 PyMem_Free(revs);
1756 PyMem_Free(revs);
1685 return ret;
1757 return ret;
1686
1758
1687 bail:
1759 bail:
1688 PyMem_Free(revs);
1760 PyMem_Free(revs);
1689 Py_XDECREF(ret);
1761 Py_XDECREF(ret);
1690 return NULL;
1762 return NULL;
1691 }
1763 }
1692
1764
1693 /*
1765 /*
1694 * Given a (possibly overlapping) set of revs, return the greatest
1766 * Given a (possibly overlapping) set of revs, return the greatest
1695 * common ancestors: those with the longest path to the root.
1767 * common ancestors: those with the longest path to the root.
1696 */
1768 */
1697 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1769 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1698 {
1770 {
1699 PyObject *ret;
1771 PyObject *ret;
1700 PyObject *gca = index_commonancestorsheads(self, args);
1772 PyObject *gca = index_commonancestorsheads(self, args);
1701 if (gca == NULL)
1773 if (gca == NULL)
1702 return NULL;
1774 return NULL;
1703
1775
1704 if (PyList_GET_SIZE(gca) <= 1) {
1776 if (PyList_GET_SIZE(gca) <= 1) {
1705 return gca;
1777 return gca;
1706 }
1778 }
1707
1779
1708 ret = find_deepest(self, gca);
1780 ret = find_deepest(self, gca);
1709 Py_DECREF(gca);
1781 Py_DECREF(gca);
1710 return ret;
1782 return ret;
1711 }
1783 }
1712
1784
1713 /*
1785 /*
1714 * Invalidate any trie entries introduced by added revs.
1786 * Invalidate any trie entries introduced by added revs.
1715 */
1787 */
1716 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1788 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1717 {
1789 {
1718 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1790 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1719
1791
1720 for (i = start; i < len; i++) {
1792 for (i = start; i < len; i++) {
1721 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1793 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1722 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1794 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1723
1795
1724 nt_insert(self, PyBytes_AS_STRING(node), -1);
1796 nt_insert(self, PyBytes_AS_STRING(node), -1);
1725 }
1797 }
1726
1798
1727 if (start == 0)
1799 if (start == 0)
1728 Py_CLEAR(self->added);
1800 Py_CLEAR(self->added);
1729 }
1801 }
1730
1802
1731 /*
1803 /*
1732 * Delete a numeric range of revs, which must be at the end of the
1804 * Delete a numeric range of revs, which must be at the end of the
1733 * range, but exclude the sentinel nullid entry.
1805 * range, but exclude the sentinel nullid entry.
1734 */
1806 */
1735 static int index_slice_del(indexObject *self, PyObject *item)
1807 static int index_slice_del(indexObject *self, PyObject *item)
1736 {
1808 {
1737 Py_ssize_t start, stop, step, slicelength;
1809 Py_ssize_t start, stop, step, slicelength;
1738 Py_ssize_t length = index_length(self);
1810 Py_ssize_t length = index_length(self);
1739 int ret = 0;
1811 int ret = 0;
1740
1812
1741 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1813 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1742 #ifdef IS_PY3K
1814 #ifdef IS_PY3K
1743 if (PySlice_GetIndicesEx(item, length,
1815 if (PySlice_GetIndicesEx(item, length,
1744 #else
1816 #else
1745 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1817 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1746 #endif
1818 #endif
1747 &start, &stop, &step, &slicelength) < 0)
1819 &start, &stop, &step, &slicelength) < 0)
1748 return -1;
1820 return -1;
1749
1821
1750 if (slicelength <= 0)
1822 if (slicelength <= 0)
1751 return 0;
1823 return 0;
1752
1824
1753 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1825 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1754 stop = start;
1826 stop = start;
1755
1827
1756 if (step < 0) {
1828 if (step < 0) {
1757 stop = start + 1;
1829 stop = start + 1;
1758 start = stop + step*(slicelength - 1) - 1;
1830 start = stop + step*(slicelength - 1) - 1;
1759 step = -step;
1831 step = -step;
1760 }
1832 }
1761
1833
1762 if (step != 1) {
1834 if (step != 1) {
1763 PyErr_SetString(PyExc_ValueError,
1835 PyErr_SetString(PyExc_ValueError,
1764 "revlog index delete requires step size of 1");
1836 "revlog index delete requires step size of 1");
1765 return -1;
1837 return -1;
1766 }
1838 }
1767
1839
1768 if (stop != length - 1) {
1840 if (stop != length - 1) {
1769 PyErr_SetString(PyExc_IndexError,
1841 PyErr_SetString(PyExc_IndexError,
1770 "revlog index deletion indices are invalid");
1842 "revlog index deletion indices are invalid");
1771 return -1;
1843 return -1;
1772 }
1844 }
1773
1845
1774 if (start < self->length - 1) {
1846 if (start < self->length - 1) {
1775 if (self->nt) {
1847 if (self->nt) {
1776 Py_ssize_t i;
1848 Py_ssize_t i;
1777
1849
1778 for (i = start + 1; i < self->length - 1; i++) {
1850 for (i = start + 1; i < self->length - 1; i++) {
1779 const char *node = index_node(self, i);
1851 const char *node = index_node(self, i);
1780
1852
1781 if (node)
1853 if (node)
1782 nt_insert(self, node, -1);
1854 nt_insert(self, node, -1);
1783 }
1855 }
1784 if (self->added)
1856 if (self->added)
1785 nt_invalidate_added(self, 0);
1857 nt_invalidate_added(self, 0);
1786 if (self->ntrev > start)
1858 if (self->ntrev > start)
1787 self->ntrev = (int)start;
1859 self->ntrev = (int)start;
1788 }
1860 }
1789 self->length = start + 1;
1861 self->length = start + 1;
1790 if (start < self->raw_length) {
1862 if (start < self->raw_length) {
1791 if (self->cache) {
1863 if (self->cache) {
1792 Py_ssize_t i;
1864 Py_ssize_t i;
1793 for (i = start; i < self->raw_length; i++)
1865 for (i = start; i < self->raw_length; i++)
1794 Py_CLEAR(self->cache[i]);
1866 Py_CLEAR(self->cache[i]);
1795 }
1867 }
1796 self->raw_length = start;
1868 self->raw_length = start;
1797 }
1869 }
1798 goto done;
1870 goto done;
1799 }
1871 }
1800
1872
1801 if (self->nt) {
1873 if (self->nt) {
1802 nt_invalidate_added(self, start - self->length + 1);
1874 nt_invalidate_added(self, start - self->length + 1);
1803 if (self->ntrev > start)
1875 if (self->ntrev > start)
1804 self->ntrev = (int)start;
1876 self->ntrev = (int)start;
1805 }
1877 }
1806 if (self->added)
1878 if (self->added)
1807 ret = PyList_SetSlice(self->added, start - self->length + 1,
1879 ret = PyList_SetSlice(self->added, start - self->length + 1,
1808 PyList_GET_SIZE(self->added), NULL);
1880 PyList_GET_SIZE(self->added), NULL);
1809 done:
1881 done:
1810 Py_CLEAR(self->headrevs);
1882 Py_CLEAR(self->headrevs);
1811 return ret;
1883 return ret;
1812 }
1884 }
1813
1885
1814 /*
1886 /*
1815 * Supported ops:
1887 * Supported ops:
1816 *
1888 *
1817 * slice deletion
1889 * slice deletion
1818 * string assignment (extend node->rev mapping)
1890 * string assignment (extend node->rev mapping)
1819 * string deletion (shrink node->rev mapping)
1891 * string deletion (shrink node->rev mapping)
1820 */
1892 */
1821 static int index_assign_subscript(indexObject *self, PyObject *item,
1893 static int index_assign_subscript(indexObject *self, PyObject *item,
1822 PyObject *value)
1894 PyObject *value)
1823 {
1895 {
1824 char *node;
1896 char *node;
1825 Py_ssize_t nodelen;
1897 Py_ssize_t nodelen;
1826 long rev;
1898 long rev;
1827
1899
1828 if (PySlice_Check(item) && value == NULL)
1900 if (PySlice_Check(item) && value == NULL)
1829 return index_slice_del(self, item);
1901 return index_slice_del(self, item);
1830
1902
1831 if (node_check(item, &node, &nodelen) == -1)
1903 if (node_check(item, &node, &nodelen) == -1)
1832 return -1;
1904 return -1;
1833
1905
1834 if (value == NULL)
1906 if (value == NULL)
1835 return self->nt ? nt_insert(self, node, -1) : 0;
1907 return self->nt ? nt_insert(self, node, -1) : 0;
1836 rev = PyInt_AsLong(value);
1908 rev = PyInt_AsLong(value);
1837 if (rev > INT_MAX || rev < 0) {
1909 if (rev > INT_MAX || rev < 0) {
1838 if (!PyErr_Occurred())
1910 if (!PyErr_Occurred())
1839 PyErr_SetString(PyExc_ValueError, "rev out of range");
1911 PyErr_SetString(PyExc_ValueError, "rev out of range");
1840 return -1;
1912 return -1;
1841 }
1913 }
1842
1914
1843 if (nt_init(self) == -1)
1915 if (nt_init(self) == -1)
1844 return -1;
1916 return -1;
1845 return nt_insert(self, node, (int)rev);
1917 return nt_insert(self, node, (int)rev);
1846 }
1918 }
1847
1919
1848 /*
1920 /*
1849 * Find all RevlogNG entries in an index that has inline data. Update
1921 * Find all RevlogNG entries in an index that has inline data. Update
1850 * the optional "offsets" table with those entries.
1922 * the optional "offsets" table with those entries.
1851 */
1923 */
1852 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1924 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1853 {
1925 {
1854 const char *data = (const char *)self->buf.buf;
1926 const char *data = (const char *)self->buf.buf;
1855 Py_ssize_t pos = 0;
1927 Py_ssize_t pos = 0;
1856 Py_ssize_t end = self->buf.len;
1928 Py_ssize_t end = self->buf.len;
1857 long incr = v1_hdrsize;
1929 long incr = v1_hdrsize;
1858 Py_ssize_t len = 0;
1930 Py_ssize_t len = 0;
1859
1931
1860 while (pos + v1_hdrsize <= end && pos >= 0) {
1932 while (pos + v1_hdrsize <= end && pos >= 0) {
1861 uint32_t comp_len;
1933 uint32_t comp_len;
1862 /* 3rd element of header is length of compressed inline data */
1934 /* 3rd element of header is length of compressed inline data */
1863 comp_len = getbe32(data + pos + 8);
1935 comp_len = getbe32(data + pos + 8);
1864 incr = v1_hdrsize + comp_len;
1936 incr = v1_hdrsize + comp_len;
1865 if (offsets)
1937 if (offsets)
1866 offsets[len] = data + pos;
1938 offsets[len] = data + pos;
1867 len++;
1939 len++;
1868 pos += incr;
1940 pos += incr;
1869 }
1941 }
1870
1942
1871 if (pos != end) {
1943 if (pos != end) {
1872 if (!PyErr_Occurred())
1944 if (!PyErr_Occurred())
1873 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1945 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1874 return -1;
1946 return -1;
1875 }
1947 }
1876
1948
1877 return len;
1949 return len;
1878 }
1950 }
1879
1951
1880 static int index_init(indexObject *self, PyObject *args)
1952 static int index_init(indexObject *self, PyObject *args)
1881 {
1953 {
1882 PyObject *data_obj, *inlined_obj;
1954 PyObject *data_obj, *inlined_obj;
1883 Py_ssize_t size;
1955 Py_ssize_t size;
1884
1956
1885 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1957 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1886 self->raw_length = 0;
1958 self->raw_length = 0;
1887 self->added = NULL;
1959 self->added = NULL;
1888 self->cache = NULL;
1960 self->cache = NULL;
1889 self->data = NULL;
1961 self->data = NULL;
1890 memset(&self->buf, 0, sizeof(self->buf));
1962 memset(&self->buf, 0, sizeof(self->buf));
1891 self->headrevs = NULL;
1963 self->headrevs = NULL;
1892 self->filteredrevs = Py_None;
1964 self->filteredrevs = Py_None;
1893 Py_INCREF(Py_None);
1965 Py_INCREF(Py_None);
1894 self->nt = NULL;
1966 self->nt = NULL;
1895 self->offsets = NULL;
1967 self->offsets = NULL;
1896
1968
1897 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1969 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1898 return -1;
1970 return -1;
1899 if (!PyObject_CheckBuffer(data_obj)) {
1971 if (!PyObject_CheckBuffer(data_obj)) {
1900 PyErr_SetString(PyExc_TypeError,
1972 PyErr_SetString(PyExc_TypeError,
1901 "data does not support buffer interface");
1973 "data does not support buffer interface");
1902 return -1;
1974 return -1;
1903 }
1975 }
1904
1976
1905 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1977 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1906 return -1;
1978 return -1;
1907 size = self->buf.len;
1979 size = self->buf.len;
1908
1980
1909 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1981 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1910 self->data = data_obj;
1982 self->data = data_obj;
1911
1983
1912 self->ntlength = self->ntcapacity = 0;
1984 self->ntlength = self->ntcapacity = 0;
1913 self->ntdepth = self->ntsplits = 0;
1985 self->ntdepth = self->ntsplits = 0;
1914 self->ntlookups = self->ntmisses = 0;
1986 self->ntlookups = self->ntmisses = 0;
1915 self->ntrev = -1;
1987 self->ntrev = -1;
1916 Py_INCREF(self->data);
1988 Py_INCREF(self->data);
1917
1989
1918 if (self->inlined) {
1990 if (self->inlined) {
1919 Py_ssize_t len = inline_scan(self, NULL);
1991 Py_ssize_t len = inline_scan(self, NULL);
1920 if (len == -1)
1992 if (len == -1)
1921 goto bail;
1993 goto bail;
1922 self->raw_length = len;
1994 self->raw_length = len;
1923 self->length = len + 1;
1995 self->length = len + 1;
1924 } else {
1996 } else {
1925 if (size % v1_hdrsize) {
1997 if (size % v1_hdrsize) {
1926 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1998 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1927 goto bail;
1999 goto bail;
1928 }
2000 }
1929 self->raw_length = size / v1_hdrsize;
2001 self->raw_length = size / v1_hdrsize;
1930 self->length = self->raw_length + 1;
2002 self->length = self->raw_length + 1;
1931 }
2003 }
1932
2004
1933 return 0;
2005 return 0;
1934 bail:
2006 bail:
1935 return -1;
2007 return -1;
1936 }
2008 }
1937
2009
1938 static PyObject *index_nodemap(indexObject *self)
2010 static PyObject *index_nodemap(indexObject *self)
1939 {
2011 {
1940 Py_INCREF(self);
2012 Py_INCREF(self);
1941 return (PyObject *)self;
2013 return (PyObject *)self;
1942 }
2014 }
1943
2015
1944 static void index_dealloc(indexObject *self)
2016 static void index_dealloc(indexObject *self)
1945 {
2017 {
1946 _index_clearcaches(self);
2018 _index_clearcaches(self);
1947 Py_XDECREF(self->filteredrevs);
2019 Py_XDECREF(self->filteredrevs);
1948 if (self->buf.buf) {
2020 if (self->buf.buf) {
1949 PyBuffer_Release(&self->buf);
2021 PyBuffer_Release(&self->buf);
1950 memset(&self->buf, 0, sizeof(self->buf));
2022 memset(&self->buf, 0, sizeof(self->buf));
1951 }
2023 }
1952 Py_XDECREF(self->data);
2024 Py_XDECREF(self->data);
1953 Py_XDECREF(self->added);
2025 Py_XDECREF(self->added);
1954 PyObject_Del(self);
2026 PyObject_Del(self);
1955 }
2027 }
1956
2028
1957 static PySequenceMethods index_sequence_methods = {
2029 static PySequenceMethods index_sequence_methods = {
1958 (lenfunc)index_length, /* sq_length */
2030 (lenfunc)index_length, /* sq_length */
1959 0, /* sq_concat */
2031 0, /* sq_concat */
1960 0, /* sq_repeat */
2032 0, /* sq_repeat */
1961 (ssizeargfunc)index_get, /* sq_item */
2033 (ssizeargfunc)index_get, /* sq_item */
1962 0, /* sq_slice */
2034 0, /* sq_slice */
1963 0, /* sq_ass_item */
2035 0, /* sq_ass_item */
1964 0, /* sq_ass_slice */
2036 0, /* sq_ass_slice */
1965 (objobjproc)index_contains, /* sq_contains */
2037 (objobjproc)index_contains, /* sq_contains */
1966 };
2038 };
1967
2039
1968 static PyMappingMethods index_mapping_methods = {
2040 static PyMappingMethods index_mapping_methods = {
1969 (lenfunc)index_length, /* mp_length */
2041 (lenfunc)index_length, /* mp_length */
1970 (binaryfunc)index_getitem, /* mp_subscript */
2042 (binaryfunc)index_getitem, /* mp_subscript */
1971 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2043 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1972 };
2044 };
1973
2045
1974 static PyMethodDef index_methods[] = {
2046 static PyMethodDef index_methods[] = {
1975 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2047 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1976 "return the gca set of the given revs"},
2048 "return the gca set of the given revs"},
1977 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2049 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
1978 METH_VARARGS,
2050 METH_VARARGS,
1979 "return the heads of the common ancestors of the given revs"},
2051 "return the heads of the common ancestors of the given revs"},
1980 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2052 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1981 "clear the index caches"},
2053 "clear the index caches"},
1982 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2054 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1983 "get an index entry"},
2055 "get an index entry"},
1984 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
2056 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
1985 METH_VARARGS, "compute phases"},
2057 METH_VARARGS, "compute phases"},
1986 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2058 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
1987 "reachableroots"},
2059 "reachableroots"},
1988 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2060 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
1989 "get head revisions"}, /* Can do filtering since 3.2 */
2061 "get head revisions"}, /* Can do filtering since 3.2 */
1990 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2062 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
1991 "get filtered head revisions"}, /* Can always do filtering */
2063 "get filtered head revisions"}, /* Can always do filtering */
1992 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2064 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
1993 "determine revisions with deltas to reconstruct fulltext"},
2065 "determine revisions with deltas to reconstruct fulltext"},
1994 {"insert", (PyCFunction)index_insert, METH_VARARGS,
2066 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1995 "insert an index entry"},
2067 "insert an index entry"},
1996 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2068 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1997 "match a potentially ambiguous node ID"},
2069 "match a potentially ambiguous node ID"},
2070 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2071 "find length of shortest hex nodeid of a binary ID"},
1998 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2072 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1999 "stats for the index"},
2073 "stats for the index"},
2000 {NULL} /* Sentinel */
2074 {NULL} /* Sentinel */
2001 };
2075 };
2002
2076
2003 static PyGetSetDef index_getset[] = {
2077 static PyGetSetDef index_getset[] = {
2004 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2078 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2005 {NULL} /* Sentinel */
2079 {NULL} /* Sentinel */
2006 };
2080 };
2007
2081
2008 static PyTypeObject indexType = {
2082 static PyTypeObject indexType = {
2009 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2083 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2010 "parsers.index", /* tp_name */
2084 "parsers.index", /* tp_name */
2011 sizeof(indexObject), /* tp_basicsize */
2085 sizeof(indexObject), /* tp_basicsize */
2012 0, /* tp_itemsize */
2086 0, /* tp_itemsize */
2013 (destructor)index_dealloc, /* tp_dealloc */
2087 (destructor)index_dealloc, /* tp_dealloc */
2014 0, /* tp_print */
2088 0, /* tp_print */
2015 0, /* tp_getattr */
2089 0, /* tp_getattr */
2016 0, /* tp_setattr */
2090 0, /* tp_setattr */
2017 0, /* tp_compare */
2091 0, /* tp_compare */
2018 0, /* tp_repr */
2092 0, /* tp_repr */
2019 0, /* tp_as_number */
2093 0, /* tp_as_number */
2020 &index_sequence_methods, /* tp_as_sequence */
2094 &index_sequence_methods, /* tp_as_sequence */
2021 &index_mapping_methods, /* tp_as_mapping */
2095 &index_mapping_methods, /* tp_as_mapping */
2022 0, /* tp_hash */
2096 0, /* tp_hash */
2023 0, /* tp_call */
2097 0, /* tp_call */
2024 0, /* tp_str */
2098 0, /* tp_str */
2025 0, /* tp_getattro */
2099 0, /* tp_getattro */
2026 0, /* tp_setattro */
2100 0, /* tp_setattro */
2027 0, /* tp_as_buffer */
2101 0, /* tp_as_buffer */
2028 Py_TPFLAGS_DEFAULT, /* tp_flags */
2102 Py_TPFLAGS_DEFAULT, /* tp_flags */
2029 "revlog index", /* tp_doc */
2103 "revlog index", /* tp_doc */
2030 0, /* tp_traverse */
2104 0, /* tp_traverse */
2031 0, /* tp_clear */
2105 0, /* tp_clear */
2032 0, /* tp_richcompare */
2106 0, /* tp_richcompare */
2033 0, /* tp_weaklistoffset */
2107 0, /* tp_weaklistoffset */
2034 0, /* tp_iter */
2108 0, /* tp_iter */
2035 0, /* tp_iternext */
2109 0, /* tp_iternext */
2036 index_methods, /* tp_methods */
2110 index_methods, /* tp_methods */
2037 0, /* tp_members */
2111 0, /* tp_members */
2038 index_getset, /* tp_getset */
2112 index_getset, /* tp_getset */
2039 0, /* tp_base */
2113 0, /* tp_base */
2040 0, /* tp_dict */
2114 0, /* tp_dict */
2041 0, /* tp_descr_get */
2115 0, /* tp_descr_get */
2042 0, /* tp_descr_set */
2116 0, /* tp_descr_set */
2043 0, /* tp_dictoffset */
2117 0, /* tp_dictoffset */
2044 (initproc)index_init, /* tp_init */
2118 (initproc)index_init, /* tp_init */
2045 0, /* tp_alloc */
2119 0, /* tp_alloc */
2046 };
2120 };
2047
2121
2048 /*
2122 /*
2049 * returns a tuple of the form (index, index, cache) with elements as
2123 * returns a tuple of the form (index, index, cache) with elements as
2050 * follows:
2124 * follows:
2051 *
2125 *
2052 * index: an index object that lazily parses RevlogNG records
2126 * index: an index object that lazily parses RevlogNG records
2053 * cache: if data is inlined, a tuple (0, index_file_content), else None
2127 * cache: if data is inlined, a tuple (0, index_file_content), else None
2054 * index_file_content could be a string, or a buffer
2128 * index_file_content could be a string, or a buffer
2055 *
2129 *
2056 * added complications are for backwards compatibility
2130 * added complications are for backwards compatibility
2057 */
2131 */
2058 PyObject *parse_index2(PyObject *self, PyObject *args)
2132 PyObject *parse_index2(PyObject *self, PyObject *args)
2059 {
2133 {
2060 PyObject *tuple = NULL, *cache = NULL;
2134 PyObject *tuple = NULL, *cache = NULL;
2061 indexObject *idx;
2135 indexObject *idx;
2062 int ret;
2136 int ret;
2063
2137
2064 idx = PyObject_New(indexObject, &indexType);
2138 idx = PyObject_New(indexObject, &indexType);
2065 if (idx == NULL)
2139 if (idx == NULL)
2066 goto bail;
2140 goto bail;
2067
2141
2068 ret = index_init(idx, args);
2142 ret = index_init(idx, args);
2069 if (ret == -1)
2143 if (ret == -1)
2070 goto bail;
2144 goto bail;
2071
2145
2072 if (idx->inlined) {
2146 if (idx->inlined) {
2073 cache = Py_BuildValue("iO", 0, idx->data);
2147 cache = Py_BuildValue("iO", 0, idx->data);
2074 if (cache == NULL)
2148 if (cache == NULL)
2075 goto bail;
2149 goto bail;
2076 } else {
2150 } else {
2077 cache = Py_None;
2151 cache = Py_None;
2078 Py_INCREF(cache);
2152 Py_INCREF(cache);
2079 }
2153 }
2080
2154
2081 tuple = Py_BuildValue("NN", idx, cache);
2155 tuple = Py_BuildValue("NN", idx, cache);
2082 if (!tuple)
2156 if (!tuple)
2083 goto bail;
2157 goto bail;
2084 return tuple;
2158 return tuple;
2085
2159
2086 bail:
2160 bail:
2087 Py_XDECREF(idx);
2161 Py_XDECREF(idx);
2088 Py_XDECREF(cache);
2162 Py_XDECREF(cache);
2089 Py_XDECREF(tuple);
2163 Py_XDECREF(tuple);
2090 return NULL;
2164 return NULL;
2091 }
2165 }
2092
2166
2093 void revlog_module_init(PyObject *mod)
2167 void revlog_module_init(PyObject *mod)
2094 {
2168 {
2095 indexType.tp_new = PyType_GenericNew;
2169 indexType.tp_new = PyType_GenericNew;
2096 if (PyType_Ready(&indexType) < 0)
2170 if (PyType_Ready(&indexType) < 0)
2097 return;
2171 return;
2098 Py_INCREF(&indexType);
2172 Py_INCREF(&indexType);
2099 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2173 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2100
2174
2101 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2175 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2102 -1, -1, -1, -1, nullid, 20);
2176 -1, -1, -1, -1, nullid, 20);
2103 if (nullentry)
2177 if (nullentry)
2104 PyObject_GC_UnTrack(nullentry);
2178 PyObject_GC_UnTrack(nullentry);
2105 }
2179 }
@@ -1,109 +1,109
1 # policy.py - module policy logic for Mercurial.
1 # policy.py - module policy logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import sys
11 import sys
12
12
13 # Rules for how modules can be loaded. Values are:
13 # Rules for how modules can be loaded. Values are:
14 #
14 #
15 # c - require C extensions
15 # c - require C extensions
16 # allow - allow pure Python implementation when C loading fails
16 # allow - allow pure Python implementation when C loading fails
17 # cffi - required cffi versions (implemented within pure module)
17 # cffi - required cffi versions (implemented within pure module)
18 # cffi-allow - allow pure Python implementation if cffi version is missing
18 # cffi-allow - allow pure Python implementation if cffi version is missing
19 # py - only load pure Python modules
19 # py - only load pure Python modules
20 #
20 #
21 # By default, fall back to the pure modules so the in-place build can
21 # By default, fall back to the pure modules so the in-place build can
22 # run without recompiling the C extensions. This will be overridden by
22 # run without recompiling the C extensions. This will be overridden by
23 # __modulepolicy__ generated by setup.py.
23 # __modulepolicy__ generated by setup.py.
24 policy = b'allow'
24 policy = b'allow'
25 _packageprefs = {
25 _packageprefs = {
26 # policy: (versioned package, pure package)
26 # policy: (versioned package, pure package)
27 b'c': (r'cext', None),
27 b'c': (r'cext', None),
28 b'allow': (r'cext', r'pure'),
28 b'allow': (r'cext', r'pure'),
29 b'cffi': (r'cffi', None),
29 b'cffi': (r'cffi', None),
30 b'cffi-allow': (r'cffi', r'pure'),
30 b'cffi-allow': (r'cffi', r'pure'),
31 b'py': (None, r'pure'),
31 b'py': (None, r'pure'),
32 }
32 }
33
33
34 try:
34 try:
35 from . import __modulepolicy__
35 from . import __modulepolicy__
36 policy = __modulepolicy__.modulepolicy
36 policy = __modulepolicy__.modulepolicy
37 except ImportError:
37 except ImportError:
38 pass
38 pass
39
39
40 # PyPy doesn't load C extensions.
40 # PyPy doesn't load C extensions.
41 #
41 #
42 # The canonical way to do this is to test platform.python_implementation().
42 # The canonical way to do this is to test platform.python_implementation().
43 # But we don't import platform and don't bloat for it here.
43 # But we don't import platform and don't bloat for it here.
44 if r'__pypy__' in sys.builtin_module_names:
44 if r'__pypy__' in sys.builtin_module_names:
45 policy = b'cffi'
45 policy = b'cffi'
46
46
47 # Environment variable can always force settings.
47 # Environment variable can always force settings.
48 if sys.version_info[0] >= 3:
48 if sys.version_info[0] >= 3:
49 if r'HGMODULEPOLICY' in os.environ:
49 if r'HGMODULEPOLICY' in os.environ:
50 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
50 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
51 else:
51 else:
52 policy = os.environ.get(r'HGMODULEPOLICY', policy)
52 policy = os.environ.get(r'HGMODULEPOLICY', policy)
53
53
54 def _importfrom(pkgname, modname):
54 def _importfrom(pkgname, modname):
55 # from .<pkgname> import <modname> (where . is looked through this module)
55 # from .<pkgname> import <modname> (where . is looked through this module)
56 fakelocals = {}
56 fakelocals = {}
57 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
57 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
58 try:
58 try:
59 fakelocals[modname] = mod = getattr(pkg, modname)
59 fakelocals[modname] = mod = getattr(pkg, modname)
60 except AttributeError:
60 except AttributeError:
61 raise ImportError(r'cannot import name %s' % modname)
61 raise ImportError(r'cannot import name %s' % modname)
62 # force import; fakelocals[modname] may be replaced with the real module
62 # force import; fakelocals[modname] may be replaced with the real module
63 getattr(mod, r'__doc__', None)
63 getattr(mod, r'__doc__', None)
64 return fakelocals[modname]
64 return fakelocals[modname]
65
65
66 # keep in sync with "version" in C modules
66 # keep in sync with "version" in C modules
67 _cextversions = {
67 _cextversions = {
68 (r'cext', r'base85'): 1,
68 (r'cext', r'base85'): 1,
69 (r'cext', r'bdiff'): 3,
69 (r'cext', r'bdiff'): 3,
70 (r'cext', r'mpatch'): 1,
70 (r'cext', r'mpatch'): 1,
71 (r'cext', r'osutil'): 4,
71 (r'cext', r'osutil'): 4,
72 (r'cext', r'parsers'): 4,
72 (r'cext', r'parsers'): 5,
73 }
73 }
74
74
75 # map import request to other package or module
75 # map import request to other package or module
76 _modredirects = {
76 _modredirects = {
77 (r'cext', r'charencode'): (r'cext', r'parsers'),
77 (r'cext', r'charencode'): (r'cext', r'parsers'),
78 (r'cffi', r'base85'): (r'pure', r'base85'),
78 (r'cffi', r'base85'): (r'pure', r'base85'),
79 (r'cffi', r'charencode'): (r'pure', r'charencode'),
79 (r'cffi', r'charencode'): (r'pure', r'charencode'),
80 (r'cffi', r'parsers'): (r'pure', r'parsers'),
80 (r'cffi', r'parsers'): (r'pure', r'parsers'),
81 }
81 }
82
82
83 def _checkmod(pkgname, modname, mod):
83 def _checkmod(pkgname, modname, mod):
84 expected = _cextversions.get((pkgname, modname))
84 expected = _cextversions.get((pkgname, modname))
85 actual = getattr(mod, r'version', None)
85 actual = getattr(mod, r'version', None)
86 if actual != expected:
86 if actual != expected:
87 raise ImportError(r'cannot import module %s.%s '
87 raise ImportError(r'cannot import module %s.%s '
88 r'(expected version: %d, actual: %r)'
88 r'(expected version: %d, actual: %r)'
89 % (pkgname, modname, expected, actual))
89 % (pkgname, modname, expected, actual))
90
90
91 def importmod(modname):
91 def importmod(modname):
92 """Import module according to policy and check API version"""
92 """Import module according to policy and check API version"""
93 try:
93 try:
94 verpkg, purepkg = _packageprefs[policy]
94 verpkg, purepkg = _packageprefs[policy]
95 except KeyError:
95 except KeyError:
96 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
96 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
97 assert verpkg or purepkg
97 assert verpkg or purepkg
98 if verpkg:
98 if verpkg:
99 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
99 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
100 try:
100 try:
101 mod = _importfrom(pn, mn)
101 mod = _importfrom(pn, mn)
102 if pn == verpkg:
102 if pn == verpkg:
103 _checkmod(pn, mn, mod)
103 _checkmod(pn, mn, mod)
104 return mod
104 return mod
105 except ImportError:
105 except ImportError:
106 if not purepkg:
106 if not purepkg:
107 raise
107 raise
108 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
108 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
109 return _importfrom(pn, mn)
109 return _importfrom(pn, mn)
@@ -1,2597 +1,2623
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 CensoredNodeError = error.CensoredNodeError
94 CensoredNodeError = error.CensoredNodeError
95 ProgrammingError = error.ProgrammingError
95 ProgrammingError = error.ProgrammingError
96
96
97 # Store flag processors (cf. 'addflagprocessor()' to register)
97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 _flagprocessors = {
98 _flagprocessors = {
99 REVIDX_ISCENSORED: None,
99 REVIDX_ISCENSORED: None,
100 }
100 }
101
101
102 _mdre = re.compile('\1\n')
102 _mdre = re.compile('\1\n')
103 def parsemeta(text):
103 def parsemeta(text):
104 """return (metadatadict, metadatasize)"""
104 """return (metadatadict, metadatasize)"""
105 # text can be buffer, so we can't use .startswith or .index
105 # text can be buffer, so we can't use .startswith or .index
106 if text[:2] != '\1\n':
106 if text[:2] != '\1\n':
107 return None, None
107 return None, None
108 s = _mdre.search(text, 2).start()
108 s = _mdre.search(text, 2).start()
109 mtext = text[2:s]
109 mtext = text[2:s]
110 meta = {}
110 meta = {}
111 for l in mtext.splitlines():
111 for l in mtext.splitlines():
112 k, v = l.split(": ", 1)
112 k, v = l.split(": ", 1)
113 meta[k] = v
113 meta[k] = v
114 return meta, (s + 2)
114 return meta, (s + 2)
115
115
116 def packmeta(meta, text):
116 def packmeta(meta, text):
117 keys = sorted(meta)
117 keys = sorted(meta)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 return "\1\n%s\1\n%s" % (metatext, text)
119 return "\1\n%s\1\n%s" % (metatext, text)
120
120
121 def _censoredtext(text):
121 def _censoredtext(text):
122 m, offs = parsemeta(text)
122 m, offs = parsemeta(text)
123 return m and "censored" in m
123 return m and "censored" in m
124
124
125 def addflagprocessor(flag, processor):
125 def addflagprocessor(flag, processor):
126 """Register a flag processor on a revision data flag.
126 """Register a flag processor on a revision data flag.
127
127
128 Invariant:
128 Invariant:
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 - Only one flag processor can be registered on a specific flag.
131 - Only one flag processor can be registered on a specific flag.
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 following signatures:
133 following signatures:
134 - (read) f(self, rawtext) -> text, bool
134 - (read) f(self, rawtext) -> text, bool
135 - (write) f(self, text) -> rawtext, bool
135 - (write) f(self, text) -> rawtext, bool
136 - (raw) f(self, rawtext) -> bool
136 - (raw) f(self, rawtext) -> bool
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 directly visible to the user.
138 directly visible to the user.
139 The boolean returned by these transforms is used to determine whether
139 The boolean returned by these transforms is used to determine whether
140 the returned text can be used for hash integrity checking. For example,
140 the returned text can be used for hash integrity checking. For example,
141 if "write" returns False, then "text" is used to generate hash. If
141 if "write" returns False, then "text" is used to generate hash. If
142 "write" returns True, that basically means "rawtext" returned by "write"
142 "write" returns True, that basically means "rawtext" returned by "write"
143 should be used to generate hash. Usually, "write" and "read" return
143 should be used to generate hash. Usually, "write" and "read" return
144 different booleans. And "raw" returns a same boolean as "write".
144 different booleans. And "raw" returns a same boolean as "write".
145
145
146 Note: The 'raw' transform is used for changegroup generation and in some
146 Note: The 'raw' transform is used for changegroup generation and in some
147 debug commands. In this case the transform only indicates whether the
147 debug commands. In this case the transform only indicates whether the
148 contents can be used for hash integrity checks.
148 contents can be used for hash integrity checks.
149 """
149 """
150 if not flag & REVIDX_KNOWN_FLAGS:
150 if not flag & REVIDX_KNOWN_FLAGS:
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 raise ProgrammingError(msg)
152 raise ProgrammingError(msg)
153 if flag not in REVIDX_FLAGS_ORDER:
153 if flag not in REVIDX_FLAGS_ORDER:
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 raise ProgrammingError(msg)
155 raise ProgrammingError(msg)
156 if flag in _flagprocessors:
156 if flag in _flagprocessors:
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 raise error.Abort(msg)
158 raise error.Abort(msg)
159 _flagprocessors[flag] = processor
159 _flagprocessors[flag] = processor
160
160
161 def getoffset(q):
161 def getoffset(q):
162 return int(q >> 16)
162 return int(q >> 16)
163
163
164 def gettype(q):
164 def gettype(q):
165 return int(q & 0xFFFF)
165 return int(q & 0xFFFF)
166
166
167 def offset_type(offset, type):
167 def offset_type(offset, type):
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 raise ValueError('unknown revlog index flags')
169 raise ValueError('unknown revlog index flags')
170 return int(int(offset) << 16 | type)
170 return int(int(offset) << 16 | type)
171
171
172 _nullhash = hashlib.sha1(nullid)
172 _nullhash = hashlib.sha1(nullid)
173
173
174 def hash(text, p1, p2):
174 def hash(text, p1, p2):
175 """generate a hash from the given text and its parent hashes
175 """generate a hash from the given text and its parent hashes
176
176
177 This hash combines both the current file contents and its history
177 This hash combines both the current file contents and its history
178 in a manner that makes it easy to distinguish nodes with the same
178 in a manner that makes it easy to distinguish nodes with the same
179 content in the revision graph.
179 content in the revision graph.
180 """
180 """
181 # As of now, if one of the parent node is null, p2 is null
181 # As of now, if one of the parent node is null, p2 is null
182 if p2 == nullid:
182 if p2 == nullid:
183 # deep copy of a hash is faster than creating one
183 # deep copy of a hash is faster than creating one
184 s = _nullhash.copy()
184 s = _nullhash.copy()
185 s.update(p1)
185 s.update(p1)
186 else:
186 else:
187 # none of the parent nodes are nullid
187 # none of the parent nodes are nullid
188 if p1 < p2:
188 if p1 < p2:
189 a = p1
189 a = p1
190 b = p2
190 b = p2
191 else:
191 else:
192 a = p2
192 a = p2
193 b = p1
193 b = p1
194 s = hashlib.sha1(a)
194 s = hashlib.sha1(a)
195 s.update(b)
195 s.update(b)
196 s.update(text)
196 s.update(text)
197 return s.digest()
197 return s.digest()
198
198
199 def _trimchunk(revlog, revs, startidx, endidx=None):
199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 """returns revs[startidx:endidx] without empty trailing revs
200 """returns revs[startidx:endidx] without empty trailing revs
201 """
201 """
202 length = revlog.length
202 length = revlog.length
203
203
204 if endidx is None:
204 if endidx is None:
205 endidx = len(revs)
205 endidx = len(revs)
206
206
207 # Trim empty revs at the end, but never the very first revision of a chain
207 # Trim empty revs at the end, but never the very first revision of a chain
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 endidx -= 1
209 endidx -= 1
210
210
211 return revs[startidx:endidx]
211 return revs[startidx:endidx]
212
212
213 def _slicechunk(revlog, revs):
213 def _slicechunk(revlog, revs):
214 """slice revs to reduce the amount of unrelated data to be read from disk.
214 """slice revs to reduce the amount of unrelated data to be read from disk.
215
215
216 ``revs`` is sliced into groups that should be read in one time.
216 ``revs`` is sliced into groups that should be read in one time.
217 Assume that revs are sorted.
217 Assume that revs are sorted.
218 """
218 """
219 start = revlog.start
219 start = revlog.start
220 length = revlog.length
220 length = revlog.length
221
221
222 if len(revs) <= 1:
222 if len(revs) <= 1:
223 yield revs
223 yield revs
224 return
224 return
225
225
226 startbyte = start(revs[0])
226 startbyte = start(revs[0])
227 endbyte = start(revs[-1]) + length(revs[-1])
227 endbyte = start(revs[-1]) + length(revs[-1])
228 readdata = deltachainspan = endbyte - startbyte
228 readdata = deltachainspan = endbyte - startbyte
229
229
230 chainpayload = sum(length(r) for r in revs)
230 chainpayload = sum(length(r) for r in revs)
231
231
232 if deltachainspan:
232 if deltachainspan:
233 density = chainpayload / float(deltachainspan)
233 density = chainpayload / float(deltachainspan)
234 else:
234 else:
235 density = 1.0
235 density = 1.0
236
236
237 # Store the gaps in a heap to have them sorted by decreasing size
237 # Store the gaps in a heap to have them sorted by decreasing size
238 gapsheap = []
238 gapsheap = []
239 heapq.heapify(gapsheap)
239 heapq.heapify(gapsheap)
240 prevend = None
240 prevend = None
241 for i, rev in enumerate(revs):
241 for i, rev in enumerate(revs):
242 revstart = start(rev)
242 revstart = start(rev)
243 revlen = length(rev)
243 revlen = length(rev)
244
244
245 # Skip empty revisions to form larger holes
245 # Skip empty revisions to form larger holes
246 if revlen == 0:
246 if revlen == 0:
247 continue
247 continue
248
248
249 if prevend is not None:
249 if prevend is not None:
250 gapsize = revstart - prevend
250 gapsize = revstart - prevend
251 # only consider holes that are large enough
251 # only consider holes that are large enough
252 if gapsize > revlog._srmingapsize:
252 if gapsize > revlog._srmingapsize:
253 heapq.heappush(gapsheap, (-gapsize, i))
253 heapq.heappush(gapsheap, (-gapsize, i))
254
254
255 prevend = revstart + revlen
255 prevend = revstart + revlen
256
256
257 # Collect the indices of the largest holes until the density is acceptable
257 # Collect the indices of the largest holes until the density is acceptable
258 indicesheap = []
258 indicesheap = []
259 heapq.heapify(indicesheap)
259 heapq.heapify(indicesheap)
260 while gapsheap and density < revlog._srdensitythreshold:
260 while gapsheap and density < revlog._srdensitythreshold:
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262
262
263 heapq.heappush(indicesheap, gapidx)
263 heapq.heappush(indicesheap, gapidx)
264
264
265 # the gap sizes are stored as negatives to be sorted decreasingly
265 # the gap sizes are stored as negatives to be sorted decreasingly
266 # by the heap
266 # by the heap
267 readdata -= (-oppgapsize)
267 readdata -= (-oppgapsize)
268 if readdata > 0:
268 if readdata > 0:
269 density = chainpayload / float(readdata)
269 density = chainpayload / float(readdata)
270 else:
270 else:
271 density = 1.0
271 density = 1.0
272
272
273 # Cut the revs at collected indices
273 # Cut the revs at collected indices
274 previdx = 0
274 previdx = 0
275 while indicesheap:
275 while indicesheap:
276 idx = heapq.heappop(indicesheap)
276 idx = heapq.heappop(indicesheap)
277
277
278 chunk = _trimchunk(revlog, revs, previdx, idx)
278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 if chunk:
279 if chunk:
280 yield chunk
280 yield chunk
281
281
282 previdx = idx
282 previdx = idx
283
283
284 chunk = _trimchunk(revlog, revs, previdx)
284 chunk = _trimchunk(revlog, revs, previdx)
285 if chunk:
285 if chunk:
286 yield chunk
286 yield chunk
287
287
288 @attr.s(slots=True, frozen=True)
288 @attr.s(slots=True, frozen=True)
289 class _deltainfo(object):
289 class _deltainfo(object):
290 distance = attr.ib()
290 distance = attr.ib()
291 deltalen = attr.ib()
291 deltalen = attr.ib()
292 data = attr.ib()
292 data = attr.ib()
293 base = attr.ib()
293 base = attr.ib()
294 chainbase = attr.ib()
294 chainbase = attr.ib()
295 chainlen = attr.ib()
295 chainlen = attr.ib()
296 compresseddeltalen = attr.ib()
296 compresseddeltalen = attr.ib()
297
297
298 class _deltacomputer(object):
298 class _deltacomputer(object):
299 def __init__(self, revlog):
299 def __init__(self, revlog):
300 self.revlog = revlog
300 self.revlog = revlog
301
301
302 def _getcandidaterevs(self, p1, p2, cachedelta):
302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 """
303 """
304 Provides revisions that present an interest to be diffed against,
304 Provides revisions that present an interest to be diffed against,
305 grouped by level of easiness.
305 grouped by level of easiness.
306 """
306 """
307 revlog = self.revlog
307 revlog = self.revlog
308 curr = len(revlog)
308 curr = len(revlog)
309 prev = curr - 1
309 prev = curr - 1
310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311
311
312 # should we try to build a delta?
312 # should we try to build a delta?
313 if prev != nullrev and revlog.storedeltachains:
313 if prev != nullrev and revlog.storedeltachains:
314 tested = set()
314 tested = set()
315 # This condition is true most of the time when processing
315 # This condition is true most of the time when processing
316 # changegroup data into a generaldelta repo. The only time it
316 # changegroup data into a generaldelta repo. The only time it
317 # isn't true is if this is the first revision in a delta chain
317 # isn't true is if this is the first revision in a delta chain
318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
320 # Assume what we received from the server is a good choice
320 # Assume what we received from the server is a good choice
321 # build delta will reuse the cache
321 # build delta will reuse the cache
322 yield (cachedelta[0],)
322 yield (cachedelta[0],)
323 tested.add(cachedelta[0])
323 tested.add(cachedelta[0])
324
324
325 if revlog._generaldelta:
325 if revlog._generaldelta:
326 # exclude already lazy tested base if any
326 # exclude already lazy tested base if any
327 parents = [p for p in (p1r, p2r)
327 parents = [p for p in (p1r, p2r)
328 if p != nullrev and p not in tested]
328 if p != nullrev and p not in tested]
329 if parents and not revlog._aggressivemergedeltas:
329 if parents and not revlog._aggressivemergedeltas:
330 # Pick whichever parent is closer to us (to minimize the
330 # Pick whichever parent is closer to us (to minimize the
331 # chance of having to build a fulltext).
331 # chance of having to build a fulltext).
332 parents = [max(parents)]
332 parents = [max(parents)]
333 tested.update(parents)
333 tested.update(parents)
334 yield parents
334 yield parents
335
335
336 if prev not in tested:
336 if prev not in tested:
337 # other approach failed try against prev to hopefully save us a
337 # other approach failed try against prev to hopefully save us a
338 # fulltext.
338 # fulltext.
339 yield (prev,)
339 yield (prev,)
340
340
341 def buildtext(self, revinfo, fh):
341 def buildtext(self, revinfo, fh):
342 """Builds a fulltext version of a revision
342 """Builds a fulltext version of a revision
343
343
344 revinfo: _revisioninfo instance that contains all needed info
344 revinfo: _revisioninfo instance that contains all needed info
345 fh: file handle to either the .i or the .d revlog file,
345 fh: file handle to either the .i or the .d revlog file,
346 depending on whether it is inlined or not
346 depending on whether it is inlined or not
347 """
347 """
348 btext = revinfo.btext
348 btext = revinfo.btext
349 if btext[0] is not None:
349 if btext[0] is not None:
350 return btext[0]
350 return btext[0]
351
351
352 revlog = self.revlog
352 revlog = self.revlog
353 cachedelta = revinfo.cachedelta
353 cachedelta = revinfo.cachedelta
354 flags = revinfo.flags
354 flags = revinfo.flags
355 node = revinfo.node
355 node = revinfo.node
356
356
357 baserev = cachedelta[0]
357 baserev = cachedelta[0]
358 delta = cachedelta[1]
358 delta = cachedelta[1]
359 # special case deltas which replace entire base; no need to decode
359 # special case deltas which replace entire base; no need to decode
360 # base revision. this neatly avoids censored bases, which throw when
360 # base revision. this neatly avoids censored bases, which throw when
361 # they're decoded.
361 # they're decoded.
362 hlen = struct.calcsize(">lll")
362 hlen = struct.calcsize(">lll")
363 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
363 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
364 len(delta) - hlen):
364 len(delta) - hlen):
365 btext[0] = delta[hlen:]
365 btext[0] = delta[hlen:]
366 else:
366 else:
367 # deltabase is rawtext before changed by flag processors, which is
367 # deltabase is rawtext before changed by flag processors, which is
368 # equivalent to non-raw text
368 # equivalent to non-raw text
369 basetext = revlog.revision(baserev, _df=fh, raw=False)
369 basetext = revlog.revision(baserev, _df=fh, raw=False)
370 btext[0] = mdiff.patch(basetext, delta)
370 btext[0] = mdiff.patch(basetext, delta)
371
371
372 try:
372 try:
373 res = revlog._processflags(btext[0], flags, 'read', raw=True)
373 res = revlog._processflags(btext[0], flags, 'read', raw=True)
374 btext[0], validatehash = res
374 btext[0], validatehash = res
375 if validatehash:
375 if validatehash:
376 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
376 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
377 if flags & REVIDX_ISCENSORED:
377 if flags & REVIDX_ISCENSORED:
378 raise RevlogError(_('node %s is not censored') % node)
378 raise RevlogError(_('node %s is not censored') % node)
379 except CensoredNodeError:
379 except CensoredNodeError:
380 # must pass the censored index flag to add censored revisions
380 # must pass the censored index flag to add censored revisions
381 if not flags & REVIDX_ISCENSORED:
381 if not flags & REVIDX_ISCENSORED:
382 raise
382 raise
383 return btext[0]
383 return btext[0]
384
384
385 def _builddeltadiff(self, base, revinfo, fh):
385 def _builddeltadiff(self, base, revinfo, fh):
386 revlog = self.revlog
386 revlog = self.revlog
387 t = self.buildtext(revinfo, fh)
387 t = self.buildtext(revinfo, fh)
388 if revlog.iscensored(base):
388 if revlog.iscensored(base):
389 # deltas based on a censored revision must replace the
389 # deltas based on a censored revision must replace the
390 # full content in one patch, so delta works everywhere
390 # full content in one patch, so delta works everywhere
391 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
391 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
392 delta = header + t
392 delta = header + t
393 else:
393 else:
394 ptext = revlog.revision(base, _df=fh, raw=True)
394 ptext = revlog.revision(base, _df=fh, raw=True)
395 delta = mdiff.textdiff(ptext, t)
395 delta = mdiff.textdiff(ptext, t)
396
396
397 return delta
397 return delta
398
398
399 def _builddeltainfo(self, revinfo, base, fh):
399 def _builddeltainfo(self, revinfo, base, fh):
400 # can we use the cached delta?
400 # can we use the cached delta?
401 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
401 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
402 delta = revinfo.cachedelta[1]
402 delta = revinfo.cachedelta[1]
403 else:
403 else:
404 delta = self._builddeltadiff(base, revinfo, fh)
404 delta = self._builddeltadiff(base, revinfo, fh)
405 revlog = self.revlog
405 revlog = self.revlog
406 header, data = revlog.compress(delta)
406 header, data = revlog.compress(delta)
407 deltalen = len(header) + len(data)
407 deltalen = len(header) + len(data)
408 chainbase = revlog.chainbase(base)
408 chainbase = revlog.chainbase(base)
409 offset = revlog.end(len(revlog) - 1)
409 offset = revlog.end(len(revlog) - 1)
410 dist = deltalen + offset - revlog.start(chainbase)
410 dist = deltalen + offset - revlog.start(chainbase)
411 if revlog._generaldelta:
411 if revlog._generaldelta:
412 deltabase = base
412 deltabase = base
413 else:
413 else:
414 deltabase = chainbase
414 deltabase = chainbase
415 chainlen, compresseddeltalen = revlog._chaininfo(base)
415 chainlen, compresseddeltalen = revlog._chaininfo(base)
416 chainlen += 1
416 chainlen += 1
417 compresseddeltalen += deltalen
417 compresseddeltalen += deltalen
418 return _deltainfo(dist, deltalen, (header, data), deltabase,
418 return _deltainfo(dist, deltalen, (header, data), deltabase,
419 chainbase, chainlen, compresseddeltalen)
419 chainbase, chainlen, compresseddeltalen)
420
420
421 def finddeltainfo(self, revinfo, fh):
421 def finddeltainfo(self, revinfo, fh):
422 """Find an acceptable delta against a candidate revision
422 """Find an acceptable delta against a candidate revision
423
423
424 revinfo: information about the revision (instance of _revisioninfo)
424 revinfo: information about the revision (instance of _revisioninfo)
425 fh: file handle to either the .i or the .d revlog file,
425 fh: file handle to either the .i or the .d revlog file,
426 depending on whether it is inlined or not
426 depending on whether it is inlined or not
427
427
428 Returns the first acceptable candidate revision, as ordered by
428 Returns the first acceptable candidate revision, as ordered by
429 _getcandidaterevs
429 _getcandidaterevs
430 """
430 """
431 cachedelta = revinfo.cachedelta
431 cachedelta = revinfo.cachedelta
432 p1 = revinfo.p1
432 p1 = revinfo.p1
433 p2 = revinfo.p2
433 p2 = revinfo.p2
434 revlog = self.revlog
434 revlog = self.revlog
435
435
436 deltainfo = None
436 deltainfo = None
437 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
437 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
438 nominateddeltas = []
438 nominateddeltas = []
439 for candidaterev in candidaterevs:
439 for candidaterev in candidaterevs:
440 # no delta for rawtext-changing revs (see "candelta" for why)
440 # no delta for rawtext-changing revs (see "candelta" for why)
441 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
441 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
442 continue
442 continue
443 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
443 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
444 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
444 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
445 nominateddeltas.append(candidatedelta)
445 nominateddeltas.append(candidatedelta)
446 if nominateddeltas:
446 if nominateddeltas:
447 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
447 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
448 break
448 break
449
449
450 return deltainfo
450 return deltainfo
451
451
452 @attr.s(slots=True, frozen=True)
452 @attr.s(slots=True, frozen=True)
453 class _revisioninfo(object):
453 class _revisioninfo(object):
454 """Information about a revision that allows building its fulltext
454 """Information about a revision that allows building its fulltext
455 node: expected hash of the revision
455 node: expected hash of the revision
456 p1, p2: parent revs of the revision
456 p1, p2: parent revs of the revision
457 btext: built text cache consisting of a one-element list
457 btext: built text cache consisting of a one-element list
458 cachedelta: (baserev, uncompressed_delta) or None
458 cachedelta: (baserev, uncompressed_delta) or None
459 flags: flags associated to the revision storage
459 flags: flags associated to the revision storage
460
460
461 One of btext[0] or cachedelta must be set.
461 One of btext[0] or cachedelta must be set.
462 """
462 """
463 node = attr.ib()
463 node = attr.ib()
464 p1 = attr.ib()
464 p1 = attr.ib()
465 p2 = attr.ib()
465 p2 = attr.ib()
466 btext = attr.ib()
466 btext = attr.ib()
467 textlen = attr.ib()
467 textlen = attr.ib()
468 cachedelta = attr.ib()
468 cachedelta = attr.ib()
469 flags = attr.ib()
469 flags = attr.ib()
470
470
471 # index v0:
471 # index v0:
472 # 4 bytes: offset
472 # 4 bytes: offset
473 # 4 bytes: compressed length
473 # 4 bytes: compressed length
474 # 4 bytes: base rev
474 # 4 bytes: base rev
475 # 4 bytes: link rev
475 # 4 bytes: link rev
476 # 20 bytes: parent 1 nodeid
476 # 20 bytes: parent 1 nodeid
477 # 20 bytes: parent 2 nodeid
477 # 20 bytes: parent 2 nodeid
478 # 20 bytes: nodeid
478 # 20 bytes: nodeid
479 indexformatv0 = struct.Struct(">4l20s20s20s")
479 indexformatv0 = struct.Struct(">4l20s20s20s")
480 indexformatv0_pack = indexformatv0.pack
480 indexformatv0_pack = indexformatv0.pack
481 indexformatv0_unpack = indexformatv0.unpack
481 indexformatv0_unpack = indexformatv0.unpack
482
482
483 class revlogoldio(object):
483 class revlogoldio(object):
484 def __init__(self):
484 def __init__(self):
485 self.size = indexformatv0.size
485 self.size = indexformatv0.size
486
486
487 def parseindex(self, data, inline):
487 def parseindex(self, data, inline):
488 s = self.size
488 s = self.size
489 index = []
489 index = []
490 nodemap = {nullid: nullrev}
490 nodemap = {nullid: nullrev}
491 n = off = 0
491 n = off = 0
492 l = len(data)
492 l = len(data)
493 while off + s <= l:
493 while off + s <= l:
494 cur = data[off:off + s]
494 cur = data[off:off + s]
495 off += s
495 off += s
496 e = indexformatv0_unpack(cur)
496 e = indexformatv0_unpack(cur)
497 # transform to revlogv1 format
497 # transform to revlogv1 format
498 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
498 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
499 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
499 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
500 index.append(e2)
500 index.append(e2)
501 nodemap[e[6]] = n
501 nodemap[e[6]] = n
502 n += 1
502 n += 1
503
503
504 # add the magic null revision at -1
504 # add the magic null revision at -1
505 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
505 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
506
506
507 return index, nodemap, None
507 return index, nodemap, None
508
508
509 def packentry(self, entry, node, version, rev):
509 def packentry(self, entry, node, version, rev):
510 if gettype(entry[0]):
510 if gettype(entry[0]):
511 raise RevlogError(_('index entry flags need revlog version 1'))
511 raise RevlogError(_('index entry flags need revlog version 1'))
512 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
512 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
513 node(entry[5]), node(entry[6]), entry[7])
513 node(entry[5]), node(entry[6]), entry[7])
514 return indexformatv0_pack(*e2)
514 return indexformatv0_pack(*e2)
515
515
516 # index ng:
516 # index ng:
517 # 6 bytes: offset
517 # 6 bytes: offset
518 # 2 bytes: flags
518 # 2 bytes: flags
519 # 4 bytes: compressed length
519 # 4 bytes: compressed length
520 # 4 bytes: uncompressed length
520 # 4 bytes: uncompressed length
521 # 4 bytes: base rev
521 # 4 bytes: base rev
522 # 4 bytes: link rev
522 # 4 bytes: link rev
523 # 4 bytes: parent 1 rev
523 # 4 bytes: parent 1 rev
524 # 4 bytes: parent 2 rev
524 # 4 bytes: parent 2 rev
525 # 32 bytes: nodeid
525 # 32 bytes: nodeid
526 indexformatng = struct.Struct(">Qiiiiii20s12x")
526 indexformatng = struct.Struct(">Qiiiiii20s12x")
527 indexformatng_pack = indexformatng.pack
527 indexformatng_pack = indexformatng.pack
528 versionformat = struct.Struct(">I")
528 versionformat = struct.Struct(">I")
529 versionformat_pack = versionformat.pack
529 versionformat_pack = versionformat.pack
530 versionformat_unpack = versionformat.unpack
530 versionformat_unpack = versionformat.unpack
531
531
532 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
532 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
533 # signed integer)
533 # signed integer)
534 _maxentrysize = 0x7fffffff
534 _maxentrysize = 0x7fffffff
535
535
536 class revlogio(object):
536 class revlogio(object):
537 def __init__(self):
537 def __init__(self):
538 self.size = indexformatng.size
538 self.size = indexformatng.size
539
539
540 def parseindex(self, data, inline):
540 def parseindex(self, data, inline):
541 # call the C implementation to parse the index data
541 # call the C implementation to parse the index data
542 index, cache = parsers.parse_index2(data, inline)
542 index, cache = parsers.parse_index2(data, inline)
543 return index, getattr(index, 'nodemap', None), cache
543 return index, getattr(index, 'nodemap', None), cache
544
544
545 def packentry(self, entry, node, version, rev):
545 def packentry(self, entry, node, version, rev):
546 p = indexformatng_pack(*entry)
546 p = indexformatng_pack(*entry)
547 if rev == 0:
547 if rev == 0:
548 p = versionformat_pack(version) + p[4:]
548 p = versionformat_pack(version) + p[4:]
549 return p
549 return p
550
550
551 class revlog(object):
551 class revlog(object):
552 """
552 """
553 the underlying revision storage object
553 the underlying revision storage object
554
554
555 A revlog consists of two parts, an index and the revision data.
555 A revlog consists of two parts, an index and the revision data.
556
556
557 The index is a file with a fixed record size containing
557 The index is a file with a fixed record size containing
558 information on each revision, including its nodeid (hash), the
558 information on each revision, including its nodeid (hash), the
559 nodeids of its parents, the position and offset of its data within
559 nodeids of its parents, the position and offset of its data within
560 the data file, and the revision it's based on. Finally, each entry
560 the data file, and the revision it's based on. Finally, each entry
561 contains a linkrev entry that can serve as a pointer to external
561 contains a linkrev entry that can serve as a pointer to external
562 data.
562 data.
563
563
564 The revision data itself is a linear collection of data chunks.
564 The revision data itself is a linear collection of data chunks.
565 Each chunk represents a revision and is usually represented as a
565 Each chunk represents a revision and is usually represented as a
566 delta against the previous chunk. To bound lookup time, runs of
566 delta against the previous chunk. To bound lookup time, runs of
567 deltas are limited to about 2 times the length of the original
567 deltas are limited to about 2 times the length of the original
568 version data. This makes retrieval of a version proportional to
568 version data. This makes retrieval of a version proportional to
569 its size, or O(1) relative to the number of revisions.
569 its size, or O(1) relative to the number of revisions.
570
570
571 Both pieces of the revlog are written to in an append-only
571 Both pieces of the revlog are written to in an append-only
572 fashion, which means we never need to rewrite a file to insert or
572 fashion, which means we never need to rewrite a file to insert or
573 remove data, and can use some simple techniques to avoid the need
573 remove data, and can use some simple techniques to avoid the need
574 for locking while reading.
574 for locking while reading.
575
575
576 If checkambig, indexfile is opened with checkambig=True at
576 If checkambig, indexfile is opened with checkambig=True at
577 writing, to avoid file stat ambiguity.
577 writing, to avoid file stat ambiguity.
578
578
579 If mmaplargeindex is True, and an mmapindexthreshold is set, the
579 If mmaplargeindex is True, and an mmapindexthreshold is set, the
580 index will be mmapped rather than read if it is larger than the
580 index will be mmapped rather than read if it is larger than the
581 configured threshold.
581 configured threshold.
582
582
583 If censorable is True, the revlog can have censored revisions.
583 If censorable is True, the revlog can have censored revisions.
584 """
584 """
585 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
585 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
586 mmaplargeindex=False, censorable=False):
586 mmaplargeindex=False, censorable=False):
587 """
587 """
588 create a revlog object
588 create a revlog object
589
589
590 opener is a function that abstracts the file opening operation
590 opener is a function that abstracts the file opening operation
591 and can be used to implement COW semantics or the like.
591 and can be used to implement COW semantics or the like.
592 """
592 """
593 self.indexfile = indexfile
593 self.indexfile = indexfile
594 self.datafile = datafile or (indexfile[:-2] + ".d")
594 self.datafile = datafile or (indexfile[:-2] + ".d")
595 self.opener = opener
595 self.opener = opener
596 # When True, indexfile is opened with checkambig=True at writing, to
596 # When True, indexfile is opened with checkambig=True at writing, to
597 # avoid file stat ambiguity.
597 # avoid file stat ambiguity.
598 self._checkambig = checkambig
598 self._checkambig = checkambig
599 self._censorable = censorable
599 self._censorable = censorable
600 # 3-tuple of (node, rev, text) for a raw revision.
600 # 3-tuple of (node, rev, text) for a raw revision.
601 self._cache = None
601 self._cache = None
602 # Maps rev to chain base rev.
602 # Maps rev to chain base rev.
603 self._chainbasecache = util.lrucachedict(100)
603 self._chainbasecache = util.lrucachedict(100)
604 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
604 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
605 self._chunkcache = (0, '')
605 self._chunkcache = (0, '')
606 # How much data to read and cache into the raw revlog data cache.
606 # How much data to read and cache into the raw revlog data cache.
607 self._chunkcachesize = 65536
607 self._chunkcachesize = 65536
608 self._maxchainlen = None
608 self._maxchainlen = None
609 self._aggressivemergedeltas = False
609 self._aggressivemergedeltas = False
610 self.index = []
610 self.index = []
611 # Mapping of partial identifiers to full nodes.
611 # Mapping of partial identifiers to full nodes.
612 self._pcache = {}
612 self._pcache = {}
613 # Mapping of revision integer to full node.
613 # Mapping of revision integer to full node.
614 self._nodecache = {nullid: nullrev}
614 self._nodecache = {nullid: nullrev}
615 self._nodepos = None
615 self._nodepos = None
616 self._compengine = 'zlib'
616 self._compengine = 'zlib'
617 self._maxdeltachainspan = -1
617 self._maxdeltachainspan = -1
618 self._withsparseread = False
618 self._withsparseread = False
619 self._srdensitythreshold = 0.25
619 self._srdensitythreshold = 0.25
620 self._srmingapsize = 262144
620 self._srmingapsize = 262144
621
621
622 mmapindexthreshold = None
622 mmapindexthreshold = None
623 v = REVLOG_DEFAULT_VERSION
623 v = REVLOG_DEFAULT_VERSION
624 opts = getattr(opener, 'options', None)
624 opts = getattr(opener, 'options', None)
625 if opts is not None:
625 if opts is not None:
626 if 'revlogv2' in opts:
626 if 'revlogv2' in opts:
627 # version 2 revlogs always use generaldelta.
627 # version 2 revlogs always use generaldelta.
628 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
628 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
629 elif 'revlogv1' in opts:
629 elif 'revlogv1' in opts:
630 if 'generaldelta' in opts:
630 if 'generaldelta' in opts:
631 v |= FLAG_GENERALDELTA
631 v |= FLAG_GENERALDELTA
632 else:
632 else:
633 v = 0
633 v = 0
634 if 'chunkcachesize' in opts:
634 if 'chunkcachesize' in opts:
635 self._chunkcachesize = opts['chunkcachesize']
635 self._chunkcachesize = opts['chunkcachesize']
636 if 'maxchainlen' in opts:
636 if 'maxchainlen' in opts:
637 self._maxchainlen = opts['maxchainlen']
637 self._maxchainlen = opts['maxchainlen']
638 if 'aggressivemergedeltas' in opts:
638 if 'aggressivemergedeltas' in opts:
639 self._aggressivemergedeltas = opts['aggressivemergedeltas']
639 self._aggressivemergedeltas = opts['aggressivemergedeltas']
640 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
640 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
641 if 'compengine' in opts:
641 if 'compengine' in opts:
642 self._compengine = opts['compengine']
642 self._compengine = opts['compengine']
643 if 'maxdeltachainspan' in opts:
643 if 'maxdeltachainspan' in opts:
644 self._maxdeltachainspan = opts['maxdeltachainspan']
644 self._maxdeltachainspan = opts['maxdeltachainspan']
645 if mmaplargeindex and 'mmapindexthreshold' in opts:
645 if mmaplargeindex and 'mmapindexthreshold' in opts:
646 mmapindexthreshold = opts['mmapindexthreshold']
646 mmapindexthreshold = opts['mmapindexthreshold']
647 self._withsparseread = bool(opts.get('with-sparse-read', False))
647 self._withsparseread = bool(opts.get('with-sparse-read', False))
648 if 'sparse-read-density-threshold' in opts:
648 if 'sparse-read-density-threshold' in opts:
649 self._srdensitythreshold = opts['sparse-read-density-threshold']
649 self._srdensitythreshold = opts['sparse-read-density-threshold']
650 if 'sparse-read-min-gap-size' in opts:
650 if 'sparse-read-min-gap-size' in opts:
651 self._srmingapsize = opts['sparse-read-min-gap-size']
651 self._srmingapsize = opts['sparse-read-min-gap-size']
652
652
653 if self._chunkcachesize <= 0:
653 if self._chunkcachesize <= 0:
654 raise RevlogError(_('revlog chunk cache size %r is not greater '
654 raise RevlogError(_('revlog chunk cache size %r is not greater '
655 'than 0') % self._chunkcachesize)
655 'than 0') % self._chunkcachesize)
656 elif self._chunkcachesize & (self._chunkcachesize - 1):
656 elif self._chunkcachesize & (self._chunkcachesize - 1):
657 raise RevlogError(_('revlog chunk cache size %r is not a power '
657 raise RevlogError(_('revlog chunk cache size %r is not a power '
658 'of 2') % self._chunkcachesize)
658 'of 2') % self._chunkcachesize)
659
659
660 indexdata = ''
660 indexdata = ''
661 self._initempty = True
661 self._initempty = True
662 try:
662 try:
663 with self._indexfp() as f:
663 with self._indexfp() as f:
664 if (mmapindexthreshold is not None and
664 if (mmapindexthreshold is not None and
665 self.opener.fstat(f).st_size >= mmapindexthreshold):
665 self.opener.fstat(f).st_size >= mmapindexthreshold):
666 indexdata = util.buffer(util.mmapread(f))
666 indexdata = util.buffer(util.mmapread(f))
667 else:
667 else:
668 indexdata = f.read()
668 indexdata = f.read()
669 if len(indexdata) > 0:
669 if len(indexdata) > 0:
670 v = versionformat_unpack(indexdata[:4])[0]
670 v = versionformat_unpack(indexdata[:4])[0]
671 self._initempty = False
671 self._initempty = False
672 except IOError as inst:
672 except IOError as inst:
673 if inst.errno != errno.ENOENT:
673 if inst.errno != errno.ENOENT:
674 raise
674 raise
675
675
676 self.version = v
676 self.version = v
677 self._inline = v & FLAG_INLINE_DATA
677 self._inline = v & FLAG_INLINE_DATA
678 self._generaldelta = v & FLAG_GENERALDELTA
678 self._generaldelta = v & FLAG_GENERALDELTA
679 flags = v & ~0xFFFF
679 flags = v & ~0xFFFF
680 fmt = v & 0xFFFF
680 fmt = v & 0xFFFF
681 if fmt == REVLOGV0:
681 if fmt == REVLOGV0:
682 if flags:
682 if flags:
683 raise RevlogError(_('unknown flags (%#04x) in version %d '
683 raise RevlogError(_('unknown flags (%#04x) in version %d '
684 'revlog %s') %
684 'revlog %s') %
685 (flags >> 16, fmt, self.indexfile))
685 (flags >> 16, fmt, self.indexfile))
686 elif fmt == REVLOGV1:
686 elif fmt == REVLOGV1:
687 if flags & ~REVLOGV1_FLAGS:
687 if flags & ~REVLOGV1_FLAGS:
688 raise RevlogError(_('unknown flags (%#04x) in version %d '
688 raise RevlogError(_('unknown flags (%#04x) in version %d '
689 'revlog %s') %
689 'revlog %s') %
690 (flags >> 16, fmt, self.indexfile))
690 (flags >> 16, fmt, self.indexfile))
691 elif fmt == REVLOGV2:
691 elif fmt == REVLOGV2:
692 if flags & ~REVLOGV2_FLAGS:
692 if flags & ~REVLOGV2_FLAGS:
693 raise RevlogError(_('unknown flags (%#04x) in version %d '
693 raise RevlogError(_('unknown flags (%#04x) in version %d '
694 'revlog %s') %
694 'revlog %s') %
695 (flags >> 16, fmt, self.indexfile))
695 (flags >> 16, fmt, self.indexfile))
696 else:
696 else:
697 raise RevlogError(_('unknown version (%d) in revlog %s') %
697 raise RevlogError(_('unknown version (%d) in revlog %s') %
698 (fmt, self.indexfile))
698 (fmt, self.indexfile))
699
699
700 self.storedeltachains = True
700 self.storedeltachains = True
701
701
702 self._io = revlogio()
702 self._io = revlogio()
703 if self.version == REVLOGV0:
703 if self.version == REVLOGV0:
704 self._io = revlogoldio()
704 self._io = revlogoldio()
705 try:
705 try:
706 d = self._io.parseindex(indexdata, self._inline)
706 d = self._io.parseindex(indexdata, self._inline)
707 except (ValueError, IndexError):
707 except (ValueError, IndexError):
708 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
708 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
709 self.index, nodemap, self._chunkcache = d
709 self.index, nodemap, self._chunkcache = d
710 if nodemap is not None:
710 if nodemap is not None:
711 self.nodemap = self._nodecache = nodemap
711 self.nodemap = self._nodecache = nodemap
712 if not self._chunkcache:
712 if not self._chunkcache:
713 self._chunkclear()
713 self._chunkclear()
714 # revnum -> (chain-length, sum-delta-length)
714 # revnum -> (chain-length, sum-delta-length)
715 self._chaininfocache = {}
715 self._chaininfocache = {}
716 # revlog header -> revlog compressor
716 # revlog header -> revlog compressor
717 self._decompressors = {}
717 self._decompressors = {}
718
718
719 @util.propertycache
719 @util.propertycache
720 def _compressor(self):
720 def _compressor(self):
721 return util.compengines[self._compengine].revlogcompressor()
721 return util.compengines[self._compengine].revlogcompressor()
722
722
723 def _indexfp(self, mode='r'):
723 def _indexfp(self, mode='r'):
724 """file object for the revlog's index file"""
724 """file object for the revlog's index file"""
725 args = {r'mode': mode}
725 args = {r'mode': mode}
726 if mode != 'r':
726 if mode != 'r':
727 args[r'checkambig'] = self._checkambig
727 args[r'checkambig'] = self._checkambig
728 if mode == 'w':
728 if mode == 'w':
729 args[r'atomictemp'] = True
729 args[r'atomictemp'] = True
730 return self.opener(self.indexfile, **args)
730 return self.opener(self.indexfile, **args)
731
731
732 def _datafp(self, mode='r'):
732 def _datafp(self, mode='r'):
733 """file object for the revlog's data file"""
733 """file object for the revlog's data file"""
734 return self.opener(self.datafile, mode=mode)
734 return self.opener(self.datafile, mode=mode)
735
735
736 @contextlib.contextmanager
736 @contextlib.contextmanager
737 def _datareadfp(self, existingfp=None):
737 def _datareadfp(self, existingfp=None):
738 """file object suitable to read data"""
738 """file object suitable to read data"""
739 if existingfp is not None:
739 if existingfp is not None:
740 yield existingfp
740 yield existingfp
741 else:
741 else:
742 if self._inline:
742 if self._inline:
743 func = self._indexfp
743 func = self._indexfp
744 else:
744 else:
745 func = self._datafp
745 func = self._datafp
746 with func() as fp:
746 with func() as fp:
747 yield fp
747 yield fp
748
748
749 def tip(self):
749 def tip(self):
750 return self.node(len(self.index) - 2)
750 return self.node(len(self.index) - 2)
751 def __contains__(self, rev):
751 def __contains__(self, rev):
752 return 0 <= rev < len(self)
752 return 0 <= rev < len(self)
753 def __len__(self):
753 def __len__(self):
754 return len(self.index) - 1
754 return len(self.index) - 1
755 def __iter__(self):
755 def __iter__(self):
756 return iter(xrange(len(self)))
756 return iter(xrange(len(self)))
757 def revs(self, start=0, stop=None):
757 def revs(self, start=0, stop=None):
758 """iterate over all rev in this revlog (from start to stop)"""
758 """iterate over all rev in this revlog (from start to stop)"""
759 step = 1
759 step = 1
760 if stop is not None:
760 if stop is not None:
761 if start > stop:
761 if start > stop:
762 step = -1
762 step = -1
763 stop += step
763 stop += step
764 else:
764 else:
765 stop = len(self)
765 stop = len(self)
766 return xrange(start, stop, step)
766 return xrange(start, stop, step)
767
767
768 @util.propertycache
768 @util.propertycache
769 def nodemap(self):
769 def nodemap(self):
770 self.rev(self.node(0))
770 self.rev(self.node(0))
771 return self._nodecache
771 return self._nodecache
772
772
773 def hasnode(self, node):
773 def hasnode(self, node):
774 try:
774 try:
775 self.rev(node)
775 self.rev(node)
776 return True
776 return True
777 except KeyError:
777 except KeyError:
778 return False
778 return False
779
779
780 def candelta(self, baserev, rev):
780 def candelta(self, baserev, rev):
781 """whether two revisions (baserev, rev) can be delta-ed or not"""
781 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 # Disable delta if either rev requires a content-changing flag
782 # Disable delta if either rev requires a content-changing flag
783 # processor (ex. LFS). This is because such flag processor can alter
783 # processor (ex. LFS). This is because such flag processor can alter
784 # the rawtext content that the delta will be based on, and two clients
784 # the rawtext content that the delta will be based on, and two clients
785 # could have a same revlog node with different flags (i.e. different
785 # could have a same revlog node with different flags (i.e. different
786 # rawtext contents) and the delta could be incompatible.
786 # rawtext contents) and the delta could be incompatible.
787 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
787 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
788 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
788 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
789 return False
789 return False
790 return True
790 return True
791
791
792 def clearcaches(self):
792 def clearcaches(self):
793 self._cache = None
793 self._cache = None
794 self._chainbasecache.clear()
794 self._chainbasecache.clear()
795 self._chunkcache = (0, '')
795 self._chunkcache = (0, '')
796 self._pcache = {}
796 self._pcache = {}
797
797
798 try:
798 try:
799 self._nodecache.clearcaches()
799 self._nodecache.clearcaches()
800 except AttributeError:
800 except AttributeError:
801 self._nodecache = {nullid: nullrev}
801 self._nodecache = {nullid: nullrev}
802 self._nodepos = None
802 self._nodepos = None
803
803
804 def rev(self, node):
804 def rev(self, node):
805 try:
805 try:
806 return self._nodecache[node]
806 return self._nodecache[node]
807 except TypeError:
807 except TypeError:
808 raise
808 raise
809 except RevlogError:
809 except RevlogError:
810 # parsers.c radix tree lookup failed
810 # parsers.c radix tree lookup failed
811 if node == wdirid or node in wdirfilenodeids:
811 if node == wdirid or node in wdirfilenodeids:
812 raise error.WdirUnsupported
812 raise error.WdirUnsupported
813 raise LookupError(node, self.indexfile, _('no node'))
813 raise LookupError(node, self.indexfile, _('no node'))
814 except KeyError:
814 except KeyError:
815 # pure python cache lookup failed
815 # pure python cache lookup failed
816 n = self._nodecache
816 n = self._nodecache
817 i = self.index
817 i = self.index
818 p = self._nodepos
818 p = self._nodepos
819 if p is None:
819 if p is None:
820 p = len(i) - 2
820 p = len(i) - 2
821 else:
821 else:
822 assert p < len(i)
822 assert p < len(i)
823 for r in xrange(p, -1, -1):
823 for r in xrange(p, -1, -1):
824 v = i[r][7]
824 v = i[r][7]
825 n[v] = r
825 n[v] = r
826 if v == node:
826 if v == node:
827 self._nodepos = r - 1
827 self._nodepos = r - 1
828 return r
828 return r
829 if node == wdirid or node in wdirfilenodeids:
829 if node == wdirid or node in wdirfilenodeids:
830 raise error.WdirUnsupported
830 raise error.WdirUnsupported
831 raise LookupError(node, self.indexfile, _('no node'))
831 raise LookupError(node, self.indexfile, _('no node'))
832
832
833 # Accessors for index entries.
833 # Accessors for index entries.
834
834
835 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
835 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
836 # are flags.
836 # are flags.
837 def start(self, rev):
837 def start(self, rev):
838 return int(self.index[rev][0] >> 16)
838 return int(self.index[rev][0] >> 16)
839
839
840 def flags(self, rev):
840 def flags(self, rev):
841 return self.index[rev][0] & 0xFFFF
841 return self.index[rev][0] & 0xFFFF
842
842
843 def length(self, rev):
843 def length(self, rev):
844 return self.index[rev][1]
844 return self.index[rev][1]
845
845
846 def rawsize(self, rev):
846 def rawsize(self, rev):
847 """return the length of the uncompressed text for a given revision"""
847 """return the length of the uncompressed text for a given revision"""
848 l = self.index[rev][2]
848 l = self.index[rev][2]
849 if l >= 0:
849 if l >= 0:
850 return l
850 return l
851
851
852 t = self.revision(rev, raw=True)
852 t = self.revision(rev, raw=True)
853 return len(t)
853 return len(t)
854
854
855 def size(self, rev):
855 def size(self, rev):
856 """length of non-raw text (processed by a "read" flag processor)"""
856 """length of non-raw text (processed by a "read" flag processor)"""
857 # fast path: if no "read" flag processor could change the content,
857 # fast path: if no "read" flag processor could change the content,
858 # size is rawsize. note: ELLIPSIS is known to not change the content.
858 # size is rawsize. note: ELLIPSIS is known to not change the content.
859 flags = self.flags(rev)
859 flags = self.flags(rev)
860 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
860 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
861 return self.rawsize(rev)
861 return self.rawsize(rev)
862
862
863 return len(self.revision(rev, raw=False))
863 return len(self.revision(rev, raw=False))
864
864
865 def chainbase(self, rev):
865 def chainbase(self, rev):
866 base = self._chainbasecache.get(rev)
866 base = self._chainbasecache.get(rev)
867 if base is not None:
867 if base is not None:
868 return base
868 return base
869
869
870 index = self.index
870 index = self.index
871 base = index[rev][3]
871 base = index[rev][3]
872 while base != rev:
872 while base != rev:
873 rev = base
873 rev = base
874 base = index[rev][3]
874 base = index[rev][3]
875
875
876 self._chainbasecache[rev] = base
876 self._chainbasecache[rev] = base
877 return base
877 return base
878
878
879 def linkrev(self, rev):
879 def linkrev(self, rev):
880 return self.index[rev][4]
880 return self.index[rev][4]
881
881
882 def parentrevs(self, rev):
882 def parentrevs(self, rev):
883 try:
883 try:
884 entry = self.index[rev]
884 entry = self.index[rev]
885 except IndexError:
885 except IndexError:
886 if rev == wdirrev:
886 if rev == wdirrev:
887 raise error.WdirUnsupported
887 raise error.WdirUnsupported
888 raise
888 raise
889
889
890 return entry[5], entry[6]
890 return entry[5], entry[6]
891
891
892 def node(self, rev):
892 def node(self, rev):
893 try:
893 try:
894 return self.index[rev][7]
894 return self.index[rev][7]
895 except IndexError:
895 except IndexError:
896 if rev == wdirrev:
896 if rev == wdirrev:
897 raise error.WdirUnsupported
897 raise error.WdirUnsupported
898 raise
898 raise
899
899
900 # Derived from index values.
900 # Derived from index values.
901
901
902 def end(self, rev):
902 def end(self, rev):
903 return self.start(rev) + self.length(rev)
903 return self.start(rev) + self.length(rev)
904
904
905 def parents(self, node):
905 def parents(self, node):
906 i = self.index
906 i = self.index
907 d = i[self.rev(node)]
907 d = i[self.rev(node)]
908 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
908 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
909
909
910 def chainlen(self, rev):
910 def chainlen(self, rev):
911 return self._chaininfo(rev)[0]
911 return self._chaininfo(rev)[0]
912
912
913 def _chaininfo(self, rev):
913 def _chaininfo(self, rev):
914 chaininfocache = self._chaininfocache
914 chaininfocache = self._chaininfocache
915 if rev in chaininfocache:
915 if rev in chaininfocache:
916 return chaininfocache[rev]
916 return chaininfocache[rev]
917 index = self.index
917 index = self.index
918 generaldelta = self._generaldelta
918 generaldelta = self._generaldelta
919 iterrev = rev
919 iterrev = rev
920 e = index[iterrev]
920 e = index[iterrev]
921 clen = 0
921 clen = 0
922 compresseddeltalen = 0
922 compresseddeltalen = 0
923 while iterrev != e[3]:
923 while iterrev != e[3]:
924 clen += 1
924 clen += 1
925 compresseddeltalen += e[1]
925 compresseddeltalen += e[1]
926 if generaldelta:
926 if generaldelta:
927 iterrev = e[3]
927 iterrev = e[3]
928 else:
928 else:
929 iterrev -= 1
929 iterrev -= 1
930 if iterrev in chaininfocache:
930 if iterrev in chaininfocache:
931 t = chaininfocache[iterrev]
931 t = chaininfocache[iterrev]
932 clen += t[0]
932 clen += t[0]
933 compresseddeltalen += t[1]
933 compresseddeltalen += t[1]
934 break
934 break
935 e = index[iterrev]
935 e = index[iterrev]
936 else:
936 else:
937 # Add text length of base since decompressing that also takes
937 # Add text length of base since decompressing that also takes
938 # work. For cache hits the length is already included.
938 # work. For cache hits the length is already included.
939 compresseddeltalen += e[1]
939 compresseddeltalen += e[1]
940 r = (clen, compresseddeltalen)
940 r = (clen, compresseddeltalen)
941 chaininfocache[rev] = r
941 chaininfocache[rev] = r
942 return r
942 return r
943
943
944 def _deltachain(self, rev, stoprev=None):
944 def _deltachain(self, rev, stoprev=None):
945 """Obtain the delta chain for a revision.
945 """Obtain the delta chain for a revision.
946
946
947 ``stoprev`` specifies a revision to stop at. If not specified, we
947 ``stoprev`` specifies a revision to stop at. If not specified, we
948 stop at the base of the chain.
948 stop at the base of the chain.
949
949
950 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
950 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
951 revs in ascending order and ``stopped`` is a bool indicating whether
951 revs in ascending order and ``stopped`` is a bool indicating whether
952 ``stoprev`` was hit.
952 ``stoprev`` was hit.
953 """
953 """
954 # Try C implementation.
954 # Try C implementation.
955 try:
955 try:
956 return self.index.deltachain(rev, stoprev, self._generaldelta)
956 return self.index.deltachain(rev, stoprev, self._generaldelta)
957 except AttributeError:
957 except AttributeError:
958 pass
958 pass
959
959
960 chain = []
960 chain = []
961
961
962 # Alias to prevent attribute lookup in tight loop.
962 # Alias to prevent attribute lookup in tight loop.
963 index = self.index
963 index = self.index
964 generaldelta = self._generaldelta
964 generaldelta = self._generaldelta
965
965
966 iterrev = rev
966 iterrev = rev
967 e = index[iterrev]
967 e = index[iterrev]
968 while iterrev != e[3] and iterrev != stoprev:
968 while iterrev != e[3] and iterrev != stoprev:
969 chain.append(iterrev)
969 chain.append(iterrev)
970 if generaldelta:
970 if generaldelta:
971 iterrev = e[3]
971 iterrev = e[3]
972 else:
972 else:
973 iterrev -= 1
973 iterrev -= 1
974 e = index[iterrev]
974 e = index[iterrev]
975
975
976 if iterrev == stoprev:
976 if iterrev == stoprev:
977 stopped = True
977 stopped = True
978 else:
978 else:
979 chain.append(iterrev)
979 chain.append(iterrev)
980 stopped = False
980 stopped = False
981
981
982 chain.reverse()
982 chain.reverse()
983 return chain, stopped
983 return chain, stopped
984
984
985 def ancestors(self, revs, stoprev=0, inclusive=False):
985 def ancestors(self, revs, stoprev=0, inclusive=False):
986 """Generate the ancestors of 'revs' in reverse topological order.
986 """Generate the ancestors of 'revs' in reverse topological order.
987 Does not generate revs lower than stoprev.
987 Does not generate revs lower than stoprev.
988
988
989 See the documentation for ancestor.lazyancestors for more details."""
989 See the documentation for ancestor.lazyancestors for more details."""
990
990
991 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
991 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
992 inclusive=inclusive)
992 inclusive=inclusive)
993
993
994 def descendants(self, revs):
994 def descendants(self, revs):
995 """Generate the descendants of 'revs' in revision order.
995 """Generate the descendants of 'revs' in revision order.
996
996
997 Yield a sequence of revision numbers starting with a child of
997 Yield a sequence of revision numbers starting with a child of
998 some rev in revs, i.e., each revision is *not* considered a
998 some rev in revs, i.e., each revision is *not* considered a
999 descendant of itself. Results are ordered by revision number (a
999 descendant of itself. Results are ordered by revision number (a
1000 topological sort)."""
1000 topological sort)."""
1001 first = min(revs)
1001 first = min(revs)
1002 if first == nullrev:
1002 if first == nullrev:
1003 for i in self:
1003 for i in self:
1004 yield i
1004 yield i
1005 return
1005 return
1006
1006
1007 seen = set(revs)
1007 seen = set(revs)
1008 for i in self.revs(start=first + 1):
1008 for i in self.revs(start=first + 1):
1009 for x in self.parentrevs(i):
1009 for x in self.parentrevs(i):
1010 if x != nullrev and x in seen:
1010 if x != nullrev and x in seen:
1011 seen.add(i)
1011 seen.add(i)
1012 yield i
1012 yield i
1013 break
1013 break
1014
1014
1015 def findcommonmissing(self, common=None, heads=None):
1015 def findcommonmissing(self, common=None, heads=None):
1016 """Return a tuple of the ancestors of common and the ancestors of heads
1016 """Return a tuple of the ancestors of common and the ancestors of heads
1017 that are not ancestors of common. In revset terminology, we return the
1017 that are not ancestors of common. In revset terminology, we return the
1018 tuple:
1018 tuple:
1019
1019
1020 ::common, (::heads) - (::common)
1020 ::common, (::heads) - (::common)
1021
1021
1022 The list is sorted by revision number, meaning it is
1022 The list is sorted by revision number, meaning it is
1023 topologically sorted.
1023 topologically sorted.
1024
1024
1025 'heads' and 'common' are both lists of node IDs. If heads is
1025 'heads' and 'common' are both lists of node IDs. If heads is
1026 not supplied, uses all of the revlog's heads. If common is not
1026 not supplied, uses all of the revlog's heads. If common is not
1027 supplied, uses nullid."""
1027 supplied, uses nullid."""
1028 if common is None:
1028 if common is None:
1029 common = [nullid]
1029 common = [nullid]
1030 if heads is None:
1030 if heads is None:
1031 heads = self.heads()
1031 heads = self.heads()
1032
1032
1033 common = [self.rev(n) for n in common]
1033 common = [self.rev(n) for n in common]
1034 heads = [self.rev(n) for n in heads]
1034 heads = [self.rev(n) for n in heads]
1035
1035
1036 # we want the ancestors, but inclusive
1036 # we want the ancestors, but inclusive
1037 class lazyset(object):
1037 class lazyset(object):
1038 def __init__(self, lazyvalues):
1038 def __init__(self, lazyvalues):
1039 self.addedvalues = set()
1039 self.addedvalues = set()
1040 self.lazyvalues = lazyvalues
1040 self.lazyvalues = lazyvalues
1041
1041
1042 def __contains__(self, value):
1042 def __contains__(self, value):
1043 return value in self.addedvalues or value in self.lazyvalues
1043 return value in self.addedvalues or value in self.lazyvalues
1044
1044
1045 def __iter__(self):
1045 def __iter__(self):
1046 added = self.addedvalues
1046 added = self.addedvalues
1047 for r in added:
1047 for r in added:
1048 yield r
1048 yield r
1049 for r in self.lazyvalues:
1049 for r in self.lazyvalues:
1050 if not r in added:
1050 if not r in added:
1051 yield r
1051 yield r
1052
1052
1053 def add(self, value):
1053 def add(self, value):
1054 self.addedvalues.add(value)
1054 self.addedvalues.add(value)
1055
1055
1056 def update(self, values):
1056 def update(self, values):
1057 self.addedvalues.update(values)
1057 self.addedvalues.update(values)
1058
1058
1059 has = lazyset(self.ancestors(common))
1059 has = lazyset(self.ancestors(common))
1060 has.add(nullrev)
1060 has.add(nullrev)
1061 has.update(common)
1061 has.update(common)
1062
1062
1063 # take all ancestors from heads that aren't in has
1063 # take all ancestors from heads that aren't in has
1064 missing = set()
1064 missing = set()
1065 visit = collections.deque(r for r in heads if r not in has)
1065 visit = collections.deque(r for r in heads if r not in has)
1066 while visit:
1066 while visit:
1067 r = visit.popleft()
1067 r = visit.popleft()
1068 if r in missing:
1068 if r in missing:
1069 continue
1069 continue
1070 else:
1070 else:
1071 missing.add(r)
1071 missing.add(r)
1072 for p in self.parentrevs(r):
1072 for p in self.parentrevs(r):
1073 if p not in has:
1073 if p not in has:
1074 visit.append(p)
1074 visit.append(p)
1075 missing = list(missing)
1075 missing = list(missing)
1076 missing.sort()
1076 missing.sort()
1077 return has, [self.node(miss) for miss in missing]
1077 return has, [self.node(miss) for miss in missing]
1078
1078
1079 def incrementalmissingrevs(self, common=None):
1079 def incrementalmissingrevs(self, common=None):
1080 """Return an object that can be used to incrementally compute the
1080 """Return an object that can be used to incrementally compute the
1081 revision numbers of the ancestors of arbitrary sets that are not
1081 revision numbers of the ancestors of arbitrary sets that are not
1082 ancestors of common. This is an ancestor.incrementalmissingancestors
1082 ancestors of common. This is an ancestor.incrementalmissingancestors
1083 object.
1083 object.
1084
1084
1085 'common' is a list of revision numbers. If common is not supplied, uses
1085 'common' is a list of revision numbers. If common is not supplied, uses
1086 nullrev.
1086 nullrev.
1087 """
1087 """
1088 if common is None:
1088 if common is None:
1089 common = [nullrev]
1089 common = [nullrev]
1090
1090
1091 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1091 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1092
1092
1093 def findmissingrevs(self, common=None, heads=None):
1093 def findmissingrevs(self, common=None, heads=None):
1094 """Return the revision numbers of the ancestors of heads that
1094 """Return the revision numbers of the ancestors of heads that
1095 are not ancestors of common.
1095 are not ancestors of common.
1096
1096
1097 More specifically, return a list of revision numbers corresponding to
1097 More specifically, return a list of revision numbers corresponding to
1098 nodes N such that every N satisfies the following constraints:
1098 nodes N such that every N satisfies the following constraints:
1099
1099
1100 1. N is an ancestor of some node in 'heads'
1100 1. N is an ancestor of some node in 'heads'
1101 2. N is not an ancestor of any node in 'common'
1101 2. N is not an ancestor of any node in 'common'
1102
1102
1103 The list is sorted by revision number, meaning it is
1103 The list is sorted by revision number, meaning it is
1104 topologically sorted.
1104 topologically sorted.
1105
1105
1106 'heads' and 'common' are both lists of revision numbers. If heads is
1106 'heads' and 'common' are both lists of revision numbers. If heads is
1107 not supplied, uses all of the revlog's heads. If common is not
1107 not supplied, uses all of the revlog's heads. If common is not
1108 supplied, uses nullid."""
1108 supplied, uses nullid."""
1109 if common is None:
1109 if common is None:
1110 common = [nullrev]
1110 common = [nullrev]
1111 if heads is None:
1111 if heads is None:
1112 heads = self.headrevs()
1112 heads = self.headrevs()
1113
1113
1114 inc = self.incrementalmissingrevs(common=common)
1114 inc = self.incrementalmissingrevs(common=common)
1115 return inc.missingancestors(heads)
1115 return inc.missingancestors(heads)
1116
1116
1117 def findmissing(self, common=None, heads=None):
1117 def findmissing(self, common=None, heads=None):
1118 """Return the ancestors of heads that are not ancestors of common.
1118 """Return the ancestors of heads that are not ancestors of common.
1119
1119
1120 More specifically, return a list of nodes N such that every N
1120 More specifically, return a list of nodes N such that every N
1121 satisfies the following constraints:
1121 satisfies the following constraints:
1122
1122
1123 1. N is an ancestor of some node in 'heads'
1123 1. N is an ancestor of some node in 'heads'
1124 2. N is not an ancestor of any node in 'common'
1124 2. N is not an ancestor of any node in 'common'
1125
1125
1126 The list is sorted by revision number, meaning it is
1126 The list is sorted by revision number, meaning it is
1127 topologically sorted.
1127 topologically sorted.
1128
1128
1129 'heads' and 'common' are both lists of node IDs. If heads is
1129 'heads' and 'common' are both lists of node IDs. If heads is
1130 not supplied, uses all of the revlog's heads. If common is not
1130 not supplied, uses all of the revlog's heads. If common is not
1131 supplied, uses nullid."""
1131 supplied, uses nullid."""
1132 if common is None:
1132 if common is None:
1133 common = [nullid]
1133 common = [nullid]
1134 if heads is None:
1134 if heads is None:
1135 heads = self.heads()
1135 heads = self.heads()
1136
1136
1137 common = [self.rev(n) for n in common]
1137 common = [self.rev(n) for n in common]
1138 heads = [self.rev(n) for n in heads]
1138 heads = [self.rev(n) for n in heads]
1139
1139
1140 inc = self.incrementalmissingrevs(common=common)
1140 inc = self.incrementalmissingrevs(common=common)
1141 return [self.node(r) for r in inc.missingancestors(heads)]
1141 return [self.node(r) for r in inc.missingancestors(heads)]
1142
1142
1143 def nodesbetween(self, roots=None, heads=None):
1143 def nodesbetween(self, roots=None, heads=None):
1144 """Return a topological path from 'roots' to 'heads'.
1144 """Return a topological path from 'roots' to 'heads'.
1145
1145
1146 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1146 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1147 topologically sorted list of all nodes N that satisfy both of
1147 topologically sorted list of all nodes N that satisfy both of
1148 these constraints:
1148 these constraints:
1149
1149
1150 1. N is a descendant of some node in 'roots'
1150 1. N is a descendant of some node in 'roots'
1151 2. N is an ancestor of some node in 'heads'
1151 2. N is an ancestor of some node in 'heads'
1152
1152
1153 Every node is considered to be both a descendant and an ancestor
1153 Every node is considered to be both a descendant and an ancestor
1154 of itself, so every reachable node in 'roots' and 'heads' will be
1154 of itself, so every reachable node in 'roots' and 'heads' will be
1155 included in 'nodes'.
1155 included in 'nodes'.
1156
1156
1157 'outroots' is the list of reachable nodes in 'roots', i.e., the
1157 'outroots' is the list of reachable nodes in 'roots', i.e., the
1158 subset of 'roots' that is returned in 'nodes'. Likewise,
1158 subset of 'roots' that is returned in 'nodes'. Likewise,
1159 'outheads' is the subset of 'heads' that is also in 'nodes'.
1159 'outheads' is the subset of 'heads' that is also in 'nodes'.
1160
1160
1161 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1161 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1162 unspecified, uses nullid as the only root. If 'heads' is
1162 unspecified, uses nullid as the only root. If 'heads' is
1163 unspecified, uses list of all of the revlog's heads."""
1163 unspecified, uses list of all of the revlog's heads."""
1164 nonodes = ([], [], [])
1164 nonodes = ([], [], [])
1165 if roots is not None:
1165 if roots is not None:
1166 roots = list(roots)
1166 roots = list(roots)
1167 if not roots:
1167 if not roots:
1168 return nonodes
1168 return nonodes
1169 lowestrev = min([self.rev(n) for n in roots])
1169 lowestrev = min([self.rev(n) for n in roots])
1170 else:
1170 else:
1171 roots = [nullid] # Everybody's a descendant of nullid
1171 roots = [nullid] # Everybody's a descendant of nullid
1172 lowestrev = nullrev
1172 lowestrev = nullrev
1173 if (lowestrev == nullrev) and (heads is None):
1173 if (lowestrev == nullrev) and (heads is None):
1174 # We want _all_ the nodes!
1174 # We want _all_ the nodes!
1175 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1175 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1176 if heads is None:
1176 if heads is None:
1177 # All nodes are ancestors, so the latest ancestor is the last
1177 # All nodes are ancestors, so the latest ancestor is the last
1178 # node.
1178 # node.
1179 highestrev = len(self) - 1
1179 highestrev = len(self) - 1
1180 # Set ancestors to None to signal that every node is an ancestor.
1180 # Set ancestors to None to signal that every node is an ancestor.
1181 ancestors = None
1181 ancestors = None
1182 # Set heads to an empty dictionary for later discovery of heads
1182 # Set heads to an empty dictionary for later discovery of heads
1183 heads = {}
1183 heads = {}
1184 else:
1184 else:
1185 heads = list(heads)
1185 heads = list(heads)
1186 if not heads:
1186 if not heads:
1187 return nonodes
1187 return nonodes
1188 ancestors = set()
1188 ancestors = set()
1189 # Turn heads into a dictionary so we can remove 'fake' heads.
1189 # Turn heads into a dictionary so we can remove 'fake' heads.
1190 # Also, later we will be using it to filter out the heads we can't
1190 # Also, later we will be using it to filter out the heads we can't
1191 # find from roots.
1191 # find from roots.
1192 heads = dict.fromkeys(heads, False)
1192 heads = dict.fromkeys(heads, False)
1193 # Start at the top and keep marking parents until we're done.
1193 # Start at the top and keep marking parents until we're done.
1194 nodestotag = set(heads)
1194 nodestotag = set(heads)
1195 # Remember where the top was so we can use it as a limit later.
1195 # Remember where the top was so we can use it as a limit later.
1196 highestrev = max([self.rev(n) for n in nodestotag])
1196 highestrev = max([self.rev(n) for n in nodestotag])
1197 while nodestotag:
1197 while nodestotag:
1198 # grab a node to tag
1198 # grab a node to tag
1199 n = nodestotag.pop()
1199 n = nodestotag.pop()
1200 # Never tag nullid
1200 # Never tag nullid
1201 if n == nullid:
1201 if n == nullid:
1202 continue
1202 continue
1203 # A node's revision number represents its place in a
1203 # A node's revision number represents its place in a
1204 # topologically sorted list of nodes.
1204 # topologically sorted list of nodes.
1205 r = self.rev(n)
1205 r = self.rev(n)
1206 if r >= lowestrev:
1206 if r >= lowestrev:
1207 if n not in ancestors:
1207 if n not in ancestors:
1208 # If we are possibly a descendant of one of the roots
1208 # If we are possibly a descendant of one of the roots
1209 # and we haven't already been marked as an ancestor
1209 # and we haven't already been marked as an ancestor
1210 ancestors.add(n) # Mark as ancestor
1210 ancestors.add(n) # Mark as ancestor
1211 # Add non-nullid parents to list of nodes to tag.
1211 # Add non-nullid parents to list of nodes to tag.
1212 nodestotag.update([p for p in self.parents(n) if
1212 nodestotag.update([p for p in self.parents(n) if
1213 p != nullid])
1213 p != nullid])
1214 elif n in heads: # We've seen it before, is it a fake head?
1214 elif n in heads: # We've seen it before, is it a fake head?
1215 # So it is, real heads should not be the ancestors of
1215 # So it is, real heads should not be the ancestors of
1216 # any other heads.
1216 # any other heads.
1217 heads.pop(n)
1217 heads.pop(n)
1218 if not ancestors:
1218 if not ancestors:
1219 return nonodes
1219 return nonodes
1220 # Now that we have our set of ancestors, we want to remove any
1220 # Now that we have our set of ancestors, we want to remove any
1221 # roots that are not ancestors.
1221 # roots that are not ancestors.
1222
1222
1223 # If one of the roots was nullid, everything is included anyway.
1223 # If one of the roots was nullid, everything is included anyway.
1224 if lowestrev > nullrev:
1224 if lowestrev > nullrev:
1225 # But, since we weren't, let's recompute the lowest rev to not
1225 # But, since we weren't, let's recompute the lowest rev to not
1226 # include roots that aren't ancestors.
1226 # include roots that aren't ancestors.
1227
1227
1228 # Filter out roots that aren't ancestors of heads
1228 # Filter out roots that aren't ancestors of heads
1229 roots = [root for root in roots if root in ancestors]
1229 roots = [root for root in roots if root in ancestors]
1230 # Recompute the lowest revision
1230 # Recompute the lowest revision
1231 if roots:
1231 if roots:
1232 lowestrev = min([self.rev(root) for root in roots])
1232 lowestrev = min([self.rev(root) for root in roots])
1233 else:
1233 else:
1234 # No more roots? Return empty list
1234 # No more roots? Return empty list
1235 return nonodes
1235 return nonodes
1236 else:
1236 else:
1237 # We are descending from nullid, and don't need to care about
1237 # We are descending from nullid, and don't need to care about
1238 # any other roots.
1238 # any other roots.
1239 lowestrev = nullrev
1239 lowestrev = nullrev
1240 roots = [nullid]
1240 roots = [nullid]
1241 # Transform our roots list into a set.
1241 # Transform our roots list into a set.
1242 descendants = set(roots)
1242 descendants = set(roots)
1243 # Also, keep the original roots so we can filter out roots that aren't
1243 # Also, keep the original roots so we can filter out roots that aren't
1244 # 'real' roots (i.e. are descended from other roots).
1244 # 'real' roots (i.e. are descended from other roots).
1245 roots = descendants.copy()
1245 roots = descendants.copy()
1246 # Our topologically sorted list of output nodes.
1246 # Our topologically sorted list of output nodes.
1247 orderedout = []
1247 orderedout = []
1248 # Don't start at nullid since we don't want nullid in our output list,
1248 # Don't start at nullid since we don't want nullid in our output list,
1249 # and if nullid shows up in descendants, empty parents will look like
1249 # and if nullid shows up in descendants, empty parents will look like
1250 # they're descendants.
1250 # they're descendants.
1251 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1251 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1252 n = self.node(r)
1252 n = self.node(r)
1253 isdescendant = False
1253 isdescendant = False
1254 if lowestrev == nullrev: # Everybody is a descendant of nullid
1254 if lowestrev == nullrev: # Everybody is a descendant of nullid
1255 isdescendant = True
1255 isdescendant = True
1256 elif n in descendants:
1256 elif n in descendants:
1257 # n is already a descendant
1257 # n is already a descendant
1258 isdescendant = True
1258 isdescendant = True
1259 # This check only needs to be done here because all the roots
1259 # This check only needs to be done here because all the roots
1260 # will start being marked is descendants before the loop.
1260 # will start being marked is descendants before the loop.
1261 if n in roots:
1261 if n in roots:
1262 # If n was a root, check if it's a 'real' root.
1262 # If n was a root, check if it's a 'real' root.
1263 p = tuple(self.parents(n))
1263 p = tuple(self.parents(n))
1264 # If any of its parents are descendants, it's not a root.
1264 # If any of its parents are descendants, it's not a root.
1265 if (p[0] in descendants) or (p[1] in descendants):
1265 if (p[0] in descendants) or (p[1] in descendants):
1266 roots.remove(n)
1266 roots.remove(n)
1267 else:
1267 else:
1268 p = tuple(self.parents(n))
1268 p = tuple(self.parents(n))
1269 # A node is a descendant if either of its parents are
1269 # A node is a descendant if either of its parents are
1270 # descendants. (We seeded the dependents list with the roots
1270 # descendants. (We seeded the dependents list with the roots
1271 # up there, remember?)
1271 # up there, remember?)
1272 if (p[0] in descendants) or (p[1] in descendants):
1272 if (p[0] in descendants) or (p[1] in descendants):
1273 descendants.add(n)
1273 descendants.add(n)
1274 isdescendant = True
1274 isdescendant = True
1275 if isdescendant and ((ancestors is None) or (n in ancestors)):
1275 if isdescendant and ((ancestors is None) or (n in ancestors)):
1276 # Only include nodes that are both descendants and ancestors.
1276 # Only include nodes that are both descendants and ancestors.
1277 orderedout.append(n)
1277 orderedout.append(n)
1278 if (ancestors is not None) and (n in heads):
1278 if (ancestors is not None) and (n in heads):
1279 # We're trying to figure out which heads are reachable
1279 # We're trying to figure out which heads are reachable
1280 # from roots.
1280 # from roots.
1281 # Mark this head as having been reached
1281 # Mark this head as having been reached
1282 heads[n] = True
1282 heads[n] = True
1283 elif ancestors is None:
1283 elif ancestors is None:
1284 # Otherwise, we're trying to discover the heads.
1284 # Otherwise, we're trying to discover the heads.
1285 # Assume this is a head because if it isn't, the next step
1285 # Assume this is a head because if it isn't, the next step
1286 # will eventually remove it.
1286 # will eventually remove it.
1287 heads[n] = True
1287 heads[n] = True
1288 # But, obviously its parents aren't.
1288 # But, obviously its parents aren't.
1289 for p in self.parents(n):
1289 for p in self.parents(n):
1290 heads.pop(p, None)
1290 heads.pop(p, None)
1291 heads = [head for head, flag in heads.iteritems() if flag]
1291 heads = [head for head, flag in heads.iteritems() if flag]
1292 roots = list(roots)
1292 roots = list(roots)
1293 assert orderedout
1293 assert orderedout
1294 assert roots
1294 assert roots
1295 assert heads
1295 assert heads
1296 return (orderedout, roots, heads)
1296 return (orderedout, roots, heads)
1297
1297
1298 def headrevs(self):
1298 def headrevs(self):
1299 try:
1299 try:
1300 return self.index.headrevs()
1300 return self.index.headrevs()
1301 except AttributeError:
1301 except AttributeError:
1302 return self._headrevs()
1302 return self._headrevs()
1303
1303
1304 def computephases(self, roots):
1304 def computephases(self, roots):
1305 return self.index.computephasesmapsets(roots)
1305 return self.index.computephasesmapsets(roots)
1306
1306
1307 def _headrevs(self):
1307 def _headrevs(self):
1308 count = len(self)
1308 count = len(self)
1309 if not count:
1309 if not count:
1310 return [nullrev]
1310 return [nullrev]
1311 # we won't iter over filtered rev so nobody is a head at start
1311 # we won't iter over filtered rev so nobody is a head at start
1312 ishead = [0] * (count + 1)
1312 ishead = [0] * (count + 1)
1313 index = self.index
1313 index = self.index
1314 for r in self:
1314 for r in self:
1315 ishead[r] = 1 # I may be an head
1315 ishead[r] = 1 # I may be an head
1316 e = index[r]
1316 e = index[r]
1317 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1317 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1318 return [r for r, val in enumerate(ishead) if val]
1318 return [r for r, val in enumerate(ishead) if val]
1319
1319
1320 def heads(self, start=None, stop=None):
1320 def heads(self, start=None, stop=None):
1321 """return the list of all nodes that have no children
1321 """return the list of all nodes that have no children
1322
1322
1323 if start is specified, only heads that are descendants of
1323 if start is specified, only heads that are descendants of
1324 start will be returned
1324 start will be returned
1325 if stop is specified, it will consider all the revs from stop
1325 if stop is specified, it will consider all the revs from stop
1326 as if they had no children
1326 as if they had no children
1327 """
1327 """
1328 if start is None and stop is None:
1328 if start is None and stop is None:
1329 if not len(self):
1329 if not len(self):
1330 return [nullid]
1330 return [nullid]
1331 return [self.node(r) for r in self.headrevs()]
1331 return [self.node(r) for r in self.headrevs()]
1332
1332
1333 if start is None:
1333 if start is None:
1334 start = nullid
1334 start = nullid
1335 if stop is None:
1335 if stop is None:
1336 stop = []
1336 stop = []
1337 stoprevs = set([self.rev(n) for n in stop])
1337 stoprevs = set([self.rev(n) for n in stop])
1338 startrev = self.rev(start)
1338 startrev = self.rev(start)
1339 reachable = {startrev}
1339 reachable = {startrev}
1340 heads = {startrev}
1340 heads = {startrev}
1341
1341
1342 parentrevs = self.parentrevs
1342 parentrevs = self.parentrevs
1343 for r in self.revs(start=startrev + 1):
1343 for r in self.revs(start=startrev + 1):
1344 for p in parentrevs(r):
1344 for p in parentrevs(r):
1345 if p in reachable:
1345 if p in reachable:
1346 if r not in stoprevs:
1346 if r not in stoprevs:
1347 reachable.add(r)
1347 reachable.add(r)
1348 heads.add(r)
1348 heads.add(r)
1349 if p in heads and p not in stoprevs:
1349 if p in heads and p not in stoprevs:
1350 heads.remove(p)
1350 heads.remove(p)
1351
1351
1352 return [self.node(r) for r in heads]
1352 return [self.node(r) for r in heads]
1353
1353
1354 def children(self, node):
1354 def children(self, node):
1355 """find the children of a given node"""
1355 """find the children of a given node"""
1356 c = []
1356 c = []
1357 p = self.rev(node)
1357 p = self.rev(node)
1358 for r in self.revs(start=p + 1):
1358 for r in self.revs(start=p + 1):
1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1360 if prevs:
1360 if prevs:
1361 for pr in prevs:
1361 for pr in prevs:
1362 if pr == p:
1362 if pr == p:
1363 c.append(self.node(r))
1363 c.append(self.node(r))
1364 elif p == nullrev:
1364 elif p == nullrev:
1365 c.append(self.node(r))
1365 c.append(self.node(r))
1366 return c
1366 return c
1367
1367
1368 def descendant(self, start, end):
1368 def descendant(self, start, end):
1369 if start == nullrev:
1369 if start == nullrev:
1370 return True
1370 return True
1371 for i in self.descendants([start]):
1371 for i in self.descendants([start]):
1372 if i == end:
1372 if i == end:
1373 return True
1373 return True
1374 elif i > end:
1374 elif i > end:
1375 break
1375 break
1376 return False
1376 return False
1377
1377
1378 def commonancestorsheads(self, a, b):
1378 def commonancestorsheads(self, a, b):
1379 """calculate all the heads of the common ancestors of nodes a and b"""
1379 """calculate all the heads of the common ancestors of nodes a and b"""
1380 a, b = self.rev(a), self.rev(b)
1380 a, b = self.rev(a), self.rev(b)
1381 try:
1381 try:
1382 ancs = self.index.commonancestorsheads(a, b)
1382 ancs = self.index.commonancestorsheads(a, b)
1383 except (AttributeError, OverflowError): # C implementation failed
1383 except (AttributeError, OverflowError): # C implementation failed
1384 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1384 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1385 return pycompat.maplist(self.node, ancs)
1385 return pycompat.maplist(self.node, ancs)
1386
1386
1387 def isancestor(self, a, b):
1387 def isancestor(self, a, b):
1388 """return True if node a is an ancestor of node b
1388 """return True if node a is an ancestor of node b
1389
1389
1390 The implementation of this is trivial but the use of
1390 The implementation of this is trivial but the use of
1391 commonancestorsheads is not."""
1391 commonancestorsheads is not."""
1392 return a in self.commonancestorsheads(a, b)
1392 return a in self.commonancestorsheads(a, b)
1393
1393
1394 def ancestor(self, a, b):
1394 def ancestor(self, a, b):
1395 """calculate the "best" common ancestor of nodes a and b"""
1395 """calculate the "best" common ancestor of nodes a and b"""
1396
1396
1397 a, b = self.rev(a), self.rev(b)
1397 a, b = self.rev(a), self.rev(b)
1398 try:
1398 try:
1399 ancs = self.index.ancestors(a, b)
1399 ancs = self.index.ancestors(a, b)
1400 except (AttributeError, OverflowError):
1400 except (AttributeError, OverflowError):
1401 ancs = ancestor.ancestors(self.parentrevs, a, b)
1401 ancs = ancestor.ancestors(self.parentrevs, a, b)
1402 if ancs:
1402 if ancs:
1403 # choose a consistent winner when there's a tie
1403 # choose a consistent winner when there's a tie
1404 return min(map(self.node, ancs))
1404 return min(map(self.node, ancs))
1405 return nullid
1405 return nullid
1406
1406
1407 def _match(self, id):
1407 def _match(self, id):
1408 if isinstance(id, int):
1408 if isinstance(id, int):
1409 # rev
1409 # rev
1410 return self.node(id)
1410 return self.node(id)
1411 if len(id) == 20:
1411 if len(id) == 20:
1412 # possibly a binary node
1412 # possibly a binary node
1413 # odds of a binary node being all hex in ASCII are 1 in 10**25
1413 # odds of a binary node being all hex in ASCII are 1 in 10**25
1414 try:
1414 try:
1415 node = id
1415 node = id
1416 self.rev(node) # quick search the index
1416 self.rev(node) # quick search the index
1417 return node
1417 return node
1418 except LookupError:
1418 except LookupError:
1419 pass # may be partial hex id
1419 pass # may be partial hex id
1420 try:
1420 try:
1421 # str(rev)
1421 # str(rev)
1422 rev = int(id)
1422 rev = int(id)
1423 if "%d" % rev != id:
1423 if "%d" % rev != id:
1424 raise ValueError
1424 raise ValueError
1425 if rev < 0:
1425 if rev < 0:
1426 rev = len(self) + rev
1426 rev = len(self) + rev
1427 if rev < 0 or rev >= len(self):
1427 if rev < 0 or rev >= len(self):
1428 raise ValueError
1428 raise ValueError
1429 return self.node(rev)
1429 return self.node(rev)
1430 except (ValueError, OverflowError):
1430 except (ValueError, OverflowError):
1431 pass
1431 pass
1432 if len(id) == 40:
1432 if len(id) == 40:
1433 try:
1433 try:
1434 # a full hex nodeid?
1434 # a full hex nodeid?
1435 node = bin(id)
1435 node = bin(id)
1436 self.rev(node)
1436 self.rev(node)
1437 return node
1437 return node
1438 except (TypeError, LookupError):
1438 except (TypeError, LookupError):
1439 pass
1439 pass
1440
1440
1441 def _partialmatch(self, id):
1441 def _partialmatch(self, id):
1442 # we don't care wdirfilenodeids as they should be always full hash
1442 # we don't care wdirfilenodeids as they should be always full hash
1443 maybewdir = wdirhex.startswith(id)
1443 maybewdir = wdirhex.startswith(id)
1444 try:
1444 try:
1445 partial = self.index.partialmatch(id)
1445 partial = self.index.partialmatch(id)
1446 if partial and self.hasnode(partial):
1446 if partial and self.hasnode(partial):
1447 if maybewdir:
1447 if maybewdir:
1448 # single 'ff...' match in radix tree, ambiguous with wdir
1448 # single 'ff...' match in radix tree, ambiguous with wdir
1449 raise RevlogError
1449 raise RevlogError
1450 return partial
1450 return partial
1451 if maybewdir:
1451 if maybewdir:
1452 # no 'ff...' match in radix tree, wdir identified
1452 # no 'ff...' match in radix tree, wdir identified
1453 raise error.WdirUnsupported
1453 raise error.WdirUnsupported
1454 return None
1454 return None
1455 except RevlogError:
1455 except RevlogError:
1456 # parsers.c radix tree lookup gave multiple matches
1456 # parsers.c radix tree lookup gave multiple matches
1457 # fast path: for unfiltered changelog, radix tree is accurate
1457 # fast path: for unfiltered changelog, radix tree is accurate
1458 if not getattr(self, 'filteredrevs', None):
1458 if not getattr(self, 'filteredrevs', None):
1459 raise LookupError(id, self.indexfile,
1459 raise LookupError(id, self.indexfile,
1460 _('ambiguous identifier'))
1460 _('ambiguous identifier'))
1461 # fall through to slow path that filters hidden revisions
1461 # fall through to slow path that filters hidden revisions
1462 except (AttributeError, ValueError):
1462 except (AttributeError, ValueError):
1463 # we are pure python, or key was too short to search radix tree
1463 # we are pure python, or key was too short to search radix tree
1464 pass
1464 pass
1465
1465
1466 if id in self._pcache:
1466 if id in self._pcache:
1467 return self._pcache[id]
1467 return self._pcache[id]
1468
1468
1469 if len(id) <= 40:
1469 if len(id) <= 40:
1470 try:
1470 try:
1471 # hex(node)[:...]
1471 # hex(node)[:...]
1472 l = len(id) // 2 # grab an even number of digits
1472 l = len(id) // 2 # grab an even number of digits
1473 prefix = bin(id[:l * 2])
1473 prefix = bin(id[:l * 2])
1474 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1474 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1475 nl = [n for n in nl if hex(n).startswith(id) and
1475 nl = [n for n in nl if hex(n).startswith(id) and
1476 self.hasnode(n)]
1476 self.hasnode(n)]
1477 if len(nl) > 0:
1477 if len(nl) > 0:
1478 if len(nl) == 1 and not maybewdir:
1478 if len(nl) == 1 and not maybewdir:
1479 self._pcache[id] = nl[0]
1479 self._pcache[id] = nl[0]
1480 return nl[0]
1480 return nl[0]
1481 raise LookupError(id, self.indexfile,
1481 raise LookupError(id, self.indexfile,
1482 _('ambiguous identifier'))
1482 _('ambiguous identifier'))
1483 if maybewdir:
1483 if maybewdir:
1484 raise error.WdirUnsupported
1484 raise error.WdirUnsupported
1485 return None
1485 return None
1486 except TypeError:
1486 except TypeError:
1487 pass
1487 pass
1488
1488
1489 def lookup(self, id):
1489 def lookup(self, id):
1490 """locate a node based on:
1490 """locate a node based on:
1491 - revision number or str(revision number)
1491 - revision number or str(revision number)
1492 - nodeid or subset of hex nodeid
1492 - nodeid or subset of hex nodeid
1493 """
1493 """
1494 n = self._match(id)
1494 n = self._match(id)
1495 if n is not None:
1495 if n is not None:
1496 return n
1496 return n
1497 n = self._partialmatch(id)
1497 n = self._partialmatch(id)
1498 if n:
1498 if n:
1499 return n
1499 return n
1500
1500
1501 raise LookupError(id, self.indexfile, _('no match found'))
1501 raise LookupError(id, self.indexfile, _('no match found'))
1502
1502
1503 def shortest(self, node, minlength=1):
1503 def shortest(self, node, minlength=1):
1504 """Find the shortest unambiguous prefix that matches node."""
1504 """Find the shortest unambiguous prefix that matches node."""
1505 def isrev(prefix):
1505 def isrev(prefix):
1506 try:
1506 try:
1507 i = int(prefix)
1507 i = int(prefix)
1508 # if we are a pure int, then starting with zero will not be
1508 # if we are a pure int, then starting with zero will not be
1509 # confused as a rev; or, obviously, if the int is larger
1509 # confused as a rev; or, obviously, if the int is larger
1510 # than the value of the tip rev
1510 # than the value of the tip rev
1511 if prefix[0] == '0' or i > len(self):
1511 if prefix[0] == '0' or i > len(self):
1512 return False
1512 return False
1513 return True
1513 return True
1514 except ValueError:
1514 except ValueError:
1515 return False
1515 return False
1516
1516
1517 def isvalid(prefix):
1517 def isvalid(prefix):
1518 try:
1518 try:
1519 node = self._partialmatch(prefix)
1519 node = self._partialmatch(prefix)
1520 except error.RevlogError:
1520 except error.RevlogError:
1521 return False
1521 return False
1522 except error.WdirUnsupported:
1522 except error.WdirUnsupported:
1523 # single 'ff...' match
1523 # single 'ff...' match
1524 return True
1524 return True
1525 if node is None:
1525 if node is None:
1526 raise LookupError(node, self.indexfile, _('no node'))
1526 raise LookupError(node, self.indexfile, _('no node'))
1527 return not isrev(prefix)
1527 return not isrev(prefix)
1528
1528
1529 def maybewdir(prefix):
1530 return all(c == 'f' for c in prefix)
1531
1529 hexnode = hex(node)
1532 hexnode = hex(node)
1533
1534 def disambiguate(hexnode, minlength):
1535 for length in range(minlength, 41):
1536 prefix = hexnode[:length]
1537 if not isrev(prefix) and not maybewdir(prefix):
1538 return prefix
1539
1540 if not getattr(self, 'filteredrevs', None):
1541 try:
1542 length = max(self.index.shortest(node), minlength)
1543 return disambiguate(hexnode, length)
1544 except RevlogError:
1545 if node == wdirid:
1546 for length in range(minlength, 41):
1547 prefix = hexnode[:length]
1548 if isvalid(prefix):
1549 return prefix
1550 else:
1551 raise LookupError(node, self.indexfile, _('no node'))
1552 except AttributeError:
1553 # Fall through to pure code
1554 pass
1555
1530 shortest = hexnode
1556 shortest = hexnode
1531 startlength = max(6, minlength)
1557 startlength = max(6, minlength)
1532 length = startlength
1558 length = startlength
1533 while True:
1559 while True:
1534 prefix = hexnode[:length]
1560 prefix = hexnode[:length]
1535 if isvalid(prefix):
1561 if isvalid(prefix):
1536 shortest = prefix
1562 shortest = prefix
1537 if length == minlength or length > startlength:
1563 if length == minlength or length > startlength:
1538 return shortest
1564 return shortest
1539 length -= 1
1565 length -= 1
1540 else:
1566 else:
1541 length += 1
1567 length += 1
1542 if len(shortest) <= length:
1568 if len(shortest) <= length:
1543 return shortest
1569 return shortest
1544
1570
1545 def cmp(self, node, text):
1571 def cmp(self, node, text):
1546 """compare text with a given file revision
1572 """compare text with a given file revision
1547
1573
1548 returns True if text is different than what is stored.
1574 returns True if text is different than what is stored.
1549 """
1575 """
1550 p1, p2 = self.parents(node)
1576 p1, p2 = self.parents(node)
1551 return hash(text, p1, p2) != node
1577 return hash(text, p1, p2) != node
1552
1578
1553 def _cachesegment(self, offset, data):
1579 def _cachesegment(self, offset, data):
1554 """Add a segment to the revlog cache.
1580 """Add a segment to the revlog cache.
1555
1581
1556 Accepts an absolute offset and the data that is at that location.
1582 Accepts an absolute offset and the data that is at that location.
1557 """
1583 """
1558 o, d = self._chunkcache
1584 o, d = self._chunkcache
1559 # try to add to existing cache
1585 # try to add to existing cache
1560 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1586 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1561 self._chunkcache = o, d + data
1587 self._chunkcache = o, d + data
1562 else:
1588 else:
1563 self._chunkcache = offset, data
1589 self._chunkcache = offset, data
1564
1590
1565 def _readsegment(self, offset, length, df=None):
1591 def _readsegment(self, offset, length, df=None):
1566 """Load a segment of raw data from the revlog.
1592 """Load a segment of raw data from the revlog.
1567
1593
1568 Accepts an absolute offset, length to read, and an optional existing
1594 Accepts an absolute offset, length to read, and an optional existing
1569 file handle to read from.
1595 file handle to read from.
1570
1596
1571 If an existing file handle is passed, it will be seeked and the
1597 If an existing file handle is passed, it will be seeked and the
1572 original seek position will NOT be restored.
1598 original seek position will NOT be restored.
1573
1599
1574 Returns a str or buffer of raw byte data.
1600 Returns a str or buffer of raw byte data.
1575 """
1601 """
1576 # Cache data both forward and backward around the requested
1602 # Cache data both forward and backward around the requested
1577 # data, in a fixed size window. This helps speed up operations
1603 # data, in a fixed size window. This helps speed up operations
1578 # involving reading the revlog backwards.
1604 # involving reading the revlog backwards.
1579 cachesize = self._chunkcachesize
1605 cachesize = self._chunkcachesize
1580 realoffset = offset & ~(cachesize - 1)
1606 realoffset = offset & ~(cachesize - 1)
1581 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1607 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1582 - realoffset)
1608 - realoffset)
1583 with self._datareadfp(df) as df:
1609 with self._datareadfp(df) as df:
1584 df.seek(realoffset)
1610 df.seek(realoffset)
1585 d = df.read(reallength)
1611 d = df.read(reallength)
1586 self._cachesegment(realoffset, d)
1612 self._cachesegment(realoffset, d)
1587 if offset != realoffset or reallength != length:
1613 if offset != realoffset or reallength != length:
1588 return util.buffer(d, offset - realoffset, length)
1614 return util.buffer(d, offset - realoffset, length)
1589 return d
1615 return d
1590
1616
1591 def _getsegment(self, offset, length, df=None):
1617 def _getsegment(self, offset, length, df=None):
1592 """Obtain a segment of raw data from the revlog.
1618 """Obtain a segment of raw data from the revlog.
1593
1619
1594 Accepts an absolute offset, length of bytes to obtain, and an
1620 Accepts an absolute offset, length of bytes to obtain, and an
1595 optional file handle to the already-opened revlog. If the file
1621 optional file handle to the already-opened revlog. If the file
1596 handle is used, it's original seek position will not be preserved.
1622 handle is used, it's original seek position will not be preserved.
1597
1623
1598 Requests for data may be returned from a cache.
1624 Requests for data may be returned from a cache.
1599
1625
1600 Returns a str or a buffer instance of raw byte data.
1626 Returns a str or a buffer instance of raw byte data.
1601 """
1627 """
1602 o, d = self._chunkcache
1628 o, d = self._chunkcache
1603 l = len(d)
1629 l = len(d)
1604
1630
1605 # is it in the cache?
1631 # is it in the cache?
1606 cachestart = offset - o
1632 cachestart = offset - o
1607 cacheend = cachestart + length
1633 cacheend = cachestart + length
1608 if cachestart >= 0 and cacheend <= l:
1634 if cachestart >= 0 and cacheend <= l:
1609 if cachestart == 0 and cacheend == l:
1635 if cachestart == 0 and cacheend == l:
1610 return d # avoid a copy
1636 return d # avoid a copy
1611 return util.buffer(d, cachestart, cacheend - cachestart)
1637 return util.buffer(d, cachestart, cacheend - cachestart)
1612
1638
1613 return self._readsegment(offset, length, df=df)
1639 return self._readsegment(offset, length, df=df)
1614
1640
1615 def _getsegmentforrevs(self, startrev, endrev, df=None):
1641 def _getsegmentforrevs(self, startrev, endrev, df=None):
1616 """Obtain a segment of raw data corresponding to a range of revisions.
1642 """Obtain a segment of raw data corresponding to a range of revisions.
1617
1643
1618 Accepts the start and end revisions and an optional already-open
1644 Accepts the start and end revisions and an optional already-open
1619 file handle to be used for reading. If the file handle is read, its
1645 file handle to be used for reading. If the file handle is read, its
1620 seek position will not be preserved.
1646 seek position will not be preserved.
1621
1647
1622 Requests for data may be satisfied by a cache.
1648 Requests for data may be satisfied by a cache.
1623
1649
1624 Returns a 2-tuple of (offset, data) for the requested range of
1650 Returns a 2-tuple of (offset, data) for the requested range of
1625 revisions. Offset is the integer offset from the beginning of the
1651 revisions. Offset is the integer offset from the beginning of the
1626 revlog and data is a str or buffer of the raw byte data.
1652 revlog and data is a str or buffer of the raw byte data.
1627
1653
1628 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1654 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1629 to determine where each revision's data begins and ends.
1655 to determine where each revision's data begins and ends.
1630 """
1656 """
1631 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1657 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1632 # (functions are expensive).
1658 # (functions are expensive).
1633 index = self.index
1659 index = self.index
1634 istart = index[startrev]
1660 istart = index[startrev]
1635 start = int(istart[0] >> 16)
1661 start = int(istart[0] >> 16)
1636 if startrev == endrev:
1662 if startrev == endrev:
1637 end = start + istart[1]
1663 end = start + istart[1]
1638 else:
1664 else:
1639 iend = index[endrev]
1665 iend = index[endrev]
1640 end = int(iend[0] >> 16) + iend[1]
1666 end = int(iend[0] >> 16) + iend[1]
1641
1667
1642 if self._inline:
1668 if self._inline:
1643 start += (startrev + 1) * self._io.size
1669 start += (startrev + 1) * self._io.size
1644 end += (endrev + 1) * self._io.size
1670 end += (endrev + 1) * self._io.size
1645 length = end - start
1671 length = end - start
1646
1672
1647 return start, self._getsegment(start, length, df=df)
1673 return start, self._getsegment(start, length, df=df)
1648
1674
1649 def _chunk(self, rev, df=None):
1675 def _chunk(self, rev, df=None):
1650 """Obtain a single decompressed chunk for a revision.
1676 """Obtain a single decompressed chunk for a revision.
1651
1677
1652 Accepts an integer revision and an optional already-open file handle
1678 Accepts an integer revision and an optional already-open file handle
1653 to be used for reading. If used, the seek position of the file will not
1679 to be used for reading. If used, the seek position of the file will not
1654 be preserved.
1680 be preserved.
1655
1681
1656 Returns a str holding uncompressed data for the requested revision.
1682 Returns a str holding uncompressed data for the requested revision.
1657 """
1683 """
1658 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1684 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1659
1685
1660 def _chunks(self, revs, df=None):
1686 def _chunks(self, revs, df=None):
1661 """Obtain decompressed chunks for the specified revisions.
1687 """Obtain decompressed chunks for the specified revisions.
1662
1688
1663 Accepts an iterable of numeric revisions that are assumed to be in
1689 Accepts an iterable of numeric revisions that are assumed to be in
1664 ascending order. Also accepts an optional already-open file handle
1690 ascending order. Also accepts an optional already-open file handle
1665 to be used for reading. If used, the seek position of the file will
1691 to be used for reading. If used, the seek position of the file will
1666 not be preserved.
1692 not be preserved.
1667
1693
1668 This function is similar to calling ``self._chunk()`` multiple times,
1694 This function is similar to calling ``self._chunk()`` multiple times,
1669 but is faster.
1695 but is faster.
1670
1696
1671 Returns a list with decompressed data for each requested revision.
1697 Returns a list with decompressed data for each requested revision.
1672 """
1698 """
1673 if not revs:
1699 if not revs:
1674 return []
1700 return []
1675 start = self.start
1701 start = self.start
1676 length = self.length
1702 length = self.length
1677 inline = self._inline
1703 inline = self._inline
1678 iosize = self._io.size
1704 iosize = self._io.size
1679 buffer = util.buffer
1705 buffer = util.buffer
1680
1706
1681 l = []
1707 l = []
1682 ladd = l.append
1708 ladd = l.append
1683
1709
1684 if not self._withsparseread:
1710 if not self._withsparseread:
1685 slicedchunks = (revs,)
1711 slicedchunks = (revs,)
1686 else:
1712 else:
1687 slicedchunks = _slicechunk(self, revs)
1713 slicedchunks = _slicechunk(self, revs)
1688
1714
1689 for revschunk in slicedchunks:
1715 for revschunk in slicedchunks:
1690 firstrev = revschunk[0]
1716 firstrev = revschunk[0]
1691 # Skip trailing revisions with empty diff
1717 # Skip trailing revisions with empty diff
1692 for lastrev in revschunk[::-1]:
1718 for lastrev in revschunk[::-1]:
1693 if length(lastrev) != 0:
1719 if length(lastrev) != 0:
1694 break
1720 break
1695
1721
1696 try:
1722 try:
1697 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1723 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1698 except OverflowError:
1724 except OverflowError:
1699 # issue4215 - we can't cache a run of chunks greater than
1725 # issue4215 - we can't cache a run of chunks greater than
1700 # 2G on Windows
1726 # 2G on Windows
1701 return [self._chunk(rev, df=df) for rev in revschunk]
1727 return [self._chunk(rev, df=df) for rev in revschunk]
1702
1728
1703 decomp = self.decompress
1729 decomp = self.decompress
1704 for rev in revschunk:
1730 for rev in revschunk:
1705 chunkstart = start(rev)
1731 chunkstart = start(rev)
1706 if inline:
1732 if inline:
1707 chunkstart += (rev + 1) * iosize
1733 chunkstart += (rev + 1) * iosize
1708 chunklength = length(rev)
1734 chunklength = length(rev)
1709 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1735 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1710
1736
1711 return l
1737 return l
1712
1738
1713 def _chunkclear(self):
1739 def _chunkclear(self):
1714 """Clear the raw chunk cache."""
1740 """Clear the raw chunk cache."""
1715 self._chunkcache = (0, '')
1741 self._chunkcache = (0, '')
1716
1742
1717 def deltaparent(self, rev):
1743 def deltaparent(self, rev):
1718 """return deltaparent of the given revision"""
1744 """return deltaparent of the given revision"""
1719 base = self.index[rev][3]
1745 base = self.index[rev][3]
1720 if base == rev:
1746 if base == rev:
1721 return nullrev
1747 return nullrev
1722 elif self._generaldelta:
1748 elif self._generaldelta:
1723 return base
1749 return base
1724 else:
1750 else:
1725 return rev - 1
1751 return rev - 1
1726
1752
1727 def revdiff(self, rev1, rev2):
1753 def revdiff(self, rev1, rev2):
1728 """return or calculate a delta between two revisions
1754 """return or calculate a delta between two revisions
1729
1755
1730 The delta calculated is in binary form and is intended to be written to
1756 The delta calculated is in binary form and is intended to be written to
1731 revlog data directly. So this function needs raw revision data.
1757 revlog data directly. So this function needs raw revision data.
1732 """
1758 """
1733 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1759 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1734 return bytes(self._chunk(rev2))
1760 return bytes(self._chunk(rev2))
1735
1761
1736 return mdiff.textdiff(self.revision(rev1, raw=True),
1762 return mdiff.textdiff(self.revision(rev1, raw=True),
1737 self.revision(rev2, raw=True))
1763 self.revision(rev2, raw=True))
1738
1764
1739 def revision(self, nodeorrev, _df=None, raw=False):
1765 def revision(self, nodeorrev, _df=None, raw=False):
1740 """return an uncompressed revision of a given node or revision
1766 """return an uncompressed revision of a given node or revision
1741 number.
1767 number.
1742
1768
1743 _df - an existing file handle to read from. (internal-only)
1769 _df - an existing file handle to read from. (internal-only)
1744 raw - an optional argument specifying if the revision data is to be
1770 raw - an optional argument specifying if the revision data is to be
1745 treated as raw data when applying flag transforms. 'raw' should be set
1771 treated as raw data when applying flag transforms. 'raw' should be set
1746 to True when generating changegroups or in debug commands.
1772 to True when generating changegroups or in debug commands.
1747 """
1773 """
1748 if isinstance(nodeorrev, int):
1774 if isinstance(nodeorrev, int):
1749 rev = nodeorrev
1775 rev = nodeorrev
1750 node = self.node(rev)
1776 node = self.node(rev)
1751 else:
1777 else:
1752 node = nodeorrev
1778 node = nodeorrev
1753 rev = None
1779 rev = None
1754
1780
1755 cachedrev = None
1781 cachedrev = None
1756 flags = None
1782 flags = None
1757 rawtext = None
1783 rawtext = None
1758 if node == nullid:
1784 if node == nullid:
1759 return ""
1785 return ""
1760 if self._cache:
1786 if self._cache:
1761 if self._cache[0] == node:
1787 if self._cache[0] == node:
1762 # _cache only stores rawtext
1788 # _cache only stores rawtext
1763 if raw:
1789 if raw:
1764 return self._cache[2]
1790 return self._cache[2]
1765 # duplicated, but good for perf
1791 # duplicated, but good for perf
1766 if rev is None:
1792 if rev is None:
1767 rev = self.rev(node)
1793 rev = self.rev(node)
1768 if flags is None:
1794 if flags is None:
1769 flags = self.flags(rev)
1795 flags = self.flags(rev)
1770 # no extra flags set, no flag processor runs, text = rawtext
1796 # no extra flags set, no flag processor runs, text = rawtext
1771 if flags == REVIDX_DEFAULT_FLAGS:
1797 if flags == REVIDX_DEFAULT_FLAGS:
1772 return self._cache[2]
1798 return self._cache[2]
1773 # rawtext is reusable. need to run flag processor
1799 # rawtext is reusable. need to run flag processor
1774 rawtext = self._cache[2]
1800 rawtext = self._cache[2]
1775
1801
1776 cachedrev = self._cache[1]
1802 cachedrev = self._cache[1]
1777
1803
1778 # look up what we need to read
1804 # look up what we need to read
1779 if rawtext is None:
1805 if rawtext is None:
1780 if rev is None:
1806 if rev is None:
1781 rev = self.rev(node)
1807 rev = self.rev(node)
1782
1808
1783 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1809 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1784 if stopped:
1810 if stopped:
1785 rawtext = self._cache[2]
1811 rawtext = self._cache[2]
1786
1812
1787 # drop cache to save memory
1813 # drop cache to save memory
1788 self._cache = None
1814 self._cache = None
1789
1815
1790 bins = self._chunks(chain, df=_df)
1816 bins = self._chunks(chain, df=_df)
1791 if rawtext is None:
1817 if rawtext is None:
1792 rawtext = bytes(bins[0])
1818 rawtext = bytes(bins[0])
1793 bins = bins[1:]
1819 bins = bins[1:]
1794
1820
1795 rawtext = mdiff.patches(rawtext, bins)
1821 rawtext = mdiff.patches(rawtext, bins)
1796 self._cache = (node, rev, rawtext)
1822 self._cache = (node, rev, rawtext)
1797
1823
1798 if flags is None:
1824 if flags is None:
1799 if rev is None:
1825 if rev is None:
1800 rev = self.rev(node)
1826 rev = self.rev(node)
1801 flags = self.flags(rev)
1827 flags = self.flags(rev)
1802
1828
1803 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1829 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1804 if validatehash:
1830 if validatehash:
1805 self.checkhash(text, node, rev=rev)
1831 self.checkhash(text, node, rev=rev)
1806
1832
1807 return text
1833 return text
1808
1834
1809 def hash(self, text, p1, p2):
1835 def hash(self, text, p1, p2):
1810 """Compute a node hash.
1836 """Compute a node hash.
1811
1837
1812 Available as a function so that subclasses can replace the hash
1838 Available as a function so that subclasses can replace the hash
1813 as needed.
1839 as needed.
1814 """
1840 """
1815 return hash(text, p1, p2)
1841 return hash(text, p1, p2)
1816
1842
1817 def _processflags(self, text, flags, operation, raw=False):
1843 def _processflags(self, text, flags, operation, raw=False):
1818 """Inspect revision data flags and applies transforms defined by
1844 """Inspect revision data flags and applies transforms defined by
1819 registered flag processors.
1845 registered flag processors.
1820
1846
1821 ``text`` - the revision data to process
1847 ``text`` - the revision data to process
1822 ``flags`` - the revision flags
1848 ``flags`` - the revision flags
1823 ``operation`` - the operation being performed (read or write)
1849 ``operation`` - the operation being performed (read or write)
1824 ``raw`` - an optional argument describing if the raw transform should be
1850 ``raw`` - an optional argument describing if the raw transform should be
1825 applied.
1851 applied.
1826
1852
1827 This method processes the flags in the order (or reverse order if
1853 This method processes the flags in the order (or reverse order if
1828 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1854 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1829 flag processors registered for present flags. The order of flags defined
1855 flag processors registered for present flags. The order of flags defined
1830 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1856 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1831
1857
1832 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1858 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1833 processed text and ``validatehash`` is a bool indicating whether the
1859 processed text and ``validatehash`` is a bool indicating whether the
1834 returned text should be checked for hash integrity.
1860 returned text should be checked for hash integrity.
1835
1861
1836 Note: If the ``raw`` argument is set, it has precedence over the
1862 Note: If the ``raw`` argument is set, it has precedence over the
1837 operation and will only update the value of ``validatehash``.
1863 operation and will only update the value of ``validatehash``.
1838 """
1864 """
1839 # fast path: no flag processors will run
1865 # fast path: no flag processors will run
1840 if flags == 0:
1866 if flags == 0:
1841 return text, True
1867 return text, True
1842 if not operation in ('read', 'write'):
1868 if not operation in ('read', 'write'):
1843 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1869 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1844 # Check all flags are known.
1870 # Check all flags are known.
1845 if flags & ~REVIDX_KNOWN_FLAGS:
1871 if flags & ~REVIDX_KNOWN_FLAGS:
1846 raise RevlogError(_("incompatible revision flag '%#x'") %
1872 raise RevlogError(_("incompatible revision flag '%#x'") %
1847 (flags & ~REVIDX_KNOWN_FLAGS))
1873 (flags & ~REVIDX_KNOWN_FLAGS))
1848 validatehash = True
1874 validatehash = True
1849 # Depending on the operation (read or write), the order might be
1875 # Depending on the operation (read or write), the order might be
1850 # reversed due to non-commutative transforms.
1876 # reversed due to non-commutative transforms.
1851 orderedflags = REVIDX_FLAGS_ORDER
1877 orderedflags = REVIDX_FLAGS_ORDER
1852 if operation == 'write':
1878 if operation == 'write':
1853 orderedflags = reversed(orderedflags)
1879 orderedflags = reversed(orderedflags)
1854
1880
1855 for flag in orderedflags:
1881 for flag in orderedflags:
1856 # If a flagprocessor has been registered for a known flag, apply the
1882 # If a flagprocessor has been registered for a known flag, apply the
1857 # related operation transform and update result tuple.
1883 # related operation transform and update result tuple.
1858 if flag & flags:
1884 if flag & flags:
1859 vhash = True
1885 vhash = True
1860
1886
1861 if flag not in _flagprocessors:
1887 if flag not in _flagprocessors:
1862 message = _("missing processor for flag '%#x'") % (flag)
1888 message = _("missing processor for flag '%#x'") % (flag)
1863 raise RevlogError(message)
1889 raise RevlogError(message)
1864
1890
1865 processor = _flagprocessors[flag]
1891 processor = _flagprocessors[flag]
1866 if processor is not None:
1892 if processor is not None:
1867 readtransform, writetransform, rawtransform = processor
1893 readtransform, writetransform, rawtransform = processor
1868
1894
1869 if raw:
1895 if raw:
1870 vhash = rawtransform(self, text)
1896 vhash = rawtransform(self, text)
1871 elif operation == 'read':
1897 elif operation == 'read':
1872 text, vhash = readtransform(self, text)
1898 text, vhash = readtransform(self, text)
1873 else: # write operation
1899 else: # write operation
1874 text, vhash = writetransform(self, text)
1900 text, vhash = writetransform(self, text)
1875 validatehash = validatehash and vhash
1901 validatehash = validatehash and vhash
1876
1902
1877 return text, validatehash
1903 return text, validatehash
1878
1904
1879 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1905 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1880 """Check node hash integrity.
1906 """Check node hash integrity.
1881
1907
1882 Available as a function so that subclasses can extend hash mismatch
1908 Available as a function so that subclasses can extend hash mismatch
1883 behaviors as needed.
1909 behaviors as needed.
1884 """
1910 """
1885 try:
1911 try:
1886 if p1 is None and p2 is None:
1912 if p1 is None and p2 is None:
1887 p1, p2 = self.parents(node)
1913 p1, p2 = self.parents(node)
1888 if node != self.hash(text, p1, p2):
1914 if node != self.hash(text, p1, p2):
1889 revornode = rev
1915 revornode = rev
1890 if revornode is None:
1916 if revornode is None:
1891 revornode = templatefilters.short(hex(node))
1917 revornode = templatefilters.short(hex(node))
1892 raise RevlogError(_("integrity check failed on %s:%s")
1918 raise RevlogError(_("integrity check failed on %s:%s")
1893 % (self.indexfile, pycompat.bytestr(revornode)))
1919 % (self.indexfile, pycompat.bytestr(revornode)))
1894 except RevlogError:
1920 except RevlogError:
1895 if self._censorable and _censoredtext(text):
1921 if self._censorable and _censoredtext(text):
1896 raise error.CensoredNodeError(self.indexfile, node, text)
1922 raise error.CensoredNodeError(self.indexfile, node, text)
1897 raise
1923 raise
1898
1924
1899 def _enforceinlinesize(self, tr, fp=None):
1925 def _enforceinlinesize(self, tr, fp=None):
1900 """Check if the revlog is too big for inline and convert if so.
1926 """Check if the revlog is too big for inline and convert if so.
1901
1927
1902 This should be called after revisions are added to the revlog. If the
1928 This should be called after revisions are added to the revlog. If the
1903 revlog has grown too large to be an inline revlog, it will convert it
1929 revlog has grown too large to be an inline revlog, it will convert it
1904 to use multiple index and data files.
1930 to use multiple index and data files.
1905 """
1931 """
1906 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1932 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1907 return
1933 return
1908
1934
1909 trinfo = tr.find(self.indexfile)
1935 trinfo = tr.find(self.indexfile)
1910 if trinfo is None:
1936 if trinfo is None:
1911 raise RevlogError(_("%s not found in the transaction")
1937 raise RevlogError(_("%s not found in the transaction")
1912 % self.indexfile)
1938 % self.indexfile)
1913
1939
1914 trindex = trinfo[2]
1940 trindex = trinfo[2]
1915 if trindex is not None:
1941 if trindex is not None:
1916 dataoff = self.start(trindex)
1942 dataoff = self.start(trindex)
1917 else:
1943 else:
1918 # revlog was stripped at start of transaction, use all leftover data
1944 # revlog was stripped at start of transaction, use all leftover data
1919 trindex = len(self) - 1
1945 trindex = len(self) - 1
1920 dataoff = self.end(-2)
1946 dataoff = self.end(-2)
1921
1947
1922 tr.add(self.datafile, dataoff)
1948 tr.add(self.datafile, dataoff)
1923
1949
1924 if fp:
1950 if fp:
1925 fp.flush()
1951 fp.flush()
1926 fp.close()
1952 fp.close()
1927
1953
1928 with self._datafp('w') as df:
1954 with self._datafp('w') as df:
1929 for r in self:
1955 for r in self:
1930 df.write(self._getsegmentforrevs(r, r)[1])
1956 df.write(self._getsegmentforrevs(r, r)[1])
1931
1957
1932 with self._indexfp('w') as fp:
1958 with self._indexfp('w') as fp:
1933 self.version &= ~FLAG_INLINE_DATA
1959 self.version &= ~FLAG_INLINE_DATA
1934 self._inline = False
1960 self._inline = False
1935 io = self._io
1961 io = self._io
1936 for i in self:
1962 for i in self:
1937 e = io.packentry(self.index[i], self.node, self.version, i)
1963 e = io.packentry(self.index[i], self.node, self.version, i)
1938 fp.write(e)
1964 fp.write(e)
1939
1965
1940 # the temp file replace the real index when we exit the context
1966 # the temp file replace the real index when we exit the context
1941 # manager
1967 # manager
1942
1968
1943 tr.replace(self.indexfile, trindex * self._io.size)
1969 tr.replace(self.indexfile, trindex * self._io.size)
1944 self._chunkclear()
1970 self._chunkclear()
1945
1971
1946 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1972 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1947 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1973 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1948 """add a revision to the log
1974 """add a revision to the log
1949
1975
1950 text - the revision data to add
1976 text - the revision data to add
1951 transaction - the transaction object used for rollback
1977 transaction - the transaction object used for rollback
1952 link - the linkrev data to add
1978 link - the linkrev data to add
1953 p1, p2 - the parent nodeids of the revision
1979 p1, p2 - the parent nodeids of the revision
1954 cachedelta - an optional precomputed delta
1980 cachedelta - an optional precomputed delta
1955 node - nodeid of revision; typically node is not specified, and it is
1981 node - nodeid of revision; typically node is not specified, and it is
1956 computed by default as hash(text, p1, p2), however subclasses might
1982 computed by default as hash(text, p1, p2), however subclasses might
1957 use different hashing method (and override checkhash() in such case)
1983 use different hashing method (and override checkhash() in such case)
1958 flags - the known flags to set on the revision
1984 flags - the known flags to set on the revision
1959 deltacomputer - an optional _deltacomputer instance shared between
1985 deltacomputer - an optional _deltacomputer instance shared between
1960 multiple calls
1986 multiple calls
1961 """
1987 """
1962 if link == nullrev:
1988 if link == nullrev:
1963 raise RevlogError(_("attempted to add linkrev -1 to %s")
1989 raise RevlogError(_("attempted to add linkrev -1 to %s")
1964 % self.indexfile)
1990 % self.indexfile)
1965
1991
1966 if flags:
1992 if flags:
1967 node = node or self.hash(text, p1, p2)
1993 node = node or self.hash(text, p1, p2)
1968
1994
1969 rawtext, validatehash = self._processflags(text, flags, 'write')
1995 rawtext, validatehash = self._processflags(text, flags, 'write')
1970
1996
1971 # If the flag processor modifies the revision data, ignore any provided
1997 # If the flag processor modifies the revision data, ignore any provided
1972 # cachedelta.
1998 # cachedelta.
1973 if rawtext != text:
1999 if rawtext != text:
1974 cachedelta = None
2000 cachedelta = None
1975
2001
1976 if len(rawtext) > _maxentrysize:
2002 if len(rawtext) > _maxentrysize:
1977 raise RevlogError(
2003 raise RevlogError(
1978 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2004 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1979 % (self.indexfile, len(rawtext)))
2005 % (self.indexfile, len(rawtext)))
1980
2006
1981 node = node or self.hash(rawtext, p1, p2)
2007 node = node or self.hash(rawtext, p1, p2)
1982 if node in self.nodemap:
2008 if node in self.nodemap:
1983 return node
2009 return node
1984
2010
1985 if validatehash:
2011 if validatehash:
1986 self.checkhash(rawtext, node, p1=p1, p2=p2)
2012 self.checkhash(rawtext, node, p1=p1, p2=p2)
1987
2013
1988 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2014 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1989 flags, cachedelta=cachedelta,
2015 flags, cachedelta=cachedelta,
1990 deltacomputer=deltacomputer)
2016 deltacomputer=deltacomputer)
1991
2017
1992 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2018 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1993 cachedelta=None, deltacomputer=None):
2019 cachedelta=None, deltacomputer=None):
1994 """add a raw revision with known flags, node and parents
2020 """add a raw revision with known flags, node and parents
1995 useful when reusing a revision not stored in this revlog (ex: received
2021 useful when reusing a revision not stored in this revlog (ex: received
1996 over wire, or read from an external bundle).
2022 over wire, or read from an external bundle).
1997 """
2023 """
1998 dfh = None
2024 dfh = None
1999 if not self._inline:
2025 if not self._inline:
2000 dfh = self._datafp("a+")
2026 dfh = self._datafp("a+")
2001 ifh = self._indexfp("a+")
2027 ifh = self._indexfp("a+")
2002 try:
2028 try:
2003 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2029 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2004 flags, cachedelta, ifh, dfh,
2030 flags, cachedelta, ifh, dfh,
2005 deltacomputer=deltacomputer)
2031 deltacomputer=deltacomputer)
2006 finally:
2032 finally:
2007 if dfh:
2033 if dfh:
2008 dfh.close()
2034 dfh.close()
2009 ifh.close()
2035 ifh.close()
2010
2036
2011 def compress(self, data):
2037 def compress(self, data):
2012 """Generate a possibly-compressed representation of data."""
2038 """Generate a possibly-compressed representation of data."""
2013 if not data:
2039 if not data:
2014 return '', data
2040 return '', data
2015
2041
2016 compressed = self._compressor.compress(data)
2042 compressed = self._compressor.compress(data)
2017
2043
2018 if compressed:
2044 if compressed:
2019 # The revlog compressor added the header in the returned data.
2045 # The revlog compressor added the header in the returned data.
2020 return '', compressed
2046 return '', compressed
2021
2047
2022 if data[0:1] == '\0':
2048 if data[0:1] == '\0':
2023 return '', data
2049 return '', data
2024 return 'u', data
2050 return 'u', data
2025
2051
2026 def decompress(self, data):
2052 def decompress(self, data):
2027 """Decompress a revlog chunk.
2053 """Decompress a revlog chunk.
2028
2054
2029 The chunk is expected to begin with a header identifying the
2055 The chunk is expected to begin with a header identifying the
2030 format type so it can be routed to an appropriate decompressor.
2056 format type so it can be routed to an appropriate decompressor.
2031 """
2057 """
2032 if not data:
2058 if not data:
2033 return data
2059 return data
2034
2060
2035 # Revlogs are read much more frequently than they are written and many
2061 # Revlogs are read much more frequently than they are written and many
2036 # chunks only take microseconds to decompress, so performance is
2062 # chunks only take microseconds to decompress, so performance is
2037 # important here.
2063 # important here.
2038 #
2064 #
2039 # We can make a few assumptions about revlogs:
2065 # We can make a few assumptions about revlogs:
2040 #
2066 #
2041 # 1) the majority of chunks will be compressed (as opposed to inline
2067 # 1) the majority of chunks will be compressed (as opposed to inline
2042 # raw data).
2068 # raw data).
2043 # 2) decompressing *any* data will likely by at least 10x slower than
2069 # 2) decompressing *any* data will likely by at least 10x slower than
2044 # returning raw inline data.
2070 # returning raw inline data.
2045 # 3) we want to prioritize common and officially supported compression
2071 # 3) we want to prioritize common and officially supported compression
2046 # engines
2072 # engines
2047 #
2073 #
2048 # It follows that we want to optimize for "decompress compressed data
2074 # It follows that we want to optimize for "decompress compressed data
2049 # when encoded with common and officially supported compression engines"
2075 # when encoded with common and officially supported compression engines"
2050 # case over "raw data" and "data encoded by less common or non-official
2076 # case over "raw data" and "data encoded by less common or non-official
2051 # compression engines." That is why we have the inline lookup first
2077 # compression engines." That is why we have the inline lookup first
2052 # followed by the compengines lookup.
2078 # followed by the compengines lookup.
2053 #
2079 #
2054 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2080 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2055 # compressed chunks. And this matters for changelog and manifest reads.
2081 # compressed chunks. And this matters for changelog and manifest reads.
2056 t = data[0:1]
2082 t = data[0:1]
2057
2083
2058 if t == 'x':
2084 if t == 'x':
2059 try:
2085 try:
2060 return _zlibdecompress(data)
2086 return _zlibdecompress(data)
2061 except zlib.error as e:
2087 except zlib.error as e:
2062 raise RevlogError(_('revlog decompress error: %s') %
2088 raise RevlogError(_('revlog decompress error: %s') %
2063 stringutil.forcebytestr(e))
2089 stringutil.forcebytestr(e))
2064 # '\0' is more common than 'u' so it goes first.
2090 # '\0' is more common than 'u' so it goes first.
2065 elif t == '\0':
2091 elif t == '\0':
2066 return data
2092 return data
2067 elif t == 'u':
2093 elif t == 'u':
2068 return util.buffer(data, 1)
2094 return util.buffer(data, 1)
2069
2095
2070 try:
2096 try:
2071 compressor = self._decompressors[t]
2097 compressor = self._decompressors[t]
2072 except KeyError:
2098 except KeyError:
2073 try:
2099 try:
2074 engine = util.compengines.forrevlogheader(t)
2100 engine = util.compengines.forrevlogheader(t)
2075 compressor = engine.revlogcompressor()
2101 compressor = engine.revlogcompressor()
2076 self._decompressors[t] = compressor
2102 self._decompressors[t] = compressor
2077 except KeyError:
2103 except KeyError:
2078 raise RevlogError(_('unknown compression type %r') % t)
2104 raise RevlogError(_('unknown compression type %r') % t)
2079
2105
2080 return compressor.decompress(data)
2106 return compressor.decompress(data)
2081
2107
2082 def _isgooddeltainfo(self, d, textlen):
2108 def _isgooddeltainfo(self, d, textlen):
2083 """Returns True if the given delta is good. Good means that it is within
2109 """Returns True if the given delta is good. Good means that it is within
2084 the disk span, disk size, and chain length bounds that we know to be
2110 the disk span, disk size, and chain length bounds that we know to be
2085 performant."""
2111 performant."""
2086 if d is None:
2112 if d is None:
2087 return False
2113 return False
2088
2114
2089 # - 'd.distance' is the distance from the base revision -- bounding it
2115 # - 'd.distance' is the distance from the base revision -- bounding it
2090 # limits the amount of I/O we need to do.
2116 # limits the amount of I/O we need to do.
2091 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2117 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2092 # need to apply -- bounding it limits the amount of CPU we consume.
2118 # need to apply -- bounding it limits the amount of CPU we consume.
2093
2119
2094 defaultmax = textlen * 4
2120 defaultmax = textlen * 4
2095 maxdist = self._maxdeltachainspan
2121 maxdist = self._maxdeltachainspan
2096 if not maxdist:
2122 if not maxdist:
2097 maxdist = d.distance # ensure the conditional pass
2123 maxdist = d.distance # ensure the conditional pass
2098 maxdist = max(maxdist, defaultmax)
2124 maxdist = max(maxdist, defaultmax)
2099 if (d.distance > maxdist or d.deltalen > textlen or
2125 if (d.distance > maxdist or d.deltalen > textlen or
2100 d.compresseddeltalen > textlen * 2 or
2126 d.compresseddeltalen > textlen * 2 or
2101 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2127 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2102 return False
2128 return False
2103
2129
2104 return True
2130 return True
2105
2131
2106 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2132 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2107 cachedelta, ifh, dfh, alwayscache=False,
2133 cachedelta, ifh, dfh, alwayscache=False,
2108 deltacomputer=None):
2134 deltacomputer=None):
2109 """internal function to add revisions to the log
2135 """internal function to add revisions to the log
2110
2136
2111 see addrevision for argument descriptions.
2137 see addrevision for argument descriptions.
2112
2138
2113 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2139 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2114
2140
2115 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2141 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2116 be used.
2142 be used.
2117
2143
2118 invariants:
2144 invariants:
2119 - rawtext is optional (can be None); if not set, cachedelta must be set.
2145 - rawtext is optional (can be None); if not set, cachedelta must be set.
2120 if both are set, they must correspond to each other.
2146 if both are set, they must correspond to each other.
2121 """
2147 """
2122 if node == nullid:
2148 if node == nullid:
2123 raise RevlogError(_("%s: attempt to add null revision") %
2149 raise RevlogError(_("%s: attempt to add null revision") %
2124 (self.indexfile))
2150 (self.indexfile))
2125 if node == wdirid or node in wdirfilenodeids:
2151 if node == wdirid or node in wdirfilenodeids:
2126 raise RevlogError(_("%s: attempt to add wdir revision") %
2152 raise RevlogError(_("%s: attempt to add wdir revision") %
2127 (self.indexfile))
2153 (self.indexfile))
2128
2154
2129 if self._inline:
2155 if self._inline:
2130 fh = ifh
2156 fh = ifh
2131 else:
2157 else:
2132 fh = dfh
2158 fh = dfh
2133
2159
2134 btext = [rawtext]
2160 btext = [rawtext]
2135
2161
2136 curr = len(self)
2162 curr = len(self)
2137 prev = curr - 1
2163 prev = curr - 1
2138 offset = self.end(prev)
2164 offset = self.end(prev)
2139 p1r, p2r = self.rev(p1), self.rev(p2)
2165 p1r, p2r = self.rev(p1), self.rev(p2)
2140
2166
2141 # full versions are inserted when the needed deltas
2167 # full versions are inserted when the needed deltas
2142 # become comparable to the uncompressed text
2168 # become comparable to the uncompressed text
2143 if rawtext is None:
2169 if rawtext is None:
2144 # need rawtext size, before changed by flag processors, which is
2170 # need rawtext size, before changed by flag processors, which is
2145 # the non-raw size. use revlog explicitly to avoid filelog's extra
2171 # the non-raw size. use revlog explicitly to avoid filelog's extra
2146 # logic that might remove metadata size.
2172 # logic that might remove metadata size.
2147 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2173 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2148 cachedelta[1])
2174 cachedelta[1])
2149 else:
2175 else:
2150 textlen = len(rawtext)
2176 textlen = len(rawtext)
2151
2177
2152 if deltacomputer is None:
2178 if deltacomputer is None:
2153 deltacomputer = _deltacomputer(self)
2179 deltacomputer = _deltacomputer(self)
2154
2180
2155 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2181 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2156
2182
2157 # no delta for flag processor revision (see "candelta" for why)
2183 # no delta for flag processor revision (see "candelta" for why)
2158 # not calling candelta since only one revision needs test, also to
2184 # not calling candelta since only one revision needs test, also to
2159 # avoid overhead fetching flags again.
2185 # avoid overhead fetching flags again.
2160 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2186 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2161 deltainfo = None
2187 deltainfo = None
2162 else:
2188 else:
2163 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2189 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2164
2190
2165 if deltainfo is not None:
2191 if deltainfo is not None:
2166 base = deltainfo.base
2192 base = deltainfo.base
2167 chainbase = deltainfo.chainbase
2193 chainbase = deltainfo.chainbase
2168 data = deltainfo.data
2194 data = deltainfo.data
2169 l = deltainfo.deltalen
2195 l = deltainfo.deltalen
2170 else:
2196 else:
2171 rawtext = deltacomputer.buildtext(revinfo, fh)
2197 rawtext = deltacomputer.buildtext(revinfo, fh)
2172 data = self.compress(rawtext)
2198 data = self.compress(rawtext)
2173 l = len(data[1]) + len(data[0])
2199 l = len(data[1]) + len(data[0])
2174 base = chainbase = curr
2200 base = chainbase = curr
2175
2201
2176 e = (offset_type(offset, flags), l, textlen,
2202 e = (offset_type(offset, flags), l, textlen,
2177 base, link, p1r, p2r, node)
2203 base, link, p1r, p2r, node)
2178 self.index.insert(-1, e)
2204 self.index.insert(-1, e)
2179 self.nodemap[node] = curr
2205 self.nodemap[node] = curr
2180
2206
2181 entry = self._io.packentry(e, self.node, self.version, curr)
2207 entry = self._io.packentry(e, self.node, self.version, curr)
2182 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2208 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2183
2209
2184 if alwayscache and rawtext is None:
2210 if alwayscache and rawtext is None:
2185 rawtext = deltacomputer._buildtext(revinfo, fh)
2211 rawtext = deltacomputer._buildtext(revinfo, fh)
2186
2212
2187 if type(rawtext) == bytes: # only accept immutable objects
2213 if type(rawtext) == bytes: # only accept immutable objects
2188 self._cache = (node, curr, rawtext)
2214 self._cache = (node, curr, rawtext)
2189 self._chainbasecache[curr] = chainbase
2215 self._chainbasecache[curr] = chainbase
2190 return node
2216 return node
2191
2217
2192 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2218 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2193 # Files opened in a+ mode have inconsistent behavior on various
2219 # Files opened in a+ mode have inconsistent behavior on various
2194 # platforms. Windows requires that a file positioning call be made
2220 # platforms. Windows requires that a file positioning call be made
2195 # when the file handle transitions between reads and writes. See
2221 # when the file handle transitions between reads and writes. See
2196 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2222 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2197 # platforms, Python or the platform itself can be buggy. Some versions
2223 # platforms, Python or the platform itself can be buggy. Some versions
2198 # of Solaris have been observed to not append at the end of the file
2224 # of Solaris have been observed to not append at the end of the file
2199 # if the file was seeked to before the end. See issue4943 for more.
2225 # if the file was seeked to before the end. See issue4943 for more.
2200 #
2226 #
2201 # We work around this issue by inserting a seek() before writing.
2227 # We work around this issue by inserting a seek() before writing.
2202 # Note: This is likely not necessary on Python 3.
2228 # Note: This is likely not necessary on Python 3.
2203 ifh.seek(0, os.SEEK_END)
2229 ifh.seek(0, os.SEEK_END)
2204 if dfh:
2230 if dfh:
2205 dfh.seek(0, os.SEEK_END)
2231 dfh.seek(0, os.SEEK_END)
2206
2232
2207 curr = len(self) - 1
2233 curr = len(self) - 1
2208 if not self._inline:
2234 if not self._inline:
2209 transaction.add(self.datafile, offset)
2235 transaction.add(self.datafile, offset)
2210 transaction.add(self.indexfile, curr * len(entry))
2236 transaction.add(self.indexfile, curr * len(entry))
2211 if data[0]:
2237 if data[0]:
2212 dfh.write(data[0])
2238 dfh.write(data[0])
2213 dfh.write(data[1])
2239 dfh.write(data[1])
2214 ifh.write(entry)
2240 ifh.write(entry)
2215 else:
2241 else:
2216 offset += curr * self._io.size
2242 offset += curr * self._io.size
2217 transaction.add(self.indexfile, offset, curr)
2243 transaction.add(self.indexfile, offset, curr)
2218 ifh.write(entry)
2244 ifh.write(entry)
2219 ifh.write(data[0])
2245 ifh.write(data[0])
2220 ifh.write(data[1])
2246 ifh.write(data[1])
2221 self._enforceinlinesize(transaction, ifh)
2247 self._enforceinlinesize(transaction, ifh)
2222
2248
2223 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2249 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2224 """
2250 """
2225 add a delta group
2251 add a delta group
2226
2252
2227 given a set of deltas, add them to the revision log. the
2253 given a set of deltas, add them to the revision log. the
2228 first delta is against its parent, which should be in our
2254 first delta is against its parent, which should be in our
2229 log, the rest are against the previous delta.
2255 log, the rest are against the previous delta.
2230
2256
2231 If ``addrevisioncb`` is defined, it will be called with arguments of
2257 If ``addrevisioncb`` is defined, it will be called with arguments of
2232 this revlog and the node that was added.
2258 this revlog and the node that was added.
2233 """
2259 """
2234
2260
2235 nodes = []
2261 nodes = []
2236
2262
2237 r = len(self)
2263 r = len(self)
2238 end = 0
2264 end = 0
2239 if r:
2265 if r:
2240 end = self.end(r - 1)
2266 end = self.end(r - 1)
2241 ifh = self._indexfp("a+")
2267 ifh = self._indexfp("a+")
2242 isize = r * self._io.size
2268 isize = r * self._io.size
2243 if self._inline:
2269 if self._inline:
2244 transaction.add(self.indexfile, end + isize, r)
2270 transaction.add(self.indexfile, end + isize, r)
2245 dfh = None
2271 dfh = None
2246 else:
2272 else:
2247 transaction.add(self.indexfile, isize, r)
2273 transaction.add(self.indexfile, isize, r)
2248 transaction.add(self.datafile, end)
2274 transaction.add(self.datafile, end)
2249 dfh = self._datafp("a+")
2275 dfh = self._datafp("a+")
2250 def flush():
2276 def flush():
2251 if dfh:
2277 if dfh:
2252 dfh.flush()
2278 dfh.flush()
2253 ifh.flush()
2279 ifh.flush()
2254 try:
2280 try:
2255 deltacomputer = _deltacomputer(self)
2281 deltacomputer = _deltacomputer(self)
2256 # loop through our set of deltas
2282 # loop through our set of deltas
2257 for data in deltas:
2283 for data in deltas:
2258 node, p1, p2, linknode, deltabase, delta, flags = data
2284 node, p1, p2, linknode, deltabase, delta, flags = data
2259 link = linkmapper(linknode)
2285 link = linkmapper(linknode)
2260 flags = flags or REVIDX_DEFAULT_FLAGS
2286 flags = flags or REVIDX_DEFAULT_FLAGS
2261
2287
2262 nodes.append(node)
2288 nodes.append(node)
2263
2289
2264 if node in self.nodemap:
2290 if node in self.nodemap:
2265 # this can happen if two branches make the same change
2291 # this can happen if two branches make the same change
2266 continue
2292 continue
2267
2293
2268 for p in (p1, p2):
2294 for p in (p1, p2):
2269 if p not in self.nodemap:
2295 if p not in self.nodemap:
2270 raise LookupError(p, self.indexfile,
2296 raise LookupError(p, self.indexfile,
2271 _('unknown parent'))
2297 _('unknown parent'))
2272
2298
2273 if deltabase not in self.nodemap:
2299 if deltabase not in self.nodemap:
2274 raise LookupError(deltabase, self.indexfile,
2300 raise LookupError(deltabase, self.indexfile,
2275 _('unknown delta base'))
2301 _('unknown delta base'))
2276
2302
2277 baserev = self.rev(deltabase)
2303 baserev = self.rev(deltabase)
2278
2304
2279 if baserev != nullrev and self.iscensored(baserev):
2305 if baserev != nullrev and self.iscensored(baserev):
2280 # if base is censored, delta must be full replacement in a
2306 # if base is censored, delta must be full replacement in a
2281 # single patch operation
2307 # single patch operation
2282 hlen = struct.calcsize(">lll")
2308 hlen = struct.calcsize(">lll")
2283 oldlen = self.rawsize(baserev)
2309 oldlen = self.rawsize(baserev)
2284 newlen = len(delta) - hlen
2310 newlen = len(delta) - hlen
2285 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2311 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2286 raise error.CensoredBaseError(self.indexfile,
2312 raise error.CensoredBaseError(self.indexfile,
2287 self.node(baserev))
2313 self.node(baserev))
2288
2314
2289 if not flags and self._peek_iscensored(baserev, delta, flush):
2315 if not flags and self._peek_iscensored(baserev, delta, flush):
2290 flags |= REVIDX_ISCENSORED
2316 flags |= REVIDX_ISCENSORED
2291
2317
2292 # We assume consumers of addrevisioncb will want to retrieve
2318 # We assume consumers of addrevisioncb will want to retrieve
2293 # the added revision, which will require a call to
2319 # the added revision, which will require a call to
2294 # revision(). revision() will fast path if there is a cache
2320 # revision(). revision() will fast path if there is a cache
2295 # hit. So, we tell _addrevision() to always cache in this case.
2321 # hit. So, we tell _addrevision() to always cache in this case.
2296 # We're only using addgroup() in the context of changegroup
2322 # We're only using addgroup() in the context of changegroup
2297 # generation so the revision data can always be handled as raw
2323 # generation so the revision data can always be handled as raw
2298 # by the flagprocessor.
2324 # by the flagprocessor.
2299 self._addrevision(node, None, transaction, link,
2325 self._addrevision(node, None, transaction, link,
2300 p1, p2, flags, (baserev, delta),
2326 p1, p2, flags, (baserev, delta),
2301 ifh, dfh,
2327 ifh, dfh,
2302 alwayscache=bool(addrevisioncb),
2328 alwayscache=bool(addrevisioncb),
2303 deltacomputer=deltacomputer)
2329 deltacomputer=deltacomputer)
2304
2330
2305 if addrevisioncb:
2331 if addrevisioncb:
2306 addrevisioncb(self, node)
2332 addrevisioncb(self, node)
2307
2333
2308 if not dfh and not self._inline:
2334 if not dfh and not self._inline:
2309 # addrevision switched from inline to conventional
2335 # addrevision switched from inline to conventional
2310 # reopen the index
2336 # reopen the index
2311 ifh.close()
2337 ifh.close()
2312 dfh = self._datafp("a+")
2338 dfh = self._datafp("a+")
2313 ifh = self._indexfp("a+")
2339 ifh = self._indexfp("a+")
2314 finally:
2340 finally:
2315 if dfh:
2341 if dfh:
2316 dfh.close()
2342 dfh.close()
2317 ifh.close()
2343 ifh.close()
2318
2344
2319 return nodes
2345 return nodes
2320
2346
2321 def iscensored(self, rev):
2347 def iscensored(self, rev):
2322 """Check if a file revision is censored."""
2348 """Check if a file revision is censored."""
2323 if not self._censorable:
2349 if not self._censorable:
2324 return False
2350 return False
2325
2351
2326 return self.flags(rev) & REVIDX_ISCENSORED
2352 return self.flags(rev) & REVIDX_ISCENSORED
2327
2353
2328 def _peek_iscensored(self, baserev, delta, flush):
2354 def _peek_iscensored(self, baserev, delta, flush):
2329 """Quickly check if a delta produces a censored revision."""
2355 """Quickly check if a delta produces a censored revision."""
2330 if not self._censorable:
2356 if not self._censorable:
2331 return False
2357 return False
2332
2358
2333 # Fragile heuristic: unless new file meta keys are added alphabetically
2359 # Fragile heuristic: unless new file meta keys are added alphabetically
2334 # preceding "censored", all censored revisions are prefixed by
2360 # preceding "censored", all censored revisions are prefixed by
2335 # "\1\ncensored:". A delta producing such a censored revision must be a
2361 # "\1\ncensored:". A delta producing such a censored revision must be a
2336 # full-replacement delta, so we inspect the first and only patch in the
2362 # full-replacement delta, so we inspect the first and only patch in the
2337 # delta for this prefix.
2363 # delta for this prefix.
2338 hlen = struct.calcsize(">lll")
2364 hlen = struct.calcsize(">lll")
2339 if len(delta) <= hlen:
2365 if len(delta) <= hlen:
2340 return False
2366 return False
2341
2367
2342 oldlen = self.rawsize(baserev)
2368 oldlen = self.rawsize(baserev)
2343 newlen = len(delta) - hlen
2369 newlen = len(delta) - hlen
2344 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2370 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2345 return False
2371 return False
2346
2372
2347 add = "\1\ncensored:"
2373 add = "\1\ncensored:"
2348 addlen = len(add)
2374 addlen = len(add)
2349 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2375 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2350
2376
2351 def getstrippoint(self, minlink):
2377 def getstrippoint(self, minlink):
2352 """find the minimum rev that must be stripped to strip the linkrev
2378 """find the minimum rev that must be stripped to strip the linkrev
2353
2379
2354 Returns a tuple containing the minimum rev and a set of all revs that
2380 Returns a tuple containing the minimum rev and a set of all revs that
2355 have linkrevs that will be broken by this strip.
2381 have linkrevs that will be broken by this strip.
2356 """
2382 """
2357 brokenrevs = set()
2383 brokenrevs = set()
2358 strippoint = len(self)
2384 strippoint = len(self)
2359
2385
2360 heads = {}
2386 heads = {}
2361 futurelargelinkrevs = set()
2387 futurelargelinkrevs = set()
2362 for head in self.headrevs():
2388 for head in self.headrevs():
2363 headlinkrev = self.linkrev(head)
2389 headlinkrev = self.linkrev(head)
2364 heads[head] = headlinkrev
2390 heads[head] = headlinkrev
2365 if headlinkrev >= minlink:
2391 if headlinkrev >= minlink:
2366 futurelargelinkrevs.add(headlinkrev)
2392 futurelargelinkrevs.add(headlinkrev)
2367
2393
2368 # This algorithm involves walking down the rev graph, starting at the
2394 # This algorithm involves walking down the rev graph, starting at the
2369 # heads. Since the revs are topologically sorted according to linkrev,
2395 # heads. Since the revs are topologically sorted according to linkrev,
2370 # once all head linkrevs are below the minlink, we know there are
2396 # once all head linkrevs are below the minlink, we know there are
2371 # no more revs that could have a linkrev greater than minlink.
2397 # no more revs that could have a linkrev greater than minlink.
2372 # So we can stop walking.
2398 # So we can stop walking.
2373 while futurelargelinkrevs:
2399 while futurelargelinkrevs:
2374 strippoint -= 1
2400 strippoint -= 1
2375 linkrev = heads.pop(strippoint)
2401 linkrev = heads.pop(strippoint)
2376
2402
2377 if linkrev < minlink:
2403 if linkrev < minlink:
2378 brokenrevs.add(strippoint)
2404 brokenrevs.add(strippoint)
2379 else:
2405 else:
2380 futurelargelinkrevs.remove(linkrev)
2406 futurelargelinkrevs.remove(linkrev)
2381
2407
2382 for p in self.parentrevs(strippoint):
2408 for p in self.parentrevs(strippoint):
2383 if p != nullrev:
2409 if p != nullrev:
2384 plinkrev = self.linkrev(p)
2410 plinkrev = self.linkrev(p)
2385 heads[p] = plinkrev
2411 heads[p] = plinkrev
2386 if plinkrev >= minlink:
2412 if plinkrev >= minlink:
2387 futurelargelinkrevs.add(plinkrev)
2413 futurelargelinkrevs.add(plinkrev)
2388
2414
2389 return strippoint, brokenrevs
2415 return strippoint, brokenrevs
2390
2416
2391 def strip(self, minlink, transaction):
2417 def strip(self, minlink, transaction):
2392 """truncate the revlog on the first revision with a linkrev >= minlink
2418 """truncate the revlog on the first revision with a linkrev >= minlink
2393
2419
2394 This function is called when we're stripping revision minlink and
2420 This function is called when we're stripping revision minlink and
2395 its descendants from the repository.
2421 its descendants from the repository.
2396
2422
2397 We have to remove all revisions with linkrev >= minlink, because
2423 We have to remove all revisions with linkrev >= minlink, because
2398 the equivalent changelog revisions will be renumbered after the
2424 the equivalent changelog revisions will be renumbered after the
2399 strip.
2425 strip.
2400
2426
2401 So we truncate the revlog on the first of these revisions, and
2427 So we truncate the revlog on the first of these revisions, and
2402 trust that the caller has saved the revisions that shouldn't be
2428 trust that the caller has saved the revisions that shouldn't be
2403 removed and that it'll re-add them after this truncation.
2429 removed and that it'll re-add them after this truncation.
2404 """
2430 """
2405 if len(self) == 0:
2431 if len(self) == 0:
2406 return
2432 return
2407
2433
2408 rev, _ = self.getstrippoint(minlink)
2434 rev, _ = self.getstrippoint(minlink)
2409 if rev == len(self):
2435 if rev == len(self):
2410 return
2436 return
2411
2437
2412 # first truncate the files on disk
2438 # first truncate the files on disk
2413 end = self.start(rev)
2439 end = self.start(rev)
2414 if not self._inline:
2440 if not self._inline:
2415 transaction.add(self.datafile, end)
2441 transaction.add(self.datafile, end)
2416 end = rev * self._io.size
2442 end = rev * self._io.size
2417 else:
2443 else:
2418 end += rev * self._io.size
2444 end += rev * self._io.size
2419
2445
2420 transaction.add(self.indexfile, end)
2446 transaction.add(self.indexfile, end)
2421
2447
2422 # then reset internal state in memory to forget those revisions
2448 # then reset internal state in memory to forget those revisions
2423 self._cache = None
2449 self._cache = None
2424 self._chaininfocache = {}
2450 self._chaininfocache = {}
2425 self._chunkclear()
2451 self._chunkclear()
2426 for x in xrange(rev, len(self)):
2452 for x in xrange(rev, len(self)):
2427 del self.nodemap[self.node(x)]
2453 del self.nodemap[self.node(x)]
2428
2454
2429 del self.index[rev:-1]
2455 del self.index[rev:-1]
2430 self._nodepos = None
2456 self._nodepos = None
2431
2457
2432 def checksize(self):
2458 def checksize(self):
2433 expected = 0
2459 expected = 0
2434 if len(self):
2460 if len(self):
2435 expected = max(0, self.end(len(self) - 1))
2461 expected = max(0, self.end(len(self) - 1))
2436
2462
2437 try:
2463 try:
2438 with self._datafp() as f:
2464 with self._datafp() as f:
2439 f.seek(0, 2)
2465 f.seek(0, 2)
2440 actual = f.tell()
2466 actual = f.tell()
2441 dd = actual - expected
2467 dd = actual - expected
2442 except IOError as inst:
2468 except IOError as inst:
2443 if inst.errno != errno.ENOENT:
2469 if inst.errno != errno.ENOENT:
2444 raise
2470 raise
2445 dd = 0
2471 dd = 0
2446
2472
2447 try:
2473 try:
2448 f = self.opener(self.indexfile)
2474 f = self.opener(self.indexfile)
2449 f.seek(0, 2)
2475 f.seek(0, 2)
2450 actual = f.tell()
2476 actual = f.tell()
2451 f.close()
2477 f.close()
2452 s = self._io.size
2478 s = self._io.size
2453 i = max(0, actual // s)
2479 i = max(0, actual // s)
2454 di = actual - (i * s)
2480 di = actual - (i * s)
2455 if self._inline:
2481 if self._inline:
2456 databytes = 0
2482 databytes = 0
2457 for r in self:
2483 for r in self:
2458 databytes += max(0, self.length(r))
2484 databytes += max(0, self.length(r))
2459 dd = 0
2485 dd = 0
2460 di = actual - len(self) * s - databytes
2486 di = actual - len(self) * s - databytes
2461 except IOError as inst:
2487 except IOError as inst:
2462 if inst.errno != errno.ENOENT:
2488 if inst.errno != errno.ENOENT:
2463 raise
2489 raise
2464 di = 0
2490 di = 0
2465
2491
2466 return (dd, di)
2492 return (dd, di)
2467
2493
2468 def files(self):
2494 def files(self):
2469 res = [self.indexfile]
2495 res = [self.indexfile]
2470 if not self._inline:
2496 if not self._inline:
2471 res.append(self.datafile)
2497 res.append(self.datafile)
2472 return res
2498 return res
2473
2499
2474 DELTAREUSEALWAYS = 'always'
2500 DELTAREUSEALWAYS = 'always'
2475 DELTAREUSESAMEREVS = 'samerevs'
2501 DELTAREUSESAMEREVS = 'samerevs'
2476 DELTAREUSENEVER = 'never'
2502 DELTAREUSENEVER = 'never'
2477
2503
2478 DELTAREUSEFULLADD = 'fulladd'
2504 DELTAREUSEFULLADD = 'fulladd'
2479
2505
2480 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2506 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2481
2507
2482 def clone(self, tr, destrevlog, addrevisioncb=None,
2508 def clone(self, tr, destrevlog, addrevisioncb=None,
2483 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2509 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2484 """Copy this revlog to another, possibly with format changes.
2510 """Copy this revlog to another, possibly with format changes.
2485
2511
2486 The destination revlog will contain the same revisions and nodes.
2512 The destination revlog will contain the same revisions and nodes.
2487 However, it may not be bit-for-bit identical due to e.g. delta encoding
2513 However, it may not be bit-for-bit identical due to e.g. delta encoding
2488 differences.
2514 differences.
2489
2515
2490 The ``deltareuse`` argument control how deltas from the existing revlog
2516 The ``deltareuse`` argument control how deltas from the existing revlog
2491 are preserved in the destination revlog. The argument can have the
2517 are preserved in the destination revlog. The argument can have the
2492 following values:
2518 following values:
2493
2519
2494 DELTAREUSEALWAYS
2520 DELTAREUSEALWAYS
2495 Deltas will always be reused (if possible), even if the destination
2521 Deltas will always be reused (if possible), even if the destination
2496 revlog would not select the same revisions for the delta. This is the
2522 revlog would not select the same revisions for the delta. This is the
2497 fastest mode of operation.
2523 fastest mode of operation.
2498 DELTAREUSESAMEREVS
2524 DELTAREUSESAMEREVS
2499 Deltas will be reused if the destination revlog would pick the same
2525 Deltas will be reused if the destination revlog would pick the same
2500 revisions for the delta. This mode strikes a balance between speed
2526 revisions for the delta. This mode strikes a balance between speed
2501 and optimization.
2527 and optimization.
2502 DELTAREUSENEVER
2528 DELTAREUSENEVER
2503 Deltas will never be reused. This is the slowest mode of execution.
2529 Deltas will never be reused. This is the slowest mode of execution.
2504 This mode can be used to recompute deltas (e.g. if the diff/delta
2530 This mode can be used to recompute deltas (e.g. if the diff/delta
2505 algorithm changes).
2531 algorithm changes).
2506
2532
2507 Delta computation can be slow, so the choice of delta reuse policy can
2533 Delta computation can be slow, so the choice of delta reuse policy can
2508 significantly affect run time.
2534 significantly affect run time.
2509
2535
2510 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2536 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2511 two extremes. Deltas will be reused if they are appropriate. But if the
2537 two extremes. Deltas will be reused if they are appropriate. But if the
2512 delta could choose a better revision, it will do so. This means if you
2538 delta could choose a better revision, it will do so. This means if you
2513 are converting a non-generaldelta revlog to a generaldelta revlog,
2539 are converting a non-generaldelta revlog to a generaldelta revlog,
2514 deltas will be recomputed if the delta's parent isn't a parent of the
2540 deltas will be recomputed if the delta's parent isn't a parent of the
2515 revision.
2541 revision.
2516
2542
2517 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2543 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2518 controls whether to compute deltas against both parents for merges.
2544 controls whether to compute deltas against both parents for merges.
2519 By default, the current default is used.
2545 By default, the current default is used.
2520 """
2546 """
2521 if deltareuse not in self.DELTAREUSEALL:
2547 if deltareuse not in self.DELTAREUSEALL:
2522 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2548 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2523
2549
2524 if len(destrevlog):
2550 if len(destrevlog):
2525 raise ValueError(_('destination revlog is not empty'))
2551 raise ValueError(_('destination revlog is not empty'))
2526
2552
2527 if getattr(self, 'filteredrevs', None):
2553 if getattr(self, 'filteredrevs', None):
2528 raise ValueError(_('source revlog has filtered revisions'))
2554 raise ValueError(_('source revlog has filtered revisions'))
2529 if getattr(destrevlog, 'filteredrevs', None):
2555 if getattr(destrevlog, 'filteredrevs', None):
2530 raise ValueError(_('destination revlog has filtered revisions'))
2556 raise ValueError(_('destination revlog has filtered revisions'))
2531
2557
2532 # lazydeltabase controls whether to reuse a cached delta, if possible.
2558 # lazydeltabase controls whether to reuse a cached delta, if possible.
2533 oldlazydeltabase = destrevlog._lazydeltabase
2559 oldlazydeltabase = destrevlog._lazydeltabase
2534 oldamd = destrevlog._aggressivemergedeltas
2560 oldamd = destrevlog._aggressivemergedeltas
2535
2561
2536 try:
2562 try:
2537 if deltareuse == self.DELTAREUSEALWAYS:
2563 if deltareuse == self.DELTAREUSEALWAYS:
2538 destrevlog._lazydeltabase = True
2564 destrevlog._lazydeltabase = True
2539 elif deltareuse == self.DELTAREUSESAMEREVS:
2565 elif deltareuse == self.DELTAREUSESAMEREVS:
2540 destrevlog._lazydeltabase = False
2566 destrevlog._lazydeltabase = False
2541
2567
2542 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2568 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2543
2569
2544 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2570 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2545 self.DELTAREUSESAMEREVS)
2571 self.DELTAREUSESAMEREVS)
2546
2572
2547 deltacomputer = _deltacomputer(destrevlog)
2573 deltacomputer = _deltacomputer(destrevlog)
2548 index = self.index
2574 index = self.index
2549 for rev in self:
2575 for rev in self:
2550 entry = index[rev]
2576 entry = index[rev]
2551
2577
2552 # Some classes override linkrev to take filtered revs into
2578 # Some classes override linkrev to take filtered revs into
2553 # account. Use raw entry from index.
2579 # account. Use raw entry from index.
2554 flags = entry[0] & 0xffff
2580 flags = entry[0] & 0xffff
2555 linkrev = entry[4]
2581 linkrev = entry[4]
2556 p1 = index[entry[5]][7]
2582 p1 = index[entry[5]][7]
2557 p2 = index[entry[6]][7]
2583 p2 = index[entry[6]][7]
2558 node = entry[7]
2584 node = entry[7]
2559
2585
2560 # (Possibly) reuse the delta from the revlog if allowed and
2586 # (Possibly) reuse the delta from the revlog if allowed and
2561 # the revlog chunk is a delta.
2587 # the revlog chunk is a delta.
2562 cachedelta = None
2588 cachedelta = None
2563 rawtext = None
2589 rawtext = None
2564 if populatecachedelta:
2590 if populatecachedelta:
2565 dp = self.deltaparent(rev)
2591 dp = self.deltaparent(rev)
2566 if dp != nullrev:
2592 if dp != nullrev:
2567 cachedelta = (dp, bytes(self._chunk(rev)))
2593 cachedelta = (dp, bytes(self._chunk(rev)))
2568
2594
2569 if not cachedelta:
2595 if not cachedelta:
2570 rawtext = self.revision(rev, raw=True)
2596 rawtext = self.revision(rev, raw=True)
2571
2597
2572
2598
2573 if deltareuse == self.DELTAREUSEFULLADD:
2599 if deltareuse == self.DELTAREUSEFULLADD:
2574 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2600 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2575 cachedelta=cachedelta,
2601 cachedelta=cachedelta,
2576 node=node, flags=flags,
2602 node=node, flags=flags,
2577 deltacomputer=deltacomputer)
2603 deltacomputer=deltacomputer)
2578 else:
2604 else:
2579 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2605 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2580 checkambig=False)
2606 checkambig=False)
2581 dfh = None
2607 dfh = None
2582 if not destrevlog._inline:
2608 if not destrevlog._inline:
2583 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2609 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2584 try:
2610 try:
2585 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2611 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2586 p2, flags, cachedelta, ifh, dfh,
2612 p2, flags, cachedelta, ifh, dfh,
2587 deltacomputer=deltacomputer)
2613 deltacomputer=deltacomputer)
2588 finally:
2614 finally:
2589 if dfh:
2615 if dfh:
2590 dfh.close()
2616 dfh.close()
2591 ifh.close()
2617 ifh.close()
2592
2618
2593 if addrevisioncb:
2619 if addrevisioncb:
2594 addrevisioncb(self, rev, node)
2620 addrevisioncb(self, rev, node)
2595 finally:
2621 finally:
2596 destrevlog._lazydeltabase = oldlazydeltabase
2622 destrevlog._lazydeltabase = oldlazydeltabase
2597 destrevlog._aggressivemergedeltas = oldamd
2623 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now