##// END OF EJS Templates
index: don't include nullid in len()...
Martin von Zweigbergk -
r39124:781b2720 default
parent child Browse files
Show More
@@ -1,797 +1,797 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <stddef.h>
12 #include <stddef.h>
13 #include <string.h>
13 #include <string.h>
14
14
15 #include "bitmanipulation.h"
15 #include "bitmanipulation.h"
16 #include "charencode.h"
16 #include "charencode.h"
17 #include "util.h"
17 #include "util.h"
18
18
19 #ifdef IS_PY3K
19 #ifdef IS_PY3K
20 /* The mapping of Python types is meant to be temporary to get Python
20 /* The mapping of Python types is meant to be temporary to get Python
21 * 3 to compile. We should remove this once Python 3 support is fully
21 * 3 to compile. We should remove this once Python 3 support is fully
22 * supported and proper types are used in the extensions themselves. */
22 * supported and proper types are used in the extensions themselves. */
23 #define PyInt_Check PyLong_Check
23 #define PyInt_Check PyLong_Check
24 #define PyInt_FromLong PyLong_FromLong
24 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromSsize_t PyLong_FromSsize_t
25 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_AsLong PyLong_AsLong
26 #define PyInt_AsLong PyLong_AsLong
27 #endif
27 #endif
28
28
29 static const char *const versionerrortext = "Python minor version mismatch";
29 static const char *const versionerrortext = "Python minor version mismatch";
30
30
31 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
31 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
32 {
32 {
33 Py_ssize_t expected_size;
33 Py_ssize_t expected_size;
34
34
35 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
35 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
36 return NULL;
36 return NULL;
37
37
38 return _dict_new_presized(expected_size);
38 return _dict_new_presized(expected_size);
39 }
39 }
40
40
41 /*
41 /*
42 * This code assumes that a manifest is stitched together with newline
42 * This code assumes that a manifest is stitched together with newline
43 * ('\n') characters.
43 * ('\n') characters.
44 */
44 */
45 static PyObject *parse_manifest(PyObject *self, PyObject *args)
45 static PyObject *parse_manifest(PyObject *self, PyObject *args)
46 {
46 {
47 PyObject *mfdict, *fdict;
47 PyObject *mfdict, *fdict;
48 char *str, *start, *end;
48 char *str, *start, *end;
49 int len;
49 int len;
50
50
51 if (!PyArg_ParseTuple(
51 if (!PyArg_ParseTuple(
52 args, PY23("O!O!s#:parse_manifest", "O!O!y#:parse_manifest"),
52 args, PY23("O!O!s#:parse_manifest", "O!O!y#:parse_manifest"),
53 &PyDict_Type, &mfdict, &PyDict_Type, &fdict, &str, &len))
53 &PyDict_Type, &mfdict, &PyDict_Type, &fdict, &str, &len))
54 goto quit;
54 goto quit;
55
55
56 start = str;
56 start = str;
57 end = str + len;
57 end = str + len;
58 while (start < end) {
58 while (start < end) {
59 PyObject *file = NULL, *node = NULL;
59 PyObject *file = NULL, *node = NULL;
60 PyObject *flags = NULL;
60 PyObject *flags = NULL;
61 char *zero = NULL, *newline = NULL;
61 char *zero = NULL, *newline = NULL;
62 ptrdiff_t nlen;
62 ptrdiff_t nlen;
63
63
64 zero = memchr(start, '\0', end - start);
64 zero = memchr(start, '\0', end - start);
65 if (!zero) {
65 if (!zero) {
66 PyErr_SetString(PyExc_ValueError,
66 PyErr_SetString(PyExc_ValueError,
67 "manifest entry has no separator");
67 "manifest entry has no separator");
68 goto quit;
68 goto quit;
69 }
69 }
70
70
71 newline = memchr(zero + 1, '\n', end - (zero + 1));
71 newline = memchr(zero + 1, '\n', end - (zero + 1));
72 if (!newline) {
72 if (!newline) {
73 PyErr_SetString(PyExc_ValueError,
73 PyErr_SetString(PyExc_ValueError,
74 "manifest contains trailing garbage");
74 "manifest contains trailing garbage");
75 goto quit;
75 goto quit;
76 }
76 }
77
77
78 file = PyBytes_FromStringAndSize(start, zero - start);
78 file = PyBytes_FromStringAndSize(start, zero - start);
79
79
80 if (!file)
80 if (!file)
81 goto bail;
81 goto bail;
82
82
83 nlen = newline - zero - 1;
83 nlen = newline - zero - 1;
84
84
85 node = unhexlify(zero + 1, nlen > 40 ? 40 : (Py_ssize_t)nlen);
85 node = unhexlify(zero + 1, nlen > 40 ? 40 : (Py_ssize_t)nlen);
86 if (!node)
86 if (!node)
87 goto bail;
87 goto bail;
88
88
89 if (nlen > 40) {
89 if (nlen > 40) {
90 flags = PyBytes_FromStringAndSize(zero + 41, nlen - 40);
90 flags = PyBytes_FromStringAndSize(zero + 41, nlen - 40);
91 if (!flags)
91 if (!flags)
92 goto bail;
92 goto bail;
93
93
94 if (PyDict_SetItem(fdict, file, flags) == -1)
94 if (PyDict_SetItem(fdict, file, flags) == -1)
95 goto bail;
95 goto bail;
96 }
96 }
97
97
98 if (PyDict_SetItem(mfdict, file, node) == -1)
98 if (PyDict_SetItem(mfdict, file, node) == -1)
99 goto bail;
99 goto bail;
100
100
101 start = newline + 1;
101 start = newline + 1;
102
102
103 Py_XDECREF(flags);
103 Py_XDECREF(flags);
104 Py_XDECREF(node);
104 Py_XDECREF(node);
105 Py_XDECREF(file);
105 Py_XDECREF(file);
106 continue;
106 continue;
107 bail:
107 bail:
108 Py_XDECREF(flags);
108 Py_XDECREF(flags);
109 Py_XDECREF(node);
109 Py_XDECREF(node);
110 Py_XDECREF(file);
110 Py_XDECREF(file);
111 goto quit;
111 goto quit;
112 }
112 }
113
113
114 Py_INCREF(Py_None);
114 Py_INCREF(Py_None);
115 return Py_None;
115 return Py_None;
116 quit:
116 quit:
117 return NULL;
117 return NULL;
118 }
118 }
119
119
120 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
120 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
121 int size, int mtime)
121 int size, int mtime)
122 {
122 {
123 dirstateTupleObject *t =
123 dirstateTupleObject *t =
124 PyObject_New(dirstateTupleObject, &dirstateTupleType);
124 PyObject_New(dirstateTupleObject, &dirstateTupleType);
125 if (!t)
125 if (!t)
126 return NULL;
126 return NULL;
127 t->state = state;
127 t->state = state;
128 t->mode = mode;
128 t->mode = mode;
129 t->size = size;
129 t->size = size;
130 t->mtime = mtime;
130 t->mtime = mtime;
131 return t;
131 return t;
132 }
132 }
133
133
134 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
134 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
135 PyObject *kwds)
135 PyObject *kwds)
136 {
136 {
137 /* We do all the initialization here and not a tp_init function because
137 /* We do all the initialization here and not a tp_init function because
138 * dirstate_tuple is immutable. */
138 * dirstate_tuple is immutable. */
139 dirstateTupleObject *t;
139 dirstateTupleObject *t;
140 char state;
140 char state;
141 int size, mode, mtime;
141 int size, mode, mtime;
142 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
142 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
143 return NULL;
143 return NULL;
144
144
145 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
145 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
146 if (!t)
146 if (!t)
147 return NULL;
147 return NULL;
148 t->state = state;
148 t->state = state;
149 t->mode = mode;
149 t->mode = mode;
150 t->size = size;
150 t->size = size;
151 t->mtime = mtime;
151 t->mtime = mtime;
152
152
153 return (PyObject *)t;
153 return (PyObject *)t;
154 }
154 }
155
155
156 static void dirstate_tuple_dealloc(PyObject *o)
156 static void dirstate_tuple_dealloc(PyObject *o)
157 {
157 {
158 PyObject_Del(o);
158 PyObject_Del(o);
159 }
159 }
160
160
161 static Py_ssize_t dirstate_tuple_length(PyObject *o)
161 static Py_ssize_t dirstate_tuple_length(PyObject *o)
162 {
162 {
163 return 4;
163 return 4;
164 }
164 }
165
165
166 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
166 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
167 {
167 {
168 dirstateTupleObject *t = (dirstateTupleObject *)o;
168 dirstateTupleObject *t = (dirstateTupleObject *)o;
169 switch (i) {
169 switch (i) {
170 case 0:
170 case 0:
171 return PyBytes_FromStringAndSize(&t->state, 1);
171 return PyBytes_FromStringAndSize(&t->state, 1);
172 case 1:
172 case 1:
173 return PyInt_FromLong(t->mode);
173 return PyInt_FromLong(t->mode);
174 case 2:
174 case 2:
175 return PyInt_FromLong(t->size);
175 return PyInt_FromLong(t->size);
176 case 3:
176 case 3:
177 return PyInt_FromLong(t->mtime);
177 return PyInt_FromLong(t->mtime);
178 default:
178 default:
179 PyErr_SetString(PyExc_IndexError, "index out of range");
179 PyErr_SetString(PyExc_IndexError, "index out of range");
180 return NULL;
180 return NULL;
181 }
181 }
182 }
182 }
183
183
184 static PySequenceMethods dirstate_tuple_sq = {
184 static PySequenceMethods dirstate_tuple_sq = {
185 dirstate_tuple_length, /* sq_length */
185 dirstate_tuple_length, /* sq_length */
186 0, /* sq_concat */
186 0, /* sq_concat */
187 0, /* sq_repeat */
187 0, /* sq_repeat */
188 dirstate_tuple_item, /* sq_item */
188 dirstate_tuple_item, /* sq_item */
189 0, /* sq_ass_item */
189 0, /* sq_ass_item */
190 0, /* sq_contains */
190 0, /* sq_contains */
191 0, /* sq_inplace_concat */
191 0, /* sq_inplace_concat */
192 0 /* sq_inplace_repeat */
192 0 /* sq_inplace_repeat */
193 };
193 };
194
194
195 PyTypeObject dirstateTupleType = {
195 PyTypeObject dirstateTupleType = {
196 PyVarObject_HEAD_INIT(NULL, 0) /* header */
196 PyVarObject_HEAD_INIT(NULL, 0) /* header */
197 "dirstate_tuple", /* tp_name */
197 "dirstate_tuple", /* tp_name */
198 sizeof(dirstateTupleObject), /* tp_basicsize */
198 sizeof(dirstateTupleObject), /* tp_basicsize */
199 0, /* tp_itemsize */
199 0, /* tp_itemsize */
200 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
200 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
201 0, /* tp_print */
201 0, /* tp_print */
202 0, /* tp_getattr */
202 0, /* tp_getattr */
203 0, /* tp_setattr */
203 0, /* tp_setattr */
204 0, /* tp_compare */
204 0, /* tp_compare */
205 0, /* tp_repr */
205 0, /* tp_repr */
206 0, /* tp_as_number */
206 0, /* tp_as_number */
207 &dirstate_tuple_sq, /* tp_as_sequence */
207 &dirstate_tuple_sq, /* tp_as_sequence */
208 0, /* tp_as_mapping */
208 0, /* tp_as_mapping */
209 0, /* tp_hash */
209 0, /* tp_hash */
210 0, /* tp_call */
210 0, /* tp_call */
211 0, /* tp_str */
211 0, /* tp_str */
212 0, /* tp_getattro */
212 0, /* tp_getattro */
213 0, /* tp_setattro */
213 0, /* tp_setattro */
214 0, /* tp_as_buffer */
214 0, /* tp_as_buffer */
215 Py_TPFLAGS_DEFAULT, /* tp_flags */
215 Py_TPFLAGS_DEFAULT, /* tp_flags */
216 "dirstate tuple", /* tp_doc */
216 "dirstate tuple", /* tp_doc */
217 0, /* tp_traverse */
217 0, /* tp_traverse */
218 0, /* tp_clear */
218 0, /* tp_clear */
219 0, /* tp_richcompare */
219 0, /* tp_richcompare */
220 0, /* tp_weaklistoffset */
220 0, /* tp_weaklistoffset */
221 0, /* tp_iter */
221 0, /* tp_iter */
222 0, /* tp_iternext */
222 0, /* tp_iternext */
223 0, /* tp_methods */
223 0, /* tp_methods */
224 0, /* tp_members */
224 0, /* tp_members */
225 0, /* tp_getset */
225 0, /* tp_getset */
226 0, /* tp_base */
226 0, /* tp_base */
227 0, /* tp_dict */
227 0, /* tp_dict */
228 0, /* tp_descr_get */
228 0, /* tp_descr_get */
229 0, /* tp_descr_set */
229 0, /* tp_descr_set */
230 0, /* tp_dictoffset */
230 0, /* tp_dictoffset */
231 0, /* tp_init */
231 0, /* tp_init */
232 0, /* tp_alloc */
232 0, /* tp_alloc */
233 dirstate_tuple_new, /* tp_new */
233 dirstate_tuple_new, /* tp_new */
234 };
234 };
235
235
236 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
236 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
237 {
237 {
238 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
238 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
239 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
239 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
240 char state, *cur, *str, *cpos;
240 char state, *cur, *str, *cpos;
241 int mode, size, mtime;
241 int mode, size, mtime;
242 unsigned int flen, len, pos = 40;
242 unsigned int flen, len, pos = 40;
243 int readlen;
243 int readlen;
244
244
245 if (!PyArg_ParseTuple(
245 if (!PyArg_ParseTuple(
246 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
246 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
247 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen))
247 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen))
248 goto quit;
248 goto quit;
249
249
250 len = readlen;
250 len = readlen;
251
251
252 /* read parents */
252 /* read parents */
253 if (len < 40) {
253 if (len < 40) {
254 PyErr_SetString(PyExc_ValueError,
254 PyErr_SetString(PyExc_ValueError,
255 "too little data for parents");
255 "too little data for parents");
256 goto quit;
256 goto quit;
257 }
257 }
258
258
259 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, 20, str + 20, 20);
259 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, 20, str + 20, 20);
260 if (!parents)
260 if (!parents)
261 goto quit;
261 goto quit;
262
262
263 /* read filenames */
263 /* read filenames */
264 while (pos >= 40 && pos < len) {
264 while (pos >= 40 && pos < len) {
265 if (pos + 17 > len) {
265 if (pos + 17 > len) {
266 PyErr_SetString(PyExc_ValueError,
266 PyErr_SetString(PyExc_ValueError,
267 "overflow in dirstate");
267 "overflow in dirstate");
268 goto quit;
268 goto quit;
269 }
269 }
270 cur = str + pos;
270 cur = str + pos;
271 /* unpack header */
271 /* unpack header */
272 state = *cur;
272 state = *cur;
273 mode = getbe32(cur + 1);
273 mode = getbe32(cur + 1);
274 size = getbe32(cur + 5);
274 size = getbe32(cur + 5);
275 mtime = getbe32(cur + 9);
275 mtime = getbe32(cur + 9);
276 flen = getbe32(cur + 13);
276 flen = getbe32(cur + 13);
277 pos += 17;
277 pos += 17;
278 cur += 17;
278 cur += 17;
279 if (flen > len - pos) {
279 if (flen > len - pos) {
280 PyErr_SetString(PyExc_ValueError,
280 PyErr_SetString(PyExc_ValueError,
281 "overflow in dirstate");
281 "overflow in dirstate");
282 goto quit;
282 goto quit;
283 }
283 }
284
284
285 entry =
285 entry =
286 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
286 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
287 cpos = memchr(cur, 0, flen);
287 cpos = memchr(cur, 0, flen);
288 if (cpos) {
288 if (cpos) {
289 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
289 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
290 cname = PyBytes_FromStringAndSize(
290 cname = PyBytes_FromStringAndSize(
291 cpos + 1, flen - (cpos - cur) - 1);
291 cpos + 1, flen - (cpos - cur) - 1);
292 if (!fname || !cname ||
292 if (!fname || !cname ||
293 PyDict_SetItem(cmap, fname, cname) == -1 ||
293 PyDict_SetItem(cmap, fname, cname) == -1 ||
294 PyDict_SetItem(dmap, fname, entry) == -1)
294 PyDict_SetItem(dmap, fname, entry) == -1)
295 goto quit;
295 goto quit;
296 Py_DECREF(cname);
296 Py_DECREF(cname);
297 } else {
297 } else {
298 fname = PyBytes_FromStringAndSize(cur, flen);
298 fname = PyBytes_FromStringAndSize(cur, flen);
299 if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
299 if (!fname || PyDict_SetItem(dmap, fname, entry) == -1)
300 goto quit;
300 goto quit;
301 }
301 }
302 Py_DECREF(fname);
302 Py_DECREF(fname);
303 Py_DECREF(entry);
303 Py_DECREF(entry);
304 fname = cname = entry = NULL;
304 fname = cname = entry = NULL;
305 pos += flen;
305 pos += flen;
306 }
306 }
307
307
308 ret = parents;
308 ret = parents;
309 Py_INCREF(ret);
309 Py_INCREF(ret);
310 quit:
310 quit:
311 Py_XDECREF(fname);
311 Py_XDECREF(fname);
312 Py_XDECREF(cname);
312 Py_XDECREF(cname);
313 Py_XDECREF(entry);
313 Py_XDECREF(entry);
314 Py_XDECREF(parents);
314 Py_XDECREF(parents);
315 return ret;
315 return ret;
316 }
316 }
317
317
318 /*
318 /*
319 * Build a set of non-normal and other parent entries from the dirstate dmap
319 * Build a set of non-normal and other parent entries from the dirstate dmap
320 */
320 */
321 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
321 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
322 {
322 {
323 PyObject *dmap, *fname, *v;
323 PyObject *dmap, *fname, *v;
324 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
324 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
325 Py_ssize_t pos;
325 Py_ssize_t pos;
326
326
327 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type, &dmap))
327 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type, &dmap))
328 goto bail;
328 goto bail;
329
329
330 nonnset = PySet_New(NULL);
330 nonnset = PySet_New(NULL);
331 if (nonnset == NULL)
331 if (nonnset == NULL)
332 goto bail;
332 goto bail;
333
333
334 otherpset = PySet_New(NULL);
334 otherpset = PySet_New(NULL);
335 if (otherpset == NULL)
335 if (otherpset == NULL)
336 goto bail;
336 goto bail;
337
337
338 pos = 0;
338 pos = 0;
339 while (PyDict_Next(dmap, &pos, &fname, &v)) {
339 while (PyDict_Next(dmap, &pos, &fname, &v)) {
340 dirstateTupleObject *t;
340 dirstateTupleObject *t;
341 if (!dirstate_tuple_check(v)) {
341 if (!dirstate_tuple_check(v)) {
342 PyErr_SetString(PyExc_TypeError,
342 PyErr_SetString(PyExc_TypeError,
343 "expected a dirstate tuple");
343 "expected a dirstate tuple");
344 goto bail;
344 goto bail;
345 }
345 }
346 t = (dirstateTupleObject *)v;
346 t = (dirstateTupleObject *)v;
347
347
348 if (t->state == 'n' && t->size == -2) {
348 if (t->state == 'n' && t->size == -2) {
349 if (PySet_Add(otherpset, fname) == -1) {
349 if (PySet_Add(otherpset, fname) == -1) {
350 goto bail;
350 goto bail;
351 }
351 }
352 }
352 }
353
353
354 if (t->state == 'n' && t->mtime != -1)
354 if (t->state == 'n' && t->mtime != -1)
355 continue;
355 continue;
356 if (PySet_Add(nonnset, fname) == -1)
356 if (PySet_Add(nonnset, fname) == -1)
357 goto bail;
357 goto bail;
358 }
358 }
359
359
360 result = Py_BuildValue("(OO)", nonnset, otherpset);
360 result = Py_BuildValue("(OO)", nonnset, otherpset);
361 if (result == NULL)
361 if (result == NULL)
362 goto bail;
362 goto bail;
363 Py_DECREF(nonnset);
363 Py_DECREF(nonnset);
364 Py_DECREF(otherpset);
364 Py_DECREF(otherpset);
365 return result;
365 return result;
366 bail:
366 bail:
367 Py_XDECREF(nonnset);
367 Py_XDECREF(nonnset);
368 Py_XDECREF(otherpset);
368 Py_XDECREF(otherpset);
369 Py_XDECREF(result);
369 Py_XDECREF(result);
370 return NULL;
370 return NULL;
371 }
371 }
372
372
373 /*
373 /*
374 * Efficiently pack a dirstate object into its on-disk format.
374 * Efficiently pack a dirstate object into its on-disk format.
375 */
375 */
376 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
376 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
377 {
377 {
378 PyObject *packobj = NULL;
378 PyObject *packobj = NULL;
379 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
379 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
380 Py_ssize_t nbytes, pos, l;
380 Py_ssize_t nbytes, pos, l;
381 PyObject *k, *v = NULL, *pn;
381 PyObject *k, *v = NULL, *pn;
382 char *p, *s;
382 char *p, *s;
383 int now;
383 int now;
384
384
385 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", &PyDict_Type, &map,
385 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate", &PyDict_Type, &map,
386 &PyDict_Type, &copymap, &pl, &now))
386 &PyDict_Type, &copymap, &pl, &now))
387 return NULL;
387 return NULL;
388
388
389 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
389 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
390 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
390 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
391 return NULL;
391 return NULL;
392 }
392 }
393
393
394 /* Figure out how much we need to allocate. */
394 /* Figure out how much we need to allocate. */
395 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
395 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
396 PyObject *c;
396 PyObject *c;
397 if (!PyBytes_Check(k)) {
397 if (!PyBytes_Check(k)) {
398 PyErr_SetString(PyExc_TypeError, "expected string key");
398 PyErr_SetString(PyExc_TypeError, "expected string key");
399 goto bail;
399 goto bail;
400 }
400 }
401 nbytes += PyBytes_GET_SIZE(k) + 17;
401 nbytes += PyBytes_GET_SIZE(k) + 17;
402 c = PyDict_GetItem(copymap, k);
402 c = PyDict_GetItem(copymap, k);
403 if (c) {
403 if (c) {
404 if (!PyBytes_Check(c)) {
404 if (!PyBytes_Check(c)) {
405 PyErr_SetString(PyExc_TypeError,
405 PyErr_SetString(PyExc_TypeError,
406 "expected string key");
406 "expected string key");
407 goto bail;
407 goto bail;
408 }
408 }
409 nbytes += PyBytes_GET_SIZE(c) + 1;
409 nbytes += PyBytes_GET_SIZE(c) + 1;
410 }
410 }
411 }
411 }
412
412
413 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
413 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
414 if (packobj == NULL)
414 if (packobj == NULL)
415 goto bail;
415 goto bail;
416
416
417 p = PyBytes_AS_STRING(packobj);
417 p = PyBytes_AS_STRING(packobj);
418
418
419 pn = PySequence_ITEM(pl, 0);
419 pn = PySequence_ITEM(pl, 0);
420 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
420 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
421 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
421 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
422 goto bail;
422 goto bail;
423 }
423 }
424 memcpy(p, s, l);
424 memcpy(p, s, l);
425 p += 20;
425 p += 20;
426 pn = PySequence_ITEM(pl, 1);
426 pn = PySequence_ITEM(pl, 1);
427 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
427 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
428 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
428 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
429 goto bail;
429 goto bail;
430 }
430 }
431 memcpy(p, s, l);
431 memcpy(p, s, l);
432 p += 20;
432 p += 20;
433
433
434 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
434 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
435 dirstateTupleObject *tuple;
435 dirstateTupleObject *tuple;
436 char state;
436 char state;
437 int mode, size, mtime;
437 int mode, size, mtime;
438 Py_ssize_t len, l;
438 Py_ssize_t len, l;
439 PyObject *o;
439 PyObject *o;
440 char *t;
440 char *t;
441
441
442 if (!dirstate_tuple_check(v)) {
442 if (!dirstate_tuple_check(v)) {
443 PyErr_SetString(PyExc_TypeError,
443 PyErr_SetString(PyExc_TypeError,
444 "expected a dirstate tuple");
444 "expected a dirstate tuple");
445 goto bail;
445 goto bail;
446 }
446 }
447 tuple = (dirstateTupleObject *)v;
447 tuple = (dirstateTupleObject *)v;
448
448
449 state = tuple->state;
449 state = tuple->state;
450 mode = tuple->mode;
450 mode = tuple->mode;
451 size = tuple->size;
451 size = tuple->size;
452 mtime = tuple->mtime;
452 mtime = tuple->mtime;
453 if (state == 'n' && mtime == now) {
453 if (state == 'n' && mtime == now) {
454 /* See pure/parsers.py:pack_dirstate for why we do
454 /* See pure/parsers.py:pack_dirstate for why we do
455 * this. */
455 * this. */
456 mtime = -1;
456 mtime = -1;
457 mtime_unset = (PyObject *)make_dirstate_tuple(
457 mtime_unset = (PyObject *)make_dirstate_tuple(
458 state, mode, size, mtime);
458 state, mode, size, mtime);
459 if (!mtime_unset)
459 if (!mtime_unset)
460 goto bail;
460 goto bail;
461 if (PyDict_SetItem(map, k, mtime_unset) == -1)
461 if (PyDict_SetItem(map, k, mtime_unset) == -1)
462 goto bail;
462 goto bail;
463 Py_DECREF(mtime_unset);
463 Py_DECREF(mtime_unset);
464 mtime_unset = NULL;
464 mtime_unset = NULL;
465 }
465 }
466 *p++ = state;
466 *p++ = state;
467 putbe32((uint32_t)mode, p);
467 putbe32((uint32_t)mode, p);
468 putbe32((uint32_t)size, p + 4);
468 putbe32((uint32_t)size, p + 4);
469 putbe32((uint32_t)mtime, p + 8);
469 putbe32((uint32_t)mtime, p + 8);
470 t = p + 12;
470 t = p + 12;
471 p += 16;
471 p += 16;
472 len = PyBytes_GET_SIZE(k);
472 len = PyBytes_GET_SIZE(k);
473 memcpy(p, PyBytes_AS_STRING(k), len);
473 memcpy(p, PyBytes_AS_STRING(k), len);
474 p += len;
474 p += len;
475 o = PyDict_GetItem(copymap, k);
475 o = PyDict_GetItem(copymap, k);
476 if (o) {
476 if (o) {
477 *p++ = '\0';
477 *p++ = '\0';
478 l = PyBytes_GET_SIZE(o);
478 l = PyBytes_GET_SIZE(o);
479 memcpy(p, PyBytes_AS_STRING(o), l);
479 memcpy(p, PyBytes_AS_STRING(o), l);
480 p += l;
480 p += l;
481 len += l + 1;
481 len += l + 1;
482 }
482 }
483 putbe32((uint32_t)len, t);
483 putbe32((uint32_t)len, t);
484 }
484 }
485
485
486 pos = p - PyBytes_AS_STRING(packobj);
486 pos = p - PyBytes_AS_STRING(packobj);
487 if (pos != nbytes) {
487 if (pos != nbytes) {
488 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
488 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
489 (long)pos, (long)nbytes);
489 (long)pos, (long)nbytes);
490 goto bail;
490 goto bail;
491 }
491 }
492
492
493 return packobj;
493 return packobj;
494 bail:
494 bail:
495 Py_XDECREF(mtime_unset);
495 Py_XDECREF(mtime_unset);
496 Py_XDECREF(packobj);
496 Py_XDECREF(packobj);
497 Py_XDECREF(v);
497 Py_XDECREF(v);
498 return NULL;
498 return NULL;
499 }
499 }
500
500
501 #define BUMPED_FIX 1
501 #define BUMPED_FIX 1
502 #define USING_SHA_256 2
502 #define USING_SHA_256 2
503 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
503 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
504
504
505 static PyObject *readshas(const char *source, unsigned char num,
505 static PyObject *readshas(const char *source, unsigned char num,
506 Py_ssize_t hashwidth)
506 Py_ssize_t hashwidth)
507 {
507 {
508 int i;
508 int i;
509 PyObject *list = PyTuple_New(num);
509 PyObject *list = PyTuple_New(num);
510 if (list == NULL) {
510 if (list == NULL) {
511 return NULL;
511 return NULL;
512 }
512 }
513 for (i = 0; i < num; i++) {
513 for (i = 0; i < num; i++) {
514 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
514 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
515 if (hash == NULL) {
515 if (hash == NULL) {
516 Py_DECREF(list);
516 Py_DECREF(list);
517 return NULL;
517 return NULL;
518 }
518 }
519 PyTuple_SET_ITEM(list, i, hash);
519 PyTuple_SET_ITEM(list, i, hash);
520 source += hashwidth;
520 source += hashwidth;
521 }
521 }
522 return list;
522 return list;
523 }
523 }
524
524
525 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
525 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
526 uint32_t *msize)
526 uint32_t *msize)
527 {
527 {
528 const char *data = databegin;
528 const char *data = databegin;
529 const char *meta;
529 const char *meta;
530
530
531 double mtime;
531 double mtime;
532 int16_t tz;
532 int16_t tz;
533 uint16_t flags;
533 uint16_t flags;
534 unsigned char nsuccs, nparents, nmetadata;
534 unsigned char nsuccs, nparents, nmetadata;
535 Py_ssize_t hashwidth = 20;
535 Py_ssize_t hashwidth = 20;
536
536
537 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
537 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
538 PyObject *metadata = NULL, *ret = NULL;
538 PyObject *metadata = NULL, *ret = NULL;
539 int i;
539 int i;
540
540
541 if (data + FM1_HEADER_SIZE > dataend) {
541 if (data + FM1_HEADER_SIZE > dataend) {
542 goto overflow;
542 goto overflow;
543 }
543 }
544
544
545 *msize = getbe32(data);
545 *msize = getbe32(data);
546 data += 4;
546 data += 4;
547 mtime = getbefloat64(data);
547 mtime = getbefloat64(data);
548 data += 8;
548 data += 8;
549 tz = getbeint16(data);
549 tz = getbeint16(data);
550 data += 2;
550 data += 2;
551 flags = getbeuint16(data);
551 flags = getbeuint16(data);
552 data += 2;
552 data += 2;
553
553
554 if (flags & USING_SHA_256) {
554 if (flags & USING_SHA_256) {
555 hashwidth = 32;
555 hashwidth = 32;
556 }
556 }
557
557
558 nsuccs = (unsigned char)(*data++);
558 nsuccs = (unsigned char)(*data++);
559 nparents = (unsigned char)(*data++);
559 nparents = (unsigned char)(*data++);
560 nmetadata = (unsigned char)(*data++);
560 nmetadata = (unsigned char)(*data++);
561
561
562 if (databegin + *msize > dataend) {
562 if (databegin + *msize > dataend) {
563 goto overflow;
563 goto overflow;
564 }
564 }
565 dataend = databegin + *msize; /* narrow down to marker size */
565 dataend = databegin + *msize; /* narrow down to marker size */
566
566
567 if (data + hashwidth > dataend) {
567 if (data + hashwidth > dataend) {
568 goto overflow;
568 goto overflow;
569 }
569 }
570 prec = PyBytes_FromStringAndSize(data, hashwidth);
570 prec = PyBytes_FromStringAndSize(data, hashwidth);
571 data += hashwidth;
571 data += hashwidth;
572 if (prec == NULL) {
572 if (prec == NULL) {
573 goto bail;
573 goto bail;
574 }
574 }
575
575
576 if (data + nsuccs * hashwidth > dataend) {
576 if (data + nsuccs * hashwidth > dataend) {
577 goto overflow;
577 goto overflow;
578 }
578 }
579 succs = readshas(data, nsuccs, hashwidth);
579 succs = readshas(data, nsuccs, hashwidth);
580 if (succs == NULL) {
580 if (succs == NULL) {
581 goto bail;
581 goto bail;
582 }
582 }
583 data += nsuccs * hashwidth;
583 data += nsuccs * hashwidth;
584
584
585 if (nparents == 1 || nparents == 2) {
585 if (nparents == 1 || nparents == 2) {
586 if (data + nparents * hashwidth > dataend) {
586 if (data + nparents * hashwidth > dataend) {
587 goto overflow;
587 goto overflow;
588 }
588 }
589 parents = readshas(data, nparents, hashwidth);
589 parents = readshas(data, nparents, hashwidth);
590 if (parents == NULL) {
590 if (parents == NULL) {
591 goto bail;
591 goto bail;
592 }
592 }
593 data += nparents * hashwidth;
593 data += nparents * hashwidth;
594 } else {
594 } else {
595 parents = Py_None;
595 parents = Py_None;
596 Py_INCREF(parents);
596 Py_INCREF(parents);
597 }
597 }
598
598
599 if (data + 2 * nmetadata > dataend) {
599 if (data + 2 * nmetadata > dataend) {
600 goto overflow;
600 goto overflow;
601 }
601 }
602 meta = data + (2 * nmetadata);
602 meta = data + (2 * nmetadata);
603 metadata = PyTuple_New(nmetadata);
603 metadata = PyTuple_New(nmetadata);
604 if (metadata == NULL) {
604 if (metadata == NULL) {
605 goto bail;
605 goto bail;
606 }
606 }
607 for (i = 0; i < nmetadata; i++) {
607 for (i = 0; i < nmetadata; i++) {
608 PyObject *tmp, *left = NULL, *right = NULL;
608 PyObject *tmp, *left = NULL, *right = NULL;
609 Py_ssize_t leftsize = (unsigned char)(*data++);
609 Py_ssize_t leftsize = (unsigned char)(*data++);
610 Py_ssize_t rightsize = (unsigned char)(*data++);
610 Py_ssize_t rightsize = (unsigned char)(*data++);
611 if (meta + leftsize + rightsize > dataend) {
611 if (meta + leftsize + rightsize > dataend) {
612 goto overflow;
612 goto overflow;
613 }
613 }
614 left = PyBytes_FromStringAndSize(meta, leftsize);
614 left = PyBytes_FromStringAndSize(meta, leftsize);
615 meta += leftsize;
615 meta += leftsize;
616 right = PyBytes_FromStringAndSize(meta, rightsize);
616 right = PyBytes_FromStringAndSize(meta, rightsize);
617 meta += rightsize;
617 meta += rightsize;
618 tmp = PyTuple_New(2);
618 tmp = PyTuple_New(2);
619 if (!left || !right || !tmp) {
619 if (!left || !right || !tmp) {
620 Py_XDECREF(left);
620 Py_XDECREF(left);
621 Py_XDECREF(right);
621 Py_XDECREF(right);
622 Py_XDECREF(tmp);
622 Py_XDECREF(tmp);
623 goto bail;
623 goto bail;
624 }
624 }
625 PyTuple_SET_ITEM(tmp, 0, left);
625 PyTuple_SET_ITEM(tmp, 0, left);
626 PyTuple_SET_ITEM(tmp, 1, right);
626 PyTuple_SET_ITEM(tmp, 1, right);
627 PyTuple_SET_ITEM(metadata, i, tmp);
627 PyTuple_SET_ITEM(metadata, i, tmp);
628 }
628 }
629 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
629 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
630 (int)tz * 60, parents);
630 (int)tz * 60, parents);
631 goto bail; /* return successfully */
631 goto bail; /* return successfully */
632
632
633 overflow:
633 overflow:
634 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
634 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
635 bail:
635 bail:
636 Py_XDECREF(prec);
636 Py_XDECREF(prec);
637 Py_XDECREF(succs);
637 Py_XDECREF(succs);
638 Py_XDECREF(metadata);
638 Py_XDECREF(metadata);
639 Py_XDECREF(parents);
639 Py_XDECREF(parents);
640 return ret;
640 return ret;
641 }
641 }
642
642
643 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
643 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
644 {
644 {
645 const char *data, *dataend;
645 const char *data, *dataend;
646 int datalen;
646 int datalen;
647 Py_ssize_t offset, stop;
647 Py_ssize_t offset, stop;
648 PyObject *markers = NULL;
648 PyObject *markers = NULL;
649
649
650 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
650 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
651 &offset, &stop)) {
651 &offset, &stop)) {
652 return NULL;
652 return NULL;
653 }
653 }
654 dataend = data + datalen;
654 dataend = data + datalen;
655 data += offset;
655 data += offset;
656 markers = PyList_New(0);
656 markers = PyList_New(0);
657 if (!markers) {
657 if (!markers) {
658 return NULL;
658 return NULL;
659 }
659 }
660 while (offset < stop) {
660 while (offset < stop) {
661 uint32_t msize;
661 uint32_t msize;
662 int error;
662 int error;
663 PyObject *record = fm1readmarker(data, dataend, &msize);
663 PyObject *record = fm1readmarker(data, dataend, &msize);
664 if (!record) {
664 if (!record) {
665 goto bail;
665 goto bail;
666 }
666 }
667 error = PyList_Append(markers, record);
667 error = PyList_Append(markers, record);
668 Py_DECREF(record);
668 Py_DECREF(record);
669 if (error) {
669 if (error) {
670 goto bail;
670 goto bail;
671 }
671 }
672 data += msize;
672 data += msize;
673 offset += msize;
673 offset += msize;
674 }
674 }
675 return markers;
675 return markers;
676 bail:
676 bail:
677 Py_DECREF(markers);
677 Py_DECREF(markers);
678 return NULL;
678 return NULL;
679 }
679 }
680
680
681 static char parsers_doc[] = "Efficient content parsing.";
681 static char parsers_doc[] = "Efficient content parsing.";
682
682
683 PyObject *encodedir(PyObject *self, PyObject *args);
683 PyObject *encodedir(PyObject *self, PyObject *args);
684 PyObject *pathencode(PyObject *self, PyObject *args);
684 PyObject *pathencode(PyObject *self, PyObject *args);
685 PyObject *lowerencode(PyObject *self, PyObject *args);
685 PyObject *lowerencode(PyObject *self, PyObject *args);
686 PyObject *parse_index2(PyObject *self, PyObject *args);
686 PyObject *parse_index2(PyObject *self, PyObject *args);
687
687
688 static PyMethodDef methods[] = {
688 static PyMethodDef methods[] = {
689 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
689 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
690 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
690 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
691 "create a set containing non-normal and other parent entries of given "
691 "create a set containing non-normal and other parent entries of given "
692 "dirstate\n"},
692 "dirstate\n"},
693 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
693 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
694 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
694 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
695 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
695 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
696 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
696 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
697 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
697 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
698 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
698 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
699 {"dict_new_presized", dict_new_presized, METH_VARARGS,
699 {"dict_new_presized", dict_new_presized, METH_VARARGS,
700 "construct a dict with an expected size\n"},
700 "construct a dict with an expected size\n"},
701 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
701 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
702 "make file foldmap\n"},
702 "make file foldmap\n"},
703 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
703 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
704 "escape a UTF-8 byte string to JSON (fast path)\n"},
704 "escape a UTF-8 byte string to JSON (fast path)\n"},
705 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
705 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
706 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
706 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
707 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
707 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
708 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
708 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
709 "parse v1 obsolete markers\n"},
709 "parse v1 obsolete markers\n"},
710 {NULL, NULL}};
710 {NULL, NULL}};
711
711
712 void dirs_module_init(PyObject *mod);
712 void dirs_module_init(PyObject *mod);
713 void manifest_module_init(PyObject *mod);
713 void manifest_module_init(PyObject *mod);
714 void revlog_module_init(PyObject *mod);
714 void revlog_module_init(PyObject *mod);
715
715
716 static const int version = 6;
716 static const int version = 7;
717
717
718 static void module_init(PyObject *mod)
718 static void module_init(PyObject *mod)
719 {
719 {
720 PyModule_AddIntConstant(mod, "version", version);
720 PyModule_AddIntConstant(mod, "version", version);
721
721
722 /* This module constant has two purposes. First, it lets us unit test
722 /* This module constant has two purposes. First, it lets us unit test
723 * the ImportError raised without hard-coding any error text. This
723 * the ImportError raised without hard-coding any error text. This
724 * means we can change the text in the future without breaking tests,
724 * means we can change the text in the future without breaking tests,
725 * even across changesets without a recompile. Second, its presence
725 * even across changesets without a recompile. Second, its presence
726 * can be used to determine whether the version-checking logic is
726 * can be used to determine whether the version-checking logic is
727 * present, which also helps in testing across changesets without a
727 * present, which also helps in testing across changesets without a
728 * recompile. Note that this means the pure-Python version of parsers
728 * recompile. Note that this means the pure-Python version of parsers
729 * should not have this module constant. */
729 * should not have this module constant. */
730 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
730 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
731
731
732 dirs_module_init(mod);
732 dirs_module_init(mod);
733 manifest_module_init(mod);
733 manifest_module_init(mod);
734 revlog_module_init(mod);
734 revlog_module_init(mod);
735
735
736 if (PyType_Ready(&dirstateTupleType) < 0)
736 if (PyType_Ready(&dirstateTupleType) < 0)
737 return;
737 return;
738 Py_INCREF(&dirstateTupleType);
738 Py_INCREF(&dirstateTupleType);
739 PyModule_AddObject(mod, "dirstatetuple",
739 PyModule_AddObject(mod, "dirstatetuple",
740 (PyObject *)&dirstateTupleType);
740 (PyObject *)&dirstateTupleType);
741 }
741 }
742
742
743 static int check_python_version(void)
743 static int check_python_version(void)
744 {
744 {
745 PyObject *sys = PyImport_ImportModule("sys"), *ver;
745 PyObject *sys = PyImport_ImportModule("sys"), *ver;
746 long hexversion;
746 long hexversion;
747 if (!sys)
747 if (!sys)
748 return -1;
748 return -1;
749 ver = PyObject_GetAttrString(sys, "hexversion");
749 ver = PyObject_GetAttrString(sys, "hexversion");
750 Py_DECREF(sys);
750 Py_DECREF(sys);
751 if (!ver)
751 if (!ver)
752 return -1;
752 return -1;
753 hexversion = PyInt_AsLong(ver);
753 hexversion = PyInt_AsLong(ver);
754 Py_DECREF(ver);
754 Py_DECREF(ver);
755 /* sys.hexversion is a 32-bit number by default, so the -1 case
755 /* sys.hexversion is a 32-bit number by default, so the -1 case
756 * should only occur in unusual circumstances (e.g. if sys.hexversion
756 * should only occur in unusual circumstances (e.g. if sys.hexversion
757 * is manually set to an invalid value). */
757 * is manually set to an invalid value). */
758 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
758 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
759 PyErr_Format(PyExc_ImportError,
759 PyErr_Format(PyExc_ImportError,
760 "%s: The Mercurial extension "
760 "%s: The Mercurial extension "
761 "modules were compiled with Python " PY_VERSION
761 "modules were compiled with Python " PY_VERSION
762 ", but "
762 ", but "
763 "Mercurial is currently using Python with "
763 "Mercurial is currently using Python with "
764 "sys.hexversion=%ld: "
764 "sys.hexversion=%ld: "
765 "Python %s\n at: %s",
765 "Python %s\n at: %s",
766 versionerrortext, hexversion, Py_GetVersion(),
766 versionerrortext, hexversion, Py_GetVersion(),
767 Py_GetProgramFullPath());
767 Py_GetProgramFullPath());
768 return -1;
768 return -1;
769 }
769 }
770 return 0;
770 return 0;
771 }
771 }
772
772
773 #ifdef IS_PY3K
773 #ifdef IS_PY3K
774 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
774 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
775 parsers_doc, -1, methods};
775 parsers_doc, -1, methods};
776
776
777 PyMODINIT_FUNC PyInit_parsers(void)
777 PyMODINIT_FUNC PyInit_parsers(void)
778 {
778 {
779 PyObject *mod;
779 PyObject *mod;
780
780
781 if (check_python_version() == -1)
781 if (check_python_version() == -1)
782 return NULL;
782 return NULL;
783 mod = PyModule_Create(&parsers_module);
783 mod = PyModule_Create(&parsers_module);
784 module_init(mod);
784 module_init(mod);
785 return mod;
785 return mod;
786 }
786 }
787 #else
787 #else
788 PyMODINIT_FUNC initparsers(void)
788 PyMODINIT_FUNC initparsers(void)
789 {
789 {
790 PyObject *mod;
790 PyObject *mod;
791
791
792 if (check_python_version() == -1)
792 if (check_python_version() == -1)
793 return;
793 return;
794 mod = Py_InitModule3("parsers", methods, parsers_doc);
794 mod = Py_InitModule3("parsers", methods, parsers_doc);
795 module_init(mod);
795 module_init(mod);
796 }
796 }
797 #endif
797 #endif
@@ -1,2167 +1,2167 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <assert.h>
11 #include <assert.h>
12 #include <ctype.h>
12 #include <ctype.h>
13 #include <stddef.h>
13 #include <stddef.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #include "bitmanipulation.h"
16 #include "bitmanipulation.h"
17 #include "charencode.h"
17 #include "charencode.h"
18 #include "util.h"
18 #include "util.h"
19
19
20 #ifdef IS_PY3K
20 #ifdef IS_PY3K
21 /* The mapping of Python types is meant to be temporary to get Python
21 /* The mapping of Python types is meant to be temporary to get Python
22 * 3 to compile. We should remove this once Python 3 support is fully
22 * 3 to compile. We should remove this once Python 3 support is fully
23 * supported and proper types are used in the extensions themselves. */
23 * supported and proper types are used in the extensions themselves. */
24 #define PyInt_Check PyLong_Check
24 #define PyInt_Check PyLong_Check
25 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromLong PyLong_FromLong
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 #define PyInt_AS_LONG PyLong_AS_LONG
27 #define PyInt_AS_LONG PyLong_AS_LONG
28 #define PyInt_AsLong PyLong_AsLong
28 #define PyInt_AsLong PyLong_AsLong
29 #endif
29 #endif
30
30
31 /*
31 /*
32 * A base-16 trie for fast node->rev mapping.
32 * A base-16 trie for fast node->rev mapping.
33 *
33 *
34 * Positive value is index of the next node in the trie
34 * Positive value is index of the next node in the trie
35 * Negative value is a leaf: -(rev + 2)
35 * Negative value is a leaf: -(rev + 2)
36 * Zero is empty
36 * Zero is empty
37 */
37 */
38 typedef struct {
38 typedef struct {
39 int children[16];
39 int children[16];
40 } nodetree;
40 } nodetree;
41
41
42 /*
42 /*
43 * This class has two behaviors.
43 * This class has two behaviors.
44 *
44 *
45 * When used in a list-like way (with integer keys), we decode an
45 * When used in a list-like way (with integer keys), we decode an
46 * entry in a RevlogNG index file on demand. Our last entry is a
46 * entry in a RevlogNG index file on demand. Our last entry is a
47 * sentinel, always a nullid. We have limited support for
47 * sentinel, always a nullid. We have limited support for
48 * integer-keyed insert and delete, only at elements right before the
48 * integer-keyed insert and delete, only at elements right before the
49 * sentinel.
49 * sentinel.
50 *
50 *
51 * With string keys, we lazily perform a reverse mapping from node to
51 * With string keys, we lazily perform a reverse mapping from node to
52 * rev, using a base-16 trie.
52 * rev, using a base-16 trie.
53 */
53 */
54 typedef struct {
54 typedef struct {
55 PyObject_HEAD
55 PyObject_HEAD
56 /* Type-specific fields go here. */
56 /* Type-specific fields go here. */
57 PyObject *data; /* raw bytes of index */
57 PyObject *data; /* raw bytes of index */
58 Py_buffer buf; /* buffer of data */
58 Py_buffer buf; /* buffer of data */
59 PyObject **cache; /* cached tuples */
59 PyObject **cache; /* cached tuples */
60 const char **offsets; /* populated on demand */
60 const char **offsets; /* populated on demand */
61 Py_ssize_t raw_length; /* original number of elements */
61 Py_ssize_t raw_length; /* original number of elements */
62 Py_ssize_t length; /* current number of elements */
62 Py_ssize_t length; /* current number of elements */
63 PyObject *added; /* populated on demand */
63 PyObject *added; /* populated on demand */
64 PyObject *headrevs; /* cache, invalidated on changes */
64 PyObject *headrevs; /* cache, invalidated on changes */
65 PyObject *filteredrevs;/* filtered revs set */
65 PyObject *filteredrevs;/* filtered revs set */
66 nodetree *nt; /* base-16 trie */
66 nodetree *nt; /* base-16 trie */
67 unsigned ntlength; /* # nodes in use */
67 unsigned ntlength; /* # nodes in use */
68 unsigned ntcapacity; /* # nodes allocated */
68 unsigned ntcapacity; /* # nodes allocated */
69 int ntdepth; /* maximum depth of tree */
69 int ntdepth; /* maximum depth of tree */
70 int ntsplits; /* # splits performed */
70 int ntsplits; /* # splits performed */
71 int ntrev; /* last rev scanned */
71 int ntrev; /* last rev scanned */
72 int ntlookups; /* # lookups */
72 int ntlookups; /* # lookups */
73 int ntmisses; /* # lookups that miss the cache */
73 int ntmisses; /* # lookups that miss the cache */
74 int inlined;
74 int inlined;
75 } indexObject;
75 } indexObject;
76
76
77 static Py_ssize_t index_length(const indexObject *self)
77 static Py_ssize_t index_length(const indexObject *self)
78 {
78 {
79 if (self->added == NULL)
79 if (self->added == NULL)
80 return self->length;
80 return self->length - 1;
81 return self->length + PyList_GET_SIZE(self->added);
81 return self->length + PyList_GET_SIZE(self->added) - 1;
82 }
82 }
83
83
84 static PyObject *nullentry;
84 static PyObject *nullentry;
85 static const char nullid[20];
85 static const char nullid[20];
86
86
87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
87 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
88
88
89 #if LONG_MAX == 0x7fffffffL
89 #if LONG_MAX == 0x7fffffffL
90 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
90 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
91 #else
91 #else
92 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
92 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
93 #endif
93 #endif
94
94
95 /* A RevlogNG v1 index entry is 64 bytes long. */
95 /* A RevlogNG v1 index entry is 64 bytes long. */
96 static const long v1_hdrsize = 64;
96 static const long v1_hdrsize = 64;
97
97
98 /*
98 /*
99 * Return a pointer to the beginning of a RevlogNG record.
99 * Return a pointer to the beginning of a RevlogNG record.
100 */
100 */
101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
101 static const char *index_deref(indexObject *self, Py_ssize_t pos)
102 {
102 {
103 if (self->inlined && pos > 0) {
103 if (self->inlined && pos > 0) {
104 if (self->offsets == NULL) {
104 if (self->offsets == NULL) {
105 self->offsets = PyMem_Malloc(self->raw_length *
105 self->offsets = PyMem_Malloc(self->raw_length *
106 sizeof(*self->offsets));
106 sizeof(*self->offsets));
107 if (self->offsets == NULL)
107 if (self->offsets == NULL)
108 return (const char *)PyErr_NoMemory();
108 return (const char *)PyErr_NoMemory();
109 inline_scan(self, self->offsets);
109 inline_scan(self, self->offsets);
110 }
110 }
111 return self->offsets[pos];
111 return self->offsets[pos];
112 }
112 }
113
113
114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
114 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
115 }
115 }
116
116
117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
117 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
118 int *ps, int maxrev)
118 int *ps, int maxrev)
119 {
119 {
120 if (rev >= self->length - 1) {
120 if (rev >= self->length - 1) {
121 PyObject *tuple = PyList_GET_ITEM(self->added,
121 PyObject *tuple = PyList_GET_ITEM(self->added,
122 rev - self->length + 1);
122 rev - self->length + 1);
123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
123 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
124 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
125 } else {
125 } else {
126 const char *data = index_deref(self, rev);
126 const char *data = index_deref(self, rev);
127 ps[0] = getbe32(data + 24);
127 ps[0] = getbe32(data + 24);
128 ps[1] = getbe32(data + 28);
128 ps[1] = getbe32(data + 28);
129 }
129 }
130 /* If index file is corrupted, ps[] may point to invalid revisions. So
130 /* If index file is corrupted, ps[] may point to invalid revisions. So
131 * there is a risk of buffer overflow to trust them unconditionally. */
131 * there is a risk of buffer overflow to trust them unconditionally. */
132 if (ps[0] > maxrev || ps[1] > maxrev) {
132 if (ps[0] > maxrev || ps[1] > maxrev) {
133 PyErr_SetString(PyExc_ValueError, "parent out of range");
133 PyErr_SetString(PyExc_ValueError, "parent out of range");
134 return -1;
134 return -1;
135 }
135 }
136 return 0;
136 return 0;
137 }
137 }
138
138
139
139
140 /*
140 /*
141 * RevlogNG format (all in big endian, data may be inlined):
141 * RevlogNG format (all in big endian, data may be inlined):
142 * 6 bytes: offset
142 * 6 bytes: offset
143 * 2 bytes: flags
143 * 2 bytes: flags
144 * 4 bytes: compressed length
144 * 4 bytes: compressed length
145 * 4 bytes: uncompressed length
145 * 4 bytes: uncompressed length
146 * 4 bytes: base revision
146 * 4 bytes: base revision
147 * 4 bytes: link revision
147 * 4 bytes: link revision
148 * 4 bytes: parent 1 revision
148 * 4 bytes: parent 1 revision
149 * 4 bytes: parent 2 revision
149 * 4 bytes: parent 2 revision
150 * 32 bytes: nodeid (only 20 bytes used)
150 * 32 bytes: nodeid (only 20 bytes used)
151 */
151 */
152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
152 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
153 {
153 {
154 uint64_t offset_flags;
154 uint64_t offset_flags;
155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
155 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
156 const char *c_node_id;
156 const char *c_node_id;
157 const char *data;
157 const char *data;
158 Py_ssize_t length = index_length(self);
158 Py_ssize_t length = index_length(self) + 1;
159 PyObject *entry;
159 PyObject *entry;
160
160
161 if (pos == -1 || pos == length - 1) {
161 if (pos == -1 || pos == length - 1) {
162 Py_INCREF(nullentry);
162 Py_INCREF(nullentry);
163 return nullentry;
163 return nullentry;
164 }
164 }
165
165
166 if (pos < 0 || pos >= length) {
166 if (pos < 0 || pos >= length) {
167 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
167 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
168 return NULL;
168 return NULL;
169 }
169 }
170
170
171 if (pos >= self->length - 1) {
171 if (pos >= self->length - 1) {
172 PyObject *obj;
172 PyObject *obj;
173 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
173 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
174 Py_INCREF(obj);
174 Py_INCREF(obj);
175 return obj;
175 return obj;
176 }
176 }
177
177
178 if (self->cache) {
178 if (self->cache) {
179 if (self->cache[pos]) {
179 if (self->cache[pos]) {
180 Py_INCREF(self->cache[pos]);
180 Py_INCREF(self->cache[pos]);
181 return self->cache[pos];
181 return self->cache[pos];
182 }
182 }
183 } else {
183 } else {
184 self->cache = calloc(self->raw_length, sizeof(PyObject *));
184 self->cache = calloc(self->raw_length, sizeof(PyObject *));
185 if (self->cache == NULL)
185 if (self->cache == NULL)
186 return PyErr_NoMemory();
186 return PyErr_NoMemory();
187 }
187 }
188
188
189 data = index_deref(self, pos);
189 data = index_deref(self, pos);
190 if (data == NULL)
190 if (data == NULL)
191 return NULL;
191 return NULL;
192
192
193 offset_flags = getbe32(data + 4);
193 offset_flags = getbe32(data + 4);
194 if (pos == 0) /* mask out version number for the first entry */
194 if (pos == 0) /* mask out version number for the first entry */
195 offset_flags &= 0xFFFF;
195 offset_flags &= 0xFFFF;
196 else {
196 else {
197 uint32_t offset_high = getbe32(data);
197 uint32_t offset_high = getbe32(data);
198 offset_flags |= ((uint64_t)offset_high) << 32;
198 offset_flags |= ((uint64_t)offset_high) << 32;
199 }
199 }
200
200
201 comp_len = getbe32(data + 8);
201 comp_len = getbe32(data + 8);
202 uncomp_len = getbe32(data + 12);
202 uncomp_len = getbe32(data + 12);
203 base_rev = getbe32(data + 16);
203 base_rev = getbe32(data + 16);
204 link_rev = getbe32(data + 20);
204 link_rev = getbe32(data + 20);
205 parent_1 = getbe32(data + 24);
205 parent_1 = getbe32(data + 24);
206 parent_2 = getbe32(data + 28);
206 parent_2 = getbe32(data + 28);
207 c_node_id = data + 32;
207 c_node_id = data + 32;
208
208
209 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
209 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
210 uncomp_len, base_rev, link_rev,
210 uncomp_len, base_rev, link_rev,
211 parent_1, parent_2, c_node_id, 20);
211 parent_1, parent_2, c_node_id, 20);
212
212
213 if (entry) {
213 if (entry) {
214 PyObject_GC_UnTrack(entry);
214 PyObject_GC_UnTrack(entry);
215 Py_INCREF(entry);
215 Py_INCREF(entry);
216 }
216 }
217
217
218 self->cache[pos] = entry;
218 self->cache[pos] = entry;
219
219
220 return entry;
220 return entry;
221 }
221 }
222
222
223 /*
223 /*
224 * Return the 20-byte SHA of the node corresponding to the given rev.
224 * Return the 20-byte SHA of the node corresponding to the given rev.
225 */
225 */
226 static const char *index_node(indexObject *self, Py_ssize_t pos)
226 static const char *index_node(indexObject *self, Py_ssize_t pos)
227 {
227 {
228 Py_ssize_t length = index_length(self);
228 Py_ssize_t length = index_length(self) + 1;
229 const char *data;
229 const char *data;
230
230
231 if (pos == length - 1 || pos == -1)
231 if (pos == length - 1 || pos == -1)
232 return nullid;
232 return nullid;
233
233
234 if (pos >= length)
234 if (pos >= length)
235 return NULL;
235 return NULL;
236
236
237 if (pos >= self->length - 1) {
237 if (pos >= self->length - 1) {
238 PyObject *tuple, *str;
238 PyObject *tuple, *str;
239 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
239 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
240 str = PyTuple_GetItem(tuple, 7);
240 str = PyTuple_GetItem(tuple, 7);
241 return str ? PyBytes_AS_STRING(str) : NULL;
241 return str ? PyBytes_AS_STRING(str) : NULL;
242 }
242 }
243
243
244 data = index_deref(self, pos);
244 data = index_deref(self, pos);
245 return data ? data + 32 : NULL;
245 return data ? data + 32 : NULL;
246 }
246 }
247
247
248 /*
248 /*
249 * Return the 20-byte SHA of the node corresponding to the given rev. The
249 * Return the 20-byte SHA of the node corresponding to the given rev. The
250 * rev is assumed to be existing. If not, an exception is set.
250 * rev is assumed to be existing. If not, an exception is set.
251 */
251 */
252 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
252 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
253 {
253 {
254 const char *node = index_node(self, pos);
254 const char *node = index_node(self, pos);
255 if (node == NULL) {
255 if (node == NULL) {
256 PyErr_Format(PyExc_IndexError, "could not access rev %d",
256 PyErr_Format(PyExc_IndexError, "could not access rev %d",
257 (int)pos);
257 (int)pos);
258 }
258 }
259 return node;
259 return node;
260 }
260 }
261
261
262 static int nt_insert(indexObject *self, const char *node, int rev);
262 static int nt_insert(indexObject *self, const char *node, int rev);
263
263
264 static int node_check(PyObject *obj, char **node)
264 static int node_check(PyObject *obj, char **node)
265 {
265 {
266 Py_ssize_t nodelen;
266 Py_ssize_t nodelen;
267 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
267 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
268 return -1;
268 return -1;
269 if (nodelen == 20)
269 if (nodelen == 20)
270 return 0;
270 return 0;
271 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
271 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
272 return -1;
272 return -1;
273 }
273 }
274
274
275 static PyObject *index_append(indexObject *self, PyObject *obj)
275 static PyObject *index_append(indexObject *self, PyObject *obj)
276 {
276 {
277 char *node;
277 char *node;
278 Py_ssize_t len;
278 Py_ssize_t len;
279
279
280 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
280 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
281 PyErr_SetString(PyExc_TypeError, "8-tuple required");
281 PyErr_SetString(PyExc_TypeError, "8-tuple required");
282 return NULL;
282 return NULL;
283 }
283 }
284
284
285 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
285 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
286 return NULL;
286 return NULL;
287
287
288 len = index_length(self);
288 len = index_length(self) + 1;
289
289
290 if (self->added == NULL) {
290 if (self->added == NULL) {
291 self->added = PyList_New(0);
291 self->added = PyList_New(0);
292 if (self->added == NULL)
292 if (self->added == NULL)
293 return NULL;
293 return NULL;
294 }
294 }
295
295
296 if (PyList_Append(self->added, obj) == -1)
296 if (PyList_Append(self->added, obj) == -1)
297 return NULL;
297 return NULL;
298
298
299 if (self->nt)
299 if (self->nt)
300 nt_insert(self, node, len - 1);
300 nt_insert(self, node, len - 1);
301
301
302 Py_CLEAR(self->headrevs);
302 Py_CLEAR(self->headrevs);
303 Py_RETURN_NONE;
303 Py_RETURN_NONE;
304 }
304 }
305
305
306 static void _index_clearcaches(indexObject *self)
306 static void _index_clearcaches(indexObject *self)
307 {
307 {
308 if (self->cache) {
308 if (self->cache) {
309 Py_ssize_t i;
309 Py_ssize_t i;
310
310
311 for (i = 0; i < self->raw_length; i++)
311 for (i = 0; i < self->raw_length; i++)
312 Py_CLEAR(self->cache[i]);
312 Py_CLEAR(self->cache[i]);
313 free(self->cache);
313 free(self->cache);
314 self->cache = NULL;
314 self->cache = NULL;
315 }
315 }
316 if (self->offsets) {
316 if (self->offsets) {
317 PyMem_Free(self->offsets);
317 PyMem_Free(self->offsets);
318 self->offsets = NULL;
318 self->offsets = NULL;
319 }
319 }
320 free(self->nt);
320 free(self->nt);
321 self->nt = NULL;
321 self->nt = NULL;
322 Py_CLEAR(self->headrevs);
322 Py_CLEAR(self->headrevs);
323 }
323 }
324
324
325 static PyObject *index_clearcaches(indexObject *self)
325 static PyObject *index_clearcaches(indexObject *self)
326 {
326 {
327 _index_clearcaches(self);
327 _index_clearcaches(self);
328 self->ntlength = self->ntcapacity = 0;
328 self->ntlength = self->ntcapacity = 0;
329 self->ntdepth = self->ntsplits = 0;
329 self->ntdepth = self->ntsplits = 0;
330 self->ntrev = -1;
330 self->ntrev = -1;
331 self->ntlookups = self->ntmisses = 0;
331 self->ntlookups = self->ntmisses = 0;
332 Py_RETURN_NONE;
332 Py_RETURN_NONE;
333 }
333 }
334
334
335 static PyObject *index_stats(indexObject *self)
335 static PyObject *index_stats(indexObject *self)
336 {
336 {
337 PyObject *obj = PyDict_New();
337 PyObject *obj = PyDict_New();
338 PyObject *t = NULL;
338 PyObject *t = NULL;
339
339
340 if (obj == NULL)
340 if (obj == NULL)
341 return NULL;
341 return NULL;
342
342
343 #define istat(__n, __d) \
343 #define istat(__n, __d) \
344 do { \
344 do { \
345 t = PyInt_FromSsize_t(self->__n); \
345 t = PyInt_FromSsize_t(self->__n); \
346 if (!t) \
346 if (!t) \
347 goto bail; \
347 goto bail; \
348 if (PyDict_SetItemString(obj, __d, t) == -1) \
348 if (PyDict_SetItemString(obj, __d, t) == -1) \
349 goto bail; \
349 goto bail; \
350 Py_DECREF(t); \
350 Py_DECREF(t); \
351 } while (0)
351 } while (0)
352
352
353 if (self->added) {
353 if (self->added) {
354 Py_ssize_t len = PyList_GET_SIZE(self->added);
354 Py_ssize_t len = PyList_GET_SIZE(self->added);
355 t = PyInt_FromSsize_t(len);
355 t = PyInt_FromSsize_t(len);
356 if (!t)
356 if (!t)
357 goto bail;
357 goto bail;
358 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
358 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
359 goto bail;
359 goto bail;
360 Py_DECREF(t);
360 Py_DECREF(t);
361 }
361 }
362
362
363 if (self->raw_length != self->length - 1)
363 if (self->raw_length != self->length - 1)
364 istat(raw_length, "revs on disk");
364 istat(raw_length, "revs on disk");
365 istat(length, "revs in memory");
365 istat(length, "revs in memory");
366 istat(ntcapacity, "node trie capacity");
366 istat(ntcapacity, "node trie capacity");
367 istat(ntdepth, "node trie depth");
367 istat(ntdepth, "node trie depth");
368 istat(ntlength, "node trie count");
368 istat(ntlength, "node trie count");
369 istat(ntlookups, "node trie lookups");
369 istat(ntlookups, "node trie lookups");
370 istat(ntmisses, "node trie misses");
370 istat(ntmisses, "node trie misses");
371 istat(ntrev, "node trie last rev scanned");
371 istat(ntrev, "node trie last rev scanned");
372 istat(ntsplits, "node trie splits");
372 istat(ntsplits, "node trie splits");
373
373
374 #undef istat
374 #undef istat
375
375
376 return obj;
376 return obj;
377
377
378 bail:
378 bail:
379 Py_XDECREF(obj);
379 Py_XDECREF(obj);
380 Py_XDECREF(t);
380 Py_XDECREF(t);
381 return NULL;
381 return NULL;
382 }
382 }
383
383
384 /*
384 /*
385 * When we cache a list, we want to be sure the caller can't mutate
385 * When we cache a list, we want to be sure the caller can't mutate
386 * the cached copy.
386 * the cached copy.
387 */
387 */
388 static PyObject *list_copy(PyObject *list)
388 static PyObject *list_copy(PyObject *list)
389 {
389 {
390 Py_ssize_t len = PyList_GET_SIZE(list);
390 Py_ssize_t len = PyList_GET_SIZE(list);
391 PyObject *newlist = PyList_New(len);
391 PyObject *newlist = PyList_New(len);
392 Py_ssize_t i;
392 Py_ssize_t i;
393
393
394 if (newlist == NULL)
394 if (newlist == NULL)
395 return NULL;
395 return NULL;
396
396
397 for (i = 0; i < len; i++) {
397 for (i = 0; i < len; i++) {
398 PyObject *obj = PyList_GET_ITEM(list, i);
398 PyObject *obj = PyList_GET_ITEM(list, i);
399 Py_INCREF(obj);
399 Py_INCREF(obj);
400 PyList_SET_ITEM(newlist, i, obj);
400 PyList_SET_ITEM(newlist, i, obj);
401 }
401 }
402
402
403 return newlist;
403 return newlist;
404 }
404 }
405
405
406 static int check_filter(PyObject *filter, Py_ssize_t arg)
406 static int check_filter(PyObject *filter, Py_ssize_t arg)
407 {
407 {
408 if (filter) {
408 if (filter) {
409 PyObject *arglist, *result;
409 PyObject *arglist, *result;
410 int isfiltered;
410 int isfiltered;
411
411
412 arglist = Py_BuildValue("(n)", arg);
412 arglist = Py_BuildValue("(n)", arg);
413 if (!arglist) {
413 if (!arglist) {
414 return -1;
414 return -1;
415 }
415 }
416
416
417 result = PyEval_CallObject(filter, arglist);
417 result = PyEval_CallObject(filter, arglist);
418 Py_DECREF(arglist);
418 Py_DECREF(arglist);
419 if (!result) {
419 if (!result) {
420 return -1;
420 return -1;
421 }
421 }
422
422
423 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
423 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
424 * same as this function, so we can just return it directly.*/
424 * same as this function, so we can just return it directly.*/
425 isfiltered = PyObject_IsTrue(result);
425 isfiltered = PyObject_IsTrue(result);
426 Py_DECREF(result);
426 Py_DECREF(result);
427 return isfiltered;
427 return isfiltered;
428 } else {
428 } else {
429 return 0;
429 return 0;
430 }
430 }
431 }
431 }
432
432
433 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
433 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
434 Py_ssize_t marker, char *phases)
434 Py_ssize_t marker, char *phases)
435 {
435 {
436 PyObject *iter = NULL;
436 PyObject *iter = NULL;
437 PyObject *iter_item = NULL;
437 PyObject *iter_item = NULL;
438 Py_ssize_t min_idx = index_length(self) + 1;
438 Py_ssize_t min_idx = index_length(self) + 2;
439 long iter_item_long;
439 long iter_item_long;
440
440
441 if (PyList_GET_SIZE(list) != 0) {
441 if (PyList_GET_SIZE(list) != 0) {
442 iter = PyObject_GetIter(list);
442 iter = PyObject_GetIter(list);
443 if (iter == NULL)
443 if (iter == NULL)
444 return -2;
444 return -2;
445 while ((iter_item = PyIter_Next(iter))) {
445 while ((iter_item = PyIter_Next(iter))) {
446 iter_item_long = PyInt_AS_LONG(iter_item);
446 iter_item_long = PyInt_AS_LONG(iter_item);
447 Py_DECREF(iter_item);
447 Py_DECREF(iter_item);
448 if (iter_item_long < min_idx)
448 if (iter_item_long < min_idx)
449 min_idx = iter_item_long;
449 min_idx = iter_item_long;
450 phases[iter_item_long] = marker;
450 phases[iter_item_long] = marker;
451 }
451 }
452 Py_DECREF(iter);
452 Py_DECREF(iter);
453 }
453 }
454
454
455 return min_idx;
455 return min_idx;
456 }
456 }
457
457
458 static inline void set_phase_from_parents(char *phases, int parent_1,
458 static inline void set_phase_from_parents(char *phases, int parent_1,
459 int parent_2, Py_ssize_t i)
459 int parent_2, Py_ssize_t i)
460 {
460 {
461 if (parent_1 >= 0 && phases[parent_1] > phases[i])
461 if (parent_1 >= 0 && phases[parent_1] > phases[i])
462 phases[i] = phases[parent_1];
462 phases[i] = phases[parent_1];
463 if (parent_2 >= 0 && phases[parent_2] > phases[i])
463 if (parent_2 >= 0 && phases[parent_2] > phases[i])
464 phases[i] = phases[parent_2];
464 phases[i] = phases[parent_2];
465 }
465 }
466
466
467 static PyObject *reachableroots2(indexObject *self, PyObject *args)
467 static PyObject *reachableroots2(indexObject *self, PyObject *args)
468 {
468 {
469
469
470 /* Input */
470 /* Input */
471 long minroot;
471 long minroot;
472 PyObject *includepatharg = NULL;
472 PyObject *includepatharg = NULL;
473 int includepath = 0;
473 int includepath = 0;
474 /* heads and roots are lists */
474 /* heads and roots are lists */
475 PyObject *heads = NULL;
475 PyObject *heads = NULL;
476 PyObject *roots = NULL;
476 PyObject *roots = NULL;
477 PyObject *reachable = NULL;
477 PyObject *reachable = NULL;
478
478
479 PyObject *val;
479 PyObject *val;
480 Py_ssize_t len = index_length(self) - 1;
480 Py_ssize_t len = index_length(self);
481 long revnum;
481 long revnum;
482 Py_ssize_t k;
482 Py_ssize_t k;
483 Py_ssize_t i;
483 Py_ssize_t i;
484 Py_ssize_t l;
484 Py_ssize_t l;
485 int r;
485 int r;
486 int parents[2];
486 int parents[2];
487
487
488 /* Internal data structure:
488 /* Internal data structure:
489 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
489 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
490 * revstates: array of length len+1 (all revs + nullrev) */
490 * revstates: array of length len+1 (all revs + nullrev) */
491 int *tovisit = NULL;
491 int *tovisit = NULL;
492 long lentovisit = 0;
492 long lentovisit = 0;
493 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
493 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
494 char *revstates = NULL;
494 char *revstates = NULL;
495
495
496 /* Get arguments */
496 /* Get arguments */
497 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
497 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
498 &PyList_Type, &roots,
498 &PyList_Type, &roots,
499 &PyBool_Type, &includepatharg))
499 &PyBool_Type, &includepatharg))
500 goto bail;
500 goto bail;
501
501
502 if (includepatharg == Py_True)
502 if (includepatharg == Py_True)
503 includepath = 1;
503 includepath = 1;
504
504
505 /* Initialize return set */
505 /* Initialize return set */
506 reachable = PyList_New(0);
506 reachable = PyList_New(0);
507 if (reachable == NULL)
507 if (reachable == NULL)
508 goto bail;
508 goto bail;
509
509
510 /* Initialize internal datastructures */
510 /* Initialize internal datastructures */
511 tovisit = (int *)malloc((len + 1) * sizeof(int));
511 tovisit = (int *)malloc((len + 1) * sizeof(int));
512 if (tovisit == NULL) {
512 if (tovisit == NULL) {
513 PyErr_NoMemory();
513 PyErr_NoMemory();
514 goto bail;
514 goto bail;
515 }
515 }
516
516
517 revstates = (char *)calloc(len + 1, 1);
517 revstates = (char *)calloc(len + 1, 1);
518 if (revstates == NULL) {
518 if (revstates == NULL) {
519 PyErr_NoMemory();
519 PyErr_NoMemory();
520 goto bail;
520 goto bail;
521 }
521 }
522
522
523 l = PyList_GET_SIZE(roots);
523 l = PyList_GET_SIZE(roots);
524 for (i = 0; i < l; i++) {
524 for (i = 0; i < l; i++) {
525 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
525 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
526 if (revnum == -1 && PyErr_Occurred())
526 if (revnum == -1 && PyErr_Occurred())
527 goto bail;
527 goto bail;
528 /* If root is out of range, e.g. wdir(), it must be unreachable
528 /* If root is out of range, e.g. wdir(), it must be unreachable
529 * from heads. So we can just ignore it. */
529 * from heads. So we can just ignore it. */
530 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
530 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
531 continue;
531 continue;
532 revstates[revnum + 1] |= RS_ROOT;
532 revstates[revnum + 1] |= RS_ROOT;
533 }
533 }
534
534
535 /* Populate tovisit with all the heads */
535 /* Populate tovisit with all the heads */
536 l = PyList_GET_SIZE(heads);
536 l = PyList_GET_SIZE(heads);
537 for (i = 0; i < l; i++) {
537 for (i = 0; i < l; i++) {
538 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
538 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
539 if (revnum == -1 && PyErr_Occurred())
539 if (revnum == -1 && PyErr_Occurred())
540 goto bail;
540 goto bail;
541 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
541 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
542 PyErr_SetString(PyExc_IndexError, "head out of range");
542 PyErr_SetString(PyExc_IndexError, "head out of range");
543 goto bail;
543 goto bail;
544 }
544 }
545 if (!(revstates[revnum + 1] & RS_SEEN)) {
545 if (!(revstates[revnum + 1] & RS_SEEN)) {
546 tovisit[lentovisit++] = (int)revnum;
546 tovisit[lentovisit++] = (int)revnum;
547 revstates[revnum + 1] |= RS_SEEN;
547 revstates[revnum + 1] |= RS_SEEN;
548 }
548 }
549 }
549 }
550
550
551 /* Visit the tovisit list and find the reachable roots */
551 /* Visit the tovisit list and find the reachable roots */
552 k = 0;
552 k = 0;
553 while (k < lentovisit) {
553 while (k < lentovisit) {
554 /* Add the node to reachable if it is a root*/
554 /* Add the node to reachable if it is a root*/
555 revnum = tovisit[k++];
555 revnum = tovisit[k++];
556 if (revstates[revnum + 1] & RS_ROOT) {
556 if (revstates[revnum + 1] & RS_ROOT) {
557 revstates[revnum + 1] |= RS_REACHABLE;
557 revstates[revnum + 1] |= RS_REACHABLE;
558 val = PyInt_FromLong(revnum);
558 val = PyInt_FromLong(revnum);
559 if (val == NULL)
559 if (val == NULL)
560 goto bail;
560 goto bail;
561 r = PyList_Append(reachable, val);
561 r = PyList_Append(reachable, val);
562 Py_DECREF(val);
562 Py_DECREF(val);
563 if (r < 0)
563 if (r < 0)
564 goto bail;
564 goto bail;
565 if (includepath == 0)
565 if (includepath == 0)
566 continue;
566 continue;
567 }
567 }
568
568
569 /* Add its parents to the list of nodes to visit */
569 /* Add its parents to the list of nodes to visit */
570 if (revnum == -1)
570 if (revnum == -1)
571 continue;
571 continue;
572 r = index_get_parents(self, revnum, parents, (int)len - 1);
572 r = index_get_parents(self, revnum, parents, (int)len - 1);
573 if (r < 0)
573 if (r < 0)
574 goto bail;
574 goto bail;
575 for (i = 0; i < 2; i++) {
575 for (i = 0; i < 2; i++) {
576 if (!(revstates[parents[i] + 1] & RS_SEEN)
576 if (!(revstates[parents[i] + 1] & RS_SEEN)
577 && parents[i] >= minroot) {
577 && parents[i] >= minroot) {
578 tovisit[lentovisit++] = parents[i];
578 tovisit[lentovisit++] = parents[i];
579 revstates[parents[i] + 1] |= RS_SEEN;
579 revstates[parents[i] + 1] |= RS_SEEN;
580 }
580 }
581 }
581 }
582 }
582 }
583
583
584 /* Find all the nodes in between the roots we found and the heads
584 /* Find all the nodes in between the roots we found and the heads
585 * and add them to the reachable set */
585 * and add them to the reachable set */
586 if (includepath == 1) {
586 if (includepath == 1) {
587 long minidx = minroot;
587 long minidx = minroot;
588 if (minidx < 0)
588 if (minidx < 0)
589 minidx = 0;
589 minidx = 0;
590 for (i = minidx; i < len; i++) {
590 for (i = minidx; i < len; i++) {
591 if (!(revstates[i + 1] & RS_SEEN))
591 if (!(revstates[i + 1] & RS_SEEN))
592 continue;
592 continue;
593 r = index_get_parents(self, i, parents, (int)len - 1);
593 r = index_get_parents(self, i, parents, (int)len - 1);
594 /* Corrupted index file, error is set from
594 /* Corrupted index file, error is set from
595 * index_get_parents */
595 * index_get_parents */
596 if (r < 0)
596 if (r < 0)
597 goto bail;
597 goto bail;
598 if (((revstates[parents[0] + 1] |
598 if (((revstates[parents[0] + 1] |
599 revstates[parents[1] + 1]) & RS_REACHABLE)
599 revstates[parents[1] + 1]) & RS_REACHABLE)
600 && !(revstates[i + 1] & RS_REACHABLE)) {
600 && !(revstates[i + 1] & RS_REACHABLE)) {
601 revstates[i + 1] |= RS_REACHABLE;
601 revstates[i + 1] |= RS_REACHABLE;
602 val = PyInt_FromLong(i);
602 val = PyInt_FromLong(i);
603 if (val == NULL)
603 if (val == NULL)
604 goto bail;
604 goto bail;
605 r = PyList_Append(reachable, val);
605 r = PyList_Append(reachable, val);
606 Py_DECREF(val);
606 Py_DECREF(val);
607 if (r < 0)
607 if (r < 0)
608 goto bail;
608 goto bail;
609 }
609 }
610 }
610 }
611 }
611 }
612
612
613 free(revstates);
613 free(revstates);
614 free(tovisit);
614 free(tovisit);
615 return reachable;
615 return reachable;
616 bail:
616 bail:
617 Py_XDECREF(reachable);
617 Py_XDECREF(reachable);
618 free(revstates);
618 free(revstates);
619 free(tovisit);
619 free(tovisit);
620 return NULL;
620 return NULL;
621 }
621 }
622
622
623 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
623 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
624 {
624 {
625 PyObject *roots = Py_None;
625 PyObject *roots = Py_None;
626 PyObject *ret = NULL;
626 PyObject *ret = NULL;
627 PyObject *phasessize = NULL;
627 PyObject *phasessize = NULL;
628 PyObject *phaseroots = NULL;
628 PyObject *phaseroots = NULL;
629 PyObject *phaseset = NULL;
629 PyObject *phaseset = NULL;
630 PyObject *phasessetlist = NULL;
630 PyObject *phasessetlist = NULL;
631 PyObject *rev = NULL;
631 PyObject *rev = NULL;
632 Py_ssize_t len = index_length(self) - 1;
632 Py_ssize_t len = index_length(self);
633 Py_ssize_t numphase = 0;
633 Py_ssize_t numphase = 0;
634 Py_ssize_t minrevallphases = 0;
634 Py_ssize_t minrevallphases = 0;
635 Py_ssize_t minrevphase = 0;
635 Py_ssize_t minrevphase = 0;
636 Py_ssize_t i = 0;
636 Py_ssize_t i = 0;
637 char *phases = NULL;
637 char *phases = NULL;
638 long phase;
638 long phase;
639
639
640 if (!PyArg_ParseTuple(args, "O", &roots))
640 if (!PyArg_ParseTuple(args, "O", &roots))
641 goto done;
641 goto done;
642 if (roots == NULL || !PyList_Check(roots)) {
642 if (roots == NULL || !PyList_Check(roots)) {
643 PyErr_SetString(PyExc_TypeError, "roots must be a list");
643 PyErr_SetString(PyExc_TypeError, "roots must be a list");
644 goto done;
644 goto done;
645 }
645 }
646
646
647 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
647 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
648 if (phases == NULL) {
648 if (phases == NULL) {
649 PyErr_NoMemory();
649 PyErr_NoMemory();
650 goto done;
650 goto done;
651 }
651 }
652 /* Put the phase information of all the roots in phases */
652 /* Put the phase information of all the roots in phases */
653 numphase = PyList_GET_SIZE(roots)+1;
653 numphase = PyList_GET_SIZE(roots)+1;
654 minrevallphases = len + 1;
654 minrevallphases = len + 1;
655 phasessetlist = PyList_New(numphase);
655 phasessetlist = PyList_New(numphase);
656 if (phasessetlist == NULL)
656 if (phasessetlist == NULL)
657 goto done;
657 goto done;
658
658
659 PyList_SET_ITEM(phasessetlist, 0, Py_None);
659 PyList_SET_ITEM(phasessetlist, 0, Py_None);
660 Py_INCREF(Py_None);
660 Py_INCREF(Py_None);
661
661
662 for (i = 0; i < numphase-1; i++) {
662 for (i = 0; i < numphase-1; i++) {
663 phaseroots = PyList_GET_ITEM(roots, i);
663 phaseroots = PyList_GET_ITEM(roots, i);
664 phaseset = PySet_New(NULL);
664 phaseset = PySet_New(NULL);
665 if (phaseset == NULL)
665 if (phaseset == NULL)
666 goto release;
666 goto release;
667 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
667 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
668 if (!PyList_Check(phaseroots)) {
668 if (!PyList_Check(phaseroots)) {
669 PyErr_SetString(PyExc_TypeError,
669 PyErr_SetString(PyExc_TypeError,
670 "roots item must be a list");
670 "roots item must be a list");
671 goto release;
671 goto release;
672 }
672 }
673 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
673 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
674 if (minrevphase == -2) /* Error from add_roots_get_min */
674 if (minrevphase == -2) /* Error from add_roots_get_min */
675 goto release;
675 goto release;
676 minrevallphases = MIN(minrevallphases, minrevphase);
676 minrevallphases = MIN(minrevallphases, minrevphase);
677 }
677 }
678 /* Propagate the phase information from the roots to the revs */
678 /* Propagate the phase information from the roots to the revs */
679 if (minrevallphases != -1) {
679 if (minrevallphases != -1) {
680 int parents[2];
680 int parents[2];
681 for (i = minrevallphases; i < len; i++) {
681 for (i = minrevallphases; i < len; i++) {
682 if (index_get_parents(self, i, parents,
682 if (index_get_parents(self, i, parents,
683 (int)len - 1) < 0)
683 (int)len - 1) < 0)
684 goto release;
684 goto release;
685 set_phase_from_parents(phases, parents[0], parents[1], i);
685 set_phase_from_parents(phases, parents[0], parents[1], i);
686 }
686 }
687 }
687 }
688 /* Transform phase list to a python list */
688 /* Transform phase list to a python list */
689 phasessize = PyInt_FromLong(len);
689 phasessize = PyInt_FromLong(len);
690 if (phasessize == NULL)
690 if (phasessize == NULL)
691 goto release;
691 goto release;
692 for (i = 0; i < len; i++) {
692 for (i = 0; i < len; i++) {
693 phase = phases[i];
693 phase = phases[i];
694 /* We only store the sets of phase for non public phase, the public phase
694 /* We only store the sets of phase for non public phase, the public phase
695 * is computed as a difference */
695 * is computed as a difference */
696 if (phase != 0) {
696 if (phase != 0) {
697 phaseset = PyList_GET_ITEM(phasessetlist, phase);
697 phaseset = PyList_GET_ITEM(phasessetlist, phase);
698 rev = PyInt_FromLong(i);
698 rev = PyInt_FromLong(i);
699 if (rev == NULL)
699 if (rev == NULL)
700 goto release;
700 goto release;
701 PySet_Add(phaseset, rev);
701 PySet_Add(phaseset, rev);
702 Py_XDECREF(rev);
702 Py_XDECREF(rev);
703 }
703 }
704 }
704 }
705 ret = PyTuple_Pack(2, phasessize, phasessetlist);
705 ret = PyTuple_Pack(2, phasessize, phasessetlist);
706
706
707 release:
707 release:
708 Py_XDECREF(phasessize);
708 Py_XDECREF(phasessize);
709 Py_XDECREF(phasessetlist);
709 Py_XDECREF(phasessetlist);
710 done:
710 done:
711 free(phases);
711 free(phases);
712 return ret;
712 return ret;
713 }
713 }
714
714
715 static PyObject *index_headrevs(indexObject *self, PyObject *args)
715 static PyObject *index_headrevs(indexObject *self, PyObject *args)
716 {
716 {
717 Py_ssize_t i, j, len;
717 Py_ssize_t i, j, len;
718 char *nothead = NULL;
718 char *nothead = NULL;
719 PyObject *heads = NULL;
719 PyObject *heads = NULL;
720 PyObject *filter = NULL;
720 PyObject *filter = NULL;
721 PyObject *filteredrevs = Py_None;
721 PyObject *filteredrevs = Py_None;
722
722
723 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
723 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
724 return NULL;
724 return NULL;
725 }
725 }
726
726
727 if (self->headrevs && filteredrevs == self->filteredrevs)
727 if (self->headrevs && filteredrevs == self->filteredrevs)
728 return list_copy(self->headrevs);
728 return list_copy(self->headrevs);
729
729
730 Py_DECREF(self->filteredrevs);
730 Py_DECREF(self->filteredrevs);
731 self->filteredrevs = filteredrevs;
731 self->filteredrevs = filteredrevs;
732 Py_INCREF(filteredrevs);
732 Py_INCREF(filteredrevs);
733
733
734 if (filteredrevs != Py_None) {
734 if (filteredrevs != Py_None) {
735 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
735 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
736 if (!filter) {
736 if (!filter) {
737 PyErr_SetString(PyExc_TypeError,
737 PyErr_SetString(PyExc_TypeError,
738 "filteredrevs has no attribute __contains__");
738 "filteredrevs has no attribute __contains__");
739 goto bail;
739 goto bail;
740 }
740 }
741 }
741 }
742
742
743 len = index_length(self) - 1;
743 len = index_length(self);
744 heads = PyList_New(0);
744 heads = PyList_New(0);
745 if (heads == NULL)
745 if (heads == NULL)
746 goto bail;
746 goto bail;
747 if (len == 0) {
747 if (len == 0) {
748 PyObject *nullid = PyInt_FromLong(-1);
748 PyObject *nullid = PyInt_FromLong(-1);
749 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
749 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
750 Py_XDECREF(nullid);
750 Py_XDECREF(nullid);
751 goto bail;
751 goto bail;
752 }
752 }
753 goto done;
753 goto done;
754 }
754 }
755
755
756 nothead = calloc(len, 1);
756 nothead = calloc(len, 1);
757 if (nothead == NULL) {
757 if (nothead == NULL) {
758 PyErr_NoMemory();
758 PyErr_NoMemory();
759 goto bail;
759 goto bail;
760 }
760 }
761
761
762 for (i = len - 1; i >= 0; i--) {
762 for (i = len - 1; i >= 0; i--) {
763 int isfiltered;
763 int isfiltered;
764 int parents[2];
764 int parents[2];
765
765
766 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
766 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
767 * node already, and therefore this node is not filtered. So we can skip
767 * node already, and therefore this node is not filtered. So we can skip
768 * the expensive check_filter step.
768 * the expensive check_filter step.
769 */
769 */
770 if (nothead[i] != 1) {
770 if (nothead[i] != 1) {
771 isfiltered = check_filter(filter, i);
771 isfiltered = check_filter(filter, i);
772 if (isfiltered == -1) {
772 if (isfiltered == -1) {
773 PyErr_SetString(PyExc_TypeError,
773 PyErr_SetString(PyExc_TypeError,
774 "unable to check filter");
774 "unable to check filter");
775 goto bail;
775 goto bail;
776 }
776 }
777
777
778 if (isfiltered) {
778 if (isfiltered) {
779 nothead[i] = 1;
779 nothead[i] = 1;
780 continue;
780 continue;
781 }
781 }
782 }
782 }
783
783
784 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
784 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
785 goto bail;
785 goto bail;
786 for (j = 0; j < 2; j++) {
786 for (j = 0; j < 2; j++) {
787 if (parents[j] >= 0)
787 if (parents[j] >= 0)
788 nothead[parents[j]] = 1;
788 nothead[parents[j]] = 1;
789 }
789 }
790 }
790 }
791
791
792 for (i = 0; i < len; i++) {
792 for (i = 0; i < len; i++) {
793 PyObject *head;
793 PyObject *head;
794
794
795 if (nothead[i])
795 if (nothead[i])
796 continue;
796 continue;
797 head = PyInt_FromSsize_t(i);
797 head = PyInt_FromSsize_t(i);
798 if (head == NULL || PyList_Append(heads, head) == -1) {
798 if (head == NULL || PyList_Append(heads, head) == -1) {
799 Py_XDECREF(head);
799 Py_XDECREF(head);
800 goto bail;
800 goto bail;
801 }
801 }
802 }
802 }
803
803
804 done:
804 done:
805 self->headrevs = heads;
805 self->headrevs = heads;
806 Py_XDECREF(filter);
806 Py_XDECREF(filter);
807 free(nothead);
807 free(nothead);
808 return list_copy(self->headrevs);
808 return list_copy(self->headrevs);
809 bail:
809 bail:
810 Py_XDECREF(filter);
810 Py_XDECREF(filter);
811 Py_XDECREF(heads);
811 Py_XDECREF(heads);
812 free(nothead);
812 free(nothead);
813 return NULL;
813 return NULL;
814 }
814 }
815
815
816 /**
816 /**
817 * Obtain the base revision index entry.
817 * Obtain the base revision index entry.
818 *
818 *
819 * Callers must ensure that rev >= 0 or illegal memory access may occur.
819 * Callers must ensure that rev >= 0 or illegal memory access may occur.
820 */
820 */
821 static inline int index_baserev(indexObject *self, int rev)
821 static inline int index_baserev(indexObject *self, int rev)
822 {
822 {
823 const char *data;
823 const char *data;
824
824
825 if (rev >= self->length - 1) {
825 if (rev >= self->length - 1) {
826 PyObject *tuple = PyList_GET_ITEM(self->added,
826 PyObject *tuple = PyList_GET_ITEM(self->added,
827 rev - self->length + 1);
827 rev - self->length + 1);
828 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
828 return (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 3));
829 }
829 }
830 else {
830 else {
831 data = index_deref(self, rev);
831 data = index_deref(self, rev);
832 if (data == NULL) {
832 if (data == NULL) {
833 return -2;
833 return -2;
834 }
834 }
835
835
836 return getbe32(data + 16);
836 return getbe32(data + 16);
837 }
837 }
838 }
838 }
839
839
840 static PyObject *index_deltachain(indexObject *self, PyObject *args)
840 static PyObject *index_deltachain(indexObject *self, PyObject *args)
841 {
841 {
842 int rev, generaldelta;
842 int rev, generaldelta;
843 PyObject *stoparg;
843 PyObject *stoparg;
844 int stoprev, iterrev, baserev = -1;
844 int stoprev, iterrev, baserev = -1;
845 int stopped;
845 int stopped;
846 PyObject *chain = NULL, *result = NULL;
846 PyObject *chain = NULL, *result = NULL;
847 const Py_ssize_t length = index_length(self);
847 const Py_ssize_t length = index_length(self) + 1;
848
848
849 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
849 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
850 return NULL;
850 return NULL;
851 }
851 }
852
852
853 if (PyInt_Check(stoparg)) {
853 if (PyInt_Check(stoparg)) {
854 stoprev = (int)PyInt_AsLong(stoparg);
854 stoprev = (int)PyInt_AsLong(stoparg);
855 if (stoprev == -1 && PyErr_Occurred()) {
855 if (stoprev == -1 && PyErr_Occurred()) {
856 return NULL;
856 return NULL;
857 }
857 }
858 }
858 }
859 else if (stoparg == Py_None) {
859 else if (stoparg == Py_None) {
860 stoprev = -2;
860 stoprev = -2;
861 }
861 }
862 else {
862 else {
863 PyErr_SetString(PyExc_ValueError,
863 PyErr_SetString(PyExc_ValueError,
864 "stoprev must be integer or None");
864 "stoprev must be integer or None");
865 return NULL;
865 return NULL;
866 }
866 }
867
867
868 if (rev < 0 || rev >= length - 1) {
868 if (rev < 0 || rev >= length - 1) {
869 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
869 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
870 return NULL;
870 return NULL;
871 }
871 }
872
872
873 chain = PyList_New(0);
873 chain = PyList_New(0);
874 if (chain == NULL) {
874 if (chain == NULL) {
875 return NULL;
875 return NULL;
876 }
876 }
877
877
878 baserev = index_baserev(self, rev);
878 baserev = index_baserev(self, rev);
879
879
880 /* This should never happen. */
880 /* This should never happen. */
881 if (baserev <= -2) {
881 if (baserev <= -2) {
882 /* Error should be set by index_deref() */
882 /* Error should be set by index_deref() */
883 assert(PyErr_Occurred());
883 assert(PyErr_Occurred());
884 goto bail;
884 goto bail;
885 }
885 }
886
886
887 iterrev = rev;
887 iterrev = rev;
888
888
889 while (iterrev != baserev && iterrev != stoprev) {
889 while (iterrev != baserev && iterrev != stoprev) {
890 PyObject *value = PyInt_FromLong(iterrev);
890 PyObject *value = PyInt_FromLong(iterrev);
891 if (value == NULL) {
891 if (value == NULL) {
892 goto bail;
892 goto bail;
893 }
893 }
894 if (PyList_Append(chain, value)) {
894 if (PyList_Append(chain, value)) {
895 Py_DECREF(value);
895 Py_DECREF(value);
896 goto bail;
896 goto bail;
897 }
897 }
898 Py_DECREF(value);
898 Py_DECREF(value);
899
899
900 if (generaldelta) {
900 if (generaldelta) {
901 iterrev = baserev;
901 iterrev = baserev;
902 }
902 }
903 else {
903 else {
904 iterrev--;
904 iterrev--;
905 }
905 }
906
906
907 if (iterrev < 0) {
907 if (iterrev < 0) {
908 break;
908 break;
909 }
909 }
910
910
911 if (iterrev >= length - 1) {
911 if (iterrev >= length - 1) {
912 PyErr_SetString(PyExc_IndexError, "revision outside index");
912 PyErr_SetString(PyExc_IndexError, "revision outside index");
913 return NULL;
913 return NULL;
914 }
914 }
915
915
916 baserev = index_baserev(self, iterrev);
916 baserev = index_baserev(self, iterrev);
917
917
918 /* This should never happen. */
918 /* This should never happen. */
919 if (baserev <= -2) {
919 if (baserev <= -2) {
920 /* Error should be set by index_deref() */
920 /* Error should be set by index_deref() */
921 assert(PyErr_Occurred());
921 assert(PyErr_Occurred());
922 goto bail;
922 goto bail;
923 }
923 }
924 }
924 }
925
925
926 if (iterrev == stoprev) {
926 if (iterrev == stoprev) {
927 stopped = 1;
927 stopped = 1;
928 }
928 }
929 else {
929 else {
930 PyObject *value = PyInt_FromLong(iterrev);
930 PyObject *value = PyInt_FromLong(iterrev);
931 if (value == NULL) {
931 if (value == NULL) {
932 goto bail;
932 goto bail;
933 }
933 }
934 if (PyList_Append(chain, value)) {
934 if (PyList_Append(chain, value)) {
935 Py_DECREF(value);
935 Py_DECREF(value);
936 goto bail;
936 goto bail;
937 }
937 }
938 Py_DECREF(value);
938 Py_DECREF(value);
939
939
940 stopped = 0;
940 stopped = 0;
941 }
941 }
942
942
943 if (PyList_Reverse(chain)) {
943 if (PyList_Reverse(chain)) {
944 goto bail;
944 goto bail;
945 }
945 }
946
946
947 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
947 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
948 Py_DECREF(chain);
948 Py_DECREF(chain);
949 return result;
949 return result;
950
950
951 bail:
951 bail:
952 Py_DECREF(chain);
952 Py_DECREF(chain);
953 return NULL;
953 return NULL;
954 }
954 }
955
955
956 static inline int nt_level(const char *node, Py_ssize_t level)
956 static inline int nt_level(const char *node, Py_ssize_t level)
957 {
957 {
958 int v = node[level>>1];
958 int v = node[level>>1];
959 if (!(level & 1))
959 if (!(level & 1))
960 v >>= 4;
960 v >>= 4;
961 return v & 0xf;
961 return v & 0xf;
962 }
962 }
963
963
964 /*
964 /*
965 * Return values:
965 * Return values:
966 *
966 *
967 * -4: match is ambiguous (multiple candidates)
967 * -4: match is ambiguous (multiple candidates)
968 * -2: not found
968 * -2: not found
969 * rest: valid rev
969 * rest: valid rev
970 */
970 */
971 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
971 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
972 int hex)
972 int hex)
973 {
973 {
974 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
974 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
975 int level, maxlevel, off;
975 int level, maxlevel, off;
976
976
977 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
977 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
978 return -1;
978 return -1;
979
979
980 if (hex)
980 if (hex)
981 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
981 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
982 else
982 else
983 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
983 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
984
984
985 for (level = off = 0; level < maxlevel; level++) {
985 for (level = off = 0; level < maxlevel; level++) {
986 int k = getnybble(node, level);
986 int k = getnybble(node, level);
987 nodetree *n = &self->nt[off];
987 nodetree *n = &self->nt[off];
988 int v = n->children[k];
988 int v = n->children[k];
989
989
990 if (v < 0) {
990 if (v < 0) {
991 const char *n;
991 const char *n;
992 Py_ssize_t i;
992 Py_ssize_t i;
993
993
994 v = -(v + 2);
994 v = -(v + 2);
995 n = index_node(self, v);
995 n = index_node(self, v);
996 if (n == NULL)
996 if (n == NULL)
997 return -2;
997 return -2;
998 for (i = level; i < maxlevel; i++)
998 for (i = level; i < maxlevel; i++)
999 if (getnybble(node, i) != nt_level(n, i))
999 if (getnybble(node, i) != nt_level(n, i))
1000 return -2;
1000 return -2;
1001 return v;
1001 return v;
1002 }
1002 }
1003 if (v == 0)
1003 if (v == 0)
1004 return -2;
1004 return -2;
1005 off = v;
1005 off = v;
1006 }
1006 }
1007 /* multiple matches against an ambiguous prefix */
1007 /* multiple matches against an ambiguous prefix */
1008 return -4;
1008 return -4;
1009 }
1009 }
1010
1010
1011 static int nt_new(indexObject *self)
1011 static int nt_new(indexObject *self)
1012 {
1012 {
1013 if (self->ntlength == self->ntcapacity) {
1013 if (self->ntlength == self->ntcapacity) {
1014 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1014 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1015 PyErr_SetString(PyExc_MemoryError,
1015 PyErr_SetString(PyExc_MemoryError,
1016 "overflow in nt_new");
1016 "overflow in nt_new");
1017 return -1;
1017 return -1;
1018 }
1018 }
1019 self->ntcapacity *= 2;
1019 self->ntcapacity *= 2;
1020 self->nt = realloc(self->nt,
1020 self->nt = realloc(self->nt,
1021 self->ntcapacity * sizeof(nodetree));
1021 self->ntcapacity * sizeof(nodetree));
1022 if (self->nt == NULL) {
1022 if (self->nt == NULL) {
1023 PyErr_SetString(PyExc_MemoryError, "out of memory");
1023 PyErr_SetString(PyExc_MemoryError, "out of memory");
1024 return -1;
1024 return -1;
1025 }
1025 }
1026 memset(&self->nt[self->ntlength], 0,
1026 memset(&self->nt[self->ntlength], 0,
1027 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1027 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1028 }
1028 }
1029 return self->ntlength++;
1029 return self->ntlength++;
1030 }
1030 }
1031
1031
1032 static int nt_insert(indexObject *self, const char *node, int rev)
1032 static int nt_insert(indexObject *self, const char *node, int rev)
1033 {
1033 {
1034 int level = 0;
1034 int level = 0;
1035 int off = 0;
1035 int off = 0;
1036
1036
1037 while (level < 40) {
1037 while (level < 40) {
1038 int k = nt_level(node, level);
1038 int k = nt_level(node, level);
1039 nodetree *n;
1039 nodetree *n;
1040 int v;
1040 int v;
1041
1041
1042 n = &self->nt[off];
1042 n = &self->nt[off];
1043 v = n->children[k];
1043 v = n->children[k];
1044
1044
1045 if (v == 0) {
1045 if (v == 0) {
1046 n->children[k] = -rev - 2;
1046 n->children[k] = -rev - 2;
1047 return 0;
1047 return 0;
1048 }
1048 }
1049 if (v < 0) {
1049 if (v < 0) {
1050 const char *oldnode = index_node_existing(self, -(v + 2));
1050 const char *oldnode = index_node_existing(self, -(v + 2));
1051 int noff;
1051 int noff;
1052
1052
1053 if (oldnode == NULL)
1053 if (oldnode == NULL)
1054 return -1;
1054 return -1;
1055 if (!memcmp(oldnode, node, 20)) {
1055 if (!memcmp(oldnode, node, 20)) {
1056 n->children[k] = -rev - 2;
1056 n->children[k] = -rev - 2;
1057 return 0;
1057 return 0;
1058 }
1058 }
1059 noff = nt_new(self);
1059 noff = nt_new(self);
1060 if (noff == -1)
1060 if (noff == -1)
1061 return -1;
1061 return -1;
1062 /* self->nt may have been changed by realloc */
1062 /* self->nt may have been changed by realloc */
1063 self->nt[off].children[k] = noff;
1063 self->nt[off].children[k] = noff;
1064 off = noff;
1064 off = noff;
1065 n = &self->nt[off];
1065 n = &self->nt[off];
1066 n->children[nt_level(oldnode, ++level)] = v;
1066 n->children[nt_level(oldnode, ++level)] = v;
1067 if (level > self->ntdepth)
1067 if (level > self->ntdepth)
1068 self->ntdepth = level;
1068 self->ntdepth = level;
1069 self->ntsplits += 1;
1069 self->ntsplits += 1;
1070 } else {
1070 } else {
1071 level += 1;
1071 level += 1;
1072 off = v;
1072 off = v;
1073 }
1073 }
1074 }
1074 }
1075
1075
1076 return -1;
1076 return -1;
1077 }
1077 }
1078
1078
1079 static int nt_delete_node(indexObject *self, const char *node)
1079 static int nt_delete_node(indexObject *self, const char *node)
1080 {
1080 {
1081 /* rev==-2 happens to get encoded as 0, which is interpreted as not set */
1081 /* rev==-2 happens to get encoded as 0, which is interpreted as not set */
1082 return nt_insert(self, node, -2);
1082 return nt_insert(self, node, -2);
1083 }
1083 }
1084
1084
1085 static int nt_init(indexObject *self)
1085 static int nt_init(indexObject *self)
1086 {
1086 {
1087 if (self->nt == NULL) {
1087 if (self->nt == NULL) {
1088 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1088 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1089 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1089 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1090 return -1;
1090 return -1;
1091 }
1091 }
1092 self->ntcapacity = self->raw_length < 4
1092 self->ntcapacity = self->raw_length < 4
1093 ? 4 : (int)self->raw_length / 2;
1093 ? 4 : (int)self->raw_length / 2;
1094
1094
1095 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1095 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1096 if (self->nt == NULL) {
1096 if (self->nt == NULL) {
1097 PyErr_NoMemory();
1097 PyErr_NoMemory();
1098 return -1;
1098 return -1;
1099 }
1099 }
1100 self->ntlength = 1;
1100 self->ntlength = 1;
1101 self->ntrev = (int)index_length(self) - 1;
1101 self->ntrev = (int)index_length(self);
1102 self->ntlookups = 1;
1102 self->ntlookups = 1;
1103 self->ntmisses = 0;
1103 self->ntmisses = 0;
1104 if (nt_insert(self, nullid, -1) == -1) {
1104 if (nt_insert(self, nullid, -1) == -1) {
1105 free(self->nt);
1105 free(self->nt);
1106 self->nt = NULL;
1106 self->nt = NULL;
1107 return -1;
1107 return -1;
1108 }
1108 }
1109 }
1109 }
1110 return 0;
1110 return 0;
1111 }
1111 }
1112
1112
1113 /*
1113 /*
1114 * Return values:
1114 * Return values:
1115 *
1115 *
1116 * -3: error (exception set)
1116 * -3: error (exception set)
1117 * -2: not found (no exception set)
1117 * -2: not found (no exception set)
1118 * rest: valid rev
1118 * rest: valid rev
1119 */
1119 */
1120 static int index_find_node(indexObject *self,
1120 static int index_find_node(indexObject *self,
1121 const char *node, Py_ssize_t nodelen)
1121 const char *node, Py_ssize_t nodelen)
1122 {
1122 {
1123 int rev;
1123 int rev;
1124
1124
1125 if (nt_init(self) == -1)
1125 if (nt_init(self) == -1)
1126 return -3;
1126 return -3;
1127
1127
1128 self->ntlookups++;
1128 self->ntlookups++;
1129 rev = nt_find(self, node, nodelen, 0);
1129 rev = nt_find(self, node, nodelen, 0);
1130 if (rev >= -1)
1130 if (rev >= -1)
1131 return rev;
1131 return rev;
1132
1132
1133 /*
1133 /*
1134 * For the first handful of lookups, we scan the entire index,
1134 * For the first handful of lookups, we scan the entire index,
1135 * and cache only the matching nodes. This optimizes for cases
1135 * and cache only the matching nodes. This optimizes for cases
1136 * like "hg tip", where only a few nodes are accessed.
1136 * like "hg tip", where only a few nodes are accessed.
1137 *
1137 *
1138 * After that, we cache every node we visit, using a single
1138 * After that, we cache every node we visit, using a single
1139 * scan amortized over multiple lookups. This gives the best
1139 * scan amortized over multiple lookups. This gives the best
1140 * bulk performance, e.g. for "hg log".
1140 * bulk performance, e.g. for "hg log".
1141 */
1141 */
1142 if (self->ntmisses++ < 4) {
1142 if (self->ntmisses++ < 4) {
1143 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1143 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1144 const char *n = index_node_existing(self, rev);
1144 const char *n = index_node_existing(self, rev);
1145 if (n == NULL)
1145 if (n == NULL)
1146 return -3;
1146 return -3;
1147 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1147 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1148 if (nt_insert(self, n, rev) == -1)
1148 if (nt_insert(self, n, rev) == -1)
1149 return -3;
1149 return -3;
1150 break;
1150 break;
1151 }
1151 }
1152 }
1152 }
1153 } else {
1153 } else {
1154 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1154 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1155 const char *n = index_node_existing(self, rev);
1155 const char *n = index_node_existing(self, rev);
1156 if (n == NULL)
1156 if (n == NULL)
1157 return -3;
1157 return -3;
1158 if (nt_insert(self, n, rev) == -1) {
1158 if (nt_insert(self, n, rev) == -1) {
1159 self->ntrev = rev + 1;
1159 self->ntrev = rev + 1;
1160 return -3;
1160 return -3;
1161 }
1161 }
1162 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1162 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1163 break;
1163 break;
1164 }
1164 }
1165 }
1165 }
1166 self->ntrev = rev;
1166 self->ntrev = rev;
1167 }
1167 }
1168
1168
1169 if (rev >= 0)
1169 if (rev >= 0)
1170 return rev;
1170 return rev;
1171 return -2;
1171 return -2;
1172 }
1172 }
1173
1173
1174 static void raise_revlog_error(void)
1174 static void raise_revlog_error(void)
1175 {
1175 {
1176 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1176 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1177
1177
1178 mod = PyImport_ImportModule("mercurial.error");
1178 mod = PyImport_ImportModule("mercurial.error");
1179 if (mod == NULL) {
1179 if (mod == NULL) {
1180 goto cleanup;
1180 goto cleanup;
1181 }
1181 }
1182
1182
1183 dict = PyModule_GetDict(mod);
1183 dict = PyModule_GetDict(mod);
1184 if (dict == NULL) {
1184 if (dict == NULL) {
1185 goto cleanup;
1185 goto cleanup;
1186 }
1186 }
1187 Py_INCREF(dict);
1187 Py_INCREF(dict);
1188
1188
1189 errclass = PyDict_GetItemString(dict, "RevlogError");
1189 errclass = PyDict_GetItemString(dict, "RevlogError");
1190 if (errclass == NULL) {
1190 if (errclass == NULL) {
1191 PyErr_SetString(PyExc_SystemError,
1191 PyErr_SetString(PyExc_SystemError,
1192 "could not find RevlogError");
1192 "could not find RevlogError");
1193 goto cleanup;
1193 goto cleanup;
1194 }
1194 }
1195
1195
1196 /* value of exception is ignored by callers */
1196 /* value of exception is ignored by callers */
1197 PyErr_SetString(errclass, "RevlogError");
1197 PyErr_SetString(errclass, "RevlogError");
1198
1198
1199 cleanup:
1199 cleanup:
1200 Py_XDECREF(dict);
1200 Py_XDECREF(dict);
1201 Py_XDECREF(mod);
1201 Py_XDECREF(mod);
1202 }
1202 }
1203
1203
1204 static PyObject *index_getitem(indexObject *self, PyObject *value)
1204 static PyObject *index_getitem(indexObject *self, PyObject *value)
1205 {
1205 {
1206 char *node;
1206 char *node;
1207 int rev;
1207 int rev;
1208
1208
1209 if (PyInt_Check(value))
1209 if (PyInt_Check(value))
1210 return index_get(self, PyInt_AS_LONG(value));
1210 return index_get(self, PyInt_AS_LONG(value));
1211
1211
1212 if (node_check(value, &node) == -1)
1212 if (node_check(value, &node) == -1)
1213 return NULL;
1213 return NULL;
1214 rev = index_find_node(self, node, 20);
1214 rev = index_find_node(self, node, 20);
1215 if (rev >= -1)
1215 if (rev >= -1)
1216 return PyInt_FromLong(rev);
1216 return PyInt_FromLong(rev);
1217 if (rev == -2)
1217 if (rev == -2)
1218 raise_revlog_error();
1218 raise_revlog_error();
1219 return NULL;
1219 return NULL;
1220 }
1220 }
1221
1221
1222 /*
1222 /*
1223 * Fully populate the radix tree.
1223 * Fully populate the radix tree.
1224 */
1224 */
1225 static int nt_populate(indexObject *self) {
1225 static int nt_populate(indexObject *self) {
1226 int rev;
1226 int rev;
1227 if (self->ntrev > 0) {
1227 if (self->ntrev > 0) {
1228 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1228 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1229 const char *n = index_node_existing(self, rev);
1229 const char *n = index_node_existing(self, rev);
1230 if (n == NULL)
1230 if (n == NULL)
1231 return -1;
1231 return -1;
1232 if (nt_insert(self, n, rev) == -1)
1232 if (nt_insert(self, n, rev) == -1)
1233 return -1;
1233 return -1;
1234 }
1234 }
1235 self->ntrev = -1;
1235 self->ntrev = -1;
1236 }
1236 }
1237 return 0;
1237 return 0;
1238 }
1238 }
1239
1239
1240 static int nt_partialmatch(indexObject *self, const char *node,
1240 static int nt_partialmatch(indexObject *self, const char *node,
1241 Py_ssize_t nodelen)
1241 Py_ssize_t nodelen)
1242 {
1242 {
1243 if (nt_init(self) == -1)
1243 if (nt_init(self) == -1)
1244 return -3;
1244 return -3;
1245 if (nt_populate(self) == -1)
1245 if (nt_populate(self) == -1)
1246 return -3;
1246 return -3;
1247
1247
1248 return nt_find(self, node, nodelen, 1);
1248 return nt_find(self, node, nodelen, 1);
1249 }
1249 }
1250
1250
1251 /*
1251 /*
1252 * Find the length of the shortest unique prefix of node.
1252 * Find the length of the shortest unique prefix of node.
1253 *
1253 *
1254 * Return values:
1254 * Return values:
1255 *
1255 *
1256 * -3: error (exception set)
1256 * -3: error (exception set)
1257 * -2: not found (no exception set)
1257 * -2: not found (no exception set)
1258 * rest: length of shortest prefix
1258 * rest: length of shortest prefix
1259 */
1259 */
1260 static int nt_shortest(indexObject *self, const char *node)
1260 static int nt_shortest(indexObject *self, const char *node)
1261 {
1261 {
1262 int level, off;
1262 int level, off;
1263
1263
1264 if (nt_init(self) == -1)
1264 if (nt_init(self) == -1)
1265 return -3;
1265 return -3;
1266 if (nt_populate(self) == -1)
1266 if (nt_populate(self) == -1)
1267 return -3;
1267 return -3;
1268
1268
1269 for (level = off = 0; level < 40; level++) {
1269 for (level = off = 0; level < 40; level++) {
1270 int k, v;
1270 int k, v;
1271 nodetree *n = &self->nt[off];
1271 nodetree *n = &self->nt[off];
1272 k = nt_level(node, level);
1272 k = nt_level(node, level);
1273 v = n->children[k];
1273 v = n->children[k];
1274 if (v < 0) {
1274 if (v < 0) {
1275 const char *n;
1275 const char *n;
1276 v = -(v + 2);
1276 v = -(v + 2);
1277 n = index_node_existing(self, v);
1277 n = index_node_existing(self, v);
1278 if (n == NULL)
1278 if (n == NULL)
1279 return -3;
1279 return -3;
1280 if (memcmp(node, n, 20) != 0)
1280 if (memcmp(node, n, 20) != 0)
1281 /*
1281 /*
1282 * Found a unique prefix, but it wasn't for the
1282 * Found a unique prefix, but it wasn't for the
1283 * requested node (i.e the requested node does
1283 * requested node (i.e the requested node does
1284 * not exist).
1284 * not exist).
1285 */
1285 */
1286 return -2;
1286 return -2;
1287 return level + 1;
1287 return level + 1;
1288 }
1288 }
1289 if (v == 0)
1289 if (v == 0)
1290 return -2;
1290 return -2;
1291 off = v;
1291 off = v;
1292 }
1292 }
1293 /*
1293 /*
1294 * The node was still not unique after 40 hex digits, so this won't
1294 * The node was still not unique after 40 hex digits, so this won't
1295 * happen. Also, if we get here, then there's a programming error in
1295 * happen. Also, if we get here, then there's a programming error in
1296 * this file that made us insert a node longer than 40 hex digits.
1296 * this file that made us insert a node longer than 40 hex digits.
1297 */
1297 */
1298 PyErr_SetString(PyExc_Exception, "broken node tree");
1298 PyErr_SetString(PyExc_Exception, "broken node tree");
1299 return -3;
1299 return -3;
1300 }
1300 }
1301
1301
1302 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1302 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1303 {
1303 {
1304 const char *fullnode;
1304 const char *fullnode;
1305 int nodelen;
1305 int nodelen;
1306 char *node;
1306 char *node;
1307 int rev, i;
1307 int rev, i;
1308
1308
1309 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1309 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1310 return NULL;
1310 return NULL;
1311
1311
1312 if (nodelen < 1) {
1312 if (nodelen < 1) {
1313 PyErr_SetString(PyExc_ValueError, "key too short");
1313 PyErr_SetString(PyExc_ValueError, "key too short");
1314 return NULL;
1314 return NULL;
1315 }
1315 }
1316
1316
1317 if (nodelen > 40) {
1317 if (nodelen > 40) {
1318 PyErr_SetString(PyExc_ValueError, "key too long");
1318 PyErr_SetString(PyExc_ValueError, "key too long");
1319 return NULL;
1319 return NULL;
1320 }
1320 }
1321
1321
1322 for (i = 0; i < nodelen; i++)
1322 for (i = 0; i < nodelen; i++)
1323 hexdigit(node, i);
1323 hexdigit(node, i);
1324 if (PyErr_Occurred()) {
1324 if (PyErr_Occurred()) {
1325 /* input contains non-hex characters */
1325 /* input contains non-hex characters */
1326 PyErr_Clear();
1326 PyErr_Clear();
1327 Py_RETURN_NONE;
1327 Py_RETURN_NONE;
1328 }
1328 }
1329
1329
1330 rev = nt_partialmatch(self, node, nodelen);
1330 rev = nt_partialmatch(self, node, nodelen);
1331
1331
1332 switch (rev) {
1332 switch (rev) {
1333 case -4:
1333 case -4:
1334 raise_revlog_error();
1334 raise_revlog_error();
1335 case -3:
1335 case -3:
1336 return NULL;
1336 return NULL;
1337 case -2:
1337 case -2:
1338 Py_RETURN_NONE;
1338 Py_RETURN_NONE;
1339 case -1:
1339 case -1:
1340 return PyBytes_FromStringAndSize(nullid, 20);
1340 return PyBytes_FromStringAndSize(nullid, 20);
1341 }
1341 }
1342
1342
1343 fullnode = index_node_existing(self, rev);
1343 fullnode = index_node_existing(self, rev);
1344 if (fullnode == NULL) {
1344 if (fullnode == NULL) {
1345 return NULL;
1345 return NULL;
1346 }
1346 }
1347 return PyBytes_FromStringAndSize(fullnode, 20);
1347 return PyBytes_FromStringAndSize(fullnode, 20);
1348 }
1348 }
1349
1349
1350 static PyObject *index_shortest(indexObject *self, PyObject *args)
1350 static PyObject *index_shortest(indexObject *self, PyObject *args)
1351 {
1351 {
1352 PyObject *val;
1352 PyObject *val;
1353 char *node;
1353 char *node;
1354 int length;
1354 int length;
1355
1355
1356 if (!PyArg_ParseTuple(args, "O", &val))
1356 if (!PyArg_ParseTuple(args, "O", &val))
1357 return NULL;
1357 return NULL;
1358 if (node_check(val, &node) == -1)
1358 if (node_check(val, &node) == -1)
1359 return NULL;
1359 return NULL;
1360
1360
1361 self->ntlookups++;
1361 self->ntlookups++;
1362 length = nt_shortest(self, node);
1362 length = nt_shortest(self, node);
1363 if (length == -3)
1363 if (length == -3)
1364 return NULL;
1364 return NULL;
1365 if (length == -2) {
1365 if (length == -2) {
1366 raise_revlog_error();
1366 raise_revlog_error();
1367 return NULL;
1367 return NULL;
1368 }
1368 }
1369 return PyInt_FromLong(length);
1369 return PyInt_FromLong(length);
1370 }
1370 }
1371
1371
1372 static PyObject *index_m_get(indexObject *self, PyObject *args)
1372 static PyObject *index_m_get(indexObject *self, PyObject *args)
1373 {
1373 {
1374 PyObject *val;
1374 PyObject *val;
1375 char *node;
1375 char *node;
1376 int rev;
1376 int rev;
1377
1377
1378 if (!PyArg_ParseTuple(args, "O", &val))
1378 if (!PyArg_ParseTuple(args, "O", &val))
1379 return NULL;
1379 return NULL;
1380 if (node_check(val, &node) == -1)
1380 if (node_check(val, &node) == -1)
1381 return NULL;
1381 return NULL;
1382 rev = index_find_node(self, node, 20);
1382 rev = index_find_node(self, node, 20);
1383 if (rev == -3)
1383 if (rev == -3)
1384 return NULL;
1384 return NULL;
1385 if (rev == -2)
1385 if (rev == -2)
1386 Py_RETURN_NONE;
1386 Py_RETURN_NONE;
1387 return PyInt_FromLong(rev);
1387 return PyInt_FromLong(rev);
1388 }
1388 }
1389
1389
1390 static int index_contains(indexObject *self, PyObject *value)
1390 static int index_contains(indexObject *self, PyObject *value)
1391 {
1391 {
1392 char *node;
1392 char *node;
1393
1393
1394 if (PyInt_Check(value)) {
1394 if (PyInt_Check(value)) {
1395 long rev = PyInt_AS_LONG(value);
1395 long rev = PyInt_AS_LONG(value);
1396 return rev >= -1 && rev < index_length(self);
1396 return rev >= -1 && rev < index_length(self) + 1;
1397 }
1397 }
1398
1398
1399 if (node_check(value, &node) == -1)
1399 if (node_check(value, &node) == -1)
1400 return -1;
1400 return -1;
1401
1401
1402 switch (index_find_node(self, node, 20)) {
1402 switch (index_find_node(self, node, 20)) {
1403 case -3:
1403 case -3:
1404 return -1;
1404 return -1;
1405 case -2:
1405 case -2:
1406 return 0;
1406 return 0;
1407 default:
1407 default:
1408 return 1;
1408 return 1;
1409 }
1409 }
1410 }
1410 }
1411
1411
1412 typedef uint64_t bitmask;
1412 typedef uint64_t bitmask;
1413
1413
1414 /*
1414 /*
1415 * Given a disjoint set of revs, return all candidates for the
1415 * Given a disjoint set of revs, return all candidates for the
1416 * greatest common ancestor. In revset notation, this is the set
1416 * greatest common ancestor. In revset notation, this is the set
1417 * "heads(::a and ::b and ...)"
1417 * "heads(::a and ::b and ...)"
1418 */
1418 */
1419 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1419 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1420 int revcount)
1420 int revcount)
1421 {
1421 {
1422 const bitmask allseen = (1ull << revcount) - 1;
1422 const bitmask allseen = (1ull << revcount) - 1;
1423 const bitmask poison = 1ull << revcount;
1423 const bitmask poison = 1ull << revcount;
1424 PyObject *gca = PyList_New(0);
1424 PyObject *gca = PyList_New(0);
1425 int i, v, interesting;
1425 int i, v, interesting;
1426 int maxrev = -1;
1426 int maxrev = -1;
1427 bitmask sp;
1427 bitmask sp;
1428 bitmask *seen;
1428 bitmask *seen;
1429
1429
1430 if (gca == NULL)
1430 if (gca == NULL)
1431 return PyErr_NoMemory();
1431 return PyErr_NoMemory();
1432
1432
1433 for (i = 0; i < revcount; i++) {
1433 for (i = 0; i < revcount; i++) {
1434 if (revs[i] > maxrev)
1434 if (revs[i] > maxrev)
1435 maxrev = revs[i];
1435 maxrev = revs[i];
1436 }
1436 }
1437
1437
1438 seen = calloc(sizeof(*seen), maxrev + 1);
1438 seen = calloc(sizeof(*seen), maxrev + 1);
1439 if (seen == NULL) {
1439 if (seen == NULL) {
1440 Py_DECREF(gca);
1440 Py_DECREF(gca);
1441 return PyErr_NoMemory();
1441 return PyErr_NoMemory();
1442 }
1442 }
1443
1443
1444 for (i = 0; i < revcount; i++)
1444 for (i = 0; i < revcount; i++)
1445 seen[revs[i]] = 1ull << i;
1445 seen[revs[i]] = 1ull << i;
1446
1446
1447 interesting = revcount;
1447 interesting = revcount;
1448
1448
1449 for (v = maxrev; v >= 0 && interesting; v--) {
1449 for (v = maxrev; v >= 0 && interesting; v--) {
1450 bitmask sv = seen[v];
1450 bitmask sv = seen[v];
1451 int parents[2];
1451 int parents[2];
1452
1452
1453 if (!sv)
1453 if (!sv)
1454 continue;
1454 continue;
1455
1455
1456 if (sv < poison) {
1456 if (sv < poison) {
1457 interesting -= 1;
1457 interesting -= 1;
1458 if (sv == allseen) {
1458 if (sv == allseen) {
1459 PyObject *obj = PyInt_FromLong(v);
1459 PyObject *obj = PyInt_FromLong(v);
1460 if (obj == NULL)
1460 if (obj == NULL)
1461 goto bail;
1461 goto bail;
1462 if (PyList_Append(gca, obj) == -1) {
1462 if (PyList_Append(gca, obj) == -1) {
1463 Py_DECREF(obj);
1463 Py_DECREF(obj);
1464 goto bail;
1464 goto bail;
1465 }
1465 }
1466 sv |= poison;
1466 sv |= poison;
1467 for (i = 0; i < revcount; i++) {
1467 for (i = 0; i < revcount; i++) {
1468 if (revs[i] == v)
1468 if (revs[i] == v)
1469 goto done;
1469 goto done;
1470 }
1470 }
1471 }
1471 }
1472 }
1472 }
1473 if (index_get_parents(self, v, parents, maxrev) < 0)
1473 if (index_get_parents(self, v, parents, maxrev) < 0)
1474 goto bail;
1474 goto bail;
1475
1475
1476 for (i = 0; i < 2; i++) {
1476 for (i = 0; i < 2; i++) {
1477 int p = parents[i];
1477 int p = parents[i];
1478 if (p == -1)
1478 if (p == -1)
1479 continue;
1479 continue;
1480 sp = seen[p];
1480 sp = seen[p];
1481 if (sv < poison) {
1481 if (sv < poison) {
1482 if (sp == 0) {
1482 if (sp == 0) {
1483 seen[p] = sv;
1483 seen[p] = sv;
1484 interesting++;
1484 interesting++;
1485 }
1485 }
1486 else if (sp != sv)
1486 else if (sp != sv)
1487 seen[p] |= sv;
1487 seen[p] |= sv;
1488 } else {
1488 } else {
1489 if (sp && sp < poison)
1489 if (sp && sp < poison)
1490 interesting--;
1490 interesting--;
1491 seen[p] = sv;
1491 seen[p] = sv;
1492 }
1492 }
1493 }
1493 }
1494 }
1494 }
1495
1495
1496 done:
1496 done:
1497 free(seen);
1497 free(seen);
1498 return gca;
1498 return gca;
1499 bail:
1499 bail:
1500 free(seen);
1500 free(seen);
1501 Py_XDECREF(gca);
1501 Py_XDECREF(gca);
1502 return NULL;
1502 return NULL;
1503 }
1503 }
1504
1504
1505 /*
1505 /*
1506 * Given a disjoint set of revs, return the subset with the longest
1506 * Given a disjoint set of revs, return the subset with the longest
1507 * path to the root.
1507 * path to the root.
1508 */
1508 */
1509 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1509 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1510 {
1510 {
1511 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1511 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1512 static const Py_ssize_t capacity = 24;
1512 static const Py_ssize_t capacity = 24;
1513 int *depth, *interesting = NULL;
1513 int *depth, *interesting = NULL;
1514 int i, j, v, ninteresting;
1514 int i, j, v, ninteresting;
1515 PyObject *dict = NULL, *keys = NULL;
1515 PyObject *dict = NULL, *keys = NULL;
1516 long *seen = NULL;
1516 long *seen = NULL;
1517 int maxrev = -1;
1517 int maxrev = -1;
1518 long final;
1518 long final;
1519
1519
1520 if (revcount > capacity) {
1520 if (revcount > capacity) {
1521 PyErr_Format(PyExc_OverflowError,
1521 PyErr_Format(PyExc_OverflowError,
1522 "bitset size (%ld) > capacity (%ld)",
1522 "bitset size (%ld) > capacity (%ld)",
1523 (long)revcount, (long)capacity);
1523 (long)revcount, (long)capacity);
1524 return NULL;
1524 return NULL;
1525 }
1525 }
1526
1526
1527 for (i = 0; i < revcount; i++) {
1527 for (i = 0; i < revcount; i++) {
1528 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1528 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1529 if (n > maxrev)
1529 if (n > maxrev)
1530 maxrev = n;
1530 maxrev = n;
1531 }
1531 }
1532
1532
1533 depth = calloc(sizeof(*depth), maxrev + 1);
1533 depth = calloc(sizeof(*depth), maxrev + 1);
1534 if (depth == NULL)
1534 if (depth == NULL)
1535 return PyErr_NoMemory();
1535 return PyErr_NoMemory();
1536
1536
1537 seen = calloc(sizeof(*seen), maxrev + 1);
1537 seen = calloc(sizeof(*seen), maxrev + 1);
1538 if (seen == NULL) {
1538 if (seen == NULL) {
1539 PyErr_NoMemory();
1539 PyErr_NoMemory();
1540 goto bail;
1540 goto bail;
1541 }
1541 }
1542
1542
1543 interesting = calloc(sizeof(*interesting), 1 << revcount);
1543 interesting = calloc(sizeof(*interesting), 1 << revcount);
1544 if (interesting == NULL) {
1544 if (interesting == NULL) {
1545 PyErr_NoMemory();
1545 PyErr_NoMemory();
1546 goto bail;
1546 goto bail;
1547 }
1547 }
1548
1548
1549 if (PyList_Sort(revs) == -1)
1549 if (PyList_Sort(revs) == -1)
1550 goto bail;
1550 goto bail;
1551
1551
1552 for (i = 0; i < revcount; i++) {
1552 for (i = 0; i < revcount; i++) {
1553 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1553 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1554 long b = 1l << i;
1554 long b = 1l << i;
1555 depth[n] = 1;
1555 depth[n] = 1;
1556 seen[n] = b;
1556 seen[n] = b;
1557 interesting[b] = 1;
1557 interesting[b] = 1;
1558 }
1558 }
1559
1559
1560 /* invariant: ninteresting is the number of non-zero entries in
1560 /* invariant: ninteresting is the number of non-zero entries in
1561 * interesting. */
1561 * interesting. */
1562 ninteresting = (int)revcount;
1562 ninteresting = (int)revcount;
1563
1563
1564 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1564 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1565 int dv = depth[v];
1565 int dv = depth[v];
1566 int parents[2];
1566 int parents[2];
1567 long sv;
1567 long sv;
1568
1568
1569 if (dv == 0)
1569 if (dv == 0)
1570 continue;
1570 continue;
1571
1571
1572 sv = seen[v];
1572 sv = seen[v];
1573 if (index_get_parents(self, v, parents, maxrev) < 0)
1573 if (index_get_parents(self, v, parents, maxrev) < 0)
1574 goto bail;
1574 goto bail;
1575
1575
1576 for (i = 0; i < 2; i++) {
1576 for (i = 0; i < 2; i++) {
1577 int p = parents[i];
1577 int p = parents[i];
1578 long sp;
1578 long sp;
1579 int dp;
1579 int dp;
1580
1580
1581 if (p == -1)
1581 if (p == -1)
1582 continue;
1582 continue;
1583
1583
1584 dp = depth[p];
1584 dp = depth[p];
1585 sp = seen[p];
1585 sp = seen[p];
1586 if (dp <= dv) {
1586 if (dp <= dv) {
1587 depth[p] = dv + 1;
1587 depth[p] = dv + 1;
1588 if (sp != sv) {
1588 if (sp != sv) {
1589 interesting[sv] += 1;
1589 interesting[sv] += 1;
1590 seen[p] = sv;
1590 seen[p] = sv;
1591 if (sp) {
1591 if (sp) {
1592 interesting[sp] -= 1;
1592 interesting[sp] -= 1;
1593 if (interesting[sp] == 0)
1593 if (interesting[sp] == 0)
1594 ninteresting -= 1;
1594 ninteresting -= 1;
1595 }
1595 }
1596 }
1596 }
1597 }
1597 }
1598 else if (dv == dp - 1) {
1598 else if (dv == dp - 1) {
1599 long nsp = sp | sv;
1599 long nsp = sp | sv;
1600 if (nsp == sp)
1600 if (nsp == sp)
1601 continue;
1601 continue;
1602 seen[p] = nsp;
1602 seen[p] = nsp;
1603 interesting[sp] -= 1;
1603 interesting[sp] -= 1;
1604 if (interesting[sp] == 0)
1604 if (interesting[sp] == 0)
1605 ninteresting -= 1;
1605 ninteresting -= 1;
1606 if (interesting[nsp] == 0)
1606 if (interesting[nsp] == 0)
1607 ninteresting += 1;
1607 ninteresting += 1;
1608 interesting[nsp] += 1;
1608 interesting[nsp] += 1;
1609 }
1609 }
1610 }
1610 }
1611 interesting[sv] -= 1;
1611 interesting[sv] -= 1;
1612 if (interesting[sv] == 0)
1612 if (interesting[sv] == 0)
1613 ninteresting -= 1;
1613 ninteresting -= 1;
1614 }
1614 }
1615
1615
1616 final = 0;
1616 final = 0;
1617 j = ninteresting;
1617 j = ninteresting;
1618 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1618 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1619 if (interesting[i] == 0)
1619 if (interesting[i] == 0)
1620 continue;
1620 continue;
1621 final |= i;
1621 final |= i;
1622 j -= 1;
1622 j -= 1;
1623 }
1623 }
1624 if (final == 0) {
1624 if (final == 0) {
1625 keys = PyList_New(0);
1625 keys = PyList_New(0);
1626 goto bail;
1626 goto bail;
1627 }
1627 }
1628
1628
1629 dict = PyDict_New();
1629 dict = PyDict_New();
1630 if (dict == NULL)
1630 if (dict == NULL)
1631 goto bail;
1631 goto bail;
1632
1632
1633 for (i = 0; i < revcount; i++) {
1633 for (i = 0; i < revcount; i++) {
1634 PyObject *key;
1634 PyObject *key;
1635
1635
1636 if ((final & (1 << i)) == 0)
1636 if ((final & (1 << i)) == 0)
1637 continue;
1637 continue;
1638
1638
1639 key = PyList_GET_ITEM(revs, i);
1639 key = PyList_GET_ITEM(revs, i);
1640 Py_INCREF(key);
1640 Py_INCREF(key);
1641 Py_INCREF(Py_None);
1641 Py_INCREF(Py_None);
1642 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1642 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1643 Py_DECREF(key);
1643 Py_DECREF(key);
1644 Py_DECREF(Py_None);
1644 Py_DECREF(Py_None);
1645 goto bail;
1645 goto bail;
1646 }
1646 }
1647 }
1647 }
1648
1648
1649 keys = PyDict_Keys(dict);
1649 keys = PyDict_Keys(dict);
1650
1650
1651 bail:
1651 bail:
1652 free(depth);
1652 free(depth);
1653 free(seen);
1653 free(seen);
1654 free(interesting);
1654 free(interesting);
1655 Py_XDECREF(dict);
1655 Py_XDECREF(dict);
1656
1656
1657 return keys;
1657 return keys;
1658 }
1658 }
1659
1659
1660 /*
1660 /*
1661 * Given a (possibly overlapping) set of revs, return all the
1661 * Given a (possibly overlapping) set of revs, return all the
1662 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1662 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1663 */
1663 */
1664 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1664 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1665 {
1665 {
1666 PyObject *ret = NULL;
1666 PyObject *ret = NULL;
1667 Py_ssize_t argcount, i, len;
1667 Py_ssize_t argcount, i, len;
1668 bitmask repeat = 0;
1668 bitmask repeat = 0;
1669 int revcount = 0;
1669 int revcount = 0;
1670 int *revs;
1670 int *revs;
1671
1671
1672 argcount = PySequence_Length(args);
1672 argcount = PySequence_Length(args);
1673 revs = PyMem_Malloc(argcount * sizeof(*revs));
1673 revs = PyMem_Malloc(argcount * sizeof(*revs));
1674 if (argcount > 0 && revs == NULL)
1674 if (argcount > 0 && revs == NULL)
1675 return PyErr_NoMemory();
1675 return PyErr_NoMemory();
1676 len = index_length(self) - 1;
1676 len = index_length(self);
1677
1677
1678 for (i = 0; i < argcount; i++) {
1678 for (i = 0; i < argcount; i++) {
1679 static const int capacity = 24;
1679 static const int capacity = 24;
1680 PyObject *obj = PySequence_GetItem(args, i);
1680 PyObject *obj = PySequence_GetItem(args, i);
1681 bitmask x;
1681 bitmask x;
1682 long val;
1682 long val;
1683
1683
1684 if (!PyInt_Check(obj)) {
1684 if (!PyInt_Check(obj)) {
1685 PyErr_SetString(PyExc_TypeError,
1685 PyErr_SetString(PyExc_TypeError,
1686 "arguments must all be ints");
1686 "arguments must all be ints");
1687 Py_DECREF(obj);
1687 Py_DECREF(obj);
1688 goto bail;
1688 goto bail;
1689 }
1689 }
1690 val = PyInt_AsLong(obj);
1690 val = PyInt_AsLong(obj);
1691 Py_DECREF(obj);
1691 Py_DECREF(obj);
1692 if (val == -1) {
1692 if (val == -1) {
1693 ret = PyList_New(0);
1693 ret = PyList_New(0);
1694 goto done;
1694 goto done;
1695 }
1695 }
1696 if (val < 0 || val >= len) {
1696 if (val < 0 || val >= len) {
1697 PyErr_SetString(PyExc_IndexError,
1697 PyErr_SetString(PyExc_IndexError,
1698 "index out of range");
1698 "index out of range");
1699 goto bail;
1699 goto bail;
1700 }
1700 }
1701 /* this cheesy bloom filter lets us avoid some more
1701 /* this cheesy bloom filter lets us avoid some more
1702 * expensive duplicate checks in the common set-is-disjoint
1702 * expensive duplicate checks in the common set-is-disjoint
1703 * case */
1703 * case */
1704 x = 1ull << (val & 0x3f);
1704 x = 1ull << (val & 0x3f);
1705 if (repeat & x) {
1705 if (repeat & x) {
1706 int k;
1706 int k;
1707 for (k = 0; k < revcount; k++) {
1707 for (k = 0; k < revcount; k++) {
1708 if (val == revs[k])
1708 if (val == revs[k])
1709 goto duplicate;
1709 goto duplicate;
1710 }
1710 }
1711 }
1711 }
1712 else repeat |= x;
1712 else repeat |= x;
1713 if (revcount >= capacity) {
1713 if (revcount >= capacity) {
1714 PyErr_Format(PyExc_OverflowError,
1714 PyErr_Format(PyExc_OverflowError,
1715 "bitset size (%d) > capacity (%d)",
1715 "bitset size (%d) > capacity (%d)",
1716 revcount, capacity);
1716 revcount, capacity);
1717 goto bail;
1717 goto bail;
1718 }
1718 }
1719 revs[revcount++] = (int)val;
1719 revs[revcount++] = (int)val;
1720 duplicate:;
1720 duplicate:;
1721 }
1721 }
1722
1722
1723 if (revcount == 0) {
1723 if (revcount == 0) {
1724 ret = PyList_New(0);
1724 ret = PyList_New(0);
1725 goto done;
1725 goto done;
1726 }
1726 }
1727 if (revcount == 1) {
1727 if (revcount == 1) {
1728 PyObject *obj;
1728 PyObject *obj;
1729 ret = PyList_New(1);
1729 ret = PyList_New(1);
1730 if (ret == NULL)
1730 if (ret == NULL)
1731 goto bail;
1731 goto bail;
1732 obj = PyInt_FromLong(revs[0]);
1732 obj = PyInt_FromLong(revs[0]);
1733 if (obj == NULL)
1733 if (obj == NULL)
1734 goto bail;
1734 goto bail;
1735 PyList_SET_ITEM(ret, 0, obj);
1735 PyList_SET_ITEM(ret, 0, obj);
1736 goto done;
1736 goto done;
1737 }
1737 }
1738
1738
1739 ret = find_gca_candidates(self, revs, revcount);
1739 ret = find_gca_candidates(self, revs, revcount);
1740 if (ret == NULL)
1740 if (ret == NULL)
1741 goto bail;
1741 goto bail;
1742
1742
1743 done:
1743 done:
1744 PyMem_Free(revs);
1744 PyMem_Free(revs);
1745 return ret;
1745 return ret;
1746
1746
1747 bail:
1747 bail:
1748 PyMem_Free(revs);
1748 PyMem_Free(revs);
1749 Py_XDECREF(ret);
1749 Py_XDECREF(ret);
1750 return NULL;
1750 return NULL;
1751 }
1751 }
1752
1752
1753 /*
1753 /*
1754 * Given a (possibly overlapping) set of revs, return the greatest
1754 * Given a (possibly overlapping) set of revs, return the greatest
1755 * common ancestors: those with the longest path to the root.
1755 * common ancestors: those with the longest path to the root.
1756 */
1756 */
1757 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1757 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1758 {
1758 {
1759 PyObject *ret;
1759 PyObject *ret;
1760 PyObject *gca = index_commonancestorsheads(self, args);
1760 PyObject *gca = index_commonancestorsheads(self, args);
1761 if (gca == NULL)
1761 if (gca == NULL)
1762 return NULL;
1762 return NULL;
1763
1763
1764 if (PyList_GET_SIZE(gca) <= 1) {
1764 if (PyList_GET_SIZE(gca) <= 1) {
1765 return gca;
1765 return gca;
1766 }
1766 }
1767
1767
1768 ret = find_deepest(self, gca);
1768 ret = find_deepest(self, gca);
1769 Py_DECREF(gca);
1769 Py_DECREF(gca);
1770 return ret;
1770 return ret;
1771 }
1771 }
1772
1772
1773 /*
1773 /*
1774 * Invalidate any trie entries introduced by added revs.
1774 * Invalidate any trie entries introduced by added revs.
1775 */
1775 */
1776 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1776 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1777 {
1777 {
1778 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1778 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1779
1779
1780 for (i = start; i < len; i++) {
1780 for (i = start; i < len; i++) {
1781 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1781 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1782 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1782 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1783
1783
1784 nt_delete_node(self, PyBytes_AS_STRING(node));
1784 nt_delete_node(self, PyBytes_AS_STRING(node));
1785 }
1785 }
1786
1786
1787 if (start == 0)
1787 if (start == 0)
1788 Py_CLEAR(self->added);
1788 Py_CLEAR(self->added);
1789 }
1789 }
1790
1790
1791 /*
1791 /*
1792 * Delete a numeric range of revs, which must be at the end of the
1792 * Delete a numeric range of revs, which must be at the end of the
1793 * range, but exclude the sentinel nullid entry.
1793 * range, but exclude the sentinel nullid entry.
1794 */
1794 */
1795 static int index_slice_del(indexObject *self, PyObject *item)
1795 static int index_slice_del(indexObject *self, PyObject *item)
1796 {
1796 {
1797 Py_ssize_t start, stop, step, slicelength;
1797 Py_ssize_t start, stop, step, slicelength;
1798 Py_ssize_t length = index_length(self);
1798 Py_ssize_t length = index_length(self) + 1;
1799 int ret = 0;
1799 int ret = 0;
1800
1800
1801 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1801 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
1802 #ifdef IS_PY3K
1802 #ifdef IS_PY3K
1803 if (PySlice_GetIndicesEx(item, length,
1803 if (PySlice_GetIndicesEx(item, length,
1804 #else
1804 #else
1805 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1805 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1806 #endif
1806 #endif
1807 &start, &stop, &step, &slicelength) < 0)
1807 &start, &stop, &step, &slicelength) < 0)
1808 return -1;
1808 return -1;
1809
1809
1810 if (slicelength <= 0)
1810 if (slicelength <= 0)
1811 return 0;
1811 return 0;
1812
1812
1813 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1813 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1814 stop = start;
1814 stop = start;
1815
1815
1816 if (step < 0) {
1816 if (step < 0) {
1817 stop = start + 1;
1817 stop = start + 1;
1818 start = stop + step*(slicelength - 1) - 1;
1818 start = stop + step*(slicelength - 1) - 1;
1819 step = -step;
1819 step = -step;
1820 }
1820 }
1821
1821
1822 if (step != 1) {
1822 if (step != 1) {
1823 PyErr_SetString(PyExc_ValueError,
1823 PyErr_SetString(PyExc_ValueError,
1824 "revlog index delete requires step size of 1");
1824 "revlog index delete requires step size of 1");
1825 return -1;
1825 return -1;
1826 }
1826 }
1827
1827
1828 if (stop != length - 1) {
1828 if (stop != length - 1) {
1829 PyErr_SetString(PyExc_IndexError,
1829 PyErr_SetString(PyExc_IndexError,
1830 "revlog index deletion indices are invalid");
1830 "revlog index deletion indices are invalid");
1831 return -1;
1831 return -1;
1832 }
1832 }
1833
1833
1834 if (start < self->length - 1) {
1834 if (start < self->length - 1) {
1835 if (self->nt) {
1835 if (self->nt) {
1836 Py_ssize_t i;
1836 Py_ssize_t i;
1837
1837
1838 for (i = start + 1; i < self->length - 1; i++) {
1838 for (i = start + 1; i < self->length - 1; i++) {
1839 const char *node = index_node_existing(self, i);
1839 const char *node = index_node_existing(self, i);
1840 if (node == NULL)
1840 if (node == NULL)
1841 return -1;
1841 return -1;
1842
1842
1843 nt_delete_node(self, node);
1843 nt_delete_node(self, node);
1844 }
1844 }
1845 if (self->added)
1845 if (self->added)
1846 nt_invalidate_added(self, 0);
1846 nt_invalidate_added(self, 0);
1847 if (self->ntrev > start)
1847 if (self->ntrev > start)
1848 self->ntrev = (int)start;
1848 self->ntrev = (int)start;
1849 }
1849 }
1850 self->length = start + 1;
1850 self->length = start + 1;
1851 if (start < self->raw_length) {
1851 if (start < self->raw_length) {
1852 if (self->cache) {
1852 if (self->cache) {
1853 Py_ssize_t i;
1853 Py_ssize_t i;
1854 for (i = start; i < self->raw_length; i++)
1854 for (i = start; i < self->raw_length; i++)
1855 Py_CLEAR(self->cache[i]);
1855 Py_CLEAR(self->cache[i]);
1856 }
1856 }
1857 self->raw_length = start;
1857 self->raw_length = start;
1858 }
1858 }
1859 goto done;
1859 goto done;
1860 }
1860 }
1861
1861
1862 if (self->nt) {
1862 if (self->nt) {
1863 nt_invalidate_added(self, start - self->length + 1);
1863 nt_invalidate_added(self, start - self->length + 1);
1864 if (self->ntrev > start)
1864 if (self->ntrev > start)
1865 self->ntrev = (int)start;
1865 self->ntrev = (int)start;
1866 }
1866 }
1867 if (self->added)
1867 if (self->added)
1868 ret = PyList_SetSlice(self->added, start - self->length + 1,
1868 ret = PyList_SetSlice(self->added, start - self->length + 1,
1869 PyList_GET_SIZE(self->added), NULL);
1869 PyList_GET_SIZE(self->added), NULL);
1870 done:
1870 done:
1871 Py_CLEAR(self->headrevs);
1871 Py_CLEAR(self->headrevs);
1872 return ret;
1872 return ret;
1873 }
1873 }
1874
1874
1875 /*
1875 /*
1876 * Supported ops:
1876 * Supported ops:
1877 *
1877 *
1878 * slice deletion
1878 * slice deletion
1879 * string assignment (extend node->rev mapping)
1879 * string assignment (extend node->rev mapping)
1880 * string deletion (shrink node->rev mapping)
1880 * string deletion (shrink node->rev mapping)
1881 */
1881 */
1882 static int index_assign_subscript(indexObject *self, PyObject *item,
1882 static int index_assign_subscript(indexObject *self, PyObject *item,
1883 PyObject *value)
1883 PyObject *value)
1884 {
1884 {
1885 char *node;
1885 char *node;
1886 long rev;
1886 long rev;
1887
1887
1888 if (PySlice_Check(item) && value == NULL)
1888 if (PySlice_Check(item) && value == NULL)
1889 return index_slice_del(self, item);
1889 return index_slice_del(self, item);
1890
1890
1891 if (node_check(item, &node) == -1)
1891 if (node_check(item, &node) == -1)
1892 return -1;
1892 return -1;
1893
1893
1894 if (value == NULL)
1894 if (value == NULL)
1895 return self->nt ? nt_delete_node(self, node) : 0;
1895 return self->nt ? nt_delete_node(self, node) : 0;
1896 rev = PyInt_AsLong(value);
1896 rev = PyInt_AsLong(value);
1897 if (rev > INT_MAX || rev < 0) {
1897 if (rev > INT_MAX || rev < 0) {
1898 if (!PyErr_Occurred())
1898 if (!PyErr_Occurred())
1899 PyErr_SetString(PyExc_ValueError, "rev out of range");
1899 PyErr_SetString(PyExc_ValueError, "rev out of range");
1900 return -1;
1900 return -1;
1901 }
1901 }
1902
1902
1903 if (nt_init(self) == -1)
1903 if (nt_init(self) == -1)
1904 return -1;
1904 return -1;
1905 return nt_insert(self, node, (int)rev);
1905 return nt_insert(self, node, (int)rev);
1906 }
1906 }
1907
1907
1908 /*
1908 /*
1909 * Find all RevlogNG entries in an index that has inline data. Update
1909 * Find all RevlogNG entries in an index that has inline data. Update
1910 * the optional "offsets" table with those entries.
1910 * the optional "offsets" table with those entries.
1911 */
1911 */
1912 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1912 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
1913 {
1913 {
1914 const char *data = (const char *)self->buf.buf;
1914 const char *data = (const char *)self->buf.buf;
1915 Py_ssize_t pos = 0;
1915 Py_ssize_t pos = 0;
1916 Py_ssize_t end = self->buf.len;
1916 Py_ssize_t end = self->buf.len;
1917 long incr = v1_hdrsize;
1917 long incr = v1_hdrsize;
1918 Py_ssize_t len = 0;
1918 Py_ssize_t len = 0;
1919
1919
1920 while (pos + v1_hdrsize <= end && pos >= 0) {
1920 while (pos + v1_hdrsize <= end && pos >= 0) {
1921 uint32_t comp_len;
1921 uint32_t comp_len;
1922 /* 3rd element of header is length of compressed inline data */
1922 /* 3rd element of header is length of compressed inline data */
1923 comp_len = getbe32(data + pos + 8);
1923 comp_len = getbe32(data + pos + 8);
1924 incr = v1_hdrsize + comp_len;
1924 incr = v1_hdrsize + comp_len;
1925 if (offsets)
1925 if (offsets)
1926 offsets[len] = data + pos;
1926 offsets[len] = data + pos;
1927 len++;
1927 len++;
1928 pos += incr;
1928 pos += incr;
1929 }
1929 }
1930
1930
1931 if (pos != end) {
1931 if (pos != end) {
1932 if (!PyErr_Occurred())
1932 if (!PyErr_Occurred())
1933 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1933 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1934 return -1;
1934 return -1;
1935 }
1935 }
1936
1936
1937 return len;
1937 return len;
1938 }
1938 }
1939
1939
1940 static int index_init(indexObject *self, PyObject *args)
1940 static int index_init(indexObject *self, PyObject *args)
1941 {
1941 {
1942 PyObject *data_obj, *inlined_obj;
1942 PyObject *data_obj, *inlined_obj;
1943 Py_ssize_t size;
1943 Py_ssize_t size;
1944
1944
1945 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1945 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1946 self->raw_length = 0;
1946 self->raw_length = 0;
1947 self->added = NULL;
1947 self->added = NULL;
1948 self->cache = NULL;
1948 self->cache = NULL;
1949 self->data = NULL;
1949 self->data = NULL;
1950 memset(&self->buf, 0, sizeof(self->buf));
1950 memset(&self->buf, 0, sizeof(self->buf));
1951 self->headrevs = NULL;
1951 self->headrevs = NULL;
1952 self->filteredrevs = Py_None;
1952 self->filteredrevs = Py_None;
1953 Py_INCREF(Py_None);
1953 Py_INCREF(Py_None);
1954 self->nt = NULL;
1954 self->nt = NULL;
1955 self->offsets = NULL;
1955 self->offsets = NULL;
1956
1956
1957 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1957 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1958 return -1;
1958 return -1;
1959 if (!PyObject_CheckBuffer(data_obj)) {
1959 if (!PyObject_CheckBuffer(data_obj)) {
1960 PyErr_SetString(PyExc_TypeError,
1960 PyErr_SetString(PyExc_TypeError,
1961 "data does not support buffer interface");
1961 "data does not support buffer interface");
1962 return -1;
1962 return -1;
1963 }
1963 }
1964
1964
1965 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1965 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
1966 return -1;
1966 return -1;
1967 size = self->buf.len;
1967 size = self->buf.len;
1968
1968
1969 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1969 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1970 self->data = data_obj;
1970 self->data = data_obj;
1971
1971
1972 self->ntlength = self->ntcapacity = 0;
1972 self->ntlength = self->ntcapacity = 0;
1973 self->ntdepth = self->ntsplits = 0;
1973 self->ntdepth = self->ntsplits = 0;
1974 self->ntlookups = self->ntmisses = 0;
1974 self->ntlookups = self->ntmisses = 0;
1975 self->ntrev = -1;
1975 self->ntrev = -1;
1976 Py_INCREF(self->data);
1976 Py_INCREF(self->data);
1977
1977
1978 if (self->inlined) {
1978 if (self->inlined) {
1979 Py_ssize_t len = inline_scan(self, NULL);
1979 Py_ssize_t len = inline_scan(self, NULL);
1980 if (len == -1)
1980 if (len == -1)
1981 goto bail;
1981 goto bail;
1982 self->raw_length = len;
1982 self->raw_length = len;
1983 self->length = len + 1;
1983 self->length = len + 1;
1984 } else {
1984 } else {
1985 if (size % v1_hdrsize) {
1985 if (size % v1_hdrsize) {
1986 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1986 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1987 goto bail;
1987 goto bail;
1988 }
1988 }
1989 self->raw_length = size / v1_hdrsize;
1989 self->raw_length = size / v1_hdrsize;
1990 self->length = self->raw_length + 1;
1990 self->length = self->raw_length + 1;
1991 }
1991 }
1992
1992
1993 return 0;
1993 return 0;
1994 bail:
1994 bail:
1995 return -1;
1995 return -1;
1996 }
1996 }
1997
1997
1998 static PyObject *index_nodemap(indexObject *self)
1998 static PyObject *index_nodemap(indexObject *self)
1999 {
1999 {
2000 Py_INCREF(self);
2000 Py_INCREF(self);
2001 return (PyObject *)self;
2001 return (PyObject *)self;
2002 }
2002 }
2003
2003
2004 static void index_dealloc(indexObject *self)
2004 static void index_dealloc(indexObject *self)
2005 {
2005 {
2006 _index_clearcaches(self);
2006 _index_clearcaches(self);
2007 Py_XDECREF(self->filteredrevs);
2007 Py_XDECREF(self->filteredrevs);
2008 if (self->buf.buf) {
2008 if (self->buf.buf) {
2009 PyBuffer_Release(&self->buf);
2009 PyBuffer_Release(&self->buf);
2010 memset(&self->buf, 0, sizeof(self->buf));
2010 memset(&self->buf, 0, sizeof(self->buf));
2011 }
2011 }
2012 Py_XDECREF(self->data);
2012 Py_XDECREF(self->data);
2013 Py_XDECREF(self->added);
2013 Py_XDECREF(self->added);
2014 PyObject_Del(self);
2014 PyObject_Del(self);
2015 }
2015 }
2016
2016
2017 static PySequenceMethods index_sequence_methods = {
2017 static PySequenceMethods index_sequence_methods = {
2018 (lenfunc)index_length, /* sq_length */
2018 (lenfunc)index_length, /* sq_length */
2019 0, /* sq_concat */
2019 0, /* sq_concat */
2020 0, /* sq_repeat */
2020 0, /* sq_repeat */
2021 (ssizeargfunc)index_get, /* sq_item */
2021 (ssizeargfunc)index_get, /* sq_item */
2022 0, /* sq_slice */
2022 0, /* sq_slice */
2023 0, /* sq_ass_item */
2023 0, /* sq_ass_item */
2024 0, /* sq_ass_slice */
2024 0, /* sq_ass_slice */
2025 (objobjproc)index_contains, /* sq_contains */
2025 (objobjproc)index_contains, /* sq_contains */
2026 };
2026 };
2027
2027
2028 static PyMappingMethods index_mapping_methods = {
2028 static PyMappingMethods index_mapping_methods = {
2029 (lenfunc)index_length, /* mp_length */
2029 (lenfunc)index_length, /* mp_length */
2030 (binaryfunc)index_getitem, /* mp_subscript */
2030 (binaryfunc)index_getitem, /* mp_subscript */
2031 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2031 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2032 };
2032 };
2033
2033
2034 static PyMethodDef index_methods[] = {
2034 static PyMethodDef index_methods[] = {
2035 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2035 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2036 "return the gca set of the given revs"},
2036 "return the gca set of the given revs"},
2037 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2037 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2038 METH_VARARGS,
2038 METH_VARARGS,
2039 "return the heads of the common ancestors of the given revs"},
2039 "return the heads of the common ancestors of the given revs"},
2040 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2040 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2041 "clear the index caches"},
2041 "clear the index caches"},
2042 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2042 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2043 "get an index entry"},
2043 "get an index entry"},
2044 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
2044 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
2045 METH_VARARGS, "compute phases"},
2045 METH_VARARGS, "compute phases"},
2046 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2046 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2047 "reachableroots"},
2047 "reachableroots"},
2048 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2048 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2049 "get head revisions"}, /* Can do filtering since 3.2 */
2049 "get head revisions"}, /* Can do filtering since 3.2 */
2050 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2050 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2051 "get filtered head revisions"}, /* Can always do filtering */
2051 "get filtered head revisions"}, /* Can always do filtering */
2052 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2052 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2053 "determine revisions with deltas to reconstruct fulltext"},
2053 "determine revisions with deltas to reconstruct fulltext"},
2054 {"append", (PyCFunction)index_append, METH_O,
2054 {"append", (PyCFunction)index_append, METH_O,
2055 "append an index entry"},
2055 "append an index entry"},
2056 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2056 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2057 "match a potentially ambiguous node ID"},
2057 "match a potentially ambiguous node ID"},
2058 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2058 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2059 "find length of shortest hex nodeid of a binary ID"},
2059 "find length of shortest hex nodeid of a binary ID"},
2060 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2060 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2061 "stats for the index"},
2061 "stats for the index"},
2062 {NULL} /* Sentinel */
2062 {NULL} /* Sentinel */
2063 };
2063 };
2064
2064
2065 static PyGetSetDef index_getset[] = {
2065 static PyGetSetDef index_getset[] = {
2066 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2066 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2067 {NULL} /* Sentinel */
2067 {NULL} /* Sentinel */
2068 };
2068 };
2069
2069
2070 static PyTypeObject indexType = {
2070 static PyTypeObject indexType = {
2071 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2071 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2072 "parsers.index", /* tp_name */
2072 "parsers.index", /* tp_name */
2073 sizeof(indexObject), /* tp_basicsize */
2073 sizeof(indexObject), /* tp_basicsize */
2074 0, /* tp_itemsize */
2074 0, /* tp_itemsize */
2075 (destructor)index_dealloc, /* tp_dealloc */
2075 (destructor)index_dealloc, /* tp_dealloc */
2076 0, /* tp_print */
2076 0, /* tp_print */
2077 0, /* tp_getattr */
2077 0, /* tp_getattr */
2078 0, /* tp_setattr */
2078 0, /* tp_setattr */
2079 0, /* tp_compare */
2079 0, /* tp_compare */
2080 0, /* tp_repr */
2080 0, /* tp_repr */
2081 0, /* tp_as_number */
2081 0, /* tp_as_number */
2082 &index_sequence_methods, /* tp_as_sequence */
2082 &index_sequence_methods, /* tp_as_sequence */
2083 &index_mapping_methods, /* tp_as_mapping */
2083 &index_mapping_methods, /* tp_as_mapping */
2084 0, /* tp_hash */
2084 0, /* tp_hash */
2085 0, /* tp_call */
2085 0, /* tp_call */
2086 0, /* tp_str */
2086 0, /* tp_str */
2087 0, /* tp_getattro */
2087 0, /* tp_getattro */
2088 0, /* tp_setattro */
2088 0, /* tp_setattro */
2089 0, /* tp_as_buffer */
2089 0, /* tp_as_buffer */
2090 Py_TPFLAGS_DEFAULT, /* tp_flags */
2090 Py_TPFLAGS_DEFAULT, /* tp_flags */
2091 "revlog index", /* tp_doc */
2091 "revlog index", /* tp_doc */
2092 0, /* tp_traverse */
2092 0, /* tp_traverse */
2093 0, /* tp_clear */
2093 0, /* tp_clear */
2094 0, /* tp_richcompare */
2094 0, /* tp_richcompare */
2095 0, /* tp_weaklistoffset */
2095 0, /* tp_weaklistoffset */
2096 0, /* tp_iter */
2096 0, /* tp_iter */
2097 0, /* tp_iternext */
2097 0, /* tp_iternext */
2098 index_methods, /* tp_methods */
2098 index_methods, /* tp_methods */
2099 0, /* tp_members */
2099 0, /* tp_members */
2100 index_getset, /* tp_getset */
2100 index_getset, /* tp_getset */
2101 0, /* tp_base */
2101 0, /* tp_base */
2102 0, /* tp_dict */
2102 0, /* tp_dict */
2103 0, /* tp_descr_get */
2103 0, /* tp_descr_get */
2104 0, /* tp_descr_set */
2104 0, /* tp_descr_set */
2105 0, /* tp_dictoffset */
2105 0, /* tp_dictoffset */
2106 (initproc)index_init, /* tp_init */
2106 (initproc)index_init, /* tp_init */
2107 0, /* tp_alloc */
2107 0, /* tp_alloc */
2108 };
2108 };
2109
2109
2110 /*
2110 /*
2111 * returns a tuple of the form (index, index, cache) with elements as
2111 * returns a tuple of the form (index, index, cache) with elements as
2112 * follows:
2112 * follows:
2113 *
2113 *
2114 * index: an index object that lazily parses RevlogNG records
2114 * index: an index object that lazily parses RevlogNG records
2115 * cache: if data is inlined, a tuple (0, index_file_content), else None
2115 * cache: if data is inlined, a tuple (0, index_file_content), else None
2116 * index_file_content could be a string, or a buffer
2116 * index_file_content could be a string, or a buffer
2117 *
2117 *
2118 * added complications are for backwards compatibility
2118 * added complications are for backwards compatibility
2119 */
2119 */
2120 PyObject *parse_index2(PyObject *self, PyObject *args)
2120 PyObject *parse_index2(PyObject *self, PyObject *args)
2121 {
2121 {
2122 PyObject *tuple = NULL, *cache = NULL;
2122 PyObject *tuple = NULL, *cache = NULL;
2123 indexObject *idx;
2123 indexObject *idx;
2124 int ret;
2124 int ret;
2125
2125
2126 idx = PyObject_New(indexObject, &indexType);
2126 idx = PyObject_New(indexObject, &indexType);
2127 if (idx == NULL)
2127 if (idx == NULL)
2128 goto bail;
2128 goto bail;
2129
2129
2130 ret = index_init(idx, args);
2130 ret = index_init(idx, args);
2131 if (ret == -1)
2131 if (ret == -1)
2132 goto bail;
2132 goto bail;
2133
2133
2134 if (idx->inlined) {
2134 if (idx->inlined) {
2135 cache = Py_BuildValue("iO", 0, idx->data);
2135 cache = Py_BuildValue("iO", 0, idx->data);
2136 if (cache == NULL)
2136 if (cache == NULL)
2137 goto bail;
2137 goto bail;
2138 } else {
2138 } else {
2139 cache = Py_None;
2139 cache = Py_None;
2140 Py_INCREF(cache);
2140 Py_INCREF(cache);
2141 }
2141 }
2142
2142
2143 tuple = Py_BuildValue("NN", idx, cache);
2143 tuple = Py_BuildValue("NN", idx, cache);
2144 if (!tuple)
2144 if (!tuple)
2145 goto bail;
2145 goto bail;
2146 return tuple;
2146 return tuple;
2147
2147
2148 bail:
2148 bail:
2149 Py_XDECREF(idx);
2149 Py_XDECREF(idx);
2150 Py_XDECREF(cache);
2150 Py_XDECREF(cache);
2151 Py_XDECREF(tuple);
2151 Py_XDECREF(tuple);
2152 return NULL;
2152 return NULL;
2153 }
2153 }
2154
2154
2155 void revlog_module_init(PyObject *mod)
2155 void revlog_module_init(PyObject *mod)
2156 {
2156 {
2157 indexType.tp_new = PyType_GenericNew;
2157 indexType.tp_new = PyType_GenericNew;
2158 if (PyType_Ready(&indexType) < 0)
2158 if (PyType_Ready(&indexType) < 0)
2159 return;
2159 return;
2160 Py_INCREF(&indexType);
2160 Py_INCREF(&indexType);
2161 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2161 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2162
2162
2163 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2163 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
2164 -1, -1, -1, -1, nullid, 20);
2164 -1, -1, -1, -1, nullid, 20);
2165 if (nullentry)
2165 if (nullentry)
2166 PyObject_GC_UnTrack(nullentry);
2166 PyObject_GC_UnTrack(nullentry);
2167 }
2167 }
@@ -1,109 +1,109 b''
1 # policy.py - module policy logic for Mercurial.
1 # policy.py - module policy logic for Mercurial.
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11 import sys
11 import sys
12
12
13 # Rules for how modules can be loaded. Values are:
13 # Rules for how modules can be loaded. Values are:
14 #
14 #
15 # c - require C extensions
15 # c - require C extensions
16 # allow - allow pure Python implementation when C loading fails
16 # allow - allow pure Python implementation when C loading fails
17 # cffi - required cffi versions (implemented within pure module)
17 # cffi - required cffi versions (implemented within pure module)
18 # cffi-allow - allow pure Python implementation if cffi version is missing
18 # cffi-allow - allow pure Python implementation if cffi version is missing
19 # py - only load pure Python modules
19 # py - only load pure Python modules
20 #
20 #
21 # By default, fall back to the pure modules so the in-place build can
21 # By default, fall back to the pure modules so the in-place build can
22 # run without recompiling the C extensions. This will be overridden by
22 # run without recompiling the C extensions. This will be overridden by
23 # __modulepolicy__ generated by setup.py.
23 # __modulepolicy__ generated by setup.py.
24 policy = b'allow'
24 policy = b'allow'
25 _packageprefs = {
25 _packageprefs = {
26 # policy: (versioned package, pure package)
26 # policy: (versioned package, pure package)
27 b'c': (r'cext', None),
27 b'c': (r'cext', None),
28 b'allow': (r'cext', r'pure'),
28 b'allow': (r'cext', r'pure'),
29 b'cffi': (r'cffi', None),
29 b'cffi': (r'cffi', None),
30 b'cffi-allow': (r'cffi', r'pure'),
30 b'cffi-allow': (r'cffi', r'pure'),
31 b'py': (None, r'pure'),
31 b'py': (None, r'pure'),
32 }
32 }
33
33
34 try:
34 try:
35 from . import __modulepolicy__
35 from . import __modulepolicy__
36 policy = __modulepolicy__.modulepolicy
36 policy = __modulepolicy__.modulepolicy
37 except ImportError:
37 except ImportError:
38 pass
38 pass
39
39
40 # PyPy doesn't load C extensions.
40 # PyPy doesn't load C extensions.
41 #
41 #
42 # The canonical way to do this is to test platform.python_implementation().
42 # The canonical way to do this is to test platform.python_implementation().
43 # But we don't import platform and don't bloat for it here.
43 # But we don't import platform and don't bloat for it here.
44 if r'__pypy__' in sys.builtin_module_names:
44 if r'__pypy__' in sys.builtin_module_names:
45 policy = b'cffi'
45 policy = b'cffi'
46
46
47 # Environment variable can always force settings.
47 # Environment variable can always force settings.
48 if sys.version_info[0] >= 3:
48 if sys.version_info[0] >= 3:
49 if r'HGMODULEPOLICY' in os.environ:
49 if r'HGMODULEPOLICY' in os.environ:
50 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
50 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
51 else:
51 else:
52 policy = os.environ.get(r'HGMODULEPOLICY', policy)
52 policy = os.environ.get(r'HGMODULEPOLICY', policy)
53
53
54 def _importfrom(pkgname, modname):
54 def _importfrom(pkgname, modname):
55 # from .<pkgname> import <modname> (where . is looked through this module)
55 # from .<pkgname> import <modname> (where . is looked through this module)
56 fakelocals = {}
56 fakelocals = {}
57 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
57 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
58 try:
58 try:
59 fakelocals[modname] = mod = getattr(pkg, modname)
59 fakelocals[modname] = mod = getattr(pkg, modname)
60 except AttributeError:
60 except AttributeError:
61 raise ImportError(r'cannot import name %s' % modname)
61 raise ImportError(r'cannot import name %s' % modname)
62 # force import; fakelocals[modname] may be replaced with the real module
62 # force import; fakelocals[modname] may be replaced with the real module
63 getattr(mod, r'__doc__', None)
63 getattr(mod, r'__doc__', None)
64 return fakelocals[modname]
64 return fakelocals[modname]
65
65
66 # keep in sync with "version" in C modules
66 # keep in sync with "version" in C modules
67 _cextversions = {
67 _cextversions = {
68 (r'cext', r'base85'): 1,
68 (r'cext', r'base85'): 1,
69 (r'cext', r'bdiff'): 3,
69 (r'cext', r'bdiff'): 3,
70 (r'cext', r'mpatch'): 1,
70 (r'cext', r'mpatch'): 1,
71 (r'cext', r'osutil'): 4,
71 (r'cext', r'osutil'): 4,
72 (r'cext', r'parsers'): 6,
72 (r'cext', r'parsers'): 7,
73 }
73 }
74
74
75 # map import request to other package or module
75 # map import request to other package or module
76 _modredirects = {
76 _modredirects = {
77 (r'cext', r'charencode'): (r'cext', r'parsers'),
77 (r'cext', r'charencode'): (r'cext', r'parsers'),
78 (r'cffi', r'base85'): (r'pure', r'base85'),
78 (r'cffi', r'base85'): (r'pure', r'base85'),
79 (r'cffi', r'charencode'): (r'pure', r'charencode'),
79 (r'cffi', r'charencode'): (r'pure', r'charencode'),
80 (r'cffi', r'parsers'): (r'pure', r'parsers'),
80 (r'cffi', r'parsers'): (r'pure', r'parsers'),
81 }
81 }
82
82
83 def _checkmod(pkgname, modname, mod):
83 def _checkmod(pkgname, modname, mod):
84 expected = _cextversions.get((pkgname, modname))
84 expected = _cextversions.get((pkgname, modname))
85 actual = getattr(mod, r'version', None)
85 actual = getattr(mod, r'version', None)
86 if actual != expected:
86 if actual != expected:
87 raise ImportError(r'cannot import module %s.%s '
87 raise ImportError(r'cannot import module %s.%s '
88 r'(expected version: %d, actual: %r)'
88 r'(expected version: %d, actual: %r)'
89 % (pkgname, modname, expected, actual))
89 % (pkgname, modname, expected, actual))
90
90
91 def importmod(modname):
91 def importmod(modname):
92 """Import module according to policy and check API version"""
92 """Import module according to policy and check API version"""
93 try:
93 try:
94 verpkg, purepkg = _packageprefs[policy]
94 verpkg, purepkg = _packageprefs[policy]
95 except KeyError:
95 except KeyError:
96 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
96 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
97 assert verpkg or purepkg
97 assert verpkg or purepkg
98 if verpkg:
98 if verpkg:
99 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
99 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
100 try:
100 try:
101 mod = _importfrom(pn, mn)
101 mod = _importfrom(pn, mn)
102 if pn == verpkg:
102 if pn == verpkg:
103 _checkmod(pn, mn, mod)
103 _checkmod(pn, mn, mod)
104 return mod
104 return mod
105 except ImportError:
105 except ImportError:
106 if not purepkg:
106 if not purepkg:
107 raise
107 raise
108 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
108 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
109 return _importfrom(pn, mn)
109 return _importfrom(pn, mn)
@@ -1,176 +1,176 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid
13 from ..node import nullid
14 from .. import pycompat
14 from .. import pycompat
15 stringio = pycompat.bytesio
15 stringio = pycompat.bytesio
16
16
17
17
18 _pack = struct.pack
18 _pack = struct.pack
19 _unpack = struct.unpack
19 _unpack = struct.unpack
20 _compress = zlib.compress
20 _compress = zlib.compress
21 _decompress = zlib.decompress
21 _decompress = zlib.decompress
22
22
23 # Some code below makes tuples directly because it's more convenient. However,
23 # Some code below makes tuples directly because it's more convenient. However,
24 # code outside this module should always use dirstatetuple.
24 # code outside this module should always use dirstatetuple.
25 def dirstatetuple(*x):
25 def dirstatetuple(*x):
26 # x is a tuple
26 # x is a tuple
27 return x
27 return x
28
28
29 indexformatng = ">Qiiiiii20s12x"
29 indexformatng = ">Qiiiiii20s12x"
30 indexfirst = struct.calcsize('Q')
30 indexfirst = struct.calcsize('Q')
31 sizeint = struct.calcsize('i')
31 sizeint = struct.calcsize('i')
32 indexsize = struct.calcsize(indexformatng)
32 indexsize = struct.calcsize(indexformatng)
33
33
34 def gettype(q):
34 def gettype(q):
35 return int(q & 0xFFFF)
35 return int(q & 0xFFFF)
36
36
37 def offset_type(offset, type):
37 def offset_type(offset, type):
38 return int(int(offset) << 16 | type)
38 return int(int(offset) << 16 | type)
39
39
40 class BaseIndexObject(object):
40 class BaseIndexObject(object):
41 def __len__(self):
41 def __len__(self):
42 return self._lgt + len(self._extra) + 1
42 return self._lgt + len(self._extra)
43
43
44 def append(self, tup):
44 def append(self, tup):
45 self._extra.append(tup)
45 self._extra.append(tup)
46
46
47 def _fix_index(self, i):
47 def _fix_index(self, i):
48 if not isinstance(i, int):
48 if not isinstance(i, int):
49 raise TypeError("expecting int indexes")
49 raise TypeError("expecting int indexes")
50 if i < 0 or i >= len(self):
50 if i < 0 or i >= len(self) + 1:
51 raise IndexError
51 raise IndexError
52 return i
52 return i
53
53
54 def __getitem__(self, i):
54 def __getitem__(self, i):
55 if i == -1 or i == len(self) - 1:
55 if i == -1 or i == len(self):
56 return (0, 0, 0, -1, -1, -1, -1, nullid)
56 return (0, 0, 0, -1, -1, -1, -1, nullid)
57 i = self._fix_index(i)
57 i = self._fix_index(i)
58 if i >= self._lgt:
58 if i >= self._lgt:
59 return self._extra[i - self._lgt]
59 return self._extra[i - self._lgt]
60 index = self._calculate_index(i)
60 index = self._calculate_index(i)
61 r = struct.unpack(indexformatng, self._data[index:index + indexsize])
61 r = struct.unpack(indexformatng, self._data[index:index + indexsize])
62 if i == 0:
62 if i == 0:
63 e = list(r)
63 e = list(r)
64 type = gettype(e[0])
64 type = gettype(e[0])
65 e[0] = offset_type(0, type)
65 e[0] = offset_type(0, type)
66 return tuple(e)
66 return tuple(e)
67 return r
67 return r
68
68
69 class IndexObject(BaseIndexObject):
69 class IndexObject(BaseIndexObject):
70 def __init__(self, data):
70 def __init__(self, data):
71 assert len(data) % indexsize == 0
71 assert len(data) % indexsize == 0
72 self._data = data
72 self._data = data
73 self._lgt = len(data) // indexsize
73 self._lgt = len(data) // indexsize
74 self._extra = []
74 self._extra = []
75
75
76 def _calculate_index(self, i):
76 def _calculate_index(self, i):
77 return i * indexsize
77 return i * indexsize
78
78
79 def __delitem__(self, i):
79 def __delitem__(self, i):
80 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
80 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
81 raise ValueError("deleting slices only supports a:-1 with step 1")
81 raise ValueError("deleting slices only supports a:-1 with step 1")
82 i = self._fix_index(i.start)
82 i = self._fix_index(i.start)
83 if i < self._lgt:
83 if i < self._lgt:
84 self._data = self._data[:i * indexsize]
84 self._data = self._data[:i * indexsize]
85 self._lgt = i
85 self._lgt = i
86 self._extra = []
86 self._extra = []
87 else:
87 else:
88 self._extra = self._extra[:i - self._lgt]
88 self._extra = self._extra[:i - self._lgt]
89
89
90 class InlinedIndexObject(BaseIndexObject):
90 class InlinedIndexObject(BaseIndexObject):
91 def __init__(self, data, inline=0):
91 def __init__(self, data, inline=0):
92 self._data = data
92 self._data = data
93 self._lgt = self._inline_scan(None)
93 self._lgt = self._inline_scan(None)
94 self._inline_scan(self._lgt)
94 self._inline_scan(self._lgt)
95 self._extra = []
95 self._extra = []
96
96
97 def _inline_scan(self, lgt):
97 def _inline_scan(self, lgt):
98 off = 0
98 off = 0
99 if lgt is not None:
99 if lgt is not None:
100 self._offsets = [0] * lgt
100 self._offsets = [0] * lgt
101 count = 0
101 count = 0
102 while off <= len(self._data) - indexsize:
102 while off <= len(self._data) - indexsize:
103 s, = struct.unpack('>i',
103 s, = struct.unpack('>i',
104 self._data[off + indexfirst:off + sizeint + indexfirst])
104 self._data[off + indexfirst:off + sizeint + indexfirst])
105 if lgt is not None:
105 if lgt is not None:
106 self._offsets[count] = off
106 self._offsets[count] = off
107 count += 1
107 count += 1
108 off += indexsize + s
108 off += indexsize + s
109 if off != len(self._data):
109 if off != len(self._data):
110 raise ValueError("corrupted data")
110 raise ValueError("corrupted data")
111 return count
111 return count
112
112
113 def __delitem__(self, i):
113 def __delitem__(self, i):
114 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
114 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
115 raise ValueError("deleting slices only supports a:-1 with step 1")
115 raise ValueError("deleting slices only supports a:-1 with step 1")
116 i = self._fix_index(i.start)
116 i = self._fix_index(i.start)
117 if i < self._lgt:
117 if i < self._lgt:
118 self._offsets = self._offsets[:i]
118 self._offsets = self._offsets[:i]
119 self._lgt = i
119 self._lgt = i
120 self._extra = []
120 self._extra = []
121 else:
121 else:
122 self._extra = self._extra[:i - self._lgt]
122 self._extra = self._extra[:i - self._lgt]
123
123
124 def _calculate_index(self, i):
124 def _calculate_index(self, i):
125 return self._offsets[i]
125 return self._offsets[i]
126
126
127 def parse_index2(data, inline):
127 def parse_index2(data, inline):
128 if not inline:
128 if not inline:
129 return IndexObject(data), None
129 return IndexObject(data), None
130 return InlinedIndexObject(data, inline), (0, data)
130 return InlinedIndexObject(data, inline), (0, data)
131
131
132 def parse_dirstate(dmap, copymap, st):
132 def parse_dirstate(dmap, copymap, st):
133 parents = [st[:20], st[20: 40]]
133 parents = [st[:20], st[20: 40]]
134 # dereference fields so they will be local in loop
134 # dereference fields so they will be local in loop
135 format = ">cllll"
135 format = ">cllll"
136 e_size = struct.calcsize(format)
136 e_size = struct.calcsize(format)
137 pos1 = 40
137 pos1 = 40
138 l = len(st)
138 l = len(st)
139
139
140 # the inner loop
140 # the inner loop
141 while pos1 < l:
141 while pos1 < l:
142 pos2 = pos1 + e_size
142 pos2 = pos1 + e_size
143 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
143 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
144 pos1 = pos2 + e[4]
144 pos1 = pos2 + e[4]
145 f = st[pos2:pos1]
145 f = st[pos2:pos1]
146 if '\0' in f:
146 if '\0' in f:
147 f, c = f.split('\0')
147 f, c = f.split('\0')
148 copymap[f] = c
148 copymap[f] = c
149 dmap[f] = e[:4]
149 dmap[f] = e[:4]
150 return parents
150 return parents
151
151
152 def pack_dirstate(dmap, copymap, pl, now):
152 def pack_dirstate(dmap, copymap, pl, now):
153 now = int(now)
153 now = int(now)
154 cs = stringio()
154 cs = stringio()
155 write = cs.write
155 write = cs.write
156 write("".join(pl))
156 write("".join(pl))
157 for f, e in dmap.iteritems():
157 for f, e in dmap.iteritems():
158 if e[0] == 'n' and e[3] == now:
158 if e[0] == 'n' and e[3] == now:
159 # The file was last modified "simultaneously" with the current
159 # The file was last modified "simultaneously" with the current
160 # write to dirstate (i.e. within the same second for file-
160 # write to dirstate (i.e. within the same second for file-
161 # systems with a granularity of 1 sec). This commonly happens
161 # systems with a granularity of 1 sec). This commonly happens
162 # for at least a couple of files on 'update'.
162 # for at least a couple of files on 'update'.
163 # The user could change the file without changing its size
163 # The user could change the file without changing its size
164 # within the same second. Invalidate the file's mtime in
164 # within the same second. Invalidate the file's mtime in
165 # dirstate, forcing future 'status' calls to compare the
165 # dirstate, forcing future 'status' calls to compare the
166 # contents of the file if the size is the same. This prevents
166 # contents of the file if the size is the same. This prevents
167 # mistakenly treating such files as clean.
167 # mistakenly treating such files as clean.
168 e = dirstatetuple(e[0], e[1], e[2], -1)
168 e = dirstatetuple(e[0], e[1], e[2], -1)
169 dmap[f] = e
169 dmap[f] = e
170
170
171 if f in copymap:
171 if f in copymap:
172 f = "%s\0%s" % (f, copymap[f])
172 f = "%s\0%s" % (f, copymap[f])
173 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
173 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
174 write(e)
174 write(e)
175 write(f)
175 write(f)
176 return cs.getvalue()
176 return cs.getvalue()
@@ -1,268 +1,268 b''
1 # repoview.py - Filtered view of a localrepo object
1 # repoview.py - Filtered view of a localrepo object
2 #
2 #
3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 # Logilab SA <contact@logilab.fr>
4 # Logilab SA <contact@logilab.fr>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import copy
11 import copy
12 import weakref
12 import weakref
13
13
14 from .node import nullrev
14 from .node import nullrev
15 from . import (
15 from . import (
16 obsolete,
16 obsolete,
17 phases,
17 phases,
18 pycompat,
18 pycompat,
19 tags as tagsmod,
19 tags as tagsmod,
20 )
20 )
21
21
22 def hideablerevs(repo):
22 def hideablerevs(repo):
23 """Revision candidates to be hidden
23 """Revision candidates to be hidden
24
24
25 This is a standalone function to allow extensions to wrap it.
25 This is a standalone function to allow extensions to wrap it.
26
26
27 Because we use the set of immutable changesets as a fallback subset in
27 Because we use the set of immutable changesets as a fallback subset in
28 branchmap (see mercurial.branchmap.subsettable), you cannot set "public"
28 branchmap (see mercurial.branchmap.subsettable), you cannot set "public"
29 changesets as "hideable". Doing so would break multiple code assertions and
29 changesets as "hideable". Doing so would break multiple code assertions and
30 lead to crashes."""
30 lead to crashes."""
31 return obsolete.getrevs(repo, 'obsolete')
31 return obsolete.getrevs(repo, 'obsolete')
32
32
33 def pinnedrevs(repo):
33 def pinnedrevs(repo):
34 """revisions blocking hidden changesets from being filtered
34 """revisions blocking hidden changesets from being filtered
35 """
35 """
36
36
37 cl = repo.changelog
37 cl = repo.changelog
38 pinned = set()
38 pinned = set()
39 pinned.update([par.rev() for par in repo[None].parents()])
39 pinned.update([par.rev() for par in repo[None].parents()])
40 pinned.update([cl.rev(bm) for bm in repo._bookmarks.values()])
40 pinned.update([cl.rev(bm) for bm in repo._bookmarks.values()])
41
41
42 tags = {}
42 tags = {}
43 tagsmod.readlocaltags(repo.ui, repo, tags, {})
43 tagsmod.readlocaltags(repo.ui, repo, tags, {})
44 if tags:
44 if tags:
45 rev, nodemap = cl.rev, cl.nodemap
45 rev, nodemap = cl.rev, cl.nodemap
46 pinned.update(rev(t[0]) for t in tags.values() if t[0] in nodemap)
46 pinned.update(rev(t[0]) for t in tags.values() if t[0] in nodemap)
47 return pinned
47 return pinned
48
48
49
49
50 def _revealancestors(pfunc, hidden, revs):
50 def _revealancestors(pfunc, hidden, revs):
51 """reveals contiguous chains of hidden ancestors of 'revs' by removing them
51 """reveals contiguous chains of hidden ancestors of 'revs' by removing them
52 from 'hidden'
52 from 'hidden'
53
53
54 - pfunc(r): a funtion returning parent of 'r',
54 - pfunc(r): a funtion returning parent of 'r',
55 - hidden: the (preliminary) hidden revisions, to be updated
55 - hidden: the (preliminary) hidden revisions, to be updated
56 - revs: iterable of revnum,
56 - revs: iterable of revnum,
57
57
58 (Ancestors are revealed exclusively, i.e. the elements in 'revs' are
58 (Ancestors are revealed exclusively, i.e. the elements in 'revs' are
59 *not* revealed)
59 *not* revealed)
60 """
60 """
61 stack = list(revs)
61 stack = list(revs)
62 while stack:
62 while stack:
63 for p in pfunc(stack.pop()):
63 for p in pfunc(stack.pop()):
64 if p != nullrev and p in hidden:
64 if p != nullrev and p in hidden:
65 hidden.remove(p)
65 hidden.remove(p)
66 stack.append(p)
66 stack.append(p)
67
67
68 def computehidden(repo, visibilityexceptions=None):
68 def computehidden(repo, visibilityexceptions=None):
69 """compute the set of hidden revision to filter
69 """compute the set of hidden revision to filter
70
70
71 During most operation hidden should be filtered."""
71 During most operation hidden should be filtered."""
72 assert not repo.changelog.filteredrevs
72 assert not repo.changelog.filteredrevs
73
73
74 hidden = hideablerevs(repo)
74 hidden = hideablerevs(repo)
75 if hidden:
75 if hidden:
76 hidden = set(hidden - pinnedrevs(repo))
76 hidden = set(hidden - pinnedrevs(repo))
77 if visibilityexceptions:
77 if visibilityexceptions:
78 hidden -= visibilityexceptions
78 hidden -= visibilityexceptions
79 pfunc = repo.changelog.parentrevs
79 pfunc = repo.changelog.parentrevs
80 mutable = repo._phasecache.getrevset(repo, phases.mutablephases)
80 mutable = repo._phasecache.getrevset(repo, phases.mutablephases)
81
81
82 visible = mutable - hidden
82 visible = mutable - hidden
83 _revealancestors(pfunc, hidden, visible)
83 _revealancestors(pfunc, hidden, visible)
84 return frozenset(hidden)
84 return frozenset(hidden)
85
85
86 def computeunserved(repo, visibilityexceptions=None):
86 def computeunserved(repo, visibilityexceptions=None):
87 """compute the set of revision that should be filtered when used a server
87 """compute the set of revision that should be filtered when used a server
88
88
89 Secret and hidden changeset should not pretend to be here."""
89 Secret and hidden changeset should not pretend to be here."""
90 assert not repo.changelog.filteredrevs
90 assert not repo.changelog.filteredrevs
91 # fast path in simple case to avoid impact of non optimised code
91 # fast path in simple case to avoid impact of non optimised code
92 hiddens = filterrevs(repo, 'visible')
92 hiddens = filterrevs(repo, 'visible')
93 if phases.hassecret(repo):
93 if phases.hassecret(repo):
94 secrets = repo._phasecache.getrevset(repo, phases.remotehiddenphases)
94 secrets = repo._phasecache.getrevset(repo, phases.remotehiddenphases)
95 return frozenset(hiddens | frozenset(secrets))
95 return frozenset(hiddens | frozenset(secrets))
96 else:
96 else:
97 return hiddens
97 return hiddens
98
98
99 def computemutable(repo, visibilityexceptions=None):
99 def computemutable(repo, visibilityexceptions=None):
100 assert not repo.changelog.filteredrevs
100 assert not repo.changelog.filteredrevs
101 # fast check to avoid revset call on huge repo
101 # fast check to avoid revset call on huge repo
102 if any(repo._phasecache.phaseroots[1:]):
102 if any(repo._phasecache.phaseroots[1:]):
103 getphase = repo._phasecache.phase
103 getphase = repo._phasecache.phase
104 maymutable = filterrevs(repo, 'base')
104 maymutable = filterrevs(repo, 'base')
105 return frozenset(r for r in maymutable if getphase(repo, r))
105 return frozenset(r for r in maymutable if getphase(repo, r))
106 return frozenset()
106 return frozenset()
107
107
108 def computeimpactable(repo, visibilityexceptions=None):
108 def computeimpactable(repo, visibilityexceptions=None):
109 """Everything impactable by mutable revision
109 """Everything impactable by mutable revision
110
110
111 The immutable filter still have some chance to get invalidated. This will
111 The immutable filter still have some chance to get invalidated. This will
112 happen when:
112 happen when:
113
113
114 - you garbage collect hidden changeset,
114 - you garbage collect hidden changeset,
115 - public phase is moved backward,
115 - public phase is moved backward,
116 - something is changed in the filtering (this could be fixed)
116 - something is changed in the filtering (this could be fixed)
117
117
118 This filter out any mutable changeset and any public changeset that may be
118 This filter out any mutable changeset and any public changeset that may be
119 impacted by something happening to a mutable revision.
119 impacted by something happening to a mutable revision.
120
120
121 This is achieved by filtered everything with a revision number egal or
121 This is achieved by filtered everything with a revision number egal or
122 higher than the first mutable changeset is filtered."""
122 higher than the first mutable changeset is filtered."""
123 assert not repo.changelog.filteredrevs
123 assert not repo.changelog.filteredrevs
124 cl = repo.changelog
124 cl = repo.changelog
125 firstmutable = len(cl)
125 firstmutable = len(cl)
126 for roots in repo._phasecache.phaseroots[1:]:
126 for roots in repo._phasecache.phaseroots[1:]:
127 if roots:
127 if roots:
128 firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
128 firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
129 # protect from nullrev root
129 # protect from nullrev root
130 firstmutable = max(0, firstmutable)
130 firstmutable = max(0, firstmutable)
131 return frozenset(pycompat.xrange(firstmutable, len(cl)))
131 return frozenset(pycompat.xrange(firstmutable, len(cl)))
132
132
133 # function to compute filtered set
133 # function to compute filtered set
134 #
134 #
135 # When adding a new filter you MUST update the table at:
135 # When adding a new filter you MUST update the table at:
136 # mercurial.branchmap.subsettable
136 # mercurial.branchmap.subsettable
137 # Otherwise your filter will have to recompute all its branches cache
137 # Otherwise your filter will have to recompute all its branches cache
138 # from scratch (very slow).
138 # from scratch (very slow).
139 filtertable = {'visible': computehidden,
139 filtertable = {'visible': computehidden,
140 'visible-hidden': computehidden,
140 'visible-hidden': computehidden,
141 'served': computeunserved,
141 'served': computeunserved,
142 'immutable': computemutable,
142 'immutable': computemutable,
143 'base': computeimpactable}
143 'base': computeimpactable}
144
144
145 def filterrevs(repo, filtername, visibilityexceptions=None):
145 def filterrevs(repo, filtername, visibilityexceptions=None):
146 """returns set of filtered revision for this filter name
146 """returns set of filtered revision for this filter name
147
147
148 visibilityexceptions is a set of revs which must are exceptions for
148 visibilityexceptions is a set of revs which must are exceptions for
149 hidden-state and must be visible. They are dynamic and hence we should not
149 hidden-state and must be visible. They are dynamic and hence we should not
150 cache it's result"""
150 cache it's result"""
151 if filtername not in repo.filteredrevcache:
151 if filtername not in repo.filteredrevcache:
152 func = filtertable[filtername]
152 func = filtertable[filtername]
153 if visibilityexceptions:
153 if visibilityexceptions:
154 return func(repo.unfiltered, visibilityexceptions)
154 return func(repo.unfiltered, visibilityexceptions)
155 repo.filteredrevcache[filtername] = func(repo.unfiltered())
155 repo.filteredrevcache[filtername] = func(repo.unfiltered())
156 return repo.filteredrevcache[filtername]
156 return repo.filteredrevcache[filtername]
157
157
158 class repoview(object):
158 class repoview(object):
159 """Provide a read/write view of a repo through a filtered changelog
159 """Provide a read/write view of a repo through a filtered changelog
160
160
161 This object is used to access a filtered version of a repository without
161 This object is used to access a filtered version of a repository without
162 altering the original repository object itself. We can not alter the
162 altering the original repository object itself. We can not alter the
163 original object for two main reasons:
163 original object for two main reasons:
164 - It prevents the use of a repo with multiple filters at the same time. In
164 - It prevents the use of a repo with multiple filters at the same time. In
165 particular when multiple threads are involved.
165 particular when multiple threads are involved.
166 - It makes scope of the filtering harder to control.
166 - It makes scope of the filtering harder to control.
167
167
168 This object behaves very closely to the original repository. All attribute
168 This object behaves very closely to the original repository. All attribute
169 operations are done on the original repository:
169 operations are done on the original repository:
170 - An access to `repoview.someattr` actually returns `repo.someattr`,
170 - An access to `repoview.someattr` actually returns `repo.someattr`,
171 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
171 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
172 - A deletion of `repoview.someattr` actually drops `someattr`
172 - A deletion of `repoview.someattr` actually drops `someattr`
173 from `repo.__dict__`.
173 from `repo.__dict__`.
174
174
175 The only exception is the `changelog` property. It is overridden to return
175 The only exception is the `changelog` property. It is overridden to return
176 a (surface) copy of `repo.changelog` with some revisions filtered. The
176 a (surface) copy of `repo.changelog` with some revisions filtered. The
177 `filtername` attribute of the view control the revisions that need to be
177 `filtername` attribute of the view control the revisions that need to be
178 filtered. (the fact the changelog is copied is an implementation detail).
178 filtered. (the fact the changelog is copied is an implementation detail).
179
179
180 Unlike attributes, this object intercepts all method calls. This means that
180 Unlike attributes, this object intercepts all method calls. This means that
181 all methods are run on the `repoview` object with the filtered `changelog`
181 all methods are run on the `repoview` object with the filtered `changelog`
182 property. For this purpose the simple `repoview` class must be mixed with
182 property. For this purpose the simple `repoview` class must be mixed with
183 the actual class of the repository. This ensures that the resulting
183 the actual class of the repository. This ensures that the resulting
184 `repoview` object have the very same methods than the repo object. This
184 `repoview` object have the very same methods than the repo object. This
185 leads to the property below.
185 leads to the property below.
186
186
187 repoview.method() --> repo.__class__.method(repoview)
187 repoview.method() --> repo.__class__.method(repoview)
188
188
189 The inheritance has to be done dynamically because `repo` can be of any
189 The inheritance has to be done dynamically because `repo` can be of any
190 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
190 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
191 """
191 """
192
192
193 def __init__(self, repo, filtername, visibilityexceptions=None):
193 def __init__(self, repo, filtername, visibilityexceptions=None):
194 object.__setattr__(self, r'_unfilteredrepo', repo)
194 object.__setattr__(self, r'_unfilteredrepo', repo)
195 object.__setattr__(self, r'filtername', filtername)
195 object.__setattr__(self, r'filtername', filtername)
196 object.__setattr__(self, r'_clcachekey', None)
196 object.__setattr__(self, r'_clcachekey', None)
197 object.__setattr__(self, r'_clcache', None)
197 object.__setattr__(self, r'_clcache', None)
198 # revs which are exceptions and must not be hidden
198 # revs which are exceptions and must not be hidden
199 object.__setattr__(self, r'_visibilityexceptions',
199 object.__setattr__(self, r'_visibilityexceptions',
200 visibilityexceptions)
200 visibilityexceptions)
201
201
202 # not a propertycache on purpose we shall implement a proper cache later
202 # not a propertycache on purpose we shall implement a proper cache later
203 @property
203 @property
204 def changelog(self):
204 def changelog(self):
205 """return a filtered version of the changeset
205 """return a filtered version of the changeset
206
206
207 this changelog must not be used for writing"""
207 this changelog must not be used for writing"""
208 # some cache may be implemented later
208 # some cache may be implemented later
209 unfi = self._unfilteredrepo
209 unfi = self._unfilteredrepo
210 unfichangelog = unfi.changelog
210 unfichangelog = unfi.changelog
211 # bypass call to changelog.method
211 # bypass call to changelog.method
212 unfiindex = unfichangelog.index
212 unfiindex = unfichangelog.index
213 unfilen = len(unfiindex) - 1
213 unfilen = len(unfiindex)
214 unfinode = unfiindex[unfilen - 1][7]
214 unfinode = unfiindex[unfilen - 1][7]
215
215
216 revs = filterrevs(unfi, self.filtername, self._visibilityexceptions)
216 revs = filterrevs(unfi, self.filtername, self._visibilityexceptions)
217 cl = self._clcache
217 cl = self._clcache
218 newkey = (unfilen, unfinode, hash(revs), unfichangelog._delayed)
218 newkey = (unfilen, unfinode, hash(revs), unfichangelog._delayed)
219 # if cl.index is not unfiindex, unfi.changelog would be
219 # if cl.index is not unfiindex, unfi.changelog would be
220 # recreated, and our clcache refers to garbage object
220 # recreated, and our clcache refers to garbage object
221 if (cl is not None and
221 if (cl is not None and
222 (cl.index is not unfiindex or newkey != self._clcachekey)):
222 (cl.index is not unfiindex or newkey != self._clcachekey)):
223 cl = None
223 cl = None
224 # could have been made None by the previous if
224 # could have been made None by the previous if
225 if cl is None:
225 if cl is None:
226 cl = copy.copy(unfichangelog)
226 cl = copy.copy(unfichangelog)
227 cl.filteredrevs = revs
227 cl.filteredrevs = revs
228 object.__setattr__(self, r'_clcache', cl)
228 object.__setattr__(self, r'_clcache', cl)
229 object.__setattr__(self, r'_clcachekey', newkey)
229 object.__setattr__(self, r'_clcachekey', newkey)
230 return cl
230 return cl
231
231
232 def unfiltered(self):
232 def unfiltered(self):
233 """Return an unfiltered version of a repo"""
233 """Return an unfiltered version of a repo"""
234 return self._unfilteredrepo
234 return self._unfilteredrepo
235
235
236 def filtered(self, name, visibilityexceptions=None):
236 def filtered(self, name, visibilityexceptions=None):
237 """Return a filtered version of a repository"""
237 """Return a filtered version of a repository"""
238 if name == self.filtername and not visibilityexceptions:
238 if name == self.filtername and not visibilityexceptions:
239 return self
239 return self
240 return self.unfiltered().filtered(name, visibilityexceptions)
240 return self.unfiltered().filtered(name, visibilityexceptions)
241
241
242 def __repr__(self):
242 def __repr__(self):
243 return r'<%s:%s %r>' % (self.__class__.__name__,
243 return r'<%s:%s %r>' % (self.__class__.__name__,
244 pycompat.sysstr(self.filtername),
244 pycompat.sysstr(self.filtername),
245 self.unfiltered())
245 self.unfiltered())
246
246
247 # everything access are forwarded to the proxied repo
247 # everything access are forwarded to the proxied repo
248 def __getattr__(self, attr):
248 def __getattr__(self, attr):
249 return getattr(self._unfilteredrepo, attr)
249 return getattr(self._unfilteredrepo, attr)
250
250
251 def __setattr__(self, attr, value):
251 def __setattr__(self, attr, value):
252 return setattr(self._unfilteredrepo, attr, value)
252 return setattr(self._unfilteredrepo, attr, value)
253
253
254 def __delattr__(self, attr):
254 def __delattr__(self, attr):
255 return delattr(self._unfilteredrepo, attr)
255 return delattr(self._unfilteredrepo, attr)
256
256
257 # Python <3.4 easily leaks types via __mro__. See
257 # Python <3.4 easily leaks types via __mro__. See
258 # https://bugs.python.org/issue17950. We cache dynamically created types
258 # https://bugs.python.org/issue17950. We cache dynamically created types
259 # so they won't be leaked on every invocation of repo.filtered().
259 # so they won't be leaked on every invocation of repo.filtered().
260 _filteredrepotypes = weakref.WeakKeyDictionary()
260 _filteredrepotypes = weakref.WeakKeyDictionary()
261
261
262 def newtype(base):
262 def newtype(base):
263 """Create a new type with the repoview mixin and the given base class"""
263 """Create a new type with the repoview mixin and the given base class"""
264 if base not in _filteredrepotypes:
264 if base not in _filteredrepotypes:
265 class filteredrepo(repoview, base):
265 class filteredrepo(repoview, base):
266 pass
266 pass
267 _filteredrepotypes[base] = filteredrepo
267 _filteredrepotypes[base] = filteredrepo
268 return _filteredrepotypes[base]
268 return _filteredrepotypes[base]
@@ -1,2971 +1,2969 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import re
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .thirdparty import (
38 from .thirdparty import (
39 attr,
39 attr,
40 )
40 )
41 from . import (
41 from . import (
42 ancestor,
42 ancestor,
43 error,
43 error,
44 mdiff,
44 mdiff,
45 policy,
45 policy,
46 pycompat,
46 pycompat,
47 templatefilters,
47 templatefilters,
48 util,
48 util,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 stringutil,
51 stringutil,
52 )
52 )
53
53
54 parsers = policy.importmod(r'parsers')
54 parsers = policy.importmod(r'parsers')
55
55
56 # Aliased for performance.
56 # Aliased for performance.
57 _zlibdecompress = zlib.decompress
57 _zlibdecompress = zlib.decompress
58
58
59 # revlog header flags
59 # revlog header flags
60 REVLOGV0 = 0
60 REVLOGV0 = 0
61 REVLOGV1 = 1
61 REVLOGV1 = 1
62 # Dummy value until file format is finalized.
62 # Dummy value until file format is finalized.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 REVLOGV2 = 0xDEAD
64 REVLOGV2 = 0xDEAD
65 FLAG_INLINE_DATA = (1 << 16)
65 FLAG_INLINE_DATA = (1 << 16)
66 FLAG_GENERALDELTA = (1 << 17)
66 FLAG_GENERALDELTA = (1 << 17)
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72
72
73 # revlog index flags
73 # revlog index flags
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 REVIDX_DEFAULT_FLAGS = 0
77 REVIDX_DEFAULT_FLAGS = 0
78 # stable order in which flags need to be processed and their processors applied
78 # stable order in which flags need to be processed and their processors applied
79 REVIDX_FLAGS_ORDER = [
79 REVIDX_FLAGS_ORDER = [
80 REVIDX_ISCENSORED,
80 REVIDX_ISCENSORED,
81 REVIDX_ELLIPSIS,
81 REVIDX_ELLIPSIS,
82 REVIDX_EXTSTORED,
82 REVIDX_EXTSTORED,
83 ]
83 ]
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 # bitmark for flags that could cause rawdata content change
85 # bitmark for flags that could cause rawdata content change
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87
87
88 # max size of revlog with inline data
88 # max size of revlog with inline data
89 _maxinline = 131072
89 _maxinline = 131072
90 _chunksize = 1048576
90 _chunksize = 1048576
91
91
92 RevlogError = error.RevlogError
92 RevlogError = error.RevlogError
93 LookupError = error.LookupError
93 LookupError = error.LookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
94 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
95 CensoredNodeError = error.CensoredNodeError
95 CensoredNodeError = error.CensoredNodeError
96 ProgrammingError = error.ProgrammingError
96 ProgrammingError = error.ProgrammingError
97
97
98 # Store flag processors (cf. 'addflagprocessor()' to register)
98 # Store flag processors (cf. 'addflagprocessor()' to register)
99 _flagprocessors = {
99 _flagprocessors = {
100 REVIDX_ISCENSORED: None,
100 REVIDX_ISCENSORED: None,
101 }
101 }
102
102
103 _mdre = re.compile('\1\n')
103 _mdre = re.compile('\1\n')
104 def parsemeta(text):
104 def parsemeta(text):
105 """return (metadatadict, metadatasize)"""
105 """return (metadatadict, metadatasize)"""
106 # text can be buffer, so we can't use .startswith or .index
106 # text can be buffer, so we can't use .startswith or .index
107 if text[:2] != '\1\n':
107 if text[:2] != '\1\n':
108 return None, None
108 return None, None
109 s = _mdre.search(text, 2).start()
109 s = _mdre.search(text, 2).start()
110 mtext = text[2:s]
110 mtext = text[2:s]
111 meta = {}
111 meta = {}
112 for l in mtext.splitlines():
112 for l in mtext.splitlines():
113 k, v = l.split(": ", 1)
113 k, v = l.split(": ", 1)
114 meta[k] = v
114 meta[k] = v
115 return meta, (s + 2)
115 return meta, (s + 2)
116
116
117 def packmeta(meta, text):
117 def packmeta(meta, text):
118 keys = sorted(meta)
118 keys = sorted(meta)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
120 return "\1\n%s\1\n%s" % (metatext, text)
120 return "\1\n%s\1\n%s" % (metatext, text)
121
121
122 def _censoredtext(text):
122 def _censoredtext(text):
123 m, offs = parsemeta(text)
123 m, offs = parsemeta(text)
124 return m and "censored" in m
124 return m and "censored" in m
125
125
126 def addflagprocessor(flag, processor):
126 def addflagprocessor(flag, processor):
127 """Register a flag processor on a revision data flag.
127 """Register a flag processor on a revision data flag.
128
128
129 Invariant:
129 Invariant:
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
132 - Only one flag processor can be registered on a specific flag.
132 - Only one flag processor can be registered on a specific flag.
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
134 following signatures:
134 following signatures:
135 - (read) f(self, rawtext) -> text, bool
135 - (read) f(self, rawtext) -> text, bool
136 - (write) f(self, text) -> rawtext, bool
136 - (write) f(self, text) -> rawtext, bool
137 - (raw) f(self, rawtext) -> bool
137 - (raw) f(self, rawtext) -> bool
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 "text" is presented to the user. "rawtext" is stored in revlog data, not
139 directly visible to the user.
139 directly visible to the user.
140 The boolean returned by these transforms is used to determine whether
140 The boolean returned by these transforms is used to determine whether
141 the returned text can be used for hash integrity checking. For example,
141 the returned text can be used for hash integrity checking. For example,
142 if "write" returns False, then "text" is used to generate hash. If
142 if "write" returns False, then "text" is used to generate hash. If
143 "write" returns True, that basically means "rawtext" returned by "write"
143 "write" returns True, that basically means "rawtext" returned by "write"
144 should be used to generate hash. Usually, "write" and "read" return
144 should be used to generate hash. Usually, "write" and "read" return
145 different booleans. And "raw" returns a same boolean as "write".
145 different booleans. And "raw" returns a same boolean as "write".
146
146
147 Note: The 'raw' transform is used for changegroup generation and in some
147 Note: The 'raw' transform is used for changegroup generation and in some
148 debug commands. In this case the transform only indicates whether the
148 debug commands. In this case the transform only indicates whether the
149 contents can be used for hash integrity checks.
149 contents can be used for hash integrity checks.
150 """
150 """
151 if not flag & REVIDX_KNOWN_FLAGS:
151 if not flag & REVIDX_KNOWN_FLAGS:
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
153 raise ProgrammingError(msg)
153 raise ProgrammingError(msg)
154 if flag not in REVIDX_FLAGS_ORDER:
154 if flag not in REVIDX_FLAGS_ORDER:
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
156 raise ProgrammingError(msg)
156 raise ProgrammingError(msg)
157 if flag in _flagprocessors:
157 if flag in _flagprocessors:
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
159 raise error.Abort(msg)
159 raise error.Abort(msg)
160 _flagprocessors[flag] = processor
160 _flagprocessors[flag] = processor
161
161
162 def getoffset(q):
162 def getoffset(q):
163 return int(q >> 16)
163 return int(q >> 16)
164
164
165 def gettype(q):
165 def gettype(q):
166 return int(q & 0xFFFF)
166 return int(q & 0xFFFF)
167
167
168 def offset_type(offset, type):
168 def offset_type(offset, type):
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
170 raise ValueError('unknown revlog index flags')
170 raise ValueError('unknown revlog index flags')
171 return int(int(offset) << 16 | type)
171 return int(int(offset) << 16 | type)
172
172
173 _nullhash = hashlib.sha1(nullid)
173 _nullhash = hashlib.sha1(nullid)
174
174
175 def hash(text, p1, p2):
175 def hash(text, p1, p2):
176 """generate a hash from the given text and its parent hashes
176 """generate a hash from the given text and its parent hashes
177
177
178 This hash combines both the current file contents and its history
178 This hash combines both the current file contents and its history
179 in a manner that makes it easy to distinguish nodes with the same
179 in a manner that makes it easy to distinguish nodes with the same
180 content in the revision graph.
180 content in the revision graph.
181 """
181 """
182 # As of now, if one of the parent node is null, p2 is null
182 # As of now, if one of the parent node is null, p2 is null
183 if p2 == nullid:
183 if p2 == nullid:
184 # deep copy of a hash is faster than creating one
184 # deep copy of a hash is faster than creating one
185 s = _nullhash.copy()
185 s = _nullhash.copy()
186 s.update(p1)
186 s.update(p1)
187 else:
187 else:
188 # none of the parent nodes are nullid
188 # none of the parent nodes are nullid
189 if p1 < p2:
189 if p1 < p2:
190 a = p1
190 a = p1
191 b = p2
191 b = p2
192 else:
192 else:
193 a = p2
193 a = p2
194 b = p1
194 b = p1
195 s = hashlib.sha1(a)
195 s = hashlib.sha1(a)
196 s.update(b)
196 s.update(b)
197 s.update(text)
197 s.update(text)
198 return s.digest()
198 return s.digest()
199
199
200 class _testrevlog(object):
200 class _testrevlog(object):
201 """minimalist fake revlog to use in doctests"""
201 """minimalist fake revlog to use in doctests"""
202
202
203 def __init__(self, data, density=0.5, mingap=0):
203 def __init__(self, data, density=0.5, mingap=0):
204 """data is an list of revision payload boundaries"""
204 """data is an list of revision payload boundaries"""
205 self._data = data
205 self._data = data
206 self._srdensitythreshold = density
206 self._srdensitythreshold = density
207 self._srmingapsize = mingap
207 self._srmingapsize = mingap
208
208
209 def start(self, rev):
209 def start(self, rev):
210 if rev == 0:
210 if rev == 0:
211 return 0
211 return 0
212 return self._data[rev - 1]
212 return self._data[rev - 1]
213
213
214 def end(self, rev):
214 def end(self, rev):
215 return self._data[rev]
215 return self._data[rev]
216
216
217 def length(self, rev):
217 def length(self, rev):
218 return self.end(rev) - self.start(rev)
218 return self.end(rev) - self.start(rev)
219
219
220 def __len__(self):
220 def __len__(self):
221 return len(self._data)
221 return len(self._data)
222
222
223 def _trimchunk(revlog, revs, startidx, endidx=None):
223 def _trimchunk(revlog, revs, startidx, endidx=None):
224 """returns revs[startidx:endidx] without empty trailing revs
224 """returns revs[startidx:endidx] without empty trailing revs
225
225
226 Doctest Setup
226 Doctest Setup
227 >>> revlog = _testrevlog([
227 >>> revlog = _testrevlog([
228 ... 5, #0
228 ... 5, #0
229 ... 10, #1
229 ... 10, #1
230 ... 12, #2
230 ... 12, #2
231 ... 12, #3 (empty)
231 ... 12, #3 (empty)
232 ... 17, #4
232 ... 17, #4
233 ... 21, #5
233 ... 21, #5
234 ... 21, #6 (empty)
234 ... 21, #6 (empty)
235 ... ])
235 ... ])
236
236
237 Contiguous cases:
237 Contiguous cases:
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
238 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
239 [0, 1, 2, 3, 4, 5]
239 [0, 1, 2, 3, 4, 5]
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
240 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
241 [0, 1, 2, 3, 4]
241 [0, 1, 2, 3, 4]
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
242 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
243 [0, 1, 2]
243 [0, 1, 2]
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
244 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
245 [2]
245 [2]
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
246 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
247 [3, 4, 5]
247 [3, 4, 5]
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
248 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
249 [3, 4]
249 [3, 4]
250
250
251 Discontiguous cases:
251 Discontiguous cases:
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
252 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
253 [1, 3, 5]
253 [1, 3, 5]
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
254 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
255 [1]
255 [1]
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
256 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
257 [3, 5]
257 [3, 5]
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
258 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
259 [3, 5]
259 [3, 5]
260 """
260 """
261 length = revlog.length
261 length = revlog.length
262
262
263 if endidx is None:
263 if endidx is None:
264 endidx = len(revs)
264 endidx = len(revs)
265
265
266 # Trim empty revs at the end, but never the very first revision of a chain
266 # Trim empty revs at the end, but never the very first revision of a chain
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
267 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
268 endidx -= 1
268 endidx -= 1
269
269
270 return revs[startidx:endidx]
270 return revs[startidx:endidx]
271
271
272 def _segmentspan(revlog, revs):
272 def _segmentspan(revlog, revs):
273 """Get the byte span of a segment of revisions
273 """Get the byte span of a segment of revisions
274
274
275 revs is a sorted array of revision numbers
275 revs is a sorted array of revision numbers
276
276
277 >>> revlog = _testrevlog([
277 >>> revlog = _testrevlog([
278 ... 5, #0
278 ... 5, #0
279 ... 10, #1
279 ... 10, #1
280 ... 12, #2
280 ... 12, #2
281 ... 12, #3 (empty)
281 ... 12, #3 (empty)
282 ... 17, #4
282 ... 17, #4
283 ... ])
283 ... ])
284
284
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
285 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
286 17
286 17
287 >>> _segmentspan(revlog, [0, 4])
287 >>> _segmentspan(revlog, [0, 4])
288 17
288 17
289 >>> _segmentspan(revlog, [3, 4])
289 >>> _segmentspan(revlog, [3, 4])
290 5
290 5
291 >>> _segmentspan(revlog, [1, 2, 3,])
291 >>> _segmentspan(revlog, [1, 2, 3,])
292 7
292 7
293 >>> _segmentspan(revlog, [1, 3])
293 >>> _segmentspan(revlog, [1, 3])
294 7
294 7
295 """
295 """
296 if not revs:
296 if not revs:
297 return 0
297 return 0
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
298 return revlog.end(revs[-1]) - revlog.start(revs[0])
299
299
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
300 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
301 """slice revs to reduce the amount of unrelated data to be read from disk.
301 """slice revs to reduce the amount of unrelated data to be read from disk.
302
302
303 ``revs`` is sliced into groups that should be read in one time.
303 ``revs`` is sliced into groups that should be read in one time.
304 Assume that revs are sorted.
304 Assume that revs are sorted.
305
305
306 The initial chunk is sliced until the overall density (payload/chunks-span
306 The initial chunk is sliced until the overall density (payload/chunks-span
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
307 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
308 `revlog._srmingapsize` is skipped.
308 `revlog._srmingapsize` is skipped.
309
309
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
310 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
311 For consistency with other slicing choice, this limit won't go lower than
311 For consistency with other slicing choice, this limit won't go lower than
312 `revlog._srmingapsize`.
312 `revlog._srmingapsize`.
313
313
314 If individual revisions chunk are larger than this limit, they will still
314 If individual revisions chunk are larger than this limit, they will still
315 be raised individually.
315 be raised individually.
316
316
317 >>> revlog = _testrevlog([
317 >>> revlog = _testrevlog([
318 ... 5, #00 (5)
318 ... 5, #00 (5)
319 ... 10, #01 (5)
319 ... 10, #01 (5)
320 ... 12, #02 (2)
320 ... 12, #02 (2)
321 ... 12, #03 (empty)
321 ... 12, #03 (empty)
322 ... 27, #04 (15)
322 ... 27, #04 (15)
323 ... 31, #05 (4)
323 ... 31, #05 (4)
324 ... 31, #06 (empty)
324 ... 31, #06 (empty)
325 ... 42, #07 (11)
325 ... 42, #07 (11)
326 ... 47, #08 (5)
326 ... 47, #08 (5)
327 ... 47, #09 (empty)
327 ... 47, #09 (empty)
328 ... 48, #10 (1)
328 ... 48, #10 (1)
329 ... 51, #11 (3)
329 ... 51, #11 (3)
330 ... 74, #12 (23)
330 ... 74, #12 (23)
331 ... 85, #13 (11)
331 ... 85, #13 (11)
332 ... 86, #14 (1)
332 ... 86, #14 (1)
333 ... 91, #15 (5)
333 ... 91, #15 (5)
334 ... ])
334 ... ])
335
335
336 >>> list(_slicechunk(revlog, list(range(16))))
336 >>> list(_slicechunk(revlog, list(range(16))))
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
337 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 >>> list(_slicechunk(revlog, [0, 15]))
338 >>> list(_slicechunk(revlog, [0, 15]))
339 [[0], [15]]
339 [[0], [15]]
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
340 >>> list(_slicechunk(revlog, [0, 11, 15]))
341 [[0], [11], [15]]
341 [[0], [11], [15]]
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
342 >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
343 [[0], [11, 13, 15]]
343 [[0], [11, 13, 15]]
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
344 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 [[1, 2], [5, 8, 10, 11], [14]]
345 [[1, 2], [5, 8, 10, 11], [14]]
346
346
347 Slicing with a maximum chunk size
347 Slicing with a maximum chunk size
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
348 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
349 [[0], [11], [13], [15]]
349 [[0], [11], [13], [15]]
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
350 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
351 [[0], [11], [13, 15]]
351 [[0], [11], [13, 15]]
352 """
352 """
353 if targetsize is not None:
353 if targetsize is not None:
354 targetsize = max(targetsize, revlog._srmingapsize)
354 targetsize = max(targetsize, revlog._srmingapsize)
355 # targetsize should not be specified when evaluating delta candidates:
355 # targetsize should not be specified when evaluating delta candidates:
356 # * targetsize is used to ensure we stay within specification when reading,
356 # * targetsize is used to ensure we stay within specification when reading,
357 # * deltainfo is used to pick are good delta chain when writing.
357 # * deltainfo is used to pick are good delta chain when writing.
358 if not (deltainfo is None or targetsize is None):
358 if not (deltainfo is None or targetsize is None):
359 msg = 'cannot use `targetsize` with a `deltainfo`'
359 msg = 'cannot use `targetsize` with a `deltainfo`'
360 raise error.ProgrammingError(msg)
360 raise error.ProgrammingError(msg)
361 for chunk in _slicechunktodensity(revlog, revs,
361 for chunk in _slicechunktodensity(revlog, revs,
362 deltainfo,
362 deltainfo,
363 revlog._srdensitythreshold,
363 revlog._srdensitythreshold,
364 revlog._srmingapsize):
364 revlog._srmingapsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
365 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
366 yield subchunk
366 yield subchunk
367
367
368 def _slicechunktosize(revlog, revs, targetsize=None):
368 def _slicechunktosize(revlog, revs, targetsize=None):
369 """slice revs to match the target size
369 """slice revs to match the target size
370
370
371 This is intended to be used on chunk that density slicing selected by that
371 This is intended to be used on chunk that density slicing selected by that
372 are still too large compared to the read garantee of revlog. This might
372 are still too large compared to the read garantee of revlog. This might
373 happens when "minimal gap size" interrupted the slicing or when chain are
373 happens when "minimal gap size" interrupted the slicing or when chain are
374 built in a way that create large blocks next to each other.
374 built in a way that create large blocks next to each other.
375
375
376 >>> revlog = _testrevlog([
376 >>> revlog = _testrevlog([
377 ... 3, #0 (3)
377 ... 3, #0 (3)
378 ... 5, #1 (2)
378 ... 5, #1 (2)
379 ... 6, #2 (1)
379 ... 6, #2 (1)
380 ... 8, #3 (2)
380 ... 8, #3 (2)
381 ... 8, #4 (empty)
381 ... 8, #4 (empty)
382 ... 11, #5 (3)
382 ... 11, #5 (3)
383 ... 12, #6 (1)
383 ... 12, #6 (1)
384 ... 13, #7 (1)
384 ... 13, #7 (1)
385 ... 14, #8 (1)
385 ... 14, #8 (1)
386 ... ])
386 ... ])
387
387
388 Cases where chunk is already small enough
388 Cases where chunk is already small enough
389 >>> list(_slicechunktosize(revlog, [0], 3))
389 >>> list(_slicechunktosize(revlog, [0], 3))
390 [[0]]
390 [[0]]
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
391 >>> list(_slicechunktosize(revlog, [6, 7], 3))
392 [[6, 7]]
392 [[6, 7]]
393 >>> list(_slicechunktosize(revlog, [0], None))
393 >>> list(_slicechunktosize(revlog, [0], None))
394 [[0]]
394 [[0]]
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
395 >>> list(_slicechunktosize(revlog, [6, 7], None))
396 [[6, 7]]
396 [[6, 7]]
397
397
398 cases where we need actual slicing
398 cases where we need actual slicing
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
399 >>> list(_slicechunktosize(revlog, [0, 1], 3))
400 [[0], [1]]
400 [[0], [1]]
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
401 >>> list(_slicechunktosize(revlog, [1, 3], 3))
402 [[1], [3]]
402 [[1], [3]]
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
403 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
404 [[1, 2], [3]]
404 [[1, 2], [3]]
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
405 >>> list(_slicechunktosize(revlog, [3, 5], 3))
406 [[3], [5]]
406 [[3], [5]]
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
407 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
408 [[3], [5]]
408 [[3], [5]]
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
409 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
410 [[5], [6, 7, 8]]
410 [[5], [6, 7, 8]]
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
411 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
412 [[0], [1, 2], [3], [5], [6, 7, 8]]
413
413
414 Case with too large individual chunk (must return valid chunk)
414 Case with too large individual chunk (must return valid chunk)
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
415 >>> list(_slicechunktosize(revlog, [0, 1], 2))
416 [[0], [1]]
416 [[0], [1]]
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
417 >>> list(_slicechunktosize(revlog, [1, 3], 1))
418 [[1], [3]]
418 [[1], [3]]
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
419 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
420 [[3], [5]]
420 [[3], [5]]
421 """
421 """
422 assert targetsize is None or 0 <= targetsize
422 assert targetsize is None or 0 <= targetsize
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
423 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
424 yield revs
424 yield revs
425 return
425 return
426
426
427 startrevidx = 0
427 startrevidx = 0
428 startdata = revlog.start(revs[0])
428 startdata = revlog.start(revs[0])
429 endrevidx = 0
429 endrevidx = 0
430 iterrevs = enumerate(revs)
430 iterrevs = enumerate(revs)
431 next(iterrevs) # skip first rev.
431 next(iterrevs) # skip first rev.
432 for idx, r in iterrevs:
432 for idx, r in iterrevs:
433 span = revlog.end(r) - startdata
433 span = revlog.end(r) - startdata
434 if span <= targetsize:
434 if span <= targetsize:
435 endrevidx = idx
435 endrevidx = idx
436 else:
436 else:
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
437 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440 startrevidx = idx
440 startrevidx = idx
441 startdata = revlog.start(r)
441 startdata = revlog.start(r)
442 endrevidx = idx
442 endrevidx = idx
443 yield _trimchunk(revlog, revs, startrevidx)
443 yield _trimchunk(revlog, revs, startrevidx)
444
444
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
445 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
446 mingapsize=0):
446 mingapsize=0):
447 """slice revs to reduce the amount of unrelated data to be read from disk.
447 """slice revs to reduce the amount of unrelated data to be read from disk.
448
448
449 ``revs`` is sliced into groups that should be read in one time.
449 ``revs`` is sliced into groups that should be read in one time.
450 Assume that revs are sorted.
450 Assume that revs are sorted.
451
451
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
452 ``deltainfo`` is a _deltainfo instance of a revision that we would append
453 to the top of the revlog.
453 to the top of the revlog.
454
454
455 The initial chunk is sliced until the overall density (payload/chunks-span
455 The initial chunk is sliced until the overall density (payload/chunks-span
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
456 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
457 skipped.
457 skipped.
458
458
459 >>> revlog = _testrevlog([
459 >>> revlog = _testrevlog([
460 ... 5, #00 (5)
460 ... 5, #00 (5)
461 ... 10, #01 (5)
461 ... 10, #01 (5)
462 ... 12, #02 (2)
462 ... 12, #02 (2)
463 ... 12, #03 (empty)
463 ... 12, #03 (empty)
464 ... 27, #04 (15)
464 ... 27, #04 (15)
465 ... 31, #05 (4)
465 ... 31, #05 (4)
466 ... 31, #06 (empty)
466 ... 31, #06 (empty)
467 ... 42, #07 (11)
467 ... 42, #07 (11)
468 ... 47, #08 (5)
468 ... 47, #08 (5)
469 ... 47, #09 (empty)
469 ... 47, #09 (empty)
470 ... 48, #10 (1)
470 ... 48, #10 (1)
471 ... 51, #11 (3)
471 ... 51, #11 (3)
472 ... 74, #12 (23)
472 ... 74, #12 (23)
473 ... 85, #13 (11)
473 ... 85, #13 (11)
474 ... 86, #14 (1)
474 ... 86, #14 (1)
475 ... 91, #15 (5)
475 ... 91, #15 (5)
476 ... ])
476 ... ])
477
477
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
478 >>> list(_slicechunktodensity(revlog, list(range(16))))
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
479 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
480 >>> list(_slicechunktodensity(revlog, [0, 15]))
481 [[0], [15]]
481 [[0], [15]]
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
482 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
483 [[0], [11], [15]]
483 [[0], [11], [15]]
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
484 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
485 [[0], [11, 13, 15]]
485 [[0], [11, 13, 15]]
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
486 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
487 [[1, 2], [5, 8, 10, 11], [14]]
487 [[1, 2], [5, 8, 10, 11], [14]]
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
488 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
489 ... mingapsize=20))
489 ... mingapsize=20))
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
490 [[1, 2, 3, 5, 8, 10, 11], [14]]
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
491 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
492 ... targetdensity=0.95))
492 ... targetdensity=0.95))
493 [[1, 2], [5], [8, 10, 11], [14]]
493 [[1, 2], [5], [8, 10, 11], [14]]
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
494 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
495 ... targetdensity=0.95, mingapsize=12))
495 ... targetdensity=0.95, mingapsize=12))
496 [[1, 2], [5, 8, 10, 11], [14]]
496 [[1, 2], [5, 8, 10, 11], [14]]
497 """
497 """
498 start = revlog.start
498 start = revlog.start
499 length = revlog.length
499 length = revlog.length
500
500
501 if len(revs) <= 1:
501 if len(revs) <= 1:
502 yield revs
502 yield revs
503 return
503 return
504
504
505 nextrev = len(revlog)
505 nextrev = len(revlog)
506 nextoffset = revlog.end(nextrev - 1)
506 nextoffset = revlog.end(nextrev - 1)
507
507
508 if deltainfo is None:
508 if deltainfo is None:
509 deltachainspan = _segmentspan(revlog, revs)
509 deltachainspan = _segmentspan(revlog, revs)
510 chainpayload = sum(length(r) for r in revs)
510 chainpayload = sum(length(r) for r in revs)
511 else:
511 else:
512 deltachainspan = deltainfo.distance
512 deltachainspan = deltainfo.distance
513 chainpayload = deltainfo.compresseddeltalen
513 chainpayload = deltainfo.compresseddeltalen
514
514
515 if deltachainspan < mingapsize:
515 if deltachainspan < mingapsize:
516 yield revs
516 yield revs
517 return
517 return
518
518
519 readdata = deltachainspan
519 readdata = deltachainspan
520
520
521 if deltachainspan:
521 if deltachainspan:
522 density = chainpayload / float(deltachainspan)
522 density = chainpayload / float(deltachainspan)
523 else:
523 else:
524 density = 1.0
524 density = 1.0
525
525
526 if density >= targetdensity:
526 if density >= targetdensity:
527 yield revs
527 yield revs
528 return
528 return
529
529
530 if deltainfo is not None:
530 if deltainfo is not None:
531 revs = list(revs)
531 revs = list(revs)
532 revs.append(nextrev)
532 revs.append(nextrev)
533
533
534 # Store the gaps in a heap to have them sorted by decreasing size
534 # Store the gaps in a heap to have them sorted by decreasing size
535 gapsheap = []
535 gapsheap = []
536 heapq.heapify(gapsheap)
536 heapq.heapify(gapsheap)
537 prevend = None
537 prevend = None
538 for i, rev in enumerate(revs):
538 for i, rev in enumerate(revs):
539 if rev < nextrev:
539 if rev < nextrev:
540 revstart = start(rev)
540 revstart = start(rev)
541 revlen = length(rev)
541 revlen = length(rev)
542 else:
542 else:
543 revstart = nextoffset
543 revstart = nextoffset
544 revlen = deltainfo.deltalen
544 revlen = deltainfo.deltalen
545
545
546 # Skip empty revisions to form larger holes
546 # Skip empty revisions to form larger holes
547 if revlen == 0:
547 if revlen == 0:
548 continue
548 continue
549
549
550 if prevend is not None:
550 if prevend is not None:
551 gapsize = revstart - prevend
551 gapsize = revstart - prevend
552 # only consider holes that are large enough
552 # only consider holes that are large enough
553 if gapsize > mingapsize:
553 if gapsize > mingapsize:
554 heapq.heappush(gapsheap, (-gapsize, i))
554 heapq.heappush(gapsheap, (-gapsize, i))
555
555
556 prevend = revstart + revlen
556 prevend = revstart + revlen
557
557
558 # Collect the indices of the largest holes until the density is acceptable
558 # Collect the indices of the largest holes until the density is acceptable
559 indicesheap = []
559 indicesheap = []
560 heapq.heapify(indicesheap)
560 heapq.heapify(indicesheap)
561 while gapsheap and density < targetdensity:
561 while gapsheap and density < targetdensity:
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
562 oppgapsize, gapidx = heapq.heappop(gapsheap)
563
563
564 heapq.heappush(indicesheap, gapidx)
564 heapq.heappush(indicesheap, gapidx)
565
565
566 # the gap sizes are stored as negatives to be sorted decreasingly
566 # the gap sizes are stored as negatives to be sorted decreasingly
567 # by the heap
567 # by the heap
568 readdata -= (-oppgapsize)
568 readdata -= (-oppgapsize)
569 if readdata > 0:
569 if readdata > 0:
570 density = chainpayload / float(readdata)
570 density = chainpayload / float(readdata)
571 else:
571 else:
572 density = 1.0
572 density = 1.0
573
573
574 # Cut the revs at collected indices
574 # Cut the revs at collected indices
575 previdx = 0
575 previdx = 0
576 while indicesheap:
576 while indicesheap:
577 idx = heapq.heappop(indicesheap)
577 idx = heapq.heappop(indicesheap)
578
578
579 chunk = _trimchunk(revlog, revs, previdx, idx)
579 chunk = _trimchunk(revlog, revs, previdx, idx)
580 if chunk:
580 if chunk:
581 yield chunk
581 yield chunk
582
582
583 previdx = idx
583 previdx = idx
584
584
585 chunk = _trimchunk(revlog, revs, previdx)
585 chunk = _trimchunk(revlog, revs, previdx)
586 if chunk:
586 if chunk:
587 yield chunk
587 yield chunk
588
588
589 @attr.s(slots=True, frozen=True)
589 @attr.s(slots=True, frozen=True)
590 class _deltainfo(object):
590 class _deltainfo(object):
591 distance = attr.ib()
591 distance = attr.ib()
592 deltalen = attr.ib()
592 deltalen = attr.ib()
593 data = attr.ib()
593 data = attr.ib()
594 base = attr.ib()
594 base = attr.ib()
595 chainbase = attr.ib()
595 chainbase = attr.ib()
596 chainlen = attr.ib()
596 chainlen = attr.ib()
597 compresseddeltalen = attr.ib()
597 compresseddeltalen = attr.ib()
598
598
599 class _deltacomputer(object):
599 class _deltacomputer(object):
600 def __init__(self, revlog):
600 def __init__(self, revlog):
601 self.revlog = revlog
601 self.revlog = revlog
602
602
603 def _getcandidaterevs(self, p1, p2, cachedelta):
603 def _getcandidaterevs(self, p1, p2, cachedelta):
604 """
604 """
605 Provides revisions that present an interest to be diffed against,
605 Provides revisions that present an interest to be diffed against,
606 grouped by level of easiness.
606 grouped by level of easiness.
607 """
607 """
608 revlog = self.revlog
608 revlog = self.revlog
609 gdelta = revlog._generaldelta
609 gdelta = revlog._generaldelta
610 curr = len(revlog)
610 curr = len(revlog)
611 prev = curr - 1
611 prev = curr - 1
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
612 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
613
613
614 # should we try to build a delta?
614 # should we try to build a delta?
615 if prev != nullrev and revlog.storedeltachains:
615 if prev != nullrev and revlog.storedeltachains:
616 tested = set()
616 tested = set()
617 # This condition is true most of the time when processing
617 # This condition is true most of the time when processing
618 # changegroup data into a generaldelta repo. The only time it
618 # changegroup data into a generaldelta repo. The only time it
619 # isn't true is if this is the first revision in a delta chain
619 # isn't true is if this is the first revision in a delta chain
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
620 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
621 if cachedelta and gdelta and revlog._lazydeltabase:
621 if cachedelta and gdelta and revlog._lazydeltabase:
622 # Assume what we received from the server is a good choice
622 # Assume what we received from the server is a good choice
623 # build delta will reuse the cache
623 # build delta will reuse the cache
624 yield (cachedelta[0],)
624 yield (cachedelta[0],)
625 tested.add(cachedelta[0])
625 tested.add(cachedelta[0])
626
626
627 if gdelta:
627 if gdelta:
628 # exclude already lazy tested base if any
628 # exclude already lazy tested base if any
629 parents = [p for p in (p1r, p2r)
629 parents = [p for p in (p1r, p2r)
630 if p != nullrev and p not in tested]
630 if p != nullrev and p not in tested]
631
631
632 if not revlog._deltabothparents and len(parents) == 2:
632 if not revlog._deltabothparents and len(parents) == 2:
633 parents.sort()
633 parents.sort()
634 # To minimize the chance of having to build a fulltext,
634 # To minimize the chance of having to build a fulltext,
635 # pick first whichever parent is closest to us (max rev)
635 # pick first whichever parent is closest to us (max rev)
636 yield (parents[1],)
636 yield (parents[1],)
637 # then the other one (min rev) if the first did not fit
637 # then the other one (min rev) if the first did not fit
638 yield (parents[0],)
638 yield (parents[0],)
639 tested.update(parents)
639 tested.update(parents)
640 elif len(parents) > 0:
640 elif len(parents) > 0:
641 # Test all parents (1 or 2), and keep the best candidate
641 # Test all parents (1 or 2), and keep the best candidate
642 yield parents
642 yield parents
643 tested.update(parents)
643 tested.update(parents)
644
644
645 if prev not in tested:
645 if prev not in tested:
646 # other approach failed try against prev to hopefully save us a
646 # other approach failed try against prev to hopefully save us a
647 # fulltext.
647 # fulltext.
648 yield (prev,)
648 yield (prev,)
649 tested.add(prev)
649 tested.add(prev)
650
650
651 def buildtext(self, revinfo, fh):
651 def buildtext(self, revinfo, fh):
652 """Builds a fulltext version of a revision
652 """Builds a fulltext version of a revision
653
653
654 revinfo: _revisioninfo instance that contains all needed info
654 revinfo: _revisioninfo instance that contains all needed info
655 fh: file handle to either the .i or the .d revlog file,
655 fh: file handle to either the .i or the .d revlog file,
656 depending on whether it is inlined or not
656 depending on whether it is inlined or not
657 """
657 """
658 btext = revinfo.btext
658 btext = revinfo.btext
659 if btext[0] is not None:
659 if btext[0] is not None:
660 return btext[0]
660 return btext[0]
661
661
662 revlog = self.revlog
662 revlog = self.revlog
663 cachedelta = revinfo.cachedelta
663 cachedelta = revinfo.cachedelta
664 flags = revinfo.flags
664 flags = revinfo.flags
665 node = revinfo.node
665 node = revinfo.node
666
666
667 baserev = cachedelta[0]
667 baserev = cachedelta[0]
668 delta = cachedelta[1]
668 delta = cachedelta[1]
669 # special case deltas which replace entire base; no need to decode
669 # special case deltas which replace entire base; no need to decode
670 # base revision. this neatly avoids censored bases, which throw when
670 # base revision. this neatly avoids censored bases, which throw when
671 # they're decoded.
671 # they're decoded.
672 hlen = struct.calcsize(">lll")
672 hlen = struct.calcsize(">lll")
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
673 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
674 len(delta) - hlen):
674 len(delta) - hlen):
675 btext[0] = delta[hlen:]
675 btext[0] = delta[hlen:]
676 else:
676 else:
677 # deltabase is rawtext before changed by flag processors, which is
677 # deltabase is rawtext before changed by flag processors, which is
678 # equivalent to non-raw text
678 # equivalent to non-raw text
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
679 basetext = revlog.revision(baserev, _df=fh, raw=False)
680 btext[0] = mdiff.patch(basetext, delta)
680 btext[0] = mdiff.patch(basetext, delta)
681
681
682 try:
682 try:
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
683 res = revlog._processflags(btext[0], flags, 'read', raw=True)
684 btext[0], validatehash = res
684 btext[0], validatehash = res
685 if validatehash:
685 if validatehash:
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
686 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
687 if flags & REVIDX_ISCENSORED:
687 if flags & REVIDX_ISCENSORED:
688 raise RevlogError(_('node %s is not censored') % node)
688 raise RevlogError(_('node %s is not censored') % node)
689 except CensoredNodeError:
689 except CensoredNodeError:
690 # must pass the censored index flag to add censored revisions
690 # must pass the censored index flag to add censored revisions
691 if not flags & REVIDX_ISCENSORED:
691 if not flags & REVIDX_ISCENSORED:
692 raise
692 raise
693 return btext[0]
693 return btext[0]
694
694
695 def _builddeltadiff(self, base, revinfo, fh):
695 def _builddeltadiff(self, base, revinfo, fh):
696 revlog = self.revlog
696 revlog = self.revlog
697 t = self.buildtext(revinfo, fh)
697 t = self.buildtext(revinfo, fh)
698 if revlog.iscensored(base):
698 if revlog.iscensored(base):
699 # deltas based on a censored revision must replace the
699 # deltas based on a censored revision must replace the
700 # full content in one patch, so delta works everywhere
700 # full content in one patch, so delta works everywhere
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
701 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
702 delta = header + t
702 delta = header + t
703 else:
703 else:
704 ptext = revlog.revision(base, _df=fh, raw=True)
704 ptext = revlog.revision(base, _df=fh, raw=True)
705 delta = mdiff.textdiff(ptext, t)
705 delta = mdiff.textdiff(ptext, t)
706
706
707 return delta
707 return delta
708
708
709 def _builddeltainfo(self, revinfo, base, fh):
709 def _builddeltainfo(self, revinfo, base, fh):
710 # can we use the cached delta?
710 # can we use the cached delta?
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
711 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
712 delta = revinfo.cachedelta[1]
712 delta = revinfo.cachedelta[1]
713 else:
713 else:
714 delta = self._builddeltadiff(base, revinfo, fh)
714 delta = self._builddeltadiff(base, revinfo, fh)
715 revlog = self.revlog
715 revlog = self.revlog
716 header, data = revlog.compress(delta)
716 header, data = revlog.compress(delta)
717 deltalen = len(header) + len(data)
717 deltalen = len(header) + len(data)
718 chainbase = revlog.chainbase(base)
718 chainbase = revlog.chainbase(base)
719 offset = revlog.end(len(revlog) - 1)
719 offset = revlog.end(len(revlog) - 1)
720 dist = deltalen + offset - revlog.start(chainbase)
720 dist = deltalen + offset - revlog.start(chainbase)
721 if revlog._generaldelta:
721 if revlog._generaldelta:
722 deltabase = base
722 deltabase = base
723 else:
723 else:
724 deltabase = chainbase
724 deltabase = chainbase
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
725 chainlen, compresseddeltalen = revlog._chaininfo(base)
726 chainlen += 1
726 chainlen += 1
727 compresseddeltalen += deltalen
727 compresseddeltalen += deltalen
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
728 return _deltainfo(dist, deltalen, (header, data), deltabase,
729 chainbase, chainlen, compresseddeltalen)
729 chainbase, chainlen, compresseddeltalen)
730
730
731 def finddeltainfo(self, revinfo, fh):
731 def finddeltainfo(self, revinfo, fh):
732 """Find an acceptable delta against a candidate revision
732 """Find an acceptable delta against a candidate revision
733
733
734 revinfo: information about the revision (instance of _revisioninfo)
734 revinfo: information about the revision (instance of _revisioninfo)
735 fh: file handle to either the .i or the .d revlog file,
735 fh: file handle to either the .i or the .d revlog file,
736 depending on whether it is inlined or not
736 depending on whether it is inlined or not
737
737
738 Returns the first acceptable candidate revision, as ordered by
738 Returns the first acceptable candidate revision, as ordered by
739 _getcandidaterevs
739 _getcandidaterevs
740 """
740 """
741 cachedelta = revinfo.cachedelta
741 cachedelta = revinfo.cachedelta
742 p1 = revinfo.p1
742 p1 = revinfo.p1
743 p2 = revinfo.p2
743 p2 = revinfo.p2
744 revlog = self.revlog
744 revlog = self.revlog
745
745
746 deltainfo = None
746 deltainfo = None
747 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
747 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
748 nominateddeltas = []
748 nominateddeltas = []
749 for candidaterev in candidaterevs:
749 for candidaterev in candidaterevs:
750 # no delta for rawtext-changing revs (see "candelta" for why)
750 # no delta for rawtext-changing revs (see "candelta" for why)
751 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
751 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
752 continue
752 continue
753 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
753 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
754 if revlog._isgooddeltainfo(candidatedelta, revinfo):
754 if revlog._isgooddeltainfo(candidatedelta, revinfo):
755 nominateddeltas.append(candidatedelta)
755 nominateddeltas.append(candidatedelta)
756 if nominateddeltas:
756 if nominateddeltas:
757 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
757 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
758 break
758 break
759
759
760 return deltainfo
760 return deltainfo
761
761
762 @attr.s(slots=True, frozen=True)
762 @attr.s(slots=True, frozen=True)
763 class _revisioninfo(object):
763 class _revisioninfo(object):
764 """Information about a revision that allows building its fulltext
764 """Information about a revision that allows building its fulltext
765 node: expected hash of the revision
765 node: expected hash of the revision
766 p1, p2: parent revs of the revision
766 p1, p2: parent revs of the revision
767 btext: built text cache consisting of a one-element list
767 btext: built text cache consisting of a one-element list
768 cachedelta: (baserev, uncompressed_delta) or None
768 cachedelta: (baserev, uncompressed_delta) or None
769 flags: flags associated to the revision storage
769 flags: flags associated to the revision storage
770
770
771 One of btext[0] or cachedelta must be set.
771 One of btext[0] or cachedelta must be set.
772 """
772 """
773 node = attr.ib()
773 node = attr.ib()
774 p1 = attr.ib()
774 p1 = attr.ib()
775 p2 = attr.ib()
775 p2 = attr.ib()
776 btext = attr.ib()
776 btext = attr.ib()
777 textlen = attr.ib()
777 textlen = attr.ib()
778 cachedelta = attr.ib()
778 cachedelta = attr.ib()
779 flags = attr.ib()
779 flags = attr.ib()
780
780
781 # index v0:
781 # index v0:
782 # 4 bytes: offset
782 # 4 bytes: offset
783 # 4 bytes: compressed length
783 # 4 bytes: compressed length
784 # 4 bytes: base rev
784 # 4 bytes: base rev
785 # 4 bytes: link rev
785 # 4 bytes: link rev
786 # 20 bytes: parent 1 nodeid
786 # 20 bytes: parent 1 nodeid
787 # 20 bytes: parent 2 nodeid
787 # 20 bytes: parent 2 nodeid
788 # 20 bytes: nodeid
788 # 20 bytes: nodeid
789 indexformatv0 = struct.Struct(">4l20s20s20s")
789 indexformatv0 = struct.Struct(">4l20s20s20s")
790 indexformatv0_pack = indexformatv0.pack
790 indexformatv0_pack = indexformatv0.pack
791 indexformatv0_unpack = indexformatv0.unpack
791 indexformatv0_unpack = indexformatv0.unpack
792
792
793 class revlogoldindex(list):
793 class revlogoldindex(list):
794 def __len__(self):
795 return list.__len__(self) + 1
796 def __getitem__(self, i):
794 def __getitem__(self, i):
797 if i == -1 or i == len(self) - 1:
795 if i == -1 or i == len(self):
798 return (0, 0, 0, -1, -1, -1, -1, nullid)
796 return (0, 0, 0, -1, -1, -1, -1, nullid)
799 return list.__getitem__(self, i)
797 return list.__getitem__(self, i)
800
798
801 class revlogoldio(object):
799 class revlogoldio(object):
802 def __init__(self):
800 def __init__(self):
803 self.size = indexformatv0.size
801 self.size = indexformatv0.size
804
802
805 def parseindex(self, data, inline):
803 def parseindex(self, data, inline):
806 s = self.size
804 s = self.size
807 index = []
805 index = []
808 nodemap = {nullid: nullrev}
806 nodemap = {nullid: nullrev}
809 n = off = 0
807 n = off = 0
810 l = len(data)
808 l = len(data)
811 while off + s <= l:
809 while off + s <= l:
812 cur = data[off:off + s]
810 cur = data[off:off + s]
813 off += s
811 off += s
814 e = indexformatv0_unpack(cur)
812 e = indexformatv0_unpack(cur)
815 # transform to revlogv1 format
813 # transform to revlogv1 format
816 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
814 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
817 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
815 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
818 index.append(e2)
816 index.append(e2)
819 nodemap[e[6]] = n
817 nodemap[e[6]] = n
820 n += 1
818 n += 1
821
819
822 return revlogoldindex(index), nodemap, None
820 return revlogoldindex(index), nodemap, None
823
821
824 def packentry(self, entry, node, version, rev):
822 def packentry(self, entry, node, version, rev):
825 if gettype(entry[0]):
823 if gettype(entry[0]):
826 raise RevlogError(_('index entry flags need revlog version 1'))
824 raise RevlogError(_('index entry flags need revlog version 1'))
827 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
825 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
828 node(entry[5]), node(entry[6]), entry[7])
826 node(entry[5]), node(entry[6]), entry[7])
829 return indexformatv0_pack(*e2)
827 return indexformatv0_pack(*e2)
830
828
831 # index ng:
829 # index ng:
832 # 6 bytes: offset
830 # 6 bytes: offset
833 # 2 bytes: flags
831 # 2 bytes: flags
834 # 4 bytes: compressed length
832 # 4 bytes: compressed length
835 # 4 bytes: uncompressed length
833 # 4 bytes: uncompressed length
836 # 4 bytes: base rev
834 # 4 bytes: base rev
837 # 4 bytes: link rev
835 # 4 bytes: link rev
838 # 4 bytes: parent 1 rev
836 # 4 bytes: parent 1 rev
839 # 4 bytes: parent 2 rev
837 # 4 bytes: parent 2 rev
840 # 32 bytes: nodeid
838 # 32 bytes: nodeid
841 indexformatng = struct.Struct(">Qiiiiii20s12x")
839 indexformatng = struct.Struct(">Qiiiiii20s12x")
842 indexformatng_pack = indexformatng.pack
840 indexformatng_pack = indexformatng.pack
843 versionformat = struct.Struct(">I")
841 versionformat = struct.Struct(">I")
844 versionformat_pack = versionformat.pack
842 versionformat_pack = versionformat.pack
845 versionformat_unpack = versionformat.unpack
843 versionformat_unpack = versionformat.unpack
846
844
847 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
845 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
848 # signed integer)
846 # signed integer)
849 _maxentrysize = 0x7fffffff
847 _maxentrysize = 0x7fffffff
850
848
851 class revlogio(object):
849 class revlogio(object):
852 def __init__(self):
850 def __init__(self):
853 self.size = indexformatng.size
851 self.size = indexformatng.size
854
852
855 def parseindex(self, data, inline):
853 def parseindex(self, data, inline):
856 # call the C implementation to parse the index data
854 # call the C implementation to parse the index data
857 index, cache = parsers.parse_index2(data, inline)
855 index, cache = parsers.parse_index2(data, inline)
858 return index, getattr(index, 'nodemap', None), cache
856 return index, getattr(index, 'nodemap', None), cache
859
857
860 def packentry(self, entry, node, version, rev):
858 def packentry(self, entry, node, version, rev):
861 p = indexformatng_pack(*entry)
859 p = indexformatng_pack(*entry)
862 if rev == 0:
860 if rev == 0:
863 p = versionformat_pack(version) + p[4:]
861 p = versionformat_pack(version) + p[4:]
864 return p
862 return p
865
863
866 class revlog(object):
864 class revlog(object):
867 """
865 """
868 the underlying revision storage object
866 the underlying revision storage object
869
867
870 A revlog consists of two parts, an index and the revision data.
868 A revlog consists of two parts, an index and the revision data.
871
869
872 The index is a file with a fixed record size containing
870 The index is a file with a fixed record size containing
873 information on each revision, including its nodeid (hash), the
871 information on each revision, including its nodeid (hash), the
874 nodeids of its parents, the position and offset of its data within
872 nodeids of its parents, the position and offset of its data within
875 the data file, and the revision it's based on. Finally, each entry
873 the data file, and the revision it's based on. Finally, each entry
876 contains a linkrev entry that can serve as a pointer to external
874 contains a linkrev entry that can serve as a pointer to external
877 data.
875 data.
878
876
879 The revision data itself is a linear collection of data chunks.
877 The revision data itself is a linear collection of data chunks.
880 Each chunk represents a revision and is usually represented as a
878 Each chunk represents a revision and is usually represented as a
881 delta against the previous chunk. To bound lookup time, runs of
879 delta against the previous chunk. To bound lookup time, runs of
882 deltas are limited to about 2 times the length of the original
880 deltas are limited to about 2 times the length of the original
883 version data. This makes retrieval of a version proportional to
881 version data. This makes retrieval of a version proportional to
884 its size, or O(1) relative to the number of revisions.
882 its size, or O(1) relative to the number of revisions.
885
883
886 Both pieces of the revlog are written to in an append-only
884 Both pieces of the revlog are written to in an append-only
887 fashion, which means we never need to rewrite a file to insert or
885 fashion, which means we never need to rewrite a file to insert or
888 remove data, and can use some simple techniques to avoid the need
886 remove data, and can use some simple techniques to avoid the need
889 for locking while reading.
887 for locking while reading.
890
888
891 If checkambig, indexfile is opened with checkambig=True at
889 If checkambig, indexfile is opened with checkambig=True at
892 writing, to avoid file stat ambiguity.
890 writing, to avoid file stat ambiguity.
893
891
894 If mmaplargeindex is True, and an mmapindexthreshold is set, the
892 If mmaplargeindex is True, and an mmapindexthreshold is set, the
895 index will be mmapped rather than read if it is larger than the
893 index will be mmapped rather than read if it is larger than the
896 configured threshold.
894 configured threshold.
897
895
898 If censorable is True, the revlog can have censored revisions.
896 If censorable is True, the revlog can have censored revisions.
899 """
897 """
900 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
898 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
901 mmaplargeindex=False, censorable=False):
899 mmaplargeindex=False, censorable=False):
902 """
900 """
903 create a revlog object
901 create a revlog object
904
902
905 opener is a function that abstracts the file opening operation
903 opener is a function that abstracts the file opening operation
906 and can be used to implement COW semantics or the like.
904 and can be used to implement COW semantics or the like.
907 """
905 """
908 self.indexfile = indexfile
906 self.indexfile = indexfile
909 self.datafile = datafile or (indexfile[:-2] + ".d")
907 self.datafile = datafile or (indexfile[:-2] + ".d")
910 self.opener = opener
908 self.opener = opener
911 # When True, indexfile is opened with checkambig=True at writing, to
909 # When True, indexfile is opened with checkambig=True at writing, to
912 # avoid file stat ambiguity.
910 # avoid file stat ambiguity.
913 self._checkambig = checkambig
911 self._checkambig = checkambig
914 self._censorable = censorable
912 self._censorable = censorable
915 # 3-tuple of (node, rev, text) for a raw revision.
913 # 3-tuple of (node, rev, text) for a raw revision.
916 self._cache = None
914 self._cache = None
917 # Maps rev to chain base rev.
915 # Maps rev to chain base rev.
918 self._chainbasecache = util.lrucachedict(100)
916 self._chainbasecache = util.lrucachedict(100)
919 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
917 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
920 self._chunkcache = (0, '')
918 self._chunkcache = (0, '')
921 # How much data to read and cache into the raw revlog data cache.
919 # How much data to read and cache into the raw revlog data cache.
922 self._chunkcachesize = 65536
920 self._chunkcachesize = 65536
923 self._maxchainlen = None
921 self._maxchainlen = None
924 self._deltabothparents = True
922 self._deltabothparents = True
925 self.index = []
923 self.index = []
926 # Mapping of partial identifiers to full nodes.
924 # Mapping of partial identifiers to full nodes.
927 self._pcache = {}
925 self._pcache = {}
928 # Mapping of revision integer to full node.
926 # Mapping of revision integer to full node.
929 self._nodecache = {nullid: nullrev}
927 self._nodecache = {nullid: nullrev}
930 self._nodepos = None
928 self._nodepos = None
931 self._compengine = 'zlib'
929 self._compengine = 'zlib'
932 self._maxdeltachainspan = -1
930 self._maxdeltachainspan = -1
933 self._withsparseread = False
931 self._withsparseread = False
934 self._sparserevlog = False
932 self._sparserevlog = False
935 self._srdensitythreshold = 0.50
933 self._srdensitythreshold = 0.50
936 self._srmingapsize = 262144
934 self._srmingapsize = 262144
937
935
938 mmapindexthreshold = None
936 mmapindexthreshold = None
939 v = REVLOG_DEFAULT_VERSION
937 v = REVLOG_DEFAULT_VERSION
940 opts = getattr(opener, 'options', None)
938 opts = getattr(opener, 'options', None)
941 if opts is not None:
939 if opts is not None:
942 if 'revlogv2' in opts:
940 if 'revlogv2' in opts:
943 # version 2 revlogs always use generaldelta.
941 # version 2 revlogs always use generaldelta.
944 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
942 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
945 elif 'revlogv1' in opts:
943 elif 'revlogv1' in opts:
946 if 'generaldelta' in opts:
944 if 'generaldelta' in opts:
947 v |= FLAG_GENERALDELTA
945 v |= FLAG_GENERALDELTA
948 else:
946 else:
949 v = 0
947 v = 0
950 if 'chunkcachesize' in opts:
948 if 'chunkcachesize' in opts:
951 self._chunkcachesize = opts['chunkcachesize']
949 self._chunkcachesize = opts['chunkcachesize']
952 if 'maxchainlen' in opts:
950 if 'maxchainlen' in opts:
953 self._maxchainlen = opts['maxchainlen']
951 self._maxchainlen = opts['maxchainlen']
954 if 'deltabothparents' in opts:
952 if 'deltabothparents' in opts:
955 self._deltabothparents = opts['deltabothparents']
953 self._deltabothparents = opts['deltabothparents']
956 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
954 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
957 if 'compengine' in opts:
955 if 'compengine' in opts:
958 self._compengine = opts['compengine']
956 self._compengine = opts['compengine']
959 if 'maxdeltachainspan' in opts:
957 if 'maxdeltachainspan' in opts:
960 self._maxdeltachainspan = opts['maxdeltachainspan']
958 self._maxdeltachainspan = opts['maxdeltachainspan']
961 if mmaplargeindex and 'mmapindexthreshold' in opts:
959 if mmaplargeindex and 'mmapindexthreshold' in opts:
962 mmapindexthreshold = opts['mmapindexthreshold']
960 mmapindexthreshold = opts['mmapindexthreshold']
963 self._sparserevlog = bool(opts.get('sparse-revlog', False))
961 self._sparserevlog = bool(opts.get('sparse-revlog', False))
964 withsparseread = bool(opts.get('with-sparse-read', False))
962 withsparseread = bool(opts.get('with-sparse-read', False))
965 # sparse-revlog forces sparse-read
963 # sparse-revlog forces sparse-read
966 self._withsparseread = self._sparserevlog or withsparseread
964 self._withsparseread = self._sparserevlog or withsparseread
967 if 'sparse-read-density-threshold' in opts:
965 if 'sparse-read-density-threshold' in opts:
968 self._srdensitythreshold = opts['sparse-read-density-threshold']
966 self._srdensitythreshold = opts['sparse-read-density-threshold']
969 if 'sparse-read-min-gap-size' in opts:
967 if 'sparse-read-min-gap-size' in opts:
970 self._srmingapsize = opts['sparse-read-min-gap-size']
968 self._srmingapsize = opts['sparse-read-min-gap-size']
971
969
972 if self._chunkcachesize <= 0:
970 if self._chunkcachesize <= 0:
973 raise RevlogError(_('revlog chunk cache size %r is not greater '
971 raise RevlogError(_('revlog chunk cache size %r is not greater '
974 'than 0') % self._chunkcachesize)
972 'than 0') % self._chunkcachesize)
975 elif self._chunkcachesize & (self._chunkcachesize - 1):
973 elif self._chunkcachesize & (self._chunkcachesize - 1):
976 raise RevlogError(_('revlog chunk cache size %r is not a power '
974 raise RevlogError(_('revlog chunk cache size %r is not a power '
977 'of 2') % self._chunkcachesize)
975 'of 2') % self._chunkcachesize)
978
976
979 indexdata = ''
977 indexdata = ''
980 self._initempty = True
978 self._initempty = True
981 try:
979 try:
982 with self._indexfp() as f:
980 with self._indexfp() as f:
983 if (mmapindexthreshold is not None and
981 if (mmapindexthreshold is not None and
984 self.opener.fstat(f).st_size >= mmapindexthreshold):
982 self.opener.fstat(f).st_size >= mmapindexthreshold):
985 indexdata = util.buffer(util.mmapread(f))
983 indexdata = util.buffer(util.mmapread(f))
986 else:
984 else:
987 indexdata = f.read()
985 indexdata = f.read()
988 if len(indexdata) > 0:
986 if len(indexdata) > 0:
989 v = versionformat_unpack(indexdata[:4])[0]
987 v = versionformat_unpack(indexdata[:4])[0]
990 self._initempty = False
988 self._initempty = False
991 except IOError as inst:
989 except IOError as inst:
992 if inst.errno != errno.ENOENT:
990 if inst.errno != errno.ENOENT:
993 raise
991 raise
994
992
995 self.version = v
993 self.version = v
996 self._inline = v & FLAG_INLINE_DATA
994 self._inline = v & FLAG_INLINE_DATA
997 self._generaldelta = v & FLAG_GENERALDELTA
995 self._generaldelta = v & FLAG_GENERALDELTA
998 flags = v & ~0xFFFF
996 flags = v & ~0xFFFF
999 fmt = v & 0xFFFF
997 fmt = v & 0xFFFF
1000 if fmt == REVLOGV0:
998 if fmt == REVLOGV0:
1001 if flags:
999 if flags:
1002 raise RevlogError(_('unknown flags (%#04x) in version %d '
1000 raise RevlogError(_('unknown flags (%#04x) in version %d '
1003 'revlog %s') %
1001 'revlog %s') %
1004 (flags >> 16, fmt, self.indexfile))
1002 (flags >> 16, fmt, self.indexfile))
1005 elif fmt == REVLOGV1:
1003 elif fmt == REVLOGV1:
1006 if flags & ~REVLOGV1_FLAGS:
1004 if flags & ~REVLOGV1_FLAGS:
1007 raise RevlogError(_('unknown flags (%#04x) in version %d '
1005 raise RevlogError(_('unknown flags (%#04x) in version %d '
1008 'revlog %s') %
1006 'revlog %s') %
1009 (flags >> 16, fmt, self.indexfile))
1007 (flags >> 16, fmt, self.indexfile))
1010 elif fmt == REVLOGV2:
1008 elif fmt == REVLOGV2:
1011 if flags & ~REVLOGV2_FLAGS:
1009 if flags & ~REVLOGV2_FLAGS:
1012 raise RevlogError(_('unknown flags (%#04x) in version %d '
1010 raise RevlogError(_('unknown flags (%#04x) in version %d '
1013 'revlog %s') %
1011 'revlog %s') %
1014 (flags >> 16, fmt, self.indexfile))
1012 (flags >> 16, fmt, self.indexfile))
1015 else:
1013 else:
1016 raise RevlogError(_('unknown version (%d) in revlog %s') %
1014 raise RevlogError(_('unknown version (%d) in revlog %s') %
1017 (fmt, self.indexfile))
1015 (fmt, self.indexfile))
1018
1016
1019 self.storedeltachains = True
1017 self.storedeltachains = True
1020
1018
1021 self._io = revlogio()
1019 self._io = revlogio()
1022 if self.version == REVLOGV0:
1020 if self.version == REVLOGV0:
1023 self._io = revlogoldio()
1021 self._io = revlogoldio()
1024 try:
1022 try:
1025 d = self._io.parseindex(indexdata, self._inline)
1023 d = self._io.parseindex(indexdata, self._inline)
1026 except (ValueError, IndexError):
1024 except (ValueError, IndexError):
1027 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1025 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
1028 self.index, nodemap, self._chunkcache = d
1026 self.index, nodemap, self._chunkcache = d
1029 if nodemap is not None:
1027 if nodemap is not None:
1030 self.nodemap = self._nodecache = nodemap
1028 self.nodemap = self._nodecache = nodemap
1031 if not self._chunkcache:
1029 if not self._chunkcache:
1032 self._chunkclear()
1030 self._chunkclear()
1033 # revnum -> (chain-length, sum-delta-length)
1031 # revnum -> (chain-length, sum-delta-length)
1034 self._chaininfocache = {}
1032 self._chaininfocache = {}
1035 # revlog header -> revlog compressor
1033 # revlog header -> revlog compressor
1036 self._decompressors = {}
1034 self._decompressors = {}
1037
1035
1038 @util.propertycache
1036 @util.propertycache
1039 def _compressor(self):
1037 def _compressor(self):
1040 return util.compengines[self._compengine].revlogcompressor()
1038 return util.compengines[self._compengine].revlogcompressor()
1041
1039
1042 def _indexfp(self, mode='r'):
1040 def _indexfp(self, mode='r'):
1043 """file object for the revlog's index file"""
1041 """file object for the revlog's index file"""
1044 args = {r'mode': mode}
1042 args = {r'mode': mode}
1045 if mode != 'r':
1043 if mode != 'r':
1046 args[r'checkambig'] = self._checkambig
1044 args[r'checkambig'] = self._checkambig
1047 if mode == 'w':
1045 if mode == 'w':
1048 args[r'atomictemp'] = True
1046 args[r'atomictemp'] = True
1049 return self.opener(self.indexfile, **args)
1047 return self.opener(self.indexfile, **args)
1050
1048
1051 def _datafp(self, mode='r'):
1049 def _datafp(self, mode='r'):
1052 """file object for the revlog's data file"""
1050 """file object for the revlog's data file"""
1053 return self.opener(self.datafile, mode=mode)
1051 return self.opener(self.datafile, mode=mode)
1054
1052
1055 @contextlib.contextmanager
1053 @contextlib.contextmanager
1056 def _datareadfp(self, existingfp=None):
1054 def _datareadfp(self, existingfp=None):
1057 """file object suitable to read data"""
1055 """file object suitable to read data"""
1058 if existingfp is not None:
1056 if existingfp is not None:
1059 yield existingfp
1057 yield existingfp
1060 else:
1058 else:
1061 if self._inline:
1059 if self._inline:
1062 func = self._indexfp
1060 func = self._indexfp
1063 else:
1061 else:
1064 func = self._datafp
1062 func = self._datafp
1065 with func() as fp:
1063 with func() as fp:
1066 yield fp
1064 yield fp
1067
1065
1068 def tip(self):
1066 def tip(self):
1069 return self.node(len(self.index) - 2)
1067 return self.node(len(self.index) - 1)
1070 def __contains__(self, rev):
1068 def __contains__(self, rev):
1071 return 0 <= rev < len(self)
1069 return 0 <= rev < len(self)
1072 def __len__(self):
1070 def __len__(self):
1073 return len(self.index) - 1
1071 return len(self.index)
1074 def __iter__(self):
1072 def __iter__(self):
1075 return iter(pycompat.xrange(len(self)))
1073 return iter(pycompat.xrange(len(self)))
1076 def revs(self, start=0, stop=None):
1074 def revs(self, start=0, stop=None):
1077 """iterate over all rev in this revlog (from start to stop)"""
1075 """iterate over all rev in this revlog (from start to stop)"""
1078 step = 1
1076 step = 1
1079 length = len(self)
1077 length = len(self)
1080 if stop is not None:
1078 if stop is not None:
1081 if start > stop:
1079 if start > stop:
1082 step = -1
1080 step = -1
1083 stop += step
1081 stop += step
1084 if stop > length:
1082 if stop > length:
1085 stop = length
1083 stop = length
1086 else:
1084 else:
1087 stop = length
1085 stop = length
1088 return pycompat.xrange(start, stop, step)
1086 return pycompat.xrange(start, stop, step)
1089
1087
1090 @util.propertycache
1088 @util.propertycache
1091 def nodemap(self):
1089 def nodemap(self):
1092 self.rev(self.node(0))
1090 self.rev(self.node(0))
1093 return self._nodecache
1091 return self._nodecache
1094
1092
1095 def hasnode(self, node):
1093 def hasnode(self, node):
1096 try:
1094 try:
1097 self.rev(node)
1095 self.rev(node)
1098 return True
1096 return True
1099 except KeyError:
1097 except KeyError:
1100 return False
1098 return False
1101
1099
1102 def candelta(self, baserev, rev):
1100 def candelta(self, baserev, rev):
1103 """whether two revisions (baserev, rev) can be delta-ed or not"""
1101 """whether two revisions (baserev, rev) can be delta-ed or not"""
1104 # Disable delta if either rev requires a content-changing flag
1102 # Disable delta if either rev requires a content-changing flag
1105 # processor (ex. LFS). This is because such flag processor can alter
1103 # processor (ex. LFS). This is because such flag processor can alter
1106 # the rawtext content that the delta will be based on, and two clients
1104 # the rawtext content that the delta will be based on, and two clients
1107 # could have a same revlog node with different flags (i.e. different
1105 # could have a same revlog node with different flags (i.e. different
1108 # rawtext contents) and the delta could be incompatible.
1106 # rawtext contents) and the delta could be incompatible.
1109 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1107 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
1110 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1108 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
1111 return False
1109 return False
1112 return True
1110 return True
1113
1111
1114 def clearcaches(self):
1112 def clearcaches(self):
1115 self._cache = None
1113 self._cache = None
1116 self._chainbasecache.clear()
1114 self._chainbasecache.clear()
1117 self._chunkcache = (0, '')
1115 self._chunkcache = (0, '')
1118 self._pcache = {}
1116 self._pcache = {}
1119
1117
1120 try:
1118 try:
1121 self._nodecache.clearcaches()
1119 self._nodecache.clearcaches()
1122 except AttributeError:
1120 except AttributeError:
1123 self._nodecache = {nullid: nullrev}
1121 self._nodecache = {nullid: nullrev}
1124 self._nodepos = None
1122 self._nodepos = None
1125
1123
1126 def rev(self, node):
1124 def rev(self, node):
1127 try:
1125 try:
1128 return self._nodecache[node]
1126 return self._nodecache[node]
1129 except TypeError:
1127 except TypeError:
1130 raise
1128 raise
1131 except RevlogError:
1129 except RevlogError:
1132 # parsers.c radix tree lookup failed
1130 # parsers.c radix tree lookup failed
1133 if node == wdirid or node in wdirfilenodeids:
1131 if node == wdirid or node in wdirfilenodeids:
1134 raise error.WdirUnsupported
1132 raise error.WdirUnsupported
1135 raise LookupError(node, self.indexfile, _('no node'))
1133 raise LookupError(node, self.indexfile, _('no node'))
1136 except KeyError:
1134 except KeyError:
1137 # pure python cache lookup failed
1135 # pure python cache lookup failed
1138 n = self._nodecache
1136 n = self._nodecache
1139 i = self.index
1137 i = self.index
1140 p = self._nodepos
1138 p = self._nodepos
1141 if p is None:
1139 if p is None:
1142 p = len(i) - 2
1140 p = len(i) - 1
1143 else:
1141 else:
1144 assert p < len(i)
1142 assert p < len(i)
1145 for r in pycompat.xrange(p, -1, -1):
1143 for r in pycompat.xrange(p, -1, -1):
1146 v = i[r][7]
1144 v = i[r][7]
1147 n[v] = r
1145 n[v] = r
1148 if v == node:
1146 if v == node:
1149 self._nodepos = r - 1
1147 self._nodepos = r - 1
1150 return r
1148 return r
1151 if node == wdirid or node in wdirfilenodeids:
1149 if node == wdirid or node in wdirfilenodeids:
1152 raise error.WdirUnsupported
1150 raise error.WdirUnsupported
1153 raise LookupError(node, self.indexfile, _('no node'))
1151 raise LookupError(node, self.indexfile, _('no node'))
1154
1152
1155 # Accessors for index entries.
1153 # Accessors for index entries.
1156
1154
1157 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1155 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1158 # are flags.
1156 # are flags.
1159 def start(self, rev):
1157 def start(self, rev):
1160 return int(self.index[rev][0] >> 16)
1158 return int(self.index[rev][0] >> 16)
1161
1159
1162 def flags(self, rev):
1160 def flags(self, rev):
1163 return self.index[rev][0] & 0xFFFF
1161 return self.index[rev][0] & 0xFFFF
1164
1162
1165 def length(self, rev):
1163 def length(self, rev):
1166 return self.index[rev][1]
1164 return self.index[rev][1]
1167
1165
1168 def rawsize(self, rev):
1166 def rawsize(self, rev):
1169 """return the length of the uncompressed text for a given revision"""
1167 """return the length of the uncompressed text for a given revision"""
1170 l = self.index[rev][2]
1168 l = self.index[rev][2]
1171 if l >= 0:
1169 if l >= 0:
1172 return l
1170 return l
1173
1171
1174 t = self.revision(rev, raw=True)
1172 t = self.revision(rev, raw=True)
1175 return len(t)
1173 return len(t)
1176
1174
1177 def size(self, rev):
1175 def size(self, rev):
1178 """length of non-raw text (processed by a "read" flag processor)"""
1176 """length of non-raw text (processed by a "read" flag processor)"""
1179 # fast path: if no "read" flag processor could change the content,
1177 # fast path: if no "read" flag processor could change the content,
1180 # size is rawsize. note: ELLIPSIS is known to not change the content.
1178 # size is rawsize. note: ELLIPSIS is known to not change the content.
1181 flags = self.flags(rev)
1179 flags = self.flags(rev)
1182 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1180 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1183 return self.rawsize(rev)
1181 return self.rawsize(rev)
1184
1182
1185 return len(self.revision(rev, raw=False))
1183 return len(self.revision(rev, raw=False))
1186
1184
1187 def chainbase(self, rev):
1185 def chainbase(self, rev):
1188 base = self._chainbasecache.get(rev)
1186 base = self._chainbasecache.get(rev)
1189 if base is not None:
1187 if base is not None:
1190 return base
1188 return base
1191
1189
1192 index = self.index
1190 index = self.index
1193 iterrev = rev
1191 iterrev = rev
1194 base = index[iterrev][3]
1192 base = index[iterrev][3]
1195 while base != iterrev:
1193 while base != iterrev:
1196 iterrev = base
1194 iterrev = base
1197 base = index[iterrev][3]
1195 base = index[iterrev][3]
1198
1196
1199 self._chainbasecache[rev] = base
1197 self._chainbasecache[rev] = base
1200 return base
1198 return base
1201
1199
1202 def linkrev(self, rev):
1200 def linkrev(self, rev):
1203 return self.index[rev][4]
1201 return self.index[rev][4]
1204
1202
1205 def parentrevs(self, rev):
1203 def parentrevs(self, rev):
1206 try:
1204 try:
1207 entry = self.index[rev]
1205 entry = self.index[rev]
1208 except IndexError:
1206 except IndexError:
1209 if rev == wdirrev:
1207 if rev == wdirrev:
1210 raise error.WdirUnsupported
1208 raise error.WdirUnsupported
1211 raise
1209 raise
1212
1210
1213 return entry[5], entry[6]
1211 return entry[5], entry[6]
1214
1212
1215 def node(self, rev):
1213 def node(self, rev):
1216 try:
1214 try:
1217 return self.index[rev][7]
1215 return self.index[rev][7]
1218 except IndexError:
1216 except IndexError:
1219 if rev == wdirrev:
1217 if rev == wdirrev:
1220 raise error.WdirUnsupported
1218 raise error.WdirUnsupported
1221 raise
1219 raise
1222
1220
1223 # Derived from index values.
1221 # Derived from index values.
1224
1222
1225 def end(self, rev):
1223 def end(self, rev):
1226 return self.start(rev) + self.length(rev)
1224 return self.start(rev) + self.length(rev)
1227
1225
1228 def parents(self, node):
1226 def parents(self, node):
1229 i = self.index
1227 i = self.index
1230 d = i[self.rev(node)]
1228 d = i[self.rev(node)]
1231 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1229 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
1232
1230
1233 def chainlen(self, rev):
1231 def chainlen(self, rev):
1234 return self._chaininfo(rev)[0]
1232 return self._chaininfo(rev)[0]
1235
1233
1236 def _chaininfo(self, rev):
1234 def _chaininfo(self, rev):
1237 chaininfocache = self._chaininfocache
1235 chaininfocache = self._chaininfocache
1238 if rev in chaininfocache:
1236 if rev in chaininfocache:
1239 return chaininfocache[rev]
1237 return chaininfocache[rev]
1240 index = self.index
1238 index = self.index
1241 generaldelta = self._generaldelta
1239 generaldelta = self._generaldelta
1242 iterrev = rev
1240 iterrev = rev
1243 e = index[iterrev]
1241 e = index[iterrev]
1244 clen = 0
1242 clen = 0
1245 compresseddeltalen = 0
1243 compresseddeltalen = 0
1246 while iterrev != e[3]:
1244 while iterrev != e[3]:
1247 clen += 1
1245 clen += 1
1248 compresseddeltalen += e[1]
1246 compresseddeltalen += e[1]
1249 if generaldelta:
1247 if generaldelta:
1250 iterrev = e[3]
1248 iterrev = e[3]
1251 else:
1249 else:
1252 iterrev -= 1
1250 iterrev -= 1
1253 if iterrev in chaininfocache:
1251 if iterrev in chaininfocache:
1254 t = chaininfocache[iterrev]
1252 t = chaininfocache[iterrev]
1255 clen += t[0]
1253 clen += t[0]
1256 compresseddeltalen += t[1]
1254 compresseddeltalen += t[1]
1257 break
1255 break
1258 e = index[iterrev]
1256 e = index[iterrev]
1259 else:
1257 else:
1260 # Add text length of base since decompressing that also takes
1258 # Add text length of base since decompressing that also takes
1261 # work. For cache hits the length is already included.
1259 # work. For cache hits the length is already included.
1262 compresseddeltalen += e[1]
1260 compresseddeltalen += e[1]
1263 r = (clen, compresseddeltalen)
1261 r = (clen, compresseddeltalen)
1264 chaininfocache[rev] = r
1262 chaininfocache[rev] = r
1265 return r
1263 return r
1266
1264
1267 def _deltachain(self, rev, stoprev=None):
1265 def _deltachain(self, rev, stoprev=None):
1268 """Obtain the delta chain for a revision.
1266 """Obtain the delta chain for a revision.
1269
1267
1270 ``stoprev`` specifies a revision to stop at. If not specified, we
1268 ``stoprev`` specifies a revision to stop at. If not specified, we
1271 stop at the base of the chain.
1269 stop at the base of the chain.
1272
1270
1273 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1271 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1274 revs in ascending order and ``stopped`` is a bool indicating whether
1272 revs in ascending order and ``stopped`` is a bool indicating whether
1275 ``stoprev`` was hit.
1273 ``stoprev`` was hit.
1276 """
1274 """
1277 # Try C implementation.
1275 # Try C implementation.
1278 try:
1276 try:
1279 return self.index.deltachain(rev, stoprev, self._generaldelta)
1277 return self.index.deltachain(rev, stoprev, self._generaldelta)
1280 except AttributeError:
1278 except AttributeError:
1281 pass
1279 pass
1282
1280
1283 chain = []
1281 chain = []
1284
1282
1285 # Alias to prevent attribute lookup in tight loop.
1283 # Alias to prevent attribute lookup in tight loop.
1286 index = self.index
1284 index = self.index
1287 generaldelta = self._generaldelta
1285 generaldelta = self._generaldelta
1288
1286
1289 iterrev = rev
1287 iterrev = rev
1290 e = index[iterrev]
1288 e = index[iterrev]
1291 while iterrev != e[3] and iterrev != stoprev:
1289 while iterrev != e[3] and iterrev != stoprev:
1292 chain.append(iterrev)
1290 chain.append(iterrev)
1293 if generaldelta:
1291 if generaldelta:
1294 iterrev = e[3]
1292 iterrev = e[3]
1295 else:
1293 else:
1296 iterrev -= 1
1294 iterrev -= 1
1297 e = index[iterrev]
1295 e = index[iterrev]
1298
1296
1299 if iterrev == stoprev:
1297 if iterrev == stoprev:
1300 stopped = True
1298 stopped = True
1301 else:
1299 else:
1302 chain.append(iterrev)
1300 chain.append(iterrev)
1303 stopped = False
1301 stopped = False
1304
1302
1305 chain.reverse()
1303 chain.reverse()
1306 return chain, stopped
1304 return chain, stopped
1307
1305
1308 def ancestors(self, revs, stoprev=0, inclusive=False):
1306 def ancestors(self, revs, stoprev=0, inclusive=False):
1309 """Generate the ancestors of 'revs' in reverse topological order.
1307 """Generate the ancestors of 'revs' in reverse topological order.
1310 Does not generate revs lower than stoprev.
1308 Does not generate revs lower than stoprev.
1311
1309
1312 See the documentation for ancestor.lazyancestors for more details."""
1310 See the documentation for ancestor.lazyancestors for more details."""
1313
1311
1314 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1312 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
1315 inclusive=inclusive)
1313 inclusive=inclusive)
1316
1314
1317 def descendants(self, revs):
1315 def descendants(self, revs):
1318 """Generate the descendants of 'revs' in revision order.
1316 """Generate the descendants of 'revs' in revision order.
1319
1317
1320 Yield a sequence of revision numbers starting with a child of
1318 Yield a sequence of revision numbers starting with a child of
1321 some rev in revs, i.e., each revision is *not* considered a
1319 some rev in revs, i.e., each revision is *not* considered a
1322 descendant of itself. Results are ordered by revision number (a
1320 descendant of itself. Results are ordered by revision number (a
1323 topological sort)."""
1321 topological sort)."""
1324 first = min(revs)
1322 first = min(revs)
1325 if first == nullrev:
1323 if first == nullrev:
1326 for i in self:
1324 for i in self:
1327 yield i
1325 yield i
1328 return
1326 return
1329
1327
1330 seen = set(revs)
1328 seen = set(revs)
1331 for i in self.revs(start=first + 1):
1329 for i in self.revs(start=first + 1):
1332 for x in self.parentrevs(i):
1330 for x in self.parentrevs(i):
1333 if x != nullrev and x in seen:
1331 if x != nullrev and x in seen:
1334 seen.add(i)
1332 seen.add(i)
1335 yield i
1333 yield i
1336 break
1334 break
1337
1335
1338 def findcommonmissing(self, common=None, heads=None):
1336 def findcommonmissing(self, common=None, heads=None):
1339 """Return a tuple of the ancestors of common and the ancestors of heads
1337 """Return a tuple of the ancestors of common and the ancestors of heads
1340 that are not ancestors of common. In revset terminology, we return the
1338 that are not ancestors of common. In revset terminology, we return the
1341 tuple:
1339 tuple:
1342
1340
1343 ::common, (::heads) - (::common)
1341 ::common, (::heads) - (::common)
1344
1342
1345 The list is sorted by revision number, meaning it is
1343 The list is sorted by revision number, meaning it is
1346 topologically sorted.
1344 topologically sorted.
1347
1345
1348 'heads' and 'common' are both lists of node IDs. If heads is
1346 'heads' and 'common' are both lists of node IDs. If heads is
1349 not supplied, uses all of the revlog's heads. If common is not
1347 not supplied, uses all of the revlog's heads. If common is not
1350 supplied, uses nullid."""
1348 supplied, uses nullid."""
1351 if common is None:
1349 if common is None:
1352 common = [nullid]
1350 common = [nullid]
1353 if heads is None:
1351 if heads is None:
1354 heads = self.heads()
1352 heads = self.heads()
1355
1353
1356 common = [self.rev(n) for n in common]
1354 common = [self.rev(n) for n in common]
1357 heads = [self.rev(n) for n in heads]
1355 heads = [self.rev(n) for n in heads]
1358
1356
1359 # we want the ancestors, but inclusive
1357 # we want the ancestors, but inclusive
1360 class lazyset(object):
1358 class lazyset(object):
1361 def __init__(self, lazyvalues):
1359 def __init__(self, lazyvalues):
1362 self.addedvalues = set()
1360 self.addedvalues = set()
1363 self.lazyvalues = lazyvalues
1361 self.lazyvalues = lazyvalues
1364
1362
1365 def __contains__(self, value):
1363 def __contains__(self, value):
1366 return value in self.addedvalues or value in self.lazyvalues
1364 return value in self.addedvalues or value in self.lazyvalues
1367
1365
1368 def __iter__(self):
1366 def __iter__(self):
1369 added = self.addedvalues
1367 added = self.addedvalues
1370 for r in added:
1368 for r in added:
1371 yield r
1369 yield r
1372 for r in self.lazyvalues:
1370 for r in self.lazyvalues:
1373 if not r in added:
1371 if not r in added:
1374 yield r
1372 yield r
1375
1373
1376 def add(self, value):
1374 def add(self, value):
1377 self.addedvalues.add(value)
1375 self.addedvalues.add(value)
1378
1376
1379 def update(self, values):
1377 def update(self, values):
1380 self.addedvalues.update(values)
1378 self.addedvalues.update(values)
1381
1379
1382 has = lazyset(self.ancestors(common))
1380 has = lazyset(self.ancestors(common))
1383 has.add(nullrev)
1381 has.add(nullrev)
1384 has.update(common)
1382 has.update(common)
1385
1383
1386 # take all ancestors from heads that aren't in has
1384 # take all ancestors from heads that aren't in has
1387 missing = set()
1385 missing = set()
1388 visit = collections.deque(r for r in heads if r not in has)
1386 visit = collections.deque(r for r in heads if r not in has)
1389 while visit:
1387 while visit:
1390 r = visit.popleft()
1388 r = visit.popleft()
1391 if r in missing:
1389 if r in missing:
1392 continue
1390 continue
1393 else:
1391 else:
1394 missing.add(r)
1392 missing.add(r)
1395 for p in self.parentrevs(r):
1393 for p in self.parentrevs(r):
1396 if p not in has:
1394 if p not in has:
1397 visit.append(p)
1395 visit.append(p)
1398 missing = list(missing)
1396 missing = list(missing)
1399 missing.sort()
1397 missing.sort()
1400 return has, [self.node(miss) for miss in missing]
1398 return has, [self.node(miss) for miss in missing]
1401
1399
1402 def incrementalmissingrevs(self, common=None):
1400 def incrementalmissingrevs(self, common=None):
1403 """Return an object that can be used to incrementally compute the
1401 """Return an object that can be used to incrementally compute the
1404 revision numbers of the ancestors of arbitrary sets that are not
1402 revision numbers of the ancestors of arbitrary sets that are not
1405 ancestors of common. This is an ancestor.incrementalmissingancestors
1403 ancestors of common. This is an ancestor.incrementalmissingancestors
1406 object.
1404 object.
1407
1405
1408 'common' is a list of revision numbers. If common is not supplied, uses
1406 'common' is a list of revision numbers. If common is not supplied, uses
1409 nullrev.
1407 nullrev.
1410 """
1408 """
1411 if common is None:
1409 if common is None:
1412 common = [nullrev]
1410 common = [nullrev]
1413
1411
1414 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1412 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1415
1413
1416 def findmissingrevs(self, common=None, heads=None):
1414 def findmissingrevs(self, common=None, heads=None):
1417 """Return the revision numbers of the ancestors of heads that
1415 """Return the revision numbers of the ancestors of heads that
1418 are not ancestors of common.
1416 are not ancestors of common.
1419
1417
1420 More specifically, return a list of revision numbers corresponding to
1418 More specifically, return a list of revision numbers corresponding to
1421 nodes N such that every N satisfies the following constraints:
1419 nodes N such that every N satisfies the following constraints:
1422
1420
1423 1. N is an ancestor of some node in 'heads'
1421 1. N is an ancestor of some node in 'heads'
1424 2. N is not an ancestor of any node in 'common'
1422 2. N is not an ancestor of any node in 'common'
1425
1423
1426 The list is sorted by revision number, meaning it is
1424 The list is sorted by revision number, meaning it is
1427 topologically sorted.
1425 topologically sorted.
1428
1426
1429 'heads' and 'common' are both lists of revision numbers. If heads is
1427 'heads' and 'common' are both lists of revision numbers. If heads is
1430 not supplied, uses all of the revlog's heads. If common is not
1428 not supplied, uses all of the revlog's heads. If common is not
1431 supplied, uses nullid."""
1429 supplied, uses nullid."""
1432 if common is None:
1430 if common is None:
1433 common = [nullrev]
1431 common = [nullrev]
1434 if heads is None:
1432 if heads is None:
1435 heads = self.headrevs()
1433 heads = self.headrevs()
1436
1434
1437 inc = self.incrementalmissingrevs(common=common)
1435 inc = self.incrementalmissingrevs(common=common)
1438 return inc.missingancestors(heads)
1436 return inc.missingancestors(heads)
1439
1437
1440 def findmissing(self, common=None, heads=None):
1438 def findmissing(self, common=None, heads=None):
1441 """Return the ancestors of heads that are not ancestors of common.
1439 """Return the ancestors of heads that are not ancestors of common.
1442
1440
1443 More specifically, return a list of nodes N such that every N
1441 More specifically, return a list of nodes N such that every N
1444 satisfies the following constraints:
1442 satisfies the following constraints:
1445
1443
1446 1. N is an ancestor of some node in 'heads'
1444 1. N is an ancestor of some node in 'heads'
1447 2. N is not an ancestor of any node in 'common'
1445 2. N is not an ancestor of any node in 'common'
1448
1446
1449 The list is sorted by revision number, meaning it is
1447 The list is sorted by revision number, meaning it is
1450 topologically sorted.
1448 topologically sorted.
1451
1449
1452 'heads' and 'common' are both lists of node IDs. If heads is
1450 'heads' and 'common' are both lists of node IDs. If heads is
1453 not supplied, uses all of the revlog's heads. If common is not
1451 not supplied, uses all of the revlog's heads. If common is not
1454 supplied, uses nullid."""
1452 supplied, uses nullid."""
1455 if common is None:
1453 if common is None:
1456 common = [nullid]
1454 common = [nullid]
1457 if heads is None:
1455 if heads is None:
1458 heads = self.heads()
1456 heads = self.heads()
1459
1457
1460 common = [self.rev(n) for n in common]
1458 common = [self.rev(n) for n in common]
1461 heads = [self.rev(n) for n in heads]
1459 heads = [self.rev(n) for n in heads]
1462
1460
1463 inc = self.incrementalmissingrevs(common=common)
1461 inc = self.incrementalmissingrevs(common=common)
1464 return [self.node(r) for r in inc.missingancestors(heads)]
1462 return [self.node(r) for r in inc.missingancestors(heads)]
1465
1463
1466 def nodesbetween(self, roots=None, heads=None):
1464 def nodesbetween(self, roots=None, heads=None):
1467 """Return a topological path from 'roots' to 'heads'.
1465 """Return a topological path from 'roots' to 'heads'.
1468
1466
1469 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1467 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1470 topologically sorted list of all nodes N that satisfy both of
1468 topologically sorted list of all nodes N that satisfy both of
1471 these constraints:
1469 these constraints:
1472
1470
1473 1. N is a descendant of some node in 'roots'
1471 1. N is a descendant of some node in 'roots'
1474 2. N is an ancestor of some node in 'heads'
1472 2. N is an ancestor of some node in 'heads'
1475
1473
1476 Every node is considered to be both a descendant and an ancestor
1474 Every node is considered to be both a descendant and an ancestor
1477 of itself, so every reachable node in 'roots' and 'heads' will be
1475 of itself, so every reachable node in 'roots' and 'heads' will be
1478 included in 'nodes'.
1476 included in 'nodes'.
1479
1477
1480 'outroots' is the list of reachable nodes in 'roots', i.e., the
1478 'outroots' is the list of reachable nodes in 'roots', i.e., the
1481 subset of 'roots' that is returned in 'nodes'. Likewise,
1479 subset of 'roots' that is returned in 'nodes'. Likewise,
1482 'outheads' is the subset of 'heads' that is also in 'nodes'.
1480 'outheads' is the subset of 'heads' that is also in 'nodes'.
1483
1481
1484 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1482 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1485 unspecified, uses nullid as the only root. If 'heads' is
1483 unspecified, uses nullid as the only root. If 'heads' is
1486 unspecified, uses list of all of the revlog's heads."""
1484 unspecified, uses list of all of the revlog's heads."""
1487 nonodes = ([], [], [])
1485 nonodes = ([], [], [])
1488 if roots is not None:
1486 if roots is not None:
1489 roots = list(roots)
1487 roots = list(roots)
1490 if not roots:
1488 if not roots:
1491 return nonodes
1489 return nonodes
1492 lowestrev = min([self.rev(n) for n in roots])
1490 lowestrev = min([self.rev(n) for n in roots])
1493 else:
1491 else:
1494 roots = [nullid] # Everybody's a descendant of nullid
1492 roots = [nullid] # Everybody's a descendant of nullid
1495 lowestrev = nullrev
1493 lowestrev = nullrev
1496 if (lowestrev == nullrev) and (heads is None):
1494 if (lowestrev == nullrev) and (heads is None):
1497 # We want _all_ the nodes!
1495 # We want _all_ the nodes!
1498 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1496 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1499 if heads is None:
1497 if heads is None:
1500 # All nodes are ancestors, so the latest ancestor is the last
1498 # All nodes are ancestors, so the latest ancestor is the last
1501 # node.
1499 # node.
1502 highestrev = len(self) - 1
1500 highestrev = len(self) - 1
1503 # Set ancestors to None to signal that every node is an ancestor.
1501 # Set ancestors to None to signal that every node is an ancestor.
1504 ancestors = None
1502 ancestors = None
1505 # Set heads to an empty dictionary for later discovery of heads
1503 # Set heads to an empty dictionary for later discovery of heads
1506 heads = {}
1504 heads = {}
1507 else:
1505 else:
1508 heads = list(heads)
1506 heads = list(heads)
1509 if not heads:
1507 if not heads:
1510 return nonodes
1508 return nonodes
1511 ancestors = set()
1509 ancestors = set()
1512 # Turn heads into a dictionary so we can remove 'fake' heads.
1510 # Turn heads into a dictionary so we can remove 'fake' heads.
1513 # Also, later we will be using it to filter out the heads we can't
1511 # Also, later we will be using it to filter out the heads we can't
1514 # find from roots.
1512 # find from roots.
1515 heads = dict.fromkeys(heads, False)
1513 heads = dict.fromkeys(heads, False)
1516 # Start at the top and keep marking parents until we're done.
1514 # Start at the top and keep marking parents until we're done.
1517 nodestotag = set(heads)
1515 nodestotag = set(heads)
1518 # Remember where the top was so we can use it as a limit later.
1516 # Remember where the top was so we can use it as a limit later.
1519 highestrev = max([self.rev(n) for n in nodestotag])
1517 highestrev = max([self.rev(n) for n in nodestotag])
1520 while nodestotag:
1518 while nodestotag:
1521 # grab a node to tag
1519 # grab a node to tag
1522 n = nodestotag.pop()
1520 n = nodestotag.pop()
1523 # Never tag nullid
1521 # Never tag nullid
1524 if n == nullid:
1522 if n == nullid:
1525 continue
1523 continue
1526 # A node's revision number represents its place in a
1524 # A node's revision number represents its place in a
1527 # topologically sorted list of nodes.
1525 # topologically sorted list of nodes.
1528 r = self.rev(n)
1526 r = self.rev(n)
1529 if r >= lowestrev:
1527 if r >= lowestrev:
1530 if n not in ancestors:
1528 if n not in ancestors:
1531 # If we are possibly a descendant of one of the roots
1529 # If we are possibly a descendant of one of the roots
1532 # and we haven't already been marked as an ancestor
1530 # and we haven't already been marked as an ancestor
1533 ancestors.add(n) # Mark as ancestor
1531 ancestors.add(n) # Mark as ancestor
1534 # Add non-nullid parents to list of nodes to tag.
1532 # Add non-nullid parents to list of nodes to tag.
1535 nodestotag.update([p for p in self.parents(n) if
1533 nodestotag.update([p for p in self.parents(n) if
1536 p != nullid])
1534 p != nullid])
1537 elif n in heads: # We've seen it before, is it a fake head?
1535 elif n in heads: # We've seen it before, is it a fake head?
1538 # So it is, real heads should not be the ancestors of
1536 # So it is, real heads should not be the ancestors of
1539 # any other heads.
1537 # any other heads.
1540 heads.pop(n)
1538 heads.pop(n)
1541 if not ancestors:
1539 if not ancestors:
1542 return nonodes
1540 return nonodes
1543 # Now that we have our set of ancestors, we want to remove any
1541 # Now that we have our set of ancestors, we want to remove any
1544 # roots that are not ancestors.
1542 # roots that are not ancestors.
1545
1543
1546 # If one of the roots was nullid, everything is included anyway.
1544 # If one of the roots was nullid, everything is included anyway.
1547 if lowestrev > nullrev:
1545 if lowestrev > nullrev:
1548 # But, since we weren't, let's recompute the lowest rev to not
1546 # But, since we weren't, let's recompute the lowest rev to not
1549 # include roots that aren't ancestors.
1547 # include roots that aren't ancestors.
1550
1548
1551 # Filter out roots that aren't ancestors of heads
1549 # Filter out roots that aren't ancestors of heads
1552 roots = [root for root in roots if root in ancestors]
1550 roots = [root for root in roots if root in ancestors]
1553 # Recompute the lowest revision
1551 # Recompute the lowest revision
1554 if roots:
1552 if roots:
1555 lowestrev = min([self.rev(root) for root in roots])
1553 lowestrev = min([self.rev(root) for root in roots])
1556 else:
1554 else:
1557 # No more roots? Return empty list
1555 # No more roots? Return empty list
1558 return nonodes
1556 return nonodes
1559 else:
1557 else:
1560 # We are descending from nullid, and don't need to care about
1558 # We are descending from nullid, and don't need to care about
1561 # any other roots.
1559 # any other roots.
1562 lowestrev = nullrev
1560 lowestrev = nullrev
1563 roots = [nullid]
1561 roots = [nullid]
1564 # Transform our roots list into a set.
1562 # Transform our roots list into a set.
1565 descendants = set(roots)
1563 descendants = set(roots)
1566 # Also, keep the original roots so we can filter out roots that aren't
1564 # Also, keep the original roots so we can filter out roots that aren't
1567 # 'real' roots (i.e. are descended from other roots).
1565 # 'real' roots (i.e. are descended from other roots).
1568 roots = descendants.copy()
1566 roots = descendants.copy()
1569 # Our topologically sorted list of output nodes.
1567 # Our topologically sorted list of output nodes.
1570 orderedout = []
1568 orderedout = []
1571 # Don't start at nullid since we don't want nullid in our output list,
1569 # Don't start at nullid since we don't want nullid in our output list,
1572 # and if nullid shows up in descendants, empty parents will look like
1570 # and if nullid shows up in descendants, empty parents will look like
1573 # they're descendants.
1571 # they're descendants.
1574 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1572 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1575 n = self.node(r)
1573 n = self.node(r)
1576 isdescendant = False
1574 isdescendant = False
1577 if lowestrev == nullrev: # Everybody is a descendant of nullid
1575 if lowestrev == nullrev: # Everybody is a descendant of nullid
1578 isdescendant = True
1576 isdescendant = True
1579 elif n in descendants:
1577 elif n in descendants:
1580 # n is already a descendant
1578 # n is already a descendant
1581 isdescendant = True
1579 isdescendant = True
1582 # This check only needs to be done here because all the roots
1580 # This check only needs to be done here because all the roots
1583 # will start being marked is descendants before the loop.
1581 # will start being marked is descendants before the loop.
1584 if n in roots:
1582 if n in roots:
1585 # If n was a root, check if it's a 'real' root.
1583 # If n was a root, check if it's a 'real' root.
1586 p = tuple(self.parents(n))
1584 p = tuple(self.parents(n))
1587 # If any of its parents are descendants, it's not a root.
1585 # If any of its parents are descendants, it's not a root.
1588 if (p[0] in descendants) or (p[1] in descendants):
1586 if (p[0] in descendants) or (p[1] in descendants):
1589 roots.remove(n)
1587 roots.remove(n)
1590 else:
1588 else:
1591 p = tuple(self.parents(n))
1589 p = tuple(self.parents(n))
1592 # A node is a descendant if either of its parents are
1590 # A node is a descendant if either of its parents are
1593 # descendants. (We seeded the dependents list with the roots
1591 # descendants. (We seeded the dependents list with the roots
1594 # up there, remember?)
1592 # up there, remember?)
1595 if (p[0] in descendants) or (p[1] in descendants):
1593 if (p[0] in descendants) or (p[1] in descendants):
1596 descendants.add(n)
1594 descendants.add(n)
1597 isdescendant = True
1595 isdescendant = True
1598 if isdescendant and ((ancestors is None) or (n in ancestors)):
1596 if isdescendant and ((ancestors is None) or (n in ancestors)):
1599 # Only include nodes that are both descendants and ancestors.
1597 # Only include nodes that are both descendants and ancestors.
1600 orderedout.append(n)
1598 orderedout.append(n)
1601 if (ancestors is not None) and (n in heads):
1599 if (ancestors is not None) and (n in heads):
1602 # We're trying to figure out which heads are reachable
1600 # We're trying to figure out which heads are reachable
1603 # from roots.
1601 # from roots.
1604 # Mark this head as having been reached
1602 # Mark this head as having been reached
1605 heads[n] = True
1603 heads[n] = True
1606 elif ancestors is None:
1604 elif ancestors is None:
1607 # Otherwise, we're trying to discover the heads.
1605 # Otherwise, we're trying to discover the heads.
1608 # Assume this is a head because if it isn't, the next step
1606 # Assume this is a head because if it isn't, the next step
1609 # will eventually remove it.
1607 # will eventually remove it.
1610 heads[n] = True
1608 heads[n] = True
1611 # But, obviously its parents aren't.
1609 # But, obviously its parents aren't.
1612 for p in self.parents(n):
1610 for p in self.parents(n):
1613 heads.pop(p, None)
1611 heads.pop(p, None)
1614 heads = [head for head, flag in heads.iteritems() if flag]
1612 heads = [head for head, flag in heads.iteritems() if flag]
1615 roots = list(roots)
1613 roots = list(roots)
1616 assert orderedout
1614 assert orderedout
1617 assert roots
1615 assert roots
1618 assert heads
1616 assert heads
1619 return (orderedout, roots, heads)
1617 return (orderedout, roots, heads)
1620
1618
1621 def headrevs(self):
1619 def headrevs(self):
1622 try:
1620 try:
1623 return self.index.headrevs()
1621 return self.index.headrevs()
1624 except AttributeError:
1622 except AttributeError:
1625 return self._headrevs()
1623 return self._headrevs()
1626
1624
1627 def computephases(self, roots):
1625 def computephases(self, roots):
1628 return self.index.computephasesmapsets(roots)
1626 return self.index.computephasesmapsets(roots)
1629
1627
1630 def _headrevs(self):
1628 def _headrevs(self):
1631 count = len(self)
1629 count = len(self)
1632 if not count:
1630 if not count:
1633 return [nullrev]
1631 return [nullrev]
1634 # we won't iter over filtered rev so nobody is a head at start
1632 # we won't iter over filtered rev so nobody is a head at start
1635 ishead = [0] * (count + 1)
1633 ishead = [0] * (count + 1)
1636 index = self.index
1634 index = self.index
1637 for r in self:
1635 for r in self:
1638 ishead[r] = 1 # I may be an head
1636 ishead[r] = 1 # I may be an head
1639 e = index[r]
1637 e = index[r]
1640 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1638 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1641 return [r for r, val in enumerate(ishead) if val]
1639 return [r for r, val in enumerate(ishead) if val]
1642
1640
1643 def heads(self, start=None, stop=None):
1641 def heads(self, start=None, stop=None):
1644 """return the list of all nodes that have no children
1642 """return the list of all nodes that have no children
1645
1643
1646 if start is specified, only heads that are descendants of
1644 if start is specified, only heads that are descendants of
1647 start will be returned
1645 start will be returned
1648 if stop is specified, it will consider all the revs from stop
1646 if stop is specified, it will consider all the revs from stop
1649 as if they had no children
1647 as if they had no children
1650 """
1648 """
1651 if start is None and stop is None:
1649 if start is None and stop is None:
1652 if not len(self):
1650 if not len(self):
1653 return [nullid]
1651 return [nullid]
1654 return [self.node(r) for r in self.headrevs()]
1652 return [self.node(r) for r in self.headrevs()]
1655
1653
1656 if start is None:
1654 if start is None:
1657 start = nullid
1655 start = nullid
1658 if stop is None:
1656 if stop is None:
1659 stop = []
1657 stop = []
1660 stoprevs = set([self.rev(n) for n in stop])
1658 stoprevs = set([self.rev(n) for n in stop])
1661 startrev = self.rev(start)
1659 startrev = self.rev(start)
1662 reachable = {startrev}
1660 reachable = {startrev}
1663 heads = {startrev}
1661 heads = {startrev}
1664
1662
1665 parentrevs = self.parentrevs
1663 parentrevs = self.parentrevs
1666 for r in self.revs(start=startrev + 1):
1664 for r in self.revs(start=startrev + 1):
1667 for p in parentrevs(r):
1665 for p in parentrevs(r):
1668 if p in reachable:
1666 if p in reachable:
1669 if r not in stoprevs:
1667 if r not in stoprevs:
1670 reachable.add(r)
1668 reachable.add(r)
1671 heads.add(r)
1669 heads.add(r)
1672 if p in heads and p not in stoprevs:
1670 if p in heads and p not in stoprevs:
1673 heads.remove(p)
1671 heads.remove(p)
1674
1672
1675 return [self.node(r) for r in heads]
1673 return [self.node(r) for r in heads]
1676
1674
1677 def children(self, node):
1675 def children(self, node):
1678 """find the children of a given node"""
1676 """find the children of a given node"""
1679 c = []
1677 c = []
1680 p = self.rev(node)
1678 p = self.rev(node)
1681 for r in self.revs(start=p + 1):
1679 for r in self.revs(start=p + 1):
1682 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1680 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1683 if prevs:
1681 if prevs:
1684 for pr in prevs:
1682 for pr in prevs:
1685 if pr == p:
1683 if pr == p:
1686 c.append(self.node(r))
1684 c.append(self.node(r))
1687 elif p == nullrev:
1685 elif p == nullrev:
1688 c.append(self.node(r))
1686 c.append(self.node(r))
1689 return c
1687 return c
1690
1688
1691 def commonancestorsheads(self, a, b):
1689 def commonancestorsheads(self, a, b):
1692 """calculate all the heads of the common ancestors of nodes a and b"""
1690 """calculate all the heads of the common ancestors of nodes a and b"""
1693 a, b = self.rev(a), self.rev(b)
1691 a, b = self.rev(a), self.rev(b)
1694 ancs = self._commonancestorsheads(a, b)
1692 ancs = self._commonancestorsheads(a, b)
1695 return pycompat.maplist(self.node, ancs)
1693 return pycompat.maplist(self.node, ancs)
1696
1694
1697 def _commonancestorsheads(self, *revs):
1695 def _commonancestorsheads(self, *revs):
1698 """calculate all the heads of the common ancestors of revs"""
1696 """calculate all the heads of the common ancestors of revs"""
1699 try:
1697 try:
1700 ancs = self.index.commonancestorsheads(*revs)
1698 ancs = self.index.commonancestorsheads(*revs)
1701 except (AttributeError, OverflowError): # C implementation failed
1699 except (AttributeError, OverflowError): # C implementation failed
1702 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1700 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1703 return ancs
1701 return ancs
1704
1702
1705 def isancestor(self, a, b):
1703 def isancestor(self, a, b):
1706 """return True if node a is an ancestor of node b
1704 """return True if node a is an ancestor of node b
1707
1705
1708 A revision is considered an ancestor of itself."""
1706 A revision is considered an ancestor of itself."""
1709 a, b = self.rev(a), self.rev(b)
1707 a, b = self.rev(a), self.rev(b)
1710 return self.isancestorrev(a, b)
1708 return self.isancestorrev(a, b)
1711
1709
1712 def descendant(self, a, b):
1710 def descendant(self, a, b):
1713 msg = 'revlog.descendant is deprecated, use revlog.isancestorrev'
1711 msg = 'revlog.descendant is deprecated, use revlog.isancestorrev'
1714 util.nouideprecwarn(msg, '4.7')
1712 util.nouideprecwarn(msg, '4.7')
1715 return self.isancestorrev(a, b)
1713 return self.isancestorrev(a, b)
1716
1714
1717 def isancestorrev(self, a, b):
1715 def isancestorrev(self, a, b):
1718 """return True if revision a is an ancestor of revision b
1716 """return True if revision a is an ancestor of revision b
1719
1717
1720 A revision is considered an ancestor of itself.
1718 A revision is considered an ancestor of itself.
1721
1719
1722 The implementation of this is trivial but the use of
1720 The implementation of this is trivial but the use of
1723 commonancestorsheads is not."""
1721 commonancestorsheads is not."""
1724 if a == nullrev:
1722 if a == nullrev:
1725 return True
1723 return True
1726 elif a == b:
1724 elif a == b:
1727 return True
1725 return True
1728 elif a > b:
1726 elif a > b:
1729 return False
1727 return False
1730 return a in self._commonancestorsheads(a, b)
1728 return a in self._commonancestorsheads(a, b)
1731
1729
1732 def ancestor(self, a, b):
1730 def ancestor(self, a, b):
1733 """calculate the "best" common ancestor of nodes a and b"""
1731 """calculate the "best" common ancestor of nodes a and b"""
1734
1732
1735 a, b = self.rev(a), self.rev(b)
1733 a, b = self.rev(a), self.rev(b)
1736 try:
1734 try:
1737 ancs = self.index.ancestors(a, b)
1735 ancs = self.index.ancestors(a, b)
1738 except (AttributeError, OverflowError):
1736 except (AttributeError, OverflowError):
1739 ancs = ancestor.ancestors(self.parentrevs, a, b)
1737 ancs = ancestor.ancestors(self.parentrevs, a, b)
1740 if ancs:
1738 if ancs:
1741 # choose a consistent winner when there's a tie
1739 # choose a consistent winner when there's a tie
1742 return min(map(self.node, ancs))
1740 return min(map(self.node, ancs))
1743 return nullid
1741 return nullid
1744
1742
1745 def _match(self, id):
1743 def _match(self, id):
1746 if isinstance(id, int):
1744 if isinstance(id, int):
1747 # rev
1745 # rev
1748 return self.node(id)
1746 return self.node(id)
1749 if len(id) == 20:
1747 if len(id) == 20:
1750 # possibly a binary node
1748 # possibly a binary node
1751 # odds of a binary node being all hex in ASCII are 1 in 10**25
1749 # odds of a binary node being all hex in ASCII are 1 in 10**25
1752 try:
1750 try:
1753 node = id
1751 node = id
1754 self.rev(node) # quick search the index
1752 self.rev(node) # quick search the index
1755 return node
1753 return node
1756 except LookupError:
1754 except LookupError:
1757 pass # may be partial hex id
1755 pass # may be partial hex id
1758 try:
1756 try:
1759 # str(rev)
1757 # str(rev)
1760 rev = int(id)
1758 rev = int(id)
1761 if "%d" % rev != id:
1759 if "%d" % rev != id:
1762 raise ValueError
1760 raise ValueError
1763 if rev < 0:
1761 if rev < 0:
1764 rev = len(self) + rev
1762 rev = len(self) + rev
1765 if rev < 0 or rev >= len(self):
1763 if rev < 0 or rev >= len(self):
1766 raise ValueError
1764 raise ValueError
1767 return self.node(rev)
1765 return self.node(rev)
1768 except (ValueError, OverflowError):
1766 except (ValueError, OverflowError):
1769 pass
1767 pass
1770 if len(id) == 40:
1768 if len(id) == 40:
1771 try:
1769 try:
1772 # a full hex nodeid?
1770 # a full hex nodeid?
1773 node = bin(id)
1771 node = bin(id)
1774 self.rev(node)
1772 self.rev(node)
1775 return node
1773 return node
1776 except (TypeError, LookupError):
1774 except (TypeError, LookupError):
1777 pass
1775 pass
1778
1776
1779 def _partialmatch(self, id):
1777 def _partialmatch(self, id):
1780 # we don't care wdirfilenodeids as they should be always full hash
1778 # we don't care wdirfilenodeids as they should be always full hash
1781 maybewdir = wdirhex.startswith(id)
1779 maybewdir = wdirhex.startswith(id)
1782 try:
1780 try:
1783 partial = self.index.partialmatch(id)
1781 partial = self.index.partialmatch(id)
1784 if partial and self.hasnode(partial):
1782 if partial and self.hasnode(partial):
1785 if maybewdir:
1783 if maybewdir:
1786 # single 'ff...' match in radix tree, ambiguous with wdir
1784 # single 'ff...' match in radix tree, ambiguous with wdir
1787 raise RevlogError
1785 raise RevlogError
1788 return partial
1786 return partial
1789 if maybewdir:
1787 if maybewdir:
1790 # no 'ff...' match in radix tree, wdir identified
1788 # no 'ff...' match in radix tree, wdir identified
1791 raise error.WdirUnsupported
1789 raise error.WdirUnsupported
1792 return None
1790 return None
1793 except RevlogError:
1791 except RevlogError:
1794 # parsers.c radix tree lookup gave multiple matches
1792 # parsers.c radix tree lookup gave multiple matches
1795 # fast path: for unfiltered changelog, radix tree is accurate
1793 # fast path: for unfiltered changelog, radix tree is accurate
1796 if not getattr(self, 'filteredrevs', None):
1794 if not getattr(self, 'filteredrevs', None):
1797 raise AmbiguousPrefixLookupError(id, self.indexfile,
1795 raise AmbiguousPrefixLookupError(id, self.indexfile,
1798 _('ambiguous identifier'))
1796 _('ambiguous identifier'))
1799 # fall through to slow path that filters hidden revisions
1797 # fall through to slow path that filters hidden revisions
1800 except (AttributeError, ValueError):
1798 except (AttributeError, ValueError):
1801 # we are pure python, or key was too short to search radix tree
1799 # we are pure python, or key was too short to search radix tree
1802 pass
1800 pass
1803
1801
1804 if id in self._pcache:
1802 if id in self._pcache:
1805 return self._pcache[id]
1803 return self._pcache[id]
1806
1804
1807 if len(id) <= 40:
1805 if len(id) <= 40:
1808 try:
1806 try:
1809 # hex(node)[:...]
1807 # hex(node)[:...]
1810 l = len(id) // 2 # grab an even number of digits
1808 l = len(id) // 2 # grab an even number of digits
1811 prefix = bin(id[:l * 2])
1809 prefix = bin(id[:l * 2])
1812 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1810 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1813 nl = [n for n in nl if hex(n).startswith(id) and
1811 nl = [n for n in nl if hex(n).startswith(id) and
1814 self.hasnode(n)]
1812 self.hasnode(n)]
1815 if len(nl) > 0:
1813 if len(nl) > 0:
1816 if len(nl) == 1 and not maybewdir:
1814 if len(nl) == 1 and not maybewdir:
1817 self._pcache[id] = nl[0]
1815 self._pcache[id] = nl[0]
1818 return nl[0]
1816 return nl[0]
1819 raise AmbiguousPrefixLookupError(id, self.indexfile,
1817 raise AmbiguousPrefixLookupError(id, self.indexfile,
1820 _('ambiguous identifier'))
1818 _('ambiguous identifier'))
1821 if maybewdir:
1819 if maybewdir:
1822 raise error.WdirUnsupported
1820 raise error.WdirUnsupported
1823 return None
1821 return None
1824 except TypeError:
1822 except TypeError:
1825 pass
1823 pass
1826
1824
1827 def lookup(self, id):
1825 def lookup(self, id):
1828 """locate a node based on:
1826 """locate a node based on:
1829 - revision number or str(revision number)
1827 - revision number or str(revision number)
1830 - nodeid or subset of hex nodeid
1828 - nodeid or subset of hex nodeid
1831 """
1829 """
1832 n = self._match(id)
1830 n = self._match(id)
1833 if n is not None:
1831 if n is not None:
1834 return n
1832 return n
1835 n = self._partialmatch(id)
1833 n = self._partialmatch(id)
1836 if n:
1834 if n:
1837 return n
1835 return n
1838
1836
1839 raise LookupError(id, self.indexfile, _('no match found'))
1837 raise LookupError(id, self.indexfile, _('no match found'))
1840
1838
1841 def shortest(self, node, minlength=1):
1839 def shortest(self, node, minlength=1):
1842 """Find the shortest unambiguous prefix that matches node."""
1840 """Find the shortest unambiguous prefix that matches node."""
1843 def isvalid(prefix):
1841 def isvalid(prefix):
1844 try:
1842 try:
1845 node = self._partialmatch(prefix)
1843 node = self._partialmatch(prefix)
1846 except error.RevlogError:
1844 except error.RevlogError:
1847 return False
1845 return False
1848 except error.WdirUnsupported:
1846 except error.WdirUnsupported:
1849 # single 'ff...' match
1847 # single 'ff...' match
1850 return True
1848 return True
1851 if node is None:
1849 if node is None:
1852 raise LookupError(node, self.indexfile, _('no node'))
1850 raise LookupError(node, self.indexfile, _('no node'))
1853 return True
1851 return True
1854
1852
1855 def maybewdir(prefix):
1853 def maybewdir(prefix):
1856 return all(c == 'f' for c in prefix)
1854 return all(c == 'f' for c in prefix)
1857
1855
1858 hexnode = hex(node)
1856 hexnode = hex(node)
1859
1857
1860 def disambiguate(hexnode, minlength):
1858 def disambiguate(hexnode, minlength):
1861 """Disambiguate against wdirid."""
1859 """Disambiguate against wdirid."""
1862 for length in range(minlength, 41):
1860 for length in range(minlength, 41):
1863 prefix = hexnode[:length]
1861 prefix = hexnode[:length]
1864 if not maybewdir(prefix):
1862 if not maybewdir(prefix):
1865 return prefix
1863 return prefix
1866
1864
1867 if not getattr(self, 'filteredrevs', None):
1865 if not getattr(self, 'filteredrevs', None):
1868 try:
1866 try:
1869 length = max(self.index.shortest(node), minlength)
1867 length = max(self.index.shortest(node), minlength)
1870 return disambiguate(hexnode, length)
1868 return disambiguate(hexnode, length)
1871 except RevlogError:
1869 except RevlogError:
1872 if node != wdirid:
1870 if node != wdirid:
1873 raise LookupError(node, self.indexfile, _('no node'))
1871 raise LookupError(node, self.indexfile, _('no node'))
1874 except AttributeError:
1872 except AttributeError:
1875 # Fall through to pure code
1873 # Fall through to pure code
1876 pass
1874 pass
1877
1875
1878 if node == wdirid:
1876 if node == wdirid:
1879 for length in range(minlength, 41):
1877 for length in range(minlength, 41):
1880 prefix = hexnode[:length]
1878 prefix = hexnode[:length]
1881 if isvalid(prefix):
1879 if isvalid(prefix):
1882 return prefix
1880 return prefix
1883
1881
1884 for length in range(minlength, 41):
1882 for length in range(minlength, 41):
1885 prefix = hexnode[:length]
1883 prefix = hexnode[:length]
1886 if isvalid(prefix):
1884 if isvalid(prefix):
1887 return disambiguate(hexnode, length)
1885 return disambiguate(hexnode, length)
1888
1886
1889 def cmp(self, node, text):
1887 def cmp(self, node, text):
1890 """compare text with a given file revision
1888 """compare text with a given file revision
1891
1889
1892 returns True if text is different than what is stored.
1890 returns True if text is different than what is stored.
1893 """
1891 """
1894 p1, p2 = self.parents(node)
1892 p1, p2 = self.parents(node)
1895 return hash(text, p1, p2) != node
1893 return hash(text, p1, p2) != node
1896
1894
1897 def _cachesegment(self, offset, data):
1895 def _cachesegment(self, offset, data):
1898 """Add a segment to the revlog cache.
1896 """Add a segment to the revlog cache.
1899
1897
1900 Accepts an absolute offset and the data that is at that location.
1898 Accepts an absolute offset and the data that is at that location.
1901 """
1899 """
1902 o, d = self._chunkcache
1900 o, d = self._chunkcache
1903 # try to add to existing cache
1901 # try to add to existing cache
1904 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1902 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1905 self._chunkcache = o, d + data
1903 self._chunkcache = o, d + data
1906 else:
1904 else:
1907 self._chunkcache = offset, data
1905 self._chunkcache = offset, data
1908
1906
1909 def _readsegment(self, offset, length, df=None):
1907 def _readsegment(self, offset, length, df=None):
1910 """Load a segment of raw data from the revlog.
1908 """Load a segment of raw data from the revlog.
1911
1909
1912 Accepts an absolute offset, length to read, and an optional existing
1910 Accepts an absolute offset, length to read, and an optional existing
1913 file handle to read from.
1911 file handle to read from.
1914
1912
1915 If an existing file handle is passed, it will be seeked and the
1913 If an existing file handle is passed, it will be seeked and the
1916 original seek position will NOT be restored.
1914 original seek position will NOT be restored.
1917
1915
1918 Returns a str or buffer of raw byte data.
1916 Returns a str or buffer of raw byte data.
1919 """
1917 """
1920 # Cache data both forward and backward around the requested
1918 # Cache data both forward and backward around the requested
1921 # data, in a fixed size window. This helps speed up operations
1919 # data, in a fixed size window. This helps speed up operations
1922 # involving reading the revlog backwards.
1920 # involving reading the revlog backwards.
1923 cachesize = self._chunkcachesize
1921 cachesize = self._chunkcachesize
1924 realoffset = offset & ~(cachesize - 1)
1922 realoffset = offset & ~(cachesize - 1)
1925 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1923 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1926 - realoffset)
1924 - realoffset)
1927 with self._datareadfp(df) as df:
1925 with self._datareadfp(df) as df:
1928 df.seek(realoffset)
1926 df.seek(realoffset)
1929 d = df.read(reallength)
1927 d = df.read(reallength)
1930 self._cachesegment(realoffset, d)
1928 self._cachesegment(realoffset, d)
1931 if offset != realoffset or reallength != length:
1929 if offset != realoffset or reallength != length:
1932 return util.buffer(d, offset - realoffset, length)
1930 return util.buffer(d, offset - realoffset, length)
1933 return d
1931 return d
1934
1932
1935 def _getsegment(self, offset, length, df=None):
1933 def _getsegment(self, offset, length, df=None):
1936 """Obtain a segment of raw data from the revlog.
1934 """Obtain a segment of raw data from the revlog.
1937
1935
1938 Accepts an absolute offset, length of bytes to obtain, and an
1936 Accepts an absolute offset, length of bytes to obtain, and an
1939 optional file handle to the already-opened revlog. If the file
1937 optional file handle to the already-opened revlog. If the file
1940 handle is used, it's original seek position will not be preserved.
1938 handle is used, it's original seek position will not be preserved.
1941
1939
1942 Requests for data may be returned from a cache.
1940 Requests for data may be returned from a cache.
1943
1941
1944 Returns a str or a buffer instance of raw byte data.
1942 Returns a str or a buffer instance of raw byte data.
1945 """
1943 """
1946 o, d = self._chunkcache
1944 o, d = self._chunkcache
1947 l = len(d)
1945 l = len(d)
1948
1946
1949 # is it in the cache?
1947 # is it in the cache?
1950 cachestart = offset - o
1948 cachestart = offset - o
1951 cacheend = cachestart + length
1949 cacheend = cachestart + length
1952 if cachestart >= 0 and cacheend <= l:
1950 if cachestart >= 0 and cacheend <= l:
1953 if cachestart == 0 and cacheend == l:
1951 if cachestart == 0 and cacheend == l:
1954 return d # avoid a copy
1952 return d # avoid a copy
1955 return util.buffer(d, cachestart, cacheend - cachestart)
1953 return util.buffer(d, cachestart, cacheend - cachestart)
1956
1954
1957 return self._readsegment(offset, length, df=df)
1955 return self._readsegment(offset, length, df=df)
1958
1956
1959 def _getsegmentforrevs(self, startrev, endrev, df=None):
1957 def _getsegmentforrevs(self, startrev, endrev, df=None):
1960 """Obtain a segment of raw data corresponding to a range of revisions.
1958 """Obtain a segment of raw data corresponding to a range of revisions.
1961
1959
1962 Accepts the start and end revisions and an optional already-open
1960 Accepts the start and end revisions and an optional already-open
1963 file handle to be used for reading. If the file handle is read, its
1961 file handle to be used for reading. If the file handle is read, its
1964 seek position will not be preserved.
1962 seek position will not be preserved.
1965
1963
1966 Requests for data may be satisfied by a cache.
1964 Requests for data may be satisfied by a cache.
1967
1965
1968 Returns a 2-tuple of (offset, data) for the requested range of
1966 Returns a 2-tuple of (offset, data) for the requested range of
1969 revisions. Offset is the integer offset from the beginning of the
1967 revisions. Offset is the integer offset from the beginning of the
1970 revlog and data is a str or buffer of the raw byte data.
1968 revlog and data is a str or buffer of the raw byte data.
1971
1969
1972 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1970 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1973 to determine where each revision's data begins and ends.
1971 to determine where each revision's data begins and ends.
1974 """
1972 """
1975 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1973 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1976 # (functions are expensive).
1974 # (functions are expensive).
1977 index = self.index
1975 index = self.index
1978 istart = index[startrev]
1976 istart = index[startrev]
1979 start = int(istart[0] >> 16)
1977 start = int(istart[0] >> 16)
1980 if startrev == endrev:
1978 if startrev == endrev:
1981 end = start + istart[1]
1979 end = start + istart[1]
1982 else:
1980 else:
1983 iend = index[endrev]
1981 iend = index[endrev]
1984 end = int(iend[0] >> 16) + iend[1]
1982 end = int(iend[0] >> 16) + iend[1]
1985
1983
1986 if self._inline:
1984 if self._inline:
1987 start += (startrev + 1) * self._io.size
1985 start += (startrev + 1) * self._io.size
1988 end += (endrev + 1) * self._io.size
1986 end += (endrev + 1) * self._io.size
1989 length = end - start
1987 length = end - start
1990
1988
1991 return start, self._getsegment(start, length, df=df)
1989 return start, self._getsegment(start, length, df=df)
1992
1990
1993 def _chunk(self, rev, df=None):
1991 def _chunk(self, rev, df=None):
1994 """Obtain a single decompressed chunk for a revision.
1992 """Obtain a single decompressed chunk for a revision.
1995
1993
1996 Accepts an integer revision and an optional already-open file handle
1994 Accepts an integer revision and an optional already-open file handle
1997 to be used for reading. If used, the seek position of the file will not
1995 to be used for reading. If used, the seek position of the file will not
1998 be preserved.
1996 be preserved.
1999
1997
2000 Returns a str holding uncompressed data for the requested revision.
1998 Returns a str holding uncompressed data for the requested revision.
2001 """
1999 """
2002 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2000 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
2003
2001
2004 def _chunks(self, revs, df=None, targetsize=None):
2002 def _chunks(self, revs, df=None, targetsize=None):
2005 """Obtain decompressed chunks for the specified revisions.
2003 """Obtain decompressed chunks for the specified revisions.
2006
2004
2007 Accepts an iterable of numeric revisions that are assumed to be in
2005 Accepts an iterable of numeric revisions that are assumed to be in
2008 ascending order. Also accepts an optional already-open file handle
2006 ascending order. Also accepts an optional already-open file handle
2009 to be used for reading. If used, the seek position of the file will
2007 to be used for reading. If used, the seek position of the file will
2010 not be preserved.
2008 not be preserved.
2011
2009
2012 This function is similar to calling ``self._chunk()`` multiple times,
2010 This function is similar to calling ``self._chunk()`` multiple times,
2013 but is faster.
2011 but is faster.
2014
2012
2015 Returns a list with decompressed data for each requested revision.
2013 Returns a list with decompressed data for each requested revision.
2016 """
2014 """
2017 if not revs:
2015 if not revs:
2018 return []
2016 return []
2019 start = self.start
2017 start = self.start
2020 length = self.length
2018 length = self.length
2021 inline = self._inline
2019 inline = self._inline
2022 iosize = self._io.size
2020 iosize = self._io.size
2023 buffer = util.buffer
2021 buffer = util.buffer
2024
2022
2025 l = []
2023 l = []
2026 ladd = l.append
2024 ladd = l.append
2027
2025
2028 if not self._withsparseread:
2026 if not self._withsparseread:
2029 slicedchunks = (revs,)
2027 slicedchunks = (revs,)
2030 else:
2028 else:
2031 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2029 slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
2032
2030
2033 for revschunk in slicedchunks:
2031 for revschunk in slicedchunks:
2034 firstrev = revschunk[0]
2032 firstrev = revschunk[0]
2035 # Skip trailing revisions with empty diff
2033 # Skip trailing revisions with empty diff
2036 for lastrev in revschunk[::-1]:
2034 for lastrev in revschunk[::-1]:
2037 if length(lastrev) != 0:
2035 if length(lastrev) != 0:
2038 break
2036 break
2039
2037
2040 try:
2038 try:
2041 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2039 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
2042 except OverflowError:
2040 except OverflowError:
2043 # issue4215 - we can't cache a run of chunks greater than
2041 # issue4215 - we can't cache a run of chunks greater than
2044 # 2G on Windows
2042 # 2G on Windows
2045 return [self._chunk(rev, df=df) for rev in revschunk]
2043 return [self._chunk(rev, df=df) for rev in revschunk]
2046
2044
2047 decomp = self.decompress
2045 decomp = self.decompress
2048 for rev in revschunk:
2046 for rev in revschunk:
2049 chunkstart = start(rev)
2047 chunkstart = start(rev)
2050 if inline:
2048 if inline:
2051 chunkstart += (rev + 1) * iosize
2049 chunkstart += (rev + 1) * iosize
2052 chunklength = length(rev)
2050 chunklength = length(rev)
2053 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2051 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
2054
2052
2055 return l
2053 return l
2056
2054
2057 def _chunkclear(self):
2055 def _chunkclear(self):
2058 """Clear the raw chunk cache."""
2056 """Clear the raw chunk cache."""
2059 self._chunkcache = (0, '')
2057 self._chunkcache = (0, '')
2060
2058
2061 def deltaparent(self, rev):
2059 def deltaparent(self, rev):
2062 """return deltaparent of the given revision"""
2060 """return deltaparent of the given revision"""
2063 base = self.index[rev][3]
2061 base = self.index[rev][3]
2064 if base == rev:
2062 if base == rev:
2065 return nullrev
2063 return nullrev
2066 elif self._generaldelta:
2064 elif self._generaldelta:
2067 return base
2065 return base
2068 else:
2066 else:
2069 return rev - 1
2067 return rev - 1
2070
2068
2071 def revdiff(self, rev1, rev2):
2069 def revdiff(self, rev1, rev2):
2072 """return or calculate a delta between two revisions
2070 """return or calculate a delta between two revisions
2073
2071
2074 The delta calculated is in binary form and is intended to be written to
2072 The delta calculated is in binary form and is intended to be written to
2075 revlog data directly. So this function needs raw revision data.
2073 revlog data directly. So this function needs raw revision data.
2076 """
2074 """
2077 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2075 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2078 return bytes(self._chunk(rev2))
2076 return bytes(self._chunk(rev2))
2079
2077
2080 return mdiff.textdiff(self.revision(rev1, raw=True),
2078 return mdiff.textdiff(self.revision(rev1, raw=True),
2081 self.revision(rev2, raw=True))
2079 self.revision(rev2, raw=True))
2082
2080
2083 def revision(self, nodeorrev, _df=None, raw=False):
2081 def revision(self, nodeorrev, _df=None, raw=False):
2084 """return an uncompressed revision of a given node or revision
2082 """return an uncompressed revision of a given node or revision
2085 number.
2083 number.
2086
2084
2087 _df - an existing file handle to read from. (internal-only)
2085 _df - an existing file handle to read from. (internal-only)
2088 raw - an optional argument specifying if the revision data is to be
2086 raw - an optional argument specifying if the revision data is to be
2089 treated as raw data when applying flag transforms. 'raw' should be set
2087 treated as raw data when applying flag transforms. 'raw' should be set
2090 to True when generating changegroups or in debug commands.
2088 to True when generating changegroups or in debug commands.
2091 """
2089 """
2092 if isinstance(nodeorrev, int):
2090 if isinstance(nodeorrev, int):
2093 rev = nodeorrev
2091 rev = nodeorrev
2094 node = self.node(rev)
2092 node = self.node(rev)
2095 else:
2093 else:
2096 node = nodeorrev
2094 node = nodeorrev
2097 rev = None
2095 rev = None
2098
2096
2099 cachedrev = None
2097 cachedrev = None
2100 flags = None
2098 flags = None
2101 rawtext = None
2099 rawtext = None
2102 if node == nullid:
2100 if node == nullid:
2103 return ""
2101 return ""
2104 if self._cache:
2102 if self._cache:
2105 if self._cache[0] == node:
2103 if self._cache[0] == node:
2106 # _cache only stores rawtext
2104 # _cache only stores rawtext
2107 if raw:
2105 if raw:
2108 return self._cache[2]
2106 return self._cache[2]
2109 # duplicated, but good for perf
2107 # duplicated, but good for perf
2110 if rev is None:
2108 if rev is None:
2111 rev = self.rev(node)
2109 rev = self.rev(node)
2112 if flags is None:
2110 if flags is None:
2113 flags = self.flags(rev)
2111 flags = self.flags(rev)
2114 # no extra flags set, no flag processor runs, text = rawtext
2112 # no extra flags set, no flag processor runs, text = rawtext
2115 if flags == REVIDX_DEFAULT_FLAGS:
2113 if flags == REVIDX_DEFAULT_FLAGS:
2116 return self._cache[2]
2114 return self._cache[2]
2117 # rawtext is reusable. need to run flag processor
2115 # rawtext is reusable. need to run flag processor
2118 rawtext = self._cache[2]
2116 rawtext = self._cache[2]
2119
2117
2120 cachedrev = self._cache[1]
2118 cachedrev = self._cache[1]
2121
2119
2122 # look up what we need to read
2120 # look up what we need to read
2123 if rawtext is None:
2121 if rawtext is None:
2124 if rev is None:
2122 if rev is None:
2125 rev = self.rev(node)
2123 rev = self.rev(node)
2126
2124
2127 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2125 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2128 if stopped:
2126 if stopped:
2129 rawtext = self._cache[2]
2127 rawtext = self._cache[2]
2130
2128
2131 # drop cache to save memory
2129 # drop cache to save memory
2132 self._cache = None
2130 self._cache = None
2133
2131
2134 targetsize = None
2132 targetsize = None
2135 rawsize = self.index[rev][2]
2133 rawsize = self.index[rev][2]
2136 if 0 <= rawsize:
2134 if 0 <= rawsize:
2137 targetsize = 4 * rawsize
2135 targetsize = 4 * rawsize
2138
2136
2139 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2137 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2140 if rawtext is None:
2138 if rawtext is None:
2141 rawtext = bytes(bins[0])
2139 rawtext = bytes(bins[0])
2142 bins = bins[1:]
2140 bins = bins[1:]
2143
2141
2144 rawtext = mdiff.patches(rawtext, bins)
2142 rawtext = mdiff.patches(rawtext, bins)
2145 self._cache = (node, rev, rawtext)
2143 self._cache = (node, rev, rawtext)
2146
2144
2147 if flags is None:
2145 if flags is None:
2148 if rev is None:
2146 if rev is None:
2149 rev = self.rev(node)
2147 rev = self.rev(node)
2150 flags = self.flags(rev)
2148 flags = self.flags(rev)
2151
2149
2152 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2150 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
2153 if validatehash:
2151 if validatehash:
2154 self.checkhash(text, node, rev=rev)
2152 self.checkhash(text, node, rev=rev)
2155
2153
2156 return text
2154 return text
2157
2155
2158 def hash(self, text, p1, p2):
2156 def hash(self, text, p1, p2):
2159 """Compute a node hash.
2157 """Compute a node hash.
2160
2158
2161 Available as a function so that subclasses can replace the hash
2159 Available as a function so that subclasses can replace the hash
2162 as needed.
2160 as needed.
2163 """
2161 """
2164 return hash(text, p1, p2)
2162 return hash(text, p1, p2)
2165
2163
2166 def _processflags(self, text, flags, operation, raw=False):
2164 def _processflags(self, text, flags, operation, raw=False):
2167 """Inspect revision data flags and applies transforms defined by
2165 """Inspect revision data flags and applies transforms defined by
2168 registered flag processors.
2166 registered flag processors.
2169
2167
2170 ``text`` - the revision data to process
2168 ``text`` - the revision data to process
2171 ``flags`` - the revision flags
2169 ``flags`` - the revision flags
2172 ``operation`` - the operation being performed (read or write)
2170 ``operation`` - the operation being performed (read or write)
2173 ``raw`` - an optional argument describing if the raw transform should be
2171 ``raw`` - an optional argument describing if the raw transform should be
2174 applied.
2172 applied.
2175
2173
2176 This method processes the flags in the order (or reverse order if
2174 This method processes the flags in the order (or reverse order if
2177 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2175 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
2178 flag processors registered for present flags. The order of flags defined
2176 flag processors registered for present flags. The order of flags defined
2179 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2177 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
2180
2178
2181 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2179 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
2182 processed text and ``validatehash`` is a bool indicating whether the
2180 processed text and ``validatehash`` is a bool indicating whether the
2183 returned text should be checked for hash integrity.
2181 returned text should be checked for hash integrity.
2184
2182
2185 Note: If the ``raw`` argument is set, it has precedence over the
2183 Note: If the ``raw`` argument is set, it has precedence over the
2186 operation and will only update the value of ``validatehash``.
2184 operation and will only update the value of ``validatehash``.
2187 """
2185 """
2188 # fast path: no flag processors will run
2186 # fast path: no flag processors will run
2189 if flags == 0:
2187 if flags == 0:
2190 return text, True
2188 return text, True
2191 if not operation in ('read', 'write'):
2189 if not operation in ('read', 'write'):
2192 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2190 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
2193 # Check all flags are known.
2191 # Check all flags are known.
2194 if flags & ~REVIDX_KNOWN_FLAGS:
2192 if flags & ~REVIDX_KNOWN_FLAGS:
2195 raise RevlogError(_("incompatible revision flag '%#x'") %
2193 raise RevlogError(_("incompatible revision flag '%#x'") %
2196 (flags & ~REVIDX_KNOWN_FLAGS))
2194 (flags & ~REVIDX_KNOWN_FLAGS))
2197 validatehash = True
2195 validatehash = True
2198 # Depending on the operation (read or write), the order might be
2196 # Depending on the operation (read or write), the order might be
2199 # reversed due to non-commutative transforms.
2197 # reversed due to non-commutative transforms.
2200 orderedflags = REVIDX_FLAGS_ORDER
2198 orderedflags = REVIDX_FLAGS_ORDER
2201 if operation == 'write':
2199 if operation == 'write':
2202 orderedflags = reversed(orderedflags)
2200 orderedflags = reversed(orderedflags)
2203
2201
2204 for flag in orderedflags:
2202 for flag in orderedflags:
2205 # If a flagprocessor has been registered for a known flag, apply the
2203 # If a flagprocessor has been registered for a known flag, apply the
2206 # related operation transform and update result tuple.
2204 # related operation transform and update result tuple.
2207 if flag & flags:
2205 if flag & flags:
2208 vhash = True
2206 vhash = True
2209
2207
2210 if flag not in _flagprocessors:
2208 if flag not in _flagprocessors:
2211 message = _("missing processor for flag '%#x'") % (flag)
2209 message = _("missing processor for flag '%#x'") % (flag)
2212 raise RevlogError(message)
2210 raise RevlogError(message)
2213
2211
2214 processor = _flagprocessors[flag]
2212 processor = _flagprocessors[flag]
2215 if processor is not None:
2213 if processor is not None:
2216 readtransform, writetransform, rawtransform = processor
2214 readtransform, writetransform, rawtransform = processor
2217
2215
2218 if raw:
2216 if raw:
2219 vhash = rawtransform(self, text)
2217 vhash = rawtransform(self, text)
2220 elif operation == 'read':
2218 elif operation == 'read':
2221 text, vhash = readtransform(self, text)
2219 text, vhash = readtransform(self, text)
2222 else: # write operation
2220 else: # write operation
2223 text, vhash = writetransform(self, text)
2221 text, vhash = writetransform(self, text)
2224 validatehash = validatehash and vhash
2222 validatehash = validatehash and vhash
2225
2223
2226 return text, validatehash
2224 return text, validatehash
2227
2225
2228 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2226 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2229 """Check node hash integrity.
2227 """Check node hash integrity.
2230
2228
2231 Available as a function so that subclasses can extend hash mismatch
2229 Available as a function so that subclasses can extend hash mismatch
2232 behaviors as needed.
2230 behaviors as needed.
2233 """
2231 """
2234 try:
2232 try:
2235 if p1 is None and p2 is None:
2233 if p1 is None and p2 is None:
2236 p1, p2 = self.parents(node)
2234 p1, p2 = self.parents(node)
2237 if node != self.hash(text, p1, p2):
2235 if node != self.hash(text, p1, p2):
2238 revornode = rev
2236 revornode = rev
2239 if revornode is None:
2237 if revornode is None:
2240 revornode = templatefilters.short(hex(node))
2238 revornode = templatefilters.short(hex(node))
2241 raise RevlogError(_("integrity check failed on %s:%s")
2239 raise RevlogError(_("integrity check failed on %s:%s")
2242 % (self.indexfile, pycompat.bytestr(revornode)))
2240 % (self.indexfile, pycompat.bytestr(revornode)))
2243 except RevlogError:
2241 except RevlogError:
2244 if self._censorable and _censoredtext(text):
2242 if self._censorable and _censoredtext(text):
2245 raise error.CensoredNodeError(self.indexfile, node, text)
2243 raise error.CensoredNodeError(self.indexfile, node, text)
2246 raise
2244 raise
2247
2245
2248 def _enforceinlinesize(self, tr, fp=None):
2246 def _enforceinlinesize(self, tr, fp=None):
2249 """Check if the revlog is too big for inline and convert if so.
2247 """Check if the revlog is too big for inline and convert if so.
2250
2248
2251 This should be called after revisions are added to the revlog. If the
2249 This should be called after revisions are added to the revlog. If the
2252 revlog has grown too large to be an inline revlog, it will convert it
2250 revlog has grown too large to be an inline revlog, it will convert it
2253 to use multiple index and data files.
2251 to use multiple index and data files.
2254 """
2252 """
2255 tiprev = len(self) - 1
2253 tiprev = len(self) - 1
2256 if (not self._inline or
2254 if (not self._inline or
2257 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2255 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
2258 return
2256 return
2259
2257
2260 trinfo = tr.find(self.indexfile)
2258 trinfo = tr.find(self.indexfile)
2261 if trinfo is None:
2259 if trinfo is None:
2262 raise RevlogError(_("%s not found in the transaction")
2260 raise RevlogError(_("%s not found in the transaction")
2263 % self.indexfile)
2261 % self.indexfile)
2264
2262
2265 trindex = trinfo[2]
2263 trindex = trinfo[2]
2266 if trindex is not None:
2264 if trindex is not None:
2267 dataoff = self.start(trindex)
2265 dataoff = self.start(trindex)
2268 else:
2266 else:
2269 # revlog was stripped at start of transaction, use all leftover data
2267 # revlog was stripped at start of transaction, use all leftover data
2270 trindex = len(self) - 1
2268 trindex = len(self) - 1
2271 dataoff = self.end(tiprev)
2269 dataoff = self.end(tiprev)
2272
2270
2273 tr.add(self.datafile, dataoff)
2271 tr.add(self.datafile, dataoff)
2274
2272
2275 if fp:
2273 if fp:
2276 fp.flush()
2274 fp.flush()
2277 fp.close()
2275 fp.close()
2278
2276
2279 with self._datafp('w') as df:
2277 with self._datafp('w') as df:
2280 for r in self:
2278 for r in self:
2281 df.write(self._getsegmentforrevs(r, r)[1])
2279 df.write(self._getsegmentforrevs(r, r)[1])
2282
2280
2283 with self._indexfp('w') as fp:
2281 with self._indexfp('w') as fp:
2284 self.version &= ~FLAG_INLINE_DATA
2282 self.version &= ~FLAG_INLINE_DATA
2285 self._inline = False
2283 self._inline = False
2286 io = self._io
2284 io = self._io
2287 for i in self:
2285 for i in self:
2288 e = io.packentry(self.index[i], self.node, self.version, i)
2286 e = io.packentry(self.index[i], self.node, self.version, i)
2289 fp.write(e)
2287 fp.write(e)
2290
2288
2291 # the temp file replace the real index when we exit the context
2289 # the temp file replace the real index when we exit the context
2292 # manager
2290 # manager
2293
2291
2294 tr.replace(self.indexfile, trindex * self._io.size)
2292 tr.replace(self.indexfile, trindex * self._io.size)
2295 self._chunkclear()
2293 self._chunkclear()
2296
2294
2297 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2295 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
2298 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2296 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
2299 """add a revision to the log
2297 """add a revision to the log
2300
2298
2301 text - the revision data to add
2299 text - the revision data to add
2302 transaction - the transaction object used for rollback
2300 transaction - the transaction object used for rollback
2303 link - the linkrev data to add
2301 link - the linkrev data to add
2304 p1, p2 - the parent nodeids of the revision
2302 p1, p2 - the parent nodeids of the revision
2305 cachedelta - an optional precomputed delta
2303 cachedelta - an optional precomputed delta
2306 node - nodeid of revision; typically node is not specified, and it is
2304 node - nodeid of revision; typically node is not specified, and it is
2307 computed by default as hash(text, p1, p2), however subclasses might
2305 computed by default as hash(text, p1, p2), however subclasses might
2308 use different hashing method (and override checkhash() in such case)
2306 use different hashing method (and override checkhash() in such case)
2309 flags - the known flags to set on the revision
2307 flags - the known flags to set on the revision
2310 deltacomputer - an optional _deltacomputer instance shared between
2308 deltacomputer - an optional _deltacomputer instance shared between
2311 multiple calls
2309 multiple calls
2312 """
2310 """
2313 if link == nullrev:
2311 if link == nullrev:
2314 raise RevlogError(_("attempted to add linkrev -1 to %s")
2312 raise RevlogError(_("attempted to add linkrev -1 to %s")
2315 % self.indexfile)
2313 % self.indexfile)
2316
2314
2317 if flags:
2315 if flags:
2318 node = node or self.hash(text, p1, p2)
2316 node = node or self.hash(text, p1, p2)
2319
2317
2320 rawtext, validatehash = self._processflags(text, flags, 'write')
2318 rawtext, validatehash = self._processflags(text, flags, 'write')
2321
2319
2322 # If the flag processor modifies the revision data, ignore any provided
2320 # If the flag processor modifies the revision data, ignore any provided
2323 # cachedelta.
2321 # cachedelta.
2324 if rawtext != text:
2322 if rawtext != text:
2325 cachedelta = None
2323 cachedelta = None
2326
2324
2327 if len(rawtext) > _maxentrysize:
2325 if len(rawtext) > _maxentrysize:
2328 raise RevlogError(
2326 raise RevlogError(
2329 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2327 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
2330 % (self.indexfile, len(rawtext)))
2328 % (self.indexfile, len(rawtext)))
2331
2329
2332 node = node or self.hash(rawtext, p1, p2)
2330 node = node or self.hash(rawtext, p1, p2)
2333 if node in self.nodemap:
2331 if node in self.nodemap:
2334 return node
2332 return node
2335
2333
2336 if validatehash:
2334 if validatehash:
2337 self.checkhash(rawtext, node, p1=p1, p2=p2)
2335 self.checkhash(rawtext, node, p1=p1, p2=p2)
2338
2336
2339 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2337 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2340 flags, cachedelta=cachedelta,
2338 flags, cachedelta=cachedelta,
2341 deltacomputer=deltacomputer)
2339 deltacomputer=deltacomputer)
2342
2340
2343 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2341 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2344 cachedelta=None, deltacomputer=None):
2342 cachedelta=None, deltacomputer=None):
2345 """add a raw revision with known flags, node and parents
2343 """add a raw revision with known flags, node and parents
2346 useful when reusing a revision not stored in this revlog (ex: received
2344 useful when reusing a revision not stored in this revlog (ex: received
2347 over wire, or read from an external bundle).
2345 over wire, or read from an external bundle).
2348 """
2346 """
2349 dfh = None
2347 dfh = None
2350 if not self._inline:
2348 if not self._inline:
2351 dfh = self._datafp("a+")
2349 dfh = self._datafp("a+")
2352 ifh = self._indexfp("a+")
2350 ifh = self._indexfp("a+")
2353 try:
2351 try:
2354 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2352 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2355 flags, cachedelta, ifh, dfh,
2353 flags, cachedelta, ifh, dfh,
2356 deltacomputer=deltacomputer)
2354 deltacomputer=deltacomputer)
2357 finally:
2355 finally:
2358 if dfh:
2356 if dfh:
2359 dfh.close()
2357 dfh.close()
2360 ifh.close()
2358 ifh.close()
2361
2359
2362 def compress(self, data):
2360 def compress(self, data):
2363 """Generate a possibly-compressed representation of data."""
2361 """Generate a possibly-compressed representation of data."""
2364 if not data:
2362 if not data:
2365 return '', data
2363 return '', data
2366
2364
2367 compressed = self._compressor.compress(data)
2365 compressed = self._compressor.compress(data)
2368
2366
2369 if compressed:
2367 if compressed:
2370 # The revlog compressor added the header in the returned data.
2368 # The revlog compressor added the header in the returned data.
2371 return '', compressed
2369 return '', compressed
2372
2370
2373 if data[0:1] == '\0':
2371 if data[0:1] == '\0':
2374 return '', data
2372 return '', data
2375 return 'u', data
2373 return 'u', data
2376
2374
2377 def decompress(self, data):
2375 def decompress(self, data):
2378 """Decompress a revlog chunk.
2376 """Decompress a revlog chunk.
2379
2377
2380 The chunk is expected to begin with a header identifying the
2378 The chunk is expected to begin with a header identifying the
2381 format type so it can be routed to an appropriate decompressor.
2379 format type so it can be routed to an appropriate decompressor.
2382 """
2380 """
2383 if not data:
2381 if not data:
2384 return data
2382 return data
2385
2383
2386 # Revlogs are read much more frequently than they are written and many
2384 # Revlogs are read much more frequently than they are written and many
2387 # chunks only take microseconds to decompress, so performance is
2385 # chunks only take microseconds to decompress, so performance is
2388 # important here.
2386 # important here.
2389 #
2387 #
2390 # We can make a few assumptions about revlogs:
2388 # We can make a few assumptions about revlogs:
2391 #
2389 #
2392 # 1) the majority of chunks will be compressed (as opposed to inline
2390 # 1) the majority of chunks will be compressed (as opposed to inline
2393 # raw data).
2391 # raw data).
2394 # 2) decompressing *any* data will likely by at least 10x slower than
2392 # 2) decompressing *any* data will likely by at least 10x slower than
2395 # returning raw inline data.
2393 # returning raw inline data.
2396 # 3) we want to prioritize common and officially supported compression
2394 # 3) we want to prioritize common and officially supported compression
2397 # engines
2395 # engines
2398 #
2396 #
2399 # It follows that we want to optimize for "decompress compressed data
2397 # It follows that we want to optimize for "decompress compressed data
2400 # when encoded with common and officially supported compression engines"
2398 # when encoded with common and officially supported compression engines"
2401 # case over "raw data" and "data encoded by less common or non-official
2399 # case over "raw data" and "data encoded by less common or non-official
2402 # compression engines." That is why we have the inline lookup first
2400 # compression engines." That is why we have the inline lookup first
2403 # followed by the compengines lookup.
2401 # followed by the compengines lookup.
2404 #
2402 #
2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2403 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2406 # compressed chunks. And this matters for changelog and manifest reads.
2404 # compressed chunks. And this matters for changelog and manifest reads.
2407 t = data[0:1]
2405 t = data[0:1]
2408
2406
2409 if t == 'x':
2407 if t == 'x':
2410 try:
2408 try:
2411 return _zlibdecompress(data)
2409 return _zlibdecompress(data)
2412 except zlib.error as e:
2410 except zlib.error as e:
2413 raise RevlogError(_('revlog decompress error: %s') %
2411 raise RevlogError(_('revlog decompress error: %s') %
2414 stringutil.forcebytestr(e))
2412 stringutil.forcebytestr(e))
2415 # '\0' is more common than 'u' so it goes first.
2413 # '\0' is more common than 'u' so it goes first.
2416 elif t == '\0':
2414 elif t == '\0':
2417 return data
2415 return data
2418 elif t == 'u':
2416 elif t == 'u':
2419 return util.buffer(data, 1)
2417 return util.buffer(data, 1)
2420
2418
2421 try:
2419 try:
2422 compressor = self._decompressors[t]
2420 compressor = self._decompressors[t]
2423 except KeyError:
2421 except KeyError:
2424 try:
2422 try:
2425 engine = util.compengines.forrevlogheader(t)
2423 engine = util.compengines.forrevlogheader(t)
2426 compressor = engine.revlogcompressor()
2424 compressor = engine.revlogcompressor()
2427 self._decompressors[t] = compressor
2425 self._decompressors[t] = compressor
2428 except KeyError:
2426 except KeyError:
2429 raise RevlogError(_('unknown compression type %r') % t)
2427 raise RevlogError(_('unknown compression type %r') % t)
2430
2428
2431 return compressor.decompress(data)
2429 return compressor.decompress(data)
2432
2430
2433 def _isgooddeltainfo(self, deltainfo, revinfo):
2431 def _isgooddeltainfo(self, deltainfo, revinfo):
2434 """Returns True if the given delta is good. Good means that it is within
2432 """Returns True if the given delta is good. Good means that it is within
2435 the disk span, disk size, and chain length bounds that we know to be
2433 the disk span, disk size, and chain length bounds that we know to be
2436 performant."""
2434 performant."""
2437 if deltainfo is None:
2435 if deltainfo is None:
2438 return False
2436 return False
2439
2437
2440 # - 'deltainfo.distance' is the distance from the base revision --
2438 # - 'deltainfo.distance' is the distance from the base revision --
2441 # bounding it limits the amount of I/O we need to do.
2439 # bounding it limits the amount of I/O we need to do.
2442 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2440 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
2443 # deltas we need to apply -- bounding it limits the amount of CPU
2441 # deltas we need to apply -- bounding it limits the amount of CPU
2444 # we consume.
2442 # we consume.
2445
2443
2446 if self._sparserevlog:
2444 if self._sparserevlog:
2447 # As sparse-read will be used, we can consider that the distance,
2445 # As sparse-read will be used, we can consider that the distance,
2448 # instead of being the span of the whole chunk,
2446 # instead of being the span of the whole chunk,
2449 # is the span of the largest read chunk
2447 # is the span of the largest read chunk
2450 base = deltainfo.base
2448 base = deltainfo.base
2451
2449
2452 if base != nullrev:
2450 if base != nullrev:
2453 deltachain = self._deltachain(base)[0]
2451 deltachain = self._deltachain(base)[0]
2454 else:
2452 else:
2455 deltachain = []
2453 deltachain = []
2456
2454
2457 chunks = _slicechunk(self, deltachain, deltainfo)
2455 chunks = _slicechunk(self, deltachain, deltainfo)
2458 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2456 distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
2459 else:
2457 else:
2460 distance = deltainfo.distance
2458 distance = deltainfo.distance
2461
2459
2462 textlen = revinfo.textlen
2460 textlen = revinfo.textlen
2463 defaultmax = textlen * 4
2461 defaultmax = textlen * 4
2464 maxdist = self._maxdeltachainspan
2462 maxdist = self._maxdeltachainspan
2465 if not maxdist:
2463 if not maxdist:
2466 maxdist = distance # ensure the conditional pass
2464 maxdist = distance # ensure the conditional pass
2467 maxdist = max(maxdist, defaultmax)
2465 maxdist = max(maxdist, defaultmax)
2468 if self._sparserevlog and maxdist < self._srmingapsize:
2466 if self._sparserevlog and maxdist < self._srmingapsize:
2469 # In multiple place, we are ignoring irrelevant data range below a
2467 # In multiple place, we are ignoring irrelevant data range below a
2470 # certain size. Be also apply this tradeoff here and relax span
2468 # certain size. Be also apply this tradeoff here and relax span
2471 # constraint for small enought content.
2469 # constraint for small enought content.
2472 maxdist = self._srmingapsize
2470 maxdist = self._srmingapsize
2473 if (distance > maxdist or deltainfo.deltalen > textlen or
2471 if (distance > maxdist or deltainfo.deltalen > textlen or
2474 deltainfo.compresseddeltalen > textlen * 2 or
2472 deltainfo.compresseddeltalen > textlen * 2 or
2475 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2473 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
2476 return False
2474 return False
2477
2475
2478 return True
2476 return True
2479
2477
2480 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2478 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2481 cachedelta, ifh, dfh, alwayscache=False,
2479 cachedelta, ifh, dfh, alwayscache=False,
2482 deltacomputer=None):
2480 deltacomputer=None):
2483 """internal function to add revisions to the log
2481 """internal function to add revisions to the log
2484
2482
2485 see addrevision for argument descriptions.
2483 see addrevision for argument descriptions.
2486
2484
2487 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2485 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2488
2486
2489 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2487 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2490 be used.
2488 be used.
2491
2489
2492 invariants:
2490 invariants:
2493 - rawtext is optional (can be None); if not set, cachedelta must be set.
2491 - rawtext is optional (can be None); if not set, cachedelta must be set.
2494 if both are set, they must correspond to each other.
2492 if both are set, they must correspond to each other.
2495 """
2493 """
2496 if node == nullid:
2494 if node == nullid:
2497 raise RevlogError(_("%s: attempt to add null revision") %
2495 raise RevlogError(_("%s: attempt to add null revision") %
2498 (self.indexfile))
2496 (self.indexfile))
2499 if node == wdirid or node in wdirfilenodeids:
2497 if node == wdirid or node in wdirfilenodeids:
2500 raise RevlogError(_("%s: attempt to add wdir revision") %
2498 raise RevlogError(_("%s: attempt to add wdir revision") %
2501 (self.indexfile))
2499 (self.indexfile))
2502
2500
2503 if self._inline:
2501 if self._inline:
2504 fh = ifh
2502 fh = ifh
2505 else:
2503 else:
2506 fh = dfh
2504 fh = dfh
2507
2505
2508 btext = [rawtext]
2506 btext = [rawtext]
2509
2507
2510 curr = len(self)
2508 curr = len(self)
2511 prev = curr - 1
2509 prev = curr - 1
2512 offset = self.end(prev)
2510 offset = self.end(prev)
2513 p1r, p2r = self.rev(p1), self.rev(p2)
2511 p1r, p2r = self.rev(p1), self.rev(p2)
2514
2512
2515 # full versions are inserted when the needed deltas
2513 # full versions are inserted when the needed deltas
2516 # become comparable to the uncompressed text
2514 # become comparable to the uncompressed text
2517 if rawtext is None:
2515 if rawtext is None:
2518 # need rawtext size, before changed by flag processors, which is
2516 # need rawtext size, before changed by flag processors, which is
2519 # the non-raw size. use revlog explicitly to avoid filelog's extra
2517 # the non-raw size. use revlog explicitly to avoid filelog's extra
2520 # logic that might remove metadata size.
2518 # logic that might remove metadata size.
2521 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2519 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2522 cachedelta[1])
2520 cachedelta[1])
2523 else:
2521 else:
2524 textlen = len(rawtext)
2522 textlen = len(rawtext)
2525
2523
2526 if deltacomputer is None:
2524 if deltacomputer is None:
2527 deltacomputer = _deltacomputer(self)
2525 deltacomputer = _deltacomputer(self)
2528
2526
2529 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2527 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2530
2528
2531 # no delta for flag processor revision (see "candelta" for why)
2529 # no delta for flag processor revision (see "candelta" for why)
2532 # not calling candelta since only one revision needs test, also to
2530 # not calling candelta since only one revision needs test, also to
2533 # avoid overhead fetching flags again.
2531 # avoid overhead fetching flags again.
2534 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2532 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2535 deltainfo = None
2533 deltainfo = None
2536 else:
2534 else:
2537 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2535 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2538
2536
2539 if deltainfo is not None:
2537 if deltainfo is not None:
2540 base = deltainfo.base
2538 base = deltainfo.base
2541 chainbase = deltainfo.chainbase
2539 chainbase = deltainfo.chainbase
2542 data = deltainfo.data
2540 data = deltainfo.data
2543 l = deltainfo.deltalen
2541 l = deltainfo.deltalen
2544 else:
2542 else:
2545 rawtext = deltacomputer.buildtext(revinfo, fh)
2543 rawtext = deltacomputer.buildtext(revinfo, fh)
2546 data = self.compress(rawtext)
2544 data = self.compress(rawtext)
2547 l = len(data[1]) + len(data[0])
2545 l = len(data[1]) + len(data[0])
2548 base = chainbase = curr
2546 base = chainbase = curr
2549
2547
2550 e = (offset_type(offset, flags), l, textlen,
2548 e = (offset_type(offset, flags), l, textlen,
2551 base, link, p1r, p2r, node)
2549 base, link, p1r, p2r, node)
2552 self.index.append(e)
2550 self.index.append(e)
2553 self.nodemap[node] = curr
2551 self.nodemap[node] = curr
2554
2552
2555 entry = self._io.packentry(e, self.node, self.version, curr)
2553 entry = self._io.packentry(e, self.node, self.version, curr)
2556 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2554 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2557
2555
2558 if alwayscache and rawtext is None:
2556 if alwayscache and rawtext is None:
2559 rawtext = deltacomputer._buildtext(revinfo, fh)
2557 rawtext = deltacomputer._buildtext(revinfo, fh)
2560
2558
2561 if type(rawtext) == bytes: # only accept immutable objects
2559 if type(rawtext) == bytes: # only accept immutable objects
2562 self._cache = (node, curr, rawtext)
2560 self._cache = (node, curr, rawtext)
2563 self._chainbasecache[curr] = chainbase
2561 self._chainbasecache[curr] = chainbase
2564 return node
2562 return node
2565
2563
2566 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2564 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2567 # Files opened in a+ mode have inconsistent behavior on various
2565 # Files opened in a+ mode have inconsistent behavior on various
2568 # platforms. Windows requires that a file positioning call be made
2566 # platforms. Windows requires that a file positioning call be made
2569 # when the file handle transitions between reads and writes. See
2567 # when the file handle transitions between reads and writes. See
2570 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2568 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2571 # platforms, Python or the platform itself can be buggy. Some versions
2569 # platforms, Python or the platform itself can be buggy. Some versions
2572 # of Solaris have been observed to not append at the end of the file
2570 # of Solaris have been observed to not append at the end of the file
2573 # if the file was seeked to before the end. See issue4943 for more.
2571 # if the file was seeked to before the end. See issue4943 for more.
2574 #
2572 #
2575 # We work around this issue by inserting a seek() before writing.
2573 # We work around this issue by inserting a seek() before writing.
2576 # Note: This is likely not necessary on Python 3.
2574 # Note: This is likely not necessary on Python 3.
2577 ifh.seek(0, os.SEEK_END)
2575 ifh.seek(0, os.SEEK_END)
2578 if dfh:
2576 if dfh:
2579 dfh.seek(0, os.SEEK_END)
2577 dfh.seek(0, os.SEEK_END)
2580
2578
2581 curr = len(self) - 1
2579 curr = len(self) - 1
2582 if not self._inline:
2580 if not self._inline:
2583 transaction.add(self.datafile, offset)
2581 transaction.add(self.datafile, offset)
2584 transaction.add(self.indexfile, curr * len(entry))
2582 transaction.add(self.indexfile, curr * len(entry))
2585 if data[0]:
2583 if data[0]:
2586 dfh.write(data[0])
2584 dfh.write(data[0])
2587 dfh.write(data[1])
2585 dfh.write(data[1])
2588 ifh.write(entry)
2586 ifh.write(entry)
2589 else:
2587 else:
2590 offset += curr * self._io.size
2588 offset += curr * self._io.size
2591 transaction.add(self.indexfile, offset, curr)
2589 transaction.add(self.indexfile, offset, curr)
2592 ifh.write(entry)
2590 ifh.write(entry)
2593 ifh.write(data[0])
2591 ifh.write(data[0])
2594 ifh.write(data[1])
2592 ifh.write(data[1])
2595 self._enforceinlinesize(transaction, ifh)
2593 self._enforceinlinesize(transaction, ifh)
2596
2594
2597 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2595 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2598 """
2596 """
2599 add a delta group
2597 add a delta group
2600
2598
2601 given a set of deltas, add them to the revision log. the
2599 given a set of deltas, add them to the revision log. the
2602 first delta is against its parent, which should be in our
2600 first delta is against its parent, which should be in our
2603 log, the rest are against the previous delta.
2601 log, the rest are against the previous delta.
2604
2602
2605 If ``addrevisioncb`` is defined, it will be called with arguments of
2603 If ``addrevisioncb`` is defined, it will be called with arguments of
2606 this revlog and the node that was added.
2604 this revlog and the node that was added.
2607 """
2605 """
2608
2606
2609 nodes = []
2607 nodes = []
2610
2608
2611 r = len(self)
2609 r = len(self)
2612 end = 0
2610 end = 0
2613 if r:
2611 if r:
2614 end = self.end(r - 1)
2612 end = self.end(r - 1)
2615 ifh = self._indexfp("a+")
2613 ifh = self._indexfp("a+")
2616 isize = r * self._io.size
2614 isize = r * self._io.size
2617 if self._inline:
2615 if self._inline:
2618 transaction.add(self.indexfile, end + isize, r)
2616 transaction.add(self.indexfile, end + isize, r)
2619 dfh = None
2617 dfh = None
2620 else:
2618 else:
2621 transaction.add(self.indexfile, isize, r)
2619 transaction.add(self.indexfile, isize, r)
2622 transaction.add(self.datafile, end)
2620 transaction.add(self.datafile, end)
2623 dfh = self._datafp("a+")
2621 dfh = self._datafp("a+")
2624 def flush():
2622 def flush():
2625 if dfh:
2623 if dfh:
2626 dfh.flush()
2624 dfh.flush()
2627 ifh.flush()
2625 ifh.flush()
2628 try:
2626 try:
2629 deltacomputer = _deltacomputer(self)
2627 deltacomputer = _deltacomputer(self)
2630 # loop through our set of deltas
2628 # loop through our set of deltas
2631 for data in deltas:
2629 for data in deltas:
2632 node, p1, p2, linknode, deltabase, delta, flags = data
2630 node, p1, p2, linknode, deltabase, delta, flags = data
2633 link = linkmapper(linknode)
2631 link = linkmapper(linknode)
2634 flags = flags or REVIDX_DEFAULT_FLAGS
2632 flags = flags or REVIDX_DEFAULT_FLAGS
2635
2633
2636 nodes.append(node)
2634 nodes.append(node)
2637
2635
2638 if node in self.nodemap:
2636 if node in self.nodemap:
2639 # this can happen if two branches make the same change
2637 # this can happen if two branches make the same change
2640 continue
2638 continue
2641
2639
2642 for p in (p1, p2):
2640 for p in (p1, p2):
2643 if p not in self.nodemap:
2641 if p not in self.nodemap:
2644 raise LookupError(p, self.indexfile,
2642 raise LookupError(p, self.indexfile,
2645 _('unknown parent'))
2643 _('unknown parent'))
2646
2644
2647 if deltabase not in self.nodemap:
2645 if deltabase not in self.nodemap:
2648 raise LookupError(deltabase, self.indexfile,
2646 raise LookupError(deltabase, self.indexfile,
2649 _('unknown delta base'))
2647 _('unknown delta base'))
2650
2648
2651 baserev = self.rev(deltabase)
2649 baserev = self.rev(deltabase)
2652
2650
2653 if baserev != nullrev and self.iscensored(baserev):
2651 if baserev != nullrev and self.iscensored(baserev):
2654 # if base is censored, delta must be full replacement in a
2652 # if base is censored, delta must be full replacement in a
2655 # single patch operation
2653 # single patch operation
2656 hlen = struct.calcsize(">lll")
2654 hlen = struct.calcsize(">lll")
2657 oldlen = self.rawsize(baserev)
2655 oldlen = self.rawsize(baserev)
2658 newlen = len(delta) - hlen
2656 newlen = len(delta) - hlen
2659 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2657 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2660 raise error.CensoredBaseError(self.indexfile,
2658 raise error.CensoredBaseError(self.indexfile,
2661 self.node(baserev))
2659 self.node(baserev))
2662
2660
2663 if not flags and self._peek_iscensored(baserev, delta, flush):
2661 if not flags and self._peek_iscensored(baserev, delta, flush):
2664 flags |= REVIDX_ISCENSORED
2662 flags |= REVIDX_ISCENSORED
2665
2663
2666 # We assume consumers of addrevisioncb will want to retrieve
2664 # We assume consumers of addrevisioncb will want to retrieve
2667 # the added revision, which will require a call to
2665 # the added revision, which will require a call to
2668 # revision(). revision() will fast path if there is a cache
2666 # revision(). revision() will fast path if there is a cache
2669 # hit. So, we tell _addrevision() to always cache in this case.
2667 # hit. So, we tell _addrevision() to always cache in this case.
2670 # We're only using addgroup() in the context of changegroup
2668 # We're only using addgroup() in the context of changegroup
2671 # generation so the revision data can always be handled as raw
2669 # generation so the revision data can always be handled as raw
2672 # by the flagprocessor.
2670 # by the flagprocessor.
2673 self._addrevision(node, None, transaction, link,
2671 self._addrevision(node, None, transaction, link,
2674 p1, p2, flags, (baserev, delta),
2672 p1, p2, flags, (baserev, delta),
2675 ifh, dfh,
2673 ifh, dfh,
2676 alwayscache=bool(addrevisioncb),
2674 alwayscache=bool(addrevisioncb),
2677 deltacomputer=deltacomputer)
2675 deltacomputer=deltacomputer)
2678
2676
2679 if addrevisioncb:
2677 if addrevisioncb:
2680 addrevisioncb(self, node)
2678 addrevisioncb(self, node)
2681
2679
2682 if not dfh and not self._inline:
2680 if not dfh and not self._inline:
2683 # addrevision switched from inline to conventional
2681 # addrevision switched from inline to conventional
2684 # reopen the index
2682 # reopen the index
2685 ifh.close()
2683 ifh.close()
2686 dfh = self._datafp("a+")
2684 dfh = self._datafp("a+")
2687 ifh = self._indexfp("a+")
2685 ifh = self._indexfp("a+")
2688 finally:
2686 finally:
2689 if dfh:
2687 if dfh:
2690 dfh.close()
2688 dfh.close()
2691 ifh.close()
2689 ifh.close()
2692
2690
2693 return nodes
2691 return nodes
2694
2692
2695 def iscensored(self, rev):
2693 def iscensored(self, rev):
2696 """Check if a file revision is censored."""
2694 """Check if a file revision is censored."""
2697 if not self._censorable:
2695 if not self._censorable:
2698 return False
2696 return False
2699
2697
2700 return self.flags(rev) & REVIDX_ISCENSORED
2698 return self.flags(rev) & REVIDX_ISCENSORED
2701
2699
2702 def _peek_iscensored(self, baserev, delta, flush):
2700 def _peek_iscensored(self, baserev, delta, flush):
2703 """Quickly check if a delta produces a censored revision."""
2701 """Quickly check if a delta produces a censored revision."""
2704 if not self._censorable:
2702 if not self._censorable:
2705 return False
2703 return False
2706
2704
2707 # Fragile heuristic: unless new file meta keys are added alphabetically
2705 # Fragile heuristic: unless new file meta keys are added alphabetically
2708 # preceding "censored", all censored revisions are prefixed by
2706 # preceding "censored", all censored revisions are prefixed by
2709 # "\1\ncensored:". A delta producing such a censored revision must be a
2707 # "\1\ncensored:". A delta producing such a censored revision must be a
2710 # full-replacement delta, so we inspect the first and only patch in the
2708 # full-replacement delta, so we inspect the first and only patch in the
2711 # delta for this prefix.
2709 # delta for this prefix.
2712 hlen = struct.calcsize(">lll")
2710 hlen = struct.calcsize(">lll")
2713 if len(delta) <= hlen:
2711 if len(delta) <= hlen:
2714 return False
2712 return False
2715
2713
2716 oldlen = self.rawsize(baserev)
2714 oldlen = self.rawsize(baserev)
2717 newlen = len(delta) - hlen
2715 newlen = len(delta) - hlen
2718 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2716 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2719 return False
2717 return False
2720
2718
2721 add = "\1\ncensored:"
2719 add = "\1\ncensored:"
2722 addlen = len(add)
2720 addlen = len(add)
2723 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2721 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2724
2722
2725 def getstrippoint(self, minlink):
2723 def getstrippoint(self, minlink):
2726 """find the minimum rev that must be stripped to strip the linkrev
2724 """find the minimum rev that must be stripped to strip the linkrev
2727
2725
2728 Returns a tuple containing the minimum rev and a set of all revs that
2726 Returns a tuple containing the minimum rev and a set of all revs that
2729 have linkrevs that will be broken by this strip.
2727 have linkrevs that will be broken by this strip.
2730 """
2728 """
2731 brokenrevs = set()
2729 brokenrevs = set()
2732 strippoint = len(self)
2730 strippoint = len(self)
2733
2731
2734 heads = {}
2732 heads = {}
2735 futurelargelinkrevs = set()
2733 futurelargelinkrevs = set()
2736 for head in self.headrevs():
2734 for head in self.headrevs():
2737 headlinkrev = self.linkrev(head)
2735 headlinkrev = self.linkrev(head)
2738 heads[head] = headlinkrev
2736 heads[head] = headlinkrev
2739 if headlinkrev >= minlink:
2737 if headlinkrev >= minlink:
2740 futurelargelinkrevs.add(headlinkrev)
2738 futurelargelinkrevs.add(headlinkrev)
2741
2739
2742 # This algorithm involves walking down the rev graph, starting at the
2740 # This algorithm involves walking down the rev graph, starting at the
2743 # heads. Since the revs are topologically sorted according to linkrev,
2741 # heads. Since the revs are topologically sorted according to linkrev,
2744 # once all head linkrevs are below the minlink, we know there are
2742 # once all head linkrevs are below the minlink, we know there are
2745 # no more revs that could have a linkrev greater than minlink.
2743 # no more revs that could have a linkrev greater than minlink.
2746 # So we can stop walking.
2744 # So we can stop walking.
2747 while futurelargelinkrevs:
2745 while futurelargelinkrevs:
2748 strippoint -= 1
2746 strippoint -= 1
2749 linkrev = heads.pop(strippoint)
2747 linkrev = heads.pop(strippoint)
2750
2748
2751 if linkrev < minlink:
2749 if linkrev < minlink:
2752 brokenrevs.add(strippoint)
2750 brokenrevs.add(strippoint)
2753 else:
2751 else:
2754 futurelargelinkrevs.remove(linkrev)
2752 futurelargelinkrevs.remove(linkrev)
2755
2753
2756 for p in self.parentrevs(strippoint):
2754 for p in self.parentrevs(strippoint):
2757 if p != nullrev:
2755 if p != nullrev:
2758 plinkrev = self.linkrev(p)
2756 plinkrev = self.linkrev(p)
2759 heads[p] = plinkrev
2757 heads[p] = plinkrev
2760 if plinkrev >= minlink:
2758 if plinkrev >= minlink:
2761 futurelargelinkrevs.add(plinkrev)
2759 futurelargelinkrevs.add(plinkrev)
2762
2760
2763 return strippoint, brokenrevs
2761 return strippoint, brokenrevs
2764
2762
2765 def strip(self, minlink, transaction):
2763 def strip(self, minlink, transaction):
2766 """truncate the revlog on the first revision with a linkrev >= minlink
2764 """truncate the revlog on the first revision with a linkrev >= minlink
2767
2765
2768 This function is called when we're stripping revision minlink and
2766 This function is called when we're stripping revision minlink and
2769 its descendants from the repository.
2767 its descendants from the repository.
2770
2768
2771 We have to remove all revisions with linkrev >= minlink, because
2769 We have to remove all revisions with linkrev >= minlink, because
2772 the equivalent changelog revisions will be renumbered after the
2770 the equivalent changelog revisions will be renumbered after the
2773 strip.
2771 strip.
2774
2772
2775 So we truncate the revlog on the first of these revisions, and
2773 So we truncate the revlog on the first of these revisions, and
2776 trust that the caller has saved the revisions that shouldn't be
2774 trust that the caller has saved the revisions that shouldn't be
2777 removed and that it'll re-add them after this truncation.
2775 removed and that it'll re-add them after this truncation.
2778 """
2776 """
2779 if len(self) == 0:
2777 if len(self) == 0:
2780 return
2778 return
2781
2779
2782 rev, _ = self.getstrippoint(minlink)
2780 rev, _ = self.getstrippoint(minlink)
2783 if rev == len(self):
2781 if rev == len(self):
2784 return
2782 return
2785
2783
2786 # first truncate the files on disk
2784 # first truncate the files on disk
2787 end = self.start(rev)
2785 end = self.start(rev)
2788 if not self._inline:
2786 if not self._inline:
2789 transaction.add(self.datafile, end)
2787 transaction.add(self.datafile, end)
2790 end = rev * self._io.size
2788 end = rev * self._io.size
2791 else:
2789 else:
2792 end += rev * self._io.size
2790 end += rev * self._io.size
2793
2791
2794 transaction.add(self.indexfile, end)
2792 transaction.add(self.indexfile, end)
2795
2793
2796 # then reset internal state in memory to forget those revisions
2794 # then reset internal state in memory to forget those revisions
2797 self._cache = None
2795 self._cache = None
2798 self._chaininfocache = {}
2796 self._chaininfocache = {}
2799 self._chunkclear()
2797 self._chunkclear()
2800 for x in pycompat.xrange(rev, len(self)):
2798 for x in pycompat.xrange(rev, len(self)):
2801 del self.nodemap[self.node(x)]
2799 del self.nodemap[self.node(x)]
2802
2800
2803 del self.index[rev:-1]
2801 del self.index[rev:-1]
2804 self._nodepos = None
2802 self._nodepos = None
2805
2803
2806 def checksize(self):
2804 def checksize(self):
2807 expected = 0
2805 expected = 0
2808 if len(self):
2806 if len(self):
2809 expected = max(0, self.end(len(self) - 1))
2807 expected = max(0, self.end(len(self) - 1))
2810
2808
2811 try:
2809 try:
2812 with self._datafp() as f:
2810 with self._datafp() as f:
2813 f.seek(0, 2)
2811 f.seek(0, 2)
2814 actual = f.tell()
2812 actual = f.tell()
2815 dd = actual - expected
2813 dd = actual - expected
2816 except IOError as inst:
2814 except IOError as inst:
2817 if inst.errno != errno.ENOENT:
2815 if inst.errno != errno.ENOENT:
2818 raise
2816 raise
2819 dd = 0
2817 dd = 0
2820
2818
2821 try:
2819 try:
2822 f = self.opener(self.indexfile)
2820 f = self.opener(self.indexfile)
2823 f.seek(0, 2)
2821 f.seek(0, 2)
2824 actual = f.tell()
2822 actual = f.tell()
2825 f.close()
2823 f.close()
2826 s = self._io.size
2824 s = self._io.size
2827 i = max(0, actual // s)
2825 i = max(0, actual // s)
2828 di = actual - (i * s)
2826 di = actual - (i * s)
2829 if self._inline:
2827 if self._inline:
2830 databytes = 0
2828 databytes = 0
2831 for r in self:
2829 for r in self:
2832 databytes += max(0, self.length(r))
2830 databytes += max(0, self.length(r))
2833 dd = 0
2831 dd = 0
2834 di = actual - len(self) * s - databytes
2832 di = actual - len(self) * s - databytes
2835 except IOError as inst:
2833 except IOError as inst:
2836 if inst.errno != errno.ENOENT:
2834 if inst.errno != errno.ENOENT:
2837 raise
2835 raise
2838 di = 0
2836 di = 0
2839
2837
2840 return (dd, di)
2838 return (dd, di)
2841
2839
2842 def files(self):
2840 def files(self):
2843 res = [self.indexfile]
2841 res = [self.indexfile]
2844 if not self._inline:
2842 if not self._inline:
2845 res.append(self.datafile)
2843 res.append(self.datafile)
2846 return res
2844 return res
2847
2845
2848 DELTAREUSEALWAYS = 'always'
2846 DELTAREUSEALWAYS = 'always'
2849 DELTAREUSESAMEREVS = 'samerevs'
2847 DELTAREUSESAMEREVS = 'samerevs'
2850 DELTAREUSENEVER = 'never'
2848 DELTAREUSENEVER = 'never'
2851
2849
2852 DELTAREUSEFULLADD = 'fulladd'
2850 DELTAREUSEFULLADD = 'fulladd'
2853
2851
2854 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2852 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2855
2853
2856 def clone(self, tr, destrevlog, addrevisioncb=None,
2854 def clone(self, tr, destrevlog, addrevisioncb=None,
2857 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2855 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2858 """Copy this revlog to another, possibly with format changes.
2856 """Copy this revlog to another, possibly with format changes.
2859
2857
2860 The destination revlog will contain the same revisions and nodes.
2858 The destination revlog will contain the same revisions and nodes.
2861 However, it may not be bit-for-bit identical due to e.g. delta encoding
2859 However, it may not be bit-for-bit identical due to e.g. delta encoding
2862 differences.
2860 differences.
2863
2861
2864 The ``deltareuse`` argument control how deltas from the existing revlog
2862 The ``deltareuse`` argument control how deltas from the existing revlog
2865 are preserved in the destination revlog. The argument can have the
2863 are preserved in the destination revlog. The argument can have the
2866 following values:
2864 following values:
2867
2865
2868 DELTAREUSEALWAYS
2866 DELTAREUSEALWAYS
2869 Deltas will always be reused (if possible), even if the destination
2867 Deltas will always be reused (if possible), even if the destination
2870 revlog would not select the same revisions for the delta. This is the
2868 revlog would not select the same revisions for the delta. This is the
2871 fastest mode of operation.
2869 fastest mode of operation.
2872 DELTAREUSESAMEREVS
2870 DELTAREUSESAMEREVS
2873 Deltas will be reused if the destination revlog would pick the same
2871 Deltas will be reused if the destination revlog would pick the same
2874 revisions for the delta. This mode strikes a balance between speed
2872 revisions for the delta. This mode strikes a balance between speed
2875 and optimization.
2873 and optimization.
2876 DELTAREUSENEVER
2874 DELTAREUSENEVER
2877 Deltas will never be reused. This is the slowest mode of execution.
2875 Deltas will never be reused. This is the slowest mode of execution.
2878 This mode can be used to recompute deltas (e.g. if the diff/delta
2876 This mode can be used to recompute deltas (e.g. if the diff/delta
2879 algorithm changes).
2877 algorithm changes).
2880
2878
2881 Delta computation can be slow, so the choice of delta reuse policy can
2879 Delta computation can be slow, so the choice of delta reuse policy can
2882 significantly affect run time.
2880 significantly affect run time.
2883
2881
2884 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2882 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2885 two extremes. Deltas will be reused if they are appropriate. But if the
2883 two extremes. Deltas will be reused if they are appropriate. But if the
2886 delta could choose a better revision, it will do so. This means if you
2884 delta could choose a better revision, it will do so. This means if you
2887 are converting a non-generaldelta revlog to a generaldelta revlog,
2885 are converting a non-generaldelta revlog to a generaldelta revlog,
2888 deltas will be recomputed if the delta's parent isn't a parent of the
2886 deltas will be recomputed if the delta's parent isn't a parent of the
2889 revision.
2887 revision.
2890
2888
2891 In addition to the delta policy, the ``deltabothparents`` argument
2889 In addition to the delta policy, the ``deltabothparents`` argument
2892 controls whether to compute deltas against both parents for merges.
2890 controls whether to compute deltas against both parents for merges.
2893 By default, the current default is used.
2891 By default, the current default is used.
2894 """
2892 """
2895 if deltareuse not in self.DELTAREUSEALL:
2893 if deltareuse not in self.DELTAREUSEALL:
2896 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2894 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2897
2895
2898 if len(destrevlog):
2896 if len(destrevlog):
2899 raise ValueError(_('destination revlog is not empty'))
2897 raise ValueError(_('destination revlog is not empty'))
2900
2898
2901 if getattr(self, 'filteredrevs', None):
2899 if getattr(self, 'filteredrevs', None):
2902 raise ValueError(_('source revlog has filtered revisions'))
2900 raise ValueError(_('source revlog has filtered revisions'))
2903 if getattr(destrevlog, 'filteredrevs', None):
2901 if getattr(destrevlog, 'filteredrevs', None):
2904 raise ValueError(_('destination revlog has filtered revisions'))
2902 raise ValueError(_('destination revlog has filtered revisions'))
2905
2903
2906 # lazydeltabase controls whether to reuse a cached delta, if possible.
2904 # lazydeltabase controls whether to reuse a cached delta, if possible.
2907 oldlazydeltabase = destrevlog._lazydeltabase
2905 oldlazydeltabase = destrevlog._lazydeltabase
2908 oldamd = destrevlog._deltabothparents
2906 oldamd = destrevlog._deltabothparents
2909
2907
2910 try:
2908 try:
2911 if deltareuse == self.DELTAREUSEALWAYS:
2909 if deltareuse == self.DELTAREUSEALWAYS:
2912 destrevlog._lazydeltabase = True
2910 destrevlog._lazydeltabase = True
2913 elif deltareuse == self.DELTAREUSESAMEREVS:
2911 elif deltareuse == self.DELTAREUSESAMEREVS:
2914 destrevlog._lazydeltabase = False
2912 destrevlog._lazydeltabase = False
2915
2913
2916 destrevlog._deltabothparents = deltabothparents or oldamd
2914 destrevlog._deltabothparents = deltabothparents or oldamd
2917
2915
2918 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2916 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2919 self.DELTAREUSESAMEREVS)
2917 self.DELTAREUSESAMEREVS)
2920
2918
2921 deltacomputer = _deltacomputer(destrevlog)
2919 deltacomputer = _deltacomputer(destrevlog)
2922 index = self.index
2920 index = self.index
2923 for rev in self:
2921 for rev in self:
2924 entry = index[rev]
2922 entry = index[rev]
2925
2923
2926 # Some classes override linkrev to take filtered revs into
2924 # Some classes override linkrev to take filtered revs into
2927 # account. Use raw entry from index.
2925 # account. Use raw entry from index.
2928 flags = entry[0] & 0xffff
2926 flags = entry[0] & 0xffff
2929 linkrev = entry[4]
2927 linkrev = entry[4]
2930 p1 = index[entry[5]][7]
2928 p1 = index[entry[5]][7]
2931 p2 = index[entry[6]][7]
2929 p2 = index[entry[6]][7]
2932 node = entry[7]
2930 node = entry[7]
2933
2931
2934 # (Possibly) reuse the delta from the revlog if allowed and
2932 # (Possibly) reuse the delta from the revlog if allowed and
2935 # the revlog chunk is a delta.
2933 # the revlog chunk is a delta.
2936 cachedelta = None
2934 cachedelta = None
2937 rawtext = None
2935 rawtext = None
2938 if populatecachedelta:
2936 if populatecachedelta:
2939 dp = self.deltaparent(rev)
2937 dp = self.deltaparent(rev)
2940 if dp != nullrev:
2938 if dp != nullrev:
2941 cachedelta = (dp, bytes(self._chunk(rev)))
2939 cachedelta = (dp, bytes(self._chunk(rev)))
2942
2940
2943 if not cachedelta:
2941 if not cachedelta:
2944 rawtext = self.revision(rev, raw=True)
2942 rawtext = self.revision(rev, raw=True)
2945
2943
2946
2944
2947 if deltareuse == self.DELTAREUSEFULLADD:
2945 if deltareuse == self.DELTAREUSEFULLADD:
2948 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2946 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2949 cachedelta=cachedelta,
2947 cachedelta=cachedelta,
2950 node=node, flags=flags,
2948 node=node, flags=flags,
2951 deltacomputer=deltacomputer)
2949 deltacomputer=deltacomputer)
2952 else:
2950 else:
2953 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2951 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2954 checkambig=False)
2952 checkambig=False)
2955 dfh = None
2953 dfh = None
2956 if not destrevlog._inline:
2954 if not destrevlog._inline:
2957 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2955 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2958 try:
2956 try:
2959 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2957 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2960 p2, flags, cachedelta, ifh, dfh,
2958 p2, flags, cachedelta, ifh, dfh,
2961 deltacomputer=deltacomputer)
2959 deltacomputer=deltacomputer)
2962 finally:
2960 finally:
2963 if dfh:
2961 if dfh:
2964 dfh.close()
2962 dfh.close()
2965 ifh.close()
2963 ifh.close()
2966
2964
2967 if addrevisioncb:
2965 if addrevisioncb:
2968 addrevisioncb(self, rev, node)
2966 addrevisioncb(self, rev, node)
2969 finally:
2967 finally:
2970 destrevlog._lazydeltabase = oldlazydeltabase
2968 destrevlog._lazydeltabase = oldlazydeltabase
2971 destrevlog._deltabothparents = oldamd
2969 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now