##// END OF EJS Templates
cext: remove PY23()...
Gregory Szorc -
r49676:b0dd39b9 default
parent child Browse files
Show More
@@ -1,192 +1,192 b''
1 1 /*
2 2 base85 codec
3 3
4 4 Copyright 2006 Brendan Cully <brendan@kublai.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Largely based on git's implementation
10 10 */
11 11
12 12 #define PY_SSIZE_T_CLEAN
13 13 #include <Python.h>
14 14
15 15 #include "util.h"
16 16
17 17 static const char b85chars[] =
18 18 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
19 19 "abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
20 20 static char b85dec[256];
21 21
22 22 static void b85prep(void)
23 23 {
24 24 unsigned i;
25 25
26 26 memset(b85dec, 0, sizeof(b85dec));
27 27 for (i = 0; i < sizeof(b85chars); i++) {
28 28 b85dec[(int)(b85chars[i])] = i + 1;
29 29 }
30 30 }
31 31
32 32 static PyObject *b85encode(PyObject *self, PyObject *args)
33 33 {
34 34 const unsigned char *text;
35 35 PyObject *out;
36 36 char *dst;
37 37 Py_ssize_t len, olen, i;
38 38 unsigned int acc, val, ch;
39 39 int pad = 0;
40 40
41 if (!PyArg_ParseTuple(args, PY23("s#|i", "y#|i"), &text, &len, &pad)) {
41 if (!PyArg_ParseTuple(args, "y#|i", &text, &len, &pad)) {
42 42 return NULL;
43 43 }
44 44
45 45 if (pad) {
46 46 olen = ((len + 3) / 4 * 5) - 3;
47 47 } else {
48 48 olen = len % 4;
49 49 if (olen) {
50 50 olen++;
51 51 }
52 52 olen += len / 4 * 5;
53 53 }
54 54 if (!(out = PyBytes_FromStringAndSize(NULL, olen + 3))) {
55 55 return NULL;
56 56 }
57 57
58 58 dst = PyBytes_AsString(out);
59 59
60 60 while (len) {
61 61 acc = 0;
62 62 for (i = 24; i >= 0; i -= 8) {
63 63 ch = *text++;
64 64 acc |= ch << i;
65 65 if (--len == 0) {
66 66 break;
67 67 }
68 68 }
69 69 for (i = 4; i >= 0; i--) {
70 70 val = acc % 85;
71 71 acc /= 85;
72 72 dst[i] = b85chars[val];
73 73 }
74 74 dst += 5;
75 75 }
76 76
77 77 if (!pad) {
78 78 _PyBytes_Resize(&out, olen);
79 79 }
80 80
81 81 return out;
82 82 }
83 83
84 84 static PyObject *b85decode(PyObject *self, PyObject *args)
85 85 {
86 86 PyObject *out = NULL;
87 87 const char *text;
88 88 char *dst;
89 89 Py_ssize_t len, i, j, olen, cap;
90 90 int c;
91 91 unsigned int acc;
92 92
93 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &len)) {
93 if (!PyArg_ParseTuple(args, "y#", &text, &len)) {
94 94 return NULL;
95 95 }
96 96
97 97 olen = len / 5 * 4;
98 98 i = len % 5;
99 99 if (i) {
100 100 olen += i - 1;
101 101 }
102 102 if (!(out = PyBytes_FromStringAndSize(NULL, olen))) {
103 103 return NULL;
104 104 }
105 105
106 106 dst = PyBytes_AsString(out);
107 107
108 108 i = 0;
109 109 while (i < len) {
110 110 acc = 0;
111 111 cap = len - i - 1;
112 112 if (cap > 4) {
113 113 cap = 4;
114 114 }
115 115 for (j = 0; j < cap; i++, j++) {
116 116 c = b85dec[(int)*text++] - 1;
117 117 if (c < 0) {
118 118 PyErr_Format(
119 119 PyExc_ValueError,
120 120 "bad base85 character at position %d",
121 121 (int)i);
122 122 goto bail;
123 123 }
124 124 acc = acc * 85 + c;
125 125 }
126 126 if (i++ < len) {
127 127 c = b85dec[(int)*text++] - 1;
128 128 if (c < 0) {
129 129 PyErr_Format(
130 130 PyExc_ValueError,
131 131 "bad base85 character at position %d",
132 132 (int)i);
133 133 goto bail;
134 134 }
135 135 /* overflow detection: 0xffffffff == "|NsC0",
136 136 * "|NsC" == 0x03030303 */
137 137 if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c) {
138 138 PyErr_Format(
139 139 PyExc_ValueError,
140 140 "bad base85 sequence at position %d",
141 141 (int)i);
142 142 goto bail;
143 143 }
144 144 acc += c;
145 145 }
146 146
147 147 cap = olen < 4 ? olen : 4;
148 148 olen -= cap;
149 149 for (j = 0; j < 4 - cap; j++) {
150 150 acc *= 85;
151 151 }
152 152 if (cap && cap < 4) {
153 153 acc += 0xffffff >> (cap - 1) * 8;
154 154 }
155 155 for (j = 0; j < cap; j++) {
156 156 acc = (acc << 8) | (acc >> 24);
157 157 *dst++ = acc;
158 158 }
159 159 }
160 160
161 161 return out;
162 162 bail:
163 163 Py_XDECREF(out);
164 164 return NULL;
165 165 }
166 166
167 167 static char base85_doc[] = "Base85 Data Encoding";
168 168
169 169 static PyMethodDef methods[] = {
170 170 {"b85encode", b85encode, METH_VARARGS,
171 171 "Encode text in base85.\n\n"
172 172 "If the second parameter is true, pad the result to a multiple of "
173 173 "five characters.\n"},
174 174 {"b85decode", b85decode, METH_VARARGS, "Decode base85 text.\n"},
175 175 {NULL, NULL},
176 176 };
177 177
178 178 static const int version = 1;
179 179
180 180 static struct PyModuleDef base85_module = {
181 181 PyModuleDef_HEAD_INIT, "base85", base85_doc, -1, methods,
182 182 };
183 183
184 184 PyMODINIT_FUNC PyInit_base85(void)
185 185 {
186 186 PyObject *m;
187 187 b85prep();
188 188
189 189 m = PyModule_Create(&base85_module);
190 190 PyModule_AddIntConstant(m, "version", version);
191 191 return m;
192 192 }
@@ -1,350 +1,348 b''
1 1 /*
2 2 bdiff.c - efficient binary diff extension for Mercurial
3 3
4 4 Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8
9 9 Based roughly on Python difflib
10 10 */
11 11
12 12 #define PY_SSIZE_T_CLEAN
13 13 #include <Python.h>
14 14 #include <limits.h>
15 15 #include <stdlib.h>
16 16 #include <string.h>
17 17
18 18 #include "bdiff.h"
19 19 #include "bitmanipulation.h"
20 20 #include "thirdparty/xdiff/xdiff.h"
21 21 #include "util.h"
22 22
23 23 static PyObject *blocks(PyObject *self, PyObject *args)
24 24 {
25 25 PyObject *sa, *sb, *rl = NULL, *m;
26 26 struct bdiff_line *a, *b;
27 27 struct bdiff_hunk l, *h;
28 28 int an, bn, count, pos = 0;
29 29
30 30 l.next = NULL;
31 31
32 32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) {
33 33 return NULL;
34 34 }
35 35
36 36 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
37 37 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
38 38
39 39 if (!a || !b) {
40 40 goto nomem;
41 41 }
42 42
43 43 count = bdiff_diff(a, an, b, bn, &l);
44 44 if (count < 0) {
45 45 goto nomem;
46 46 }
47 47
48 48 rl = PyList_New(count);
49 49 if (!rl) {
50 50 goto nomem;
51 51 }
52 52
53 53 for (h = l.next; h; h = h->next) {
54 54 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
55 55 PyList_SetItem(rl, pos, m);
56 56 pos++;
57 57 }
58 58
59 59 nomem:
60 60 free(a);
61 61 free(b);
62 62 bdiff_freehunks(l.next);
63 63 return rl ? rl : PyErr_NoMemory();
64 64 }
65 65
66 66 static PyObject *bdiff(PyObject *self, PyObject *args)
67 67 {
68 68 Py_buffer ba, bb;
69 69 char *rb, *ia, *ib;
70 70 PyObject *result = NULL;
71 71 struct bdiff_line *al = NULL, *bl = NULL;
72 72 struct bdiff_hunk l, *h;
73 73 int an, bn, count;
74 74 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
75 75 PyThreadState *_save = NULL;
76 76
77 77 l.next = NULL;
78 78
79 if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba,
80 &bb)) {
79 if (!PyArg_ParseTuple(args, "y*y*:bdiff", &ba, &bb)) {
81 80 return NULL;
82 81 }
83 82
84 83 if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
85 84 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
86 85 goto cleanup;
87 86 }
88 87
89 88 if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
90 89 PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
91 90 goto cleanup;
92 91 }
93 92
94 93 la = ba.len;
95 94 lb = bb.len;
96 95
97 96 if (la > UINT_MAX || lb > UINT_MAX) {
98 97 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
99 98 goto cleanup;
100 99 }
101 100
102 101 _save = PyEval_SaveThread();
103 102
104 103 lmax = la > lb ? lb : la;
105 104 for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
106 105 ++li, ++ia, ++ib) {
107 106 if (*ia == '\n') {
108 107 lcommon = li + 1;
109 108 }
110 109 }
111 110 /* we can almost add: if (li == lmax) lcommon = li; */
112 111
113 112 an = bdiff_splitlines((char *)ba.buf + lcommon, la - lcommon, &al);
114 113 bn = bdiff_splitlines((char *)bb.buf + lcommon, lb - lcommon, &bl);
115 114 if (!al || !bl) {
116 115 PyErr_NoMemory();
117 116 goto cleanup;
118 117 }
119 118
120 119 count = bdiff_diff(al, an, bl, bn, &l);
121 120 if (count < 0) {
122 121 PyErr_NoMemory();
123 122 goto cleanup;
124 123 }
125 124
126 125 /* calculate length of output */
127 126 la = lb = 0;
128 127 for (h = l.next; h; h = h->next) {
129 128 if (h->a1 != la || h->b1 != lb) {
130 129 len += 12 + bl[h->b1].l - bl[lb].l;
131 130 }
132 131 la = h->a2;
133 132 lb = h->b2;
134 133 }
135 134 PyEval_RestoreThread(_save);
136 135 _save = NULL;
137 136
138 137 result = PyBytes_FromStringAndSize(NULL, len);
139 138
140 139 if (!result) {
141 140 goto cleanup;
142 141 }
143 142
144 143 /* build binary patch */
145 144 rb = PyBytes_AsString(result);
146 145 la = lb = 0;
147 146
148 147 for (h = l.next; h; h = h->next) {
149 148 if (h->a1 != la || h->b1 != lb) {
150 149 len = bl[h->b1].l - bl[lb].l;
151 150 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
152 151 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l),
153 152 rb + 4);
154 153 putbe32((uint32_t)len, rb + 8);
155 154 memcpy(rb + 12, bl[lb].l, len);
156 155 rb += 12 + len;
157 156 }
158 157 la = h->a2;
159 158 lb = h->b2;
160 159 }
161 160
162 161 cleanup:
163 162 if (_save) {
164 163 PyEval_RestoreThread(_save);
165 164 }
166 165 PyBuffer_Release(&ba);
167 166 PyBuffer_Release(&bb);
168 167 free(al);
169 168 free(bl);
170 169 bdiff_freehunks(l.next);
171 170 return result;
172 171 }
173 172
174 173 /*
175 174 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
176 175 * reduce whitespace sequences to a single space and trim remaining whitespace
177 176 * from end of lines.
178 177 */
179 178 static PyObject *fixws(PyObject *self, PyObject *args)
180 179 {
181 180 PyObject *s, *result = NULL;
182 181 char allws, c;
183 182 const char *r;
184 183 Py_ssize_t i, rlen, wlen = 0;
185 184 char *w;
186 185
187 186 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws)) {
188 187 return NULL;
189 188 }
190 189 r = PyBytes_AsString(s);
191 190 rlen = PyBytes_Size(s);
192 191
193 192 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
194 193 if (!w) {
195 194 goto nomem;
196 195 }
197 196
198 197 for (i = 0; i != rlen; i++) {
199 198 c = r[i];
200 199 if (c == ' ' || c == '\t' || c == '\r') {
201 200 if (!allws && (wlen == 0 || w[wlen - 1] != ' ')) {
202 201 w[wlen++] = ' ';
203 202 }
204 203 } else if (c == '\n' && !allws && wlen > 0 &&
205 204 w[wlen - 1] == ' ') {
206 205 w[wlen - 1] = '\n';
207 206 } else {
208 207 w[wlen++] = c;
209 208 }
210 209 }
211 210
212 211 result = PyBytes_FromStringAndSize(w, wlen);
213 212
214 213 nomem:
215 214 PyMem_Free(w);
216 215 return result ? result : PyErr_NoMemory();
217 216 }
218 217
219 218 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
220 219 const char *source, Py_ssize_t len)
221 220 {
222 221 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
223 222 if (sliced == NULL) {
224 223 return false;
225 224 }
226 225 PyList_SET_ITEM(list, destidx, sliced);
227 226 return true;
228 227 }
229 228
230 229 static PyObject *splitnewlines(PyObject *self, PyObject *args)
231 230 {
232 231 const char *text;
233 232 Py_ssize_t nelts = 0, size, i, start = 0;
234 233 PyObject *result = NULL;
235 234
236 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &size)) {
235 if (!PyArg_ParseTuple(args, "y#", &text, &size)) {
237 236 goto abort;
238 237 }
239 238 if (!size) {
240 239 return PyList_New(0);
241 240 }
242 241 /* This loops to size-1 because if the last byte is a newline,
243 242 * we don't want to perform a split there. */
244 243 for (i = 0; i < size - 1; ++i) {
245 244 if (text[i] == '\n') {
246 245 ++nelts;
247 246 }
248 247 }
249 248 if ((result = PyList_New(nelts + 1)) == NULL) {
250 249 goto abort;
251 250 }
252 251 nelts = 0;
253 252 for (i = 0; i < size - 1; ++i) {
254 253 if (text[i] == '\n') {
255 254 if (!sliceintolist(result, nelts++, text + start,
256 255 i - start + 1)) {
257 256 goto abort;
258 257 }
259 258 start = i + 1;
260 259 }
261 260 }
262 261 if (!sliceintolist(result, nelts++, text + start, size - start)) {
263 262 goto abort;
264 263 }
265 264 return result;
266 265 abort:
267 266 Py_XDECREF(result);
268 267 return NULL;
269 268 }
270 269
271 270 static int hunk_consumer(int64_t a1, int64_t a2, int64_t b1, int64_t b2,
272 271 void *priv)
273 272 {
274 273 PyObject *rl = (PyObject *)priv;
275 274 PyObject *m = Py_BuildValue("LLLL", a1, a2, b1, b2);
276 275 int r;
277 276 if (!m) {
278 277 return -1;
279 278 }
280 279 r = PyList_Append(rl, m);
281 280 Py_DECREF(m);
282 281 return r;
283 282 }
284 283
285 284 static PyObject *xdiffblocks(PyObject *self, PyObject *args)
286 285 {
287 286 Py_ssize_t la, lb;
288 287 mmfile_t a, b;
289 288 PyObject *rl;
290 289
291 290 xpparam_t xpp = {
292 291 XDF_INDENT_HEURISTIC, /* flags */
293 292 };
294 293 xdemitconf_t xecfg = {
295 294 XDL_EMIT_BDIFFHUNK, /* flags */
296 295 hunk_consumer, /* hunk_consume_func */
297 296 };
298 297 xdemitcb_t ecb = {
299 298 NULL, /* priv */
300 299 };
301 300
302 if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
303 &lb)) {
301 if (!PyArg_ParseTuple(args, "y#y#", &a.ptr, &la, &b.ptr, &lb)) {
304 302 return NULL;
305 303 }
306 304
307 305 a.size = la;
308 306 b.size = lb;
309 307
310 308 rl = PyList_New(0);
311 309 if (!rl) {
312 310 return PyErr_NoMemory();
313 311 }
314 312
315 313 ecb.priv = rl;
316 314
317 315 if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
318 316 Py_DECREF(rl);
319 317 return PyErr_NoMemory();
320 318 }
321 319
322 320 return rl;
323 321 }
324 322
325 323 static char mdiff_doc[] = "Efficient binary diff.";
326 324
327 325 static PyMethodDef methods[] = {
328 326 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
329 327 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
330 328 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
331 329 {"splitnewlines", splitnewlines, METH_VARARGS,
332 330 "like str.splitlines, but only split on newlines\n"},
333 331 {"xdiffblocks", xdiffblocks, METH_VARARGS,
334 332 "find a list of matching lines using xdiff algorithm\n"},
335 333 {NULL, NULL},
336 334 };
337 335
338 336 static const int version = 3;
339 337
340 338 static struct PyModuleDef bdiff_module = {
341 339 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
342 340 };
343 341
344 342 PyMODINIT_FUNC PyInit_bdiff(void)
345 343 {
346 344 PyObject *m;
347 345 m = PyModule_Create(&bdiff_module);
348 346 PyModule_AddIntConstant(m, "version", version);
349 347 return m;
350 348 }
@@ -1,405 +1,404 b''
1 1 /*
2 2 charencode.c - miscellaneous character encoding
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13
14 14 #include "charencode.h"
15 15 #include "compat.h"
16 16 #include "util.h"
17 17
18 18 /* clang-format off */
19 19 static const char lowertable[128] = {
20 20 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
21 21 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
22 22 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
23 23 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
24 24 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
25 25 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
26 26 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
27 27 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
28 28 '\x40',
29 29 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
30 30 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
31 31 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
32 32 '\x78', '\x79', '\x7a', /* X-Z */
33 33 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
34 34 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
35 35 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
36 36 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
37 37 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
38 38 };
39 39
40 40 static const char uppertable[128] = {
41 41 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
42 42 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
43 43 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
44 44 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
45 45 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
46 46 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
47 47 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
48 48 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
49 49 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
50 50 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
51 51 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
52 52 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
53 53 '\x60',
54 54 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
55 55 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
56 56 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
57 57 '\x58', '\x59', '\x5a', /* x-z */
58 58 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
59 59 };
60 60
61 61 /* 1: no escape, 2: \<c>, 6: \u<x> */
62 62 static const uint8_t jsonlentable[256] = {
63 63 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
64 64 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
65 65 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
66 66 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
67 67 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
68 68 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
69 69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
70 70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
71 71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
72 72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
73 73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74 74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
75 75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
76 76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77 77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
78 78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 79 };
80 80
81 81 static const uint8_t jsonparanoidlentable[128] = {
82 82 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
83 83 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
84 84 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
85 85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
86 86 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
87 87 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
88 88 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
89 89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
90 90 };
91 91
92 92 static const char hexchartable[16] = {
93 93 '0', '1', '2', '3', '4', '5', '6', '7',
94 94 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
95 95 };
96 96 /* clang-format on */
97 97
98 98 /*
99 99 * Turn a hex-encoded string into binary.
100 100 */
101 101 PyObject *unhexlify(const char *str, Py_ssize_t len)
102 102 {
103 103 PyObject *ret;
104 104 char *d;
105 105 Py_ssize_t i;
106 106
107 107 ret = PyBytes_FromStringAndSize(NULL, len / 2);
108 108
109 109 if (!ret) {
110 110 return NULL;
111 111 }
112 112
113 113 d = PyBytes_AsString(ret);
114 114
115 115 for (i = 0; i < len;) {
116 116 int hi = hexdigit(str, i++);
117 117 int lo = hexdigit(str, i++);
118 118 *d++ = (hi << 4) | lo;
119 119 }
120 120
121 121 return ret;
122 122 }
123 123
124 124 PyObject *isasciistr(PyObject *self, PyObject *args)
125 125 {
126 126 const char *buf;
127 127 Py_ssize_t i, len;
128 if (!PyArg_ParseTuple(args, PY23("s#:isasciistr", "y#:isasciistr"),
129 &buf, &len)) {
128 if (!PyArg_ParseTuple(args, "y#:isasciistr", &buf, &len)) {
130 129 return NULL;
131 130 }
132 131 i = 0;
133 132 /* char array in PyStringObject should be at least 4-byte aligned */
134 133 if (((uintptr_t)buf & 3) == 0) {
135 134 const uint32_t *p = (const uint32_t *)buf;
136 135 for (; i < len / 4; i++) {
137 136 if (p[i] & 0x80808080U) {
138 137 Py_RETURN_FALSE;
139 138 }
140 139 }
141 140 i *= 4;
142 141 }
143 142 for (; i < len; i++) {
144 143 if (buf[i] & 0x80) {
145 144 Py_RETURN_FALSE;
146 145 }
147 146 }
148 147 Py_RETURN_TRUE;
149 148 }
150 149
151 150 static inline PyObject *
152 151 _asciitransform(PyObject *str_obj, const char table[128], PyObject *fallback_fn)
153 152 {
154 153 char *str, *newstr;
155 154 Py_ssize_t i, len;
156 155 PyObject *newobj = NULL;
157 156 PyObject *ret = NULL;
158 157
159 158 str = PyBytes_AS_STRING(str_obj);
160 159 len = PyBytes_GET_SIZE(str_obj);
161 160
162 161 newobj = PyBytes_FromStringAndSize(NULL, len);
163 162 if (!newobj) {
164 163 goto quit;
165 164 }
166 165
167 166 newstr = PyBytes_AS_STRING(newobj);
168 167
169 168 for (i = 0; i < len; i++) {
170 169 char c = str[i];
171 170 if (c & 0x80) {
172 171 if (fallback_fn != NULL) {
173 172 ret = PyObject_CallFunctionObjArgs(
174 173 fallback_fn, str_obj, NULL);
175 174 } else {
176 175 PyObject *err = PyUnicodeDecodeError_Create(
177 176 "ascii", str, len, i, (i + 1),
178 177 "unexpected code byte");
179 178 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
180 179 Py_XDECREF(err);
181 180 }
182 181 goto quit;
183 182 }
184 183 newstr[i] = table[(unsigned char)c];
185 184 }
186 185
187 186 ret = newobj;
188 187 Py_INCREF(ret);
189 188 quit:
190 189 Py_XDECREF(newobj);
191 190 return ret;
192 191 }
193 192
194 193 PyObject *asciilower(PyObject *self, PyObject *args)
195 194 {
196 195 PyObject *str_obj;
197 196 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) {
198 197 return NULL;
199 198 }
200 199 return _asciitransform(str_obj, lowertable, NULL);
201 200 }
202 201
203 202 PyObject *asciiupper(PyObject *self, PyObject *args)
204 203 {
205 204 PyObject *str_obj;
206 205 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) {
207 206 return NULL;
208 207 }
209 208 return _asciitransform(str_obj, uppertable, NULL);
210 209 }
211 210
212 211 PyObject *make_file_foldmap(PyObject *self, PyObject *args)
213 212 {
214 213 PyObject *dmap, *spec_obj, *normcase_fallback;
215 214 PyObject *file_foldmap = NULL;
216 215 enum normcase_spec spec;
217 216 PyObject *k, *v;
218 217 dirstateItemObject *tuple;
219 218 Py_ssize_t pos = 0;
220 219 const char *table;
221 220
222 221 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", &PyDict_Type,
223 222 &dmap, &PyLong_Type, &spec_obj, &PyFunction_Type,
224 223 &normcase_fallback)) {
225 224 goto quit;
226 225 }
227 226
228 227 spec = (int)PyLong_AS_LONG(spec_obj);
229 228 switch (spec) {
230 229 case NORMCASE_LOWER:
231 230 table = lowertable;
232 231 break;
233 232 case NORMCASE_UPPER:
234 233 table = uppertable;
235 234 break;
236 235 case NORMCASE_OTHER:
237 236 table = NULL;
238 237 break;
239 238 default:
240 239 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
241 240 goto quit;
242 241 }
243 242
244 243 /* Add some more entries to deal with additions outside this
245 244 function. */
246 245 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
247 246 if (file_foldmap == NULL) {
248 247 goto quit;
249 248 }
250 249
251 250 while (PyDict_Next(dmap, &pos, &k, &v)) {
252 251 if (!dirstate_tuple_check(v)) {
253 252 PyErr_SetString(PyExc_TypeError,
254 253 "expected a dirstate tuple");
255 254 goto quit;
256 255 }
257 256
258 257 tuple = (dirstateItemObject *)v;
259 258 if (tuple->flags | dirstate_flag_wc_tracked) {
260 259 PyObject *normed;
261 260 if (table != NULL) {
262 261 normed = _asciitransform(k, table,
263 262 normcase_fallback);
264 263 } else {
265 264 normed = PyObject_CallFunctionObjArgs(
266 265 normcase_fallback, k, NULL);
267 266 }
268 267
269 268 if (normed == NULL) {
270 269 goto quit;
271 270 }
272 271 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
273 272 Py_DECREF(normed);
274 273 goto quit;
275 274 }
276 275 Py_DECREF(normed);
277 276 }
278 277 }
279 278 return file_foldmap;
280 279 quit:
281 280 Py_XDECREF(file_foldmap);
282 281 return NULL;
283 282 }
284 283
285 284 /* calculate length of JSON-escaped string; returns -1 if unsupported */
286 285 static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
287 286 {
288 287 Py_ssize_t i, esclen = 0;
289 288
290 289 if (paranoid) {
291 290 /* don't want to process multi-byte escapes in C */
292 291 for (i = 0; i < len; i++) {
293 292 char c = buf[i];
294 293 if (c & 0x80) {
295 294 PyErr_SetString(PyExc_ValueError,
296 295 "cannot process non-ascii str");
297 296 return -1;
298 297 }
299 298 esclen += jsonparanoidlentable[(unsigned char)c];
300 299 if (esclen < 0) {
301 300 PyErr_SetString(PyExc_MemoryError,
302 301 "overflow in jsonescapelen");
303 302 return -1;
304 303 }
305 304 }
306 305 } else {
307 306 for (i = 0; i < len; i++) {
308 307 char c = buf[i];
309 308 esclen += jsonlentable[(unsigned char)c];
310 309 if (esclen < 0) {
311 310 PyErr_SetString(PyExc_MemoryError,
312 311 "overflow in jsonescapelen");
313 312 return -1;
314 313 }
315 314 }
316 315 }
317 316
318 317 return esclen;
319 318 }
320 319
321 320 /* map '\<c>' escape character */
322 321 static char jsonescapechar2(char c)
323 322 {
324 323 switch (c) {
325 324 case '\b':
326 325 return 'b';
327 326 case '\t':
328 327 return 't';
329 328 case '\n':
330 329 return 'n';
331 330 case '\f':
332 331 return 'f';
333 332 case '\r':
334 333 return 'r';
335 334 case '"':
336 335 return '"';
337 336 case '\\':
338 337 return '\\';
339 338 }
340 339 return '\0'; /* should not happen */
341 340 }
342 341
343 342 /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
344 343 include characters mappable by json(paranoid)lentable */
345 344 static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
346 345 const char *origbuf, Py_ssize_t origlen,
347 346 bool paranoid)
348 347 {
349 348 const uint8_t *lentable =
350 349 (paranoid) ? jsonparanoidlentable : jsonlentable;
351 350 Py_ssize_t i, j;
352 351
353 352 for (i = 0, j = 0; i < origlen; i++) {
354 353 char c = origbuf[i];
355 354 uint8_t l = lentable[(unsigned char)c];
356 355 assert(j + l <= esclen);
357 356 switch (l) {
358 357 case 1:
359 358 escbuf[j] = c;
360 359 break;
361 360 case 2:
362 361 escbuf[j] = '\\';
363 362 escbuf[j + 1] = jsonescapechar2(c);
364 363 break;
365 364 case 6:
366 365 memcpy(escbuf + j, "\\u00", 4);
367 366 escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
368 367 escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
369 368 break;
370 369 }
371 370 j += l;
372 371 }
373 372 }
374 373
375 374 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
376 375 {
377 376 PyObject *origstr, *escstr;
378 377 const char *origbuf;
379 378 Py_ssize_t origlen, esclen;
380 379 int paranoid;
381 380 if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast", &PyBytes_Type,
382 381 &origstr, &paranoid)) {
383 382 return NULL;
384 383 }
385 384
386 385 origbuf = PyBytes_AS_STRING(origstr);
387 386 origlen = PyBytes_GET_SIZE(origstr);
388 387 esclen = jsonescapelen(origbuf, origlen, paranoid);
389 388 if (esclen < 0) {
390 389 return NULL; /* unsupported char found or overflow */
391 390 }
392 391 if (origlen == esclen) {
393 392 Py_INCREF(origstr);
394 393 return origstr;
395 394 }
396 395
397 396 escstr = PyBytes_FromStringAndSize(NULL, esclen);
398 397 if (!escstr) {
399 398 return NULL;
400 399 }
401 400 encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
402 401 paranoid);
403 402
404 403 return escstr;
405 404 }
@@ -1,999 +1,999 b''
1 1 /*
2 2 * manifest.c - manifest type that does on-demand parsing.
3 3 *
4 4 * Copyright 2015, Google Inc.
5 5 *
6 6 * This software may be used and distributed according to the terms of
7 7 * the GNU General Public License, incorporated herein by reference.
8 8 */
9 9 #include <Python.h>
10 10
11 11 #include <assert.h>
12 12 #include <stdlib.h>
13 13 #include <string.h>
14 14
15 15 #include "charencode.h"
16 16 #include "util.h"
17 17
18 18 #define DEFAULT_LINES 100000
19 19
20 20 typedef struct {
21 21 char *start;
22 22 Py_ssize_t len; /* length of line including terminal newline */
23 23 char hash_suffix;
24 24 bool from_malloc;
25 25 bool deleted;
26 26 } line;
27 27
28 28 typedef struct {
29 29 PyObject_HEAD
30 30 PyObject *pydata;
31 31 Py_ssize_t nodelen;
32 32 line *lines;
33 33 int numlines; /* number of line entries */
34 34 int livelines; /* number of non-deleted lines */
35 35 int maxlines; /* allocated number of lines */
36 36 bool dirty;
37 37 } lazymanifest;
38 38
39 39 #define MANIFEST_OOM -1
40 40 #define MANIFEST_NOT_SORTED -2
41 41 #define MANIFEST_MALFORMED -3
42 42 #define MANIFEST_BOGUS_FILENAME -4
43 43 #define MANIFEST_TOO_SHORT_LINE -5
44 44
45 45 /* get the length of the path for a line */
46 46 static Py_ssize_t pathlen(line *l)
47 47 {
48 48 const char *end = memchr(l->start, '\0', l->len);
49 49 return (end) ? (Py_ssize_t)(end - l->start) : l->len;
50 50 }
51 51
52 52 /* get the node value of a single line */
53 53 static PyObject *nodeof(Py_ssize_t nodelen, line *l, char *flag)
54 54 {
55 55 char *s = l->start;
56 56 Py_ssize_t llen = pathlen(l);
57 57 Py_ssize_t hlen = l->len - llen - 2;
58 58 PyObject *hash;
59 59 if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
60 60 PyErr_SetString(PyExc_ValueError, "manifest line too short");
61 61 return NULL;
62 62 }
63 63 /* Detect flags after the hash first. */
64 64 switch (s[llen + hlen]) {
65 65 case 'l':
66 66 case 't':
67 67 case 'x':
68 68 *flag = s[llen + hlen];
69 69 --hlen;
70 70 break;
71 71 default:
72 72 *flag = '\0';
73 73 break;
74 74 }
75 75
76 76 if (hlen != 2 * nodelen) {
77 77 PyErr_SetString(PyExc_ValueError, "invalid node length in manifest");
78 78 return NULL;
79 79 }
80 80 hash = unhexlify(s + llen + 1, nodelen * 2);
81 81 if (!hash) {
82 82 return NULL;
83 83 }
84 84 if (l->hash_suffix != '\0') {
85 85 char newhash[33];
86 86 memcpy(newhash, PyBytes_AsString(hash), nodelen);
87 87 Py_DECREF(hash);
88 88 newhash[nodelen] = l->hash_suffix;
89 89 hash = PyBytes_FromStringAndSize(newhash, nodelen + 1);
90 90 }
91 91 return hash;
92 92 }
93 93
94 94 /* get the node hash and flags of a line as a tuple */
95 95 static PyObject *hashflags(Py_ssize_t nodelen, line *l)
96 96 {
97 97 char flag;
98 98 PyObject *hash = nodeof(nodelen, l, &flag);
99 99 PyObject *flags;
100 100 PyObject *tup;
101 101
102 102 if (!hash)
103 103 return NULL;
104 104 flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
105 105 if (!flags) {
106 106 Py_DECREF(hash);
107 107 return NULL;
108 108 }
109 109 tup = PyTuple_Pack(2, hash, flags);
110 110 Py_DECREF(flags);
111 111 Py_DECREF(hash);
112 112 return tup;
113 113 }
114 114
115 115 /* if we're about to run out of space in the line index, add more */
116 116 static bool realloc_if_full(lazymanifest *self)
117 117 {
118 118 if (self->numlines == self->maxlines) {
119 119 self->maxlines *= 2;
120 120 self->lines = realloc(self->lines, self->maxlines * sizeof(line));
121 121 }
122 122 return !!self->lines;
123 123 }
124 124
125 125 /*
126 126 * Find the line boundaries in the manifest that 'data' points to and store
127 127 * information about each line in 'self'.
128 128 */
129 129 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
130 130 {
131 131 char *prev = NULL;
132 132 while (len > 0) {
133 133 line *l;
134 134 char *next;
135 135 if (*data == '\0') {
136 136 /* It's implausible there's no filename, don't
137 137 * even bother looking for the newline. */
138 138 return MANIFEST_BOGUS_FILENAME;
139 139 }
140 140 next = memchr(data, '\n', len);
141 141 if (!next) {
142 142 return MANIFEST_MALFORMED;
143 143 }
144 144 if ((next - data) < 42) {
145 145 /* We should have at least 42 bytes in a line:
146 146 1 byte filename
147 147 1 NUL
148 148 40 bytes of hash
149 149 so we can give up here.
150 150 */
151 151 return MANIFEST_TOO_SHORT_LINE;
152 152 }
153 153 next++; /* advance past newline */
154 154 if (prev && strcmp(prev, data) > -1) {
155 155 /* This data isn't sorted, so we have to abort. */
156 156 return MANIFEST_NOT_SORTED;
157 157 }
158 158 if (!realloc_if_full(self)) {
159 159 return MANIFEST_OOM; /* no memory */
160 160 }
161 161 l = self->lines + ((self->numlines)++);
162 162 l->start = data;
163 163 l->len = next - data;
164 164 l->hash_suffix = '\0';
165 165 l->from_malloc = false;
166 166 l->deleted = false;
167 167 len = len - l->len;
168 168 prev = data;
169 169 data = next;
170 170 }
171 171 self->livelines = self->numlines;
172 172 return 0;
173 173 }
174 174
175 175 static void lazymanifest_init_early(lazymanifest *self)
176 176 {
177 177 self->pydata = NULL;
178 178 self->lines = NULL;
179 179 self->numlines = 0;
180 180 self->maxlines = 0;
181 181 }
182 182
183 183 static int lazymanifest_init(lazymanifest *self, PyObject *args)
184 184 {
185 185 char *data;
186 186 Py_ssize_t nodelen, len;
187 187 int err, ret;
188 188 PyObject *pydata;
189 189
190 190 lazymanifest_init_early(self);
191 191 if (!PyArg_ParseTuple(args, "nS", &nodelen, &pydata)) {
192 192 return -1;
193 193 }
194 194 if (nodelen != 20 && nodelen != 32) {
195 195 /* See fixed buffer in nodeof */
196 196 PyErr_Format(PyExc_ValueError, "Unsupported node length");
197 197 return -1;
198 198 }
199 199 self->nodelen = nodelen;
200 200 self->dirty = false;
201 201
202 202 err = PyBytes_AsStringAndSize(pydata, &data, &len);
203 203 if (err == -1)
204 204 return -1;
205 205 self->pydata = pydata;
206 206 Py_INCREF(self->pydata);
207 207 Py_BEGIN_ALLOW_THREADS
208 208 self->lines = malloc(DEFAULT_LINES * sizeof(line));
209 209 self->maxlines = DEFAULT_LINES;
210 210 self->numlines = 0;
211 211 if (!self->lines)
212 212 ret = MANIFEST_OOM;
213 213 else
214 214 ret = find_lines(self, data, len);
215 215 Py_END_ALLOW_THREADS
216 216 switch (ret) {
217 217 case 0:
218 218 break;
219 219 case MANIFEST_OOM:
220 220 PyErr_NoMemory();
221 221 break;
222 222 case MANIFEST_NOT_SORTED:
223 223 PyErr_Format(PyExc_ValueError,
224 224 "Manifest lines not in sorted order.");
225 225 break;
226 226 case MANIFEST_MALFORMED:
227 227 PyErr_Format(PyExc_ValueError,
228 228 "Manifest did not end in a newline.");
229 229 break;
230 230 case MANIFEST_BOGUS_FILENAME:
231 231 PyErr_Format(
232 232 PyExc_ValueError,
233 233 "Manifest had an entry with a zero-length filename.");
234 234 break;
235 235 case MANIFEST_TOO_SHORT_LINE:
236 236 PyErr_Format(
237 237 PyExc_ValueError,
238 238 "Manifest had implausibly-short line.");
239 239 break;
240 240 default:
241 241 PyErr_Format(PyExc_ValueError,
242 242 "Unknown problem parsing manifest.");
243 243 }
244 244 return ret == 0 ? 0 : -1;
245 245 }
246 246
247 247 static void lazymanifest_dealloc(lazymanifest *self)
248 248 {
249 249 /* free any extra lines we had to allocate */
250 250 int i;
251 251 for (i = 0; self->lines && (i < self->numlines); i++) {
252 252 if (self->lines[i].from_malloc) {
253 253 free(self->lines[i].start);
254 254 }
255 255 }
256 256 free(self->lines);
257 257 self->lines = NULL;
258 258 if (self->pydata) {
259 259 Py_DECREF(self->pydata);
260 260 self->pydata = NULL;
261 261 }
262 262 PyObject_Del(self);
263 263 }
264 264
265 265 /* iteration support */
266 266
267 267 typedef struct {
268 268 PyObject_HEAD lazymanifest *m;
269 269 Py_ssize_t pos;
270 270 } lmIter;
271 271
272 272 static void lmiter_dealloc(PyObject *o)
273 273 {
274 274 lmIter *self = (lmIter *)o;
275 275 Py_DECREF(self->m);
276 276 PyObject_Del(self);
277 277 }
278 278
279 279 static line *lmiter_nextline(lmIter *self)
280 280 {
281 281 do {
282 282 self->pos++;
283 283 if (self->pos >= self->m->numlines) {
284 284 return NULL;
285 285 }
286 286 /* skip over deleted manifest entries */
287 287 } while (self->m->lines[self->pos].deleted);
288 288 return self->m->lines + self->pos;
289 289 }
290 290
291 291 static PyObject *lmiter_iterentriesnext(PyObject *o)
292 292 {
293 293 lmIter *self = (lmIter *)o;
294 294 Py_ssize_t pl;
295 295 line *l;
296 296 char flag;
297 297 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
298 298 l = lmiter_nextline(self);
299 299 if (!l) {
300 300 goto done;
301 301 }
302 302 pl = pathlen(l);
303 303 path = PyBytes_FromStringAndSize(l->start, pl);
304 304 hash = nodeof(self->m->nodelen, l, &flag);
305 305 if (!path || !hash) {
306 306 goto done;
307 307 }
308 308 flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
309 309 if (!flags) {
310 310 goto done;
311 311 }
312 312 ret = PyTuple_Pack(3, path, hash, flags);
313 313 done:
314 314 Py_XDECREF(path);
315 315 Py_XDECREF(hash);
316 316 Py_XDECREF(flags);
317 317 return ret;
318 318 }
319 319
320 320 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
321 321
322 322 static PyTypeObject lazymanifestEntriesIterator = {
323 323 PyVarObject_HEAD_INIT(NULL, 0) /* header */
324 324 "parsers.lazymanifest.entriesiterator", /*tp_name */
325 325 sizeof(lmIter), /*tp_basicsize */
326 326 0, /*tp_itemsize */
327 327 lmiter_dealloc, /*tp_dealloc */
328 328 0, /*tp_print */
329 329 0, /*tp_getattr */
330 330 0, /*tp_setattr */
331 331 0, /*tp_compare */
332 332 0, /*tp_repr */
333 333 0, /*tp_as_number */
334 334 0, /*tp_as_sequence */
335 335 0, /*tp_as_mapping */
336 336 0, /*tp_hash */
337 337 0, /*tp_call */
338 338 0, /*tp_str */
339 339 0, /*tp_getattro */
340 340 0, /*tp_setattro */
341 341 0, /*tp_as_buffer */
342 342 LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
343 343 "Iterator for 3-tuples in a lazymanifest.", /* tp_doc */
344 344 0, /* tp_traverse */
345 345 0, /* tp_clear */
346 346 0, /* tp_richcompare */
347 347 0, /* tp_weaklistoffset */
348 348 PyObject_SelfIter, /* tp_iter: __iter__() method */
349 349 lmiter_iterentriesnext, /* tp_iternext: next() method */
350 350 };
351 351
352 352 static PyObject *lmiter_iterkeysnext(PyObject *o)
353 353 {
354 354 Py_ssize_t pl;
355 355 line *l = lmiter_nextline((lmIter *)o);
356 356 if (!l) {
357 357 return NULL;
358 358 }
359 359 pl = pathlen(l);
360 360 return PyBytes_FromStringAndSize(l->start, pl);
361 361 }
362 362
363 363 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
364 364
365 365 static PyTypeObject lazymanifestKeysIterator = {
366 366 PyVarObject_HEAD_INIT(NULL, 0) /* header */
367 367 "parsers.lazymanifest.keysiterator", /*tp_name */
368 368 sizeof(lmIter), /*tp_basicsize */
369 369 0, /*tp_itemsize */
370 370 lmiter_dealloc, /*tp_dealloc */
371 371 0, /*tp_print */
372 372 0, /*tp_getattr */
373 373 0, /*tp_setattr */
374 374 0, /*tp_compare */
375 375 0, /*tp_repr */
376 376 0, /*tp_as_number */
377 377 0, /*tp_as_sequence */
378 378 0, /*tp_as_mapping */
379 379 0, /*tp_hash */
380 380 0, /*tp_call */
381 381 0, /*tp_str */
382 382 0, /*tp_getattro */
383 383 0, /*tp_setattro */
384 384 0, /*tp_as_buffer */
385 385 LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
386 386 "Keys iterator for a lazymanifest.", /* tp_doc */
387 387 0, /* tp_traverse */
388 388 0, /* tp_clear */
389 389 0, /* tp_richcompare */
390 390 0, /* tp_weaklistoffset */
391 391 PyObject_SelfIter, /* tp_iter: __iter__() method */
392 392 lmiter_iterkeysnext, /* tp_iternext: next() method */
393 393 };
394 394
395 395 static lazymanifest *lazymanifest_copy(lazymanifest *self);
396 396
397 397 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
398 398 {
399 399 lmIter *i = NULL;
400 400 lazymanifest *t = lazymanifest_copy(self);
401 401 if (!t) {
402 402 PyErr_NoMemory();
403 403 return NULL;
404 404 }
405 405 i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
406 406 if (i) {
407 407 i->m = t;
408 408 i->pos = -1;
409 409 } else {
410 410 Py_DECREF(t);
411 411 PyErr_NoMemory();
412 412 }
413 413 return (PyObject *)i;
414 414 }
415 415
416 416 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
417 417 {
418 418 lmIter *i = NULL;
419 419 lazymanifest *t = lazymanifest_copy(self);
420 420 if (!t) {
421 421 PyErr_NoMemory();
422 422 return NULL;
423 423 }
424 424 i = PyObject_New(lmIter, &lazymanifestKeysIterator);
425 425 if (i) {
426 426 i->m = t;
427 427 i->pos = -1;
428 428 } else {
429 429 Py_DECREF(t);
430 430 PyErr_NoMemory();
431 431 }
432 432 return (PyObject *)i;
433 433 }
434 434
435 435 /* __getitem__ and __setitem__ support */
436 436
437 437 static Py_ssize_t lazymanifest_size(lazymanifest *self)
438 438 {
439 439 return self->livelines;
440 440 }
441 441
442 442 static int linecmp(const void *left, const void *right)
443 443 {
444 444 return strcmp(((const line *)left)->start,
445 445 ((const line *)right)->start);
446 446 }
447 447
448 448 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
449 449 {
450 450 line needle;
451 451 line *hit;
452 452 if (!PyBytes_Check(key)) {
453 453 PyErr_Format(PyExc_TypeError,
454 454 "getitem: manifest keys must be a string.");
455 455 return NULL;
456 456 }
457 457 needle.start = PyBytes_AsString(key);
458 458 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
459 459 &linecmp);
460 460 if (!hit || hit->deleted) {
461 461 PyErr_Format(PyExc_KeyError, "No such manifest entry.");
462 462 return NULL;
463 463 }
464 464 return hashflags(self->nodelen, hit);
465 465 }
466 466
467 467 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
468 468 {
469 469 line needle;
470 470 line *hit;
471 471 if (!PyBytes_Check(key)) {
472 472 PyErr_Format(PyExc_TypeError,
473 473 "delitem: manifest keys must be a string.");
474 474 return -1;
475 475 }
476 476 needle.start = PyBytes_AsString(key);
477 477 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
478 478 &linecmp);
479 479 if (!hit || hit->deleted) {
480 480 PyErr_Format(PyExc_KeyError,
481 481 "Tried to delete nonexistent manifest entry.");
482 482 return -1;
483 483 }
484 484 self->dirty = true;
485 485 hit->deleted = true;
486 486 self->livelines--;
487 487 return 0;
488 488 }
489 489
490 490 /* Do a binary search for the insertion point for new, creating the
491 491 * new entry if needed. */
492 492 static int internalsetitem(lazymanifest *self, line *new)
493 493 {
494 494 int start = 0, end = self->numlines;
495 495 while (start < end) {
496 496 int pos = start + (end - start) / 2;
497 497 int c = linecmp(new, self->lines + pos);
498 498 if (c < 0)
499 499 end = pos;
500 500 else if (c > 0)
501 501 start = pos + 1;
502 502 else {
503 503 if (self->lines[pos].deleted)
504 504 self->livelines++;
505 505 if (self->lines[pos].from_malloc)
506 506 free(self->lines[pos].start);
507 507 start = pos;
508 508 goto finish;
509 509 }
510 510 }
511 511 /* being here means we need to do an insert */
512 512 if (!realloc_if_full(self)) {
513 513 PyErr_NoMemory();
514 514 return -1;
515 515 }
516 516 memmove(self->lines + start + 1, self->lines + start,
517 517 (self->numlines - start) * sizeof(line));
518 518 self->numlines++;
519 519 self->livelines++;
520 520 finish:
521 521 self->lines[start] = *new;
522 522 self->dirty = true;
523 523 return 0;
524 524 }
525 525
526 526 static int lazymanifest_setitem(
527 527 lazymanifest *self, PyObject *key, PyObject *value)
528 528 {
529 529 char *path;
530 530 Py_ssize_t plen;
531 531 PyObject *pyhash;
532 532 Py_ssize_t hlen;
533 533 char *hash;
534 534 PyObject *pyflags;
535 535 char *flags;
536 536 Py_ssize_t flen;
537 537 Py_ssize_t dlen;
538 538 char *dest;
539 539 int i;
540 540 line new;
541 541 if (!PyBytes_Check(key)) {
542 542 PyErr_Format(PyExc_TypeError,
543 543 "setitem: manifest keys must be a string.");
544 544 return -1;
545 545 }
546 546 if (!value) {
547 547 return lazymanifest_delitem(self, key);
548 548 }
549 549 if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
550 550 PyErr_Format(PyExc_TypeError,
551 551 "Manifest values must be a tuple of (node, flags).");
552 552 return -1;
553 553 }
554 554 if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
555 555 return -1;
556 556 }
557 557
558 558 pyhash = PyTuple_GetItem(value, 0);
559 559 if (!PyBytes_Check(pyhash)) {
560 560 PyErr_Format(PyExc_TypeError,
561 561 "node must be a %zi bytes string", self->nodelen);
562 562 return -1;
563 563 }
564 564 hlen = PyBytes_Size(pyhash);
565 565 if (hlen != self->nodelen) {
566 566 PyErr_Format(PyExc_TypeError,
567 567 "node must be a %zi bytes string", self->nodelen);
568 568 return -1;
569 569 }
570 570 hash = PyBytes_AsString(pyhash);
571 571
572 572 pyflags = PyTuple_GetItem(value, 1);
573 573 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
574 574 PyErr_Format(PyExc_TypeError,
575 575 "flags must a 0 or 1 bytes string");
576 576 return -1;
577 577 }
578 578 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
579 579 return -1;
580 580 }
581 581 if (flen == 1) {
582 582 switch (*flags) {
583 583 case 'l':
584 584 case 't':
585 585 case 'x':
586 586 break;
587 587 default:
588 588 PyErr_Format(PyExc_TypeError, "invalid manifest flag");
589 589 return -1;
590 590 }
591 591 }
592 592 /* one null byte and one newline */
593 593 dlen = plen + hlen * 2 + 1 + flen + 1;
594 594 dest = malloc(dlen);
595 595 if (!dest) {
596 596 PyErr_NoMemory();
597 597 return -1;
598 598 }
599 599 memcpy(dest, path, plen + 1);
600 600 for (i = 0; i < hlen; i++) {
601 601 /* Cast to unsigned, so it will not get sign-extended when promoted
602 602 * to int (as is done when passing to a variadic function)
603 603 */
604 604 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
605 605 }
606 606 memcpy(dest + plen + 2 * hlen + 1, flags, flen);
607 607 dest[plen + 2 * hlen + 1 + flen] = '\n';
608 608 new.start = dest;
609 609 new.len = dlen;
610 610 new.hash_suffix = '\0';
611 611 if (hlen > 20) {
612 612 new.hash_suffix = hash[20];
613 613 }
614 614 new.from_malloc = true; /* is `start` a pointer we allocated? */
615 615 new.deleted = false; /* is this entry deleted? */
616 616 if (internalsetitem(self, &new)) {
617 617 return -1;
618 618 }
619 619 return 0;
620 620 }
621 621
622 622 static PyMappingMethods lazymanifest_mapping_methods = {
623 623 (lenfunc)lazymanifest_size, /* mp_length */
624 624 (binaryfunc)lazymanifest_getitem, /* mp_subscript */
625 625 (objobjargproc)lazymanifest_setitem, /* mp_ass_subscript */
626 626 };
627 627
628 628 /* sequence methods (important or __contains__ builds an iterator) */
629 629
630 630 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
631 631 {
632 632 line needle;
633 633 line *hit;
634 634 if (!PyBytes_Check(key)) {
635 635 /* Our keys are always strings, so if the contains
636 636 * check is for a non-string, just return false. */
637 637 return 0;
638 638 }
639 639 needle.start = PyBytes_AsString(key);
640 640 hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
641 641 &linecmp);
642 642 if (!hit || hit->deleted) {
643 643 return 0;
644 644 }
645 645 return 1;
646 646 }
647 647
648 648 static PySequenceMethods lazymanifest_seq_meths = {
649 649 (lenfunc)lazymanifest_size, /* sq_length */
650 650 0, /* sq_concat */
651 651 0, /* sq_repeat */
652 652 0, /* sq_item */
653 653 0, /* sq_slice */
654 654 0, /* sq_ass_item */
655 655 0, /* sq_ass_slice */
656 656 (objobjproc)lazymanifest_contains, /* sq_contains */
657 657 0, /* sq_inplace_concat */
658 658 0, /* sq_inplace_repeat */
659 659 };
660 660
661 661
662 662 /* Other methods (copy, diff, etc) */
663 663 static PyTypeObject lazymanifestType;
664 664
665 665 /* If the manifest has changes, build the new manifest text and reindex it. */
666 666 static int compact(lazymanifest *self)
667 667 {
668 668 int i;
669 669 ssize_t need = 0;
670 670 char *data;
671 671 line *src, *dst;
672 672 PyObject *pydata;
673 673 if (!self->dirty)
674 674 return 0;
675 675 for (i = 0; i < self->numlines; i++) {
676 676 if (!self->lines[i].deleted) {
677 677 need += self->lines[i].len;
678 678 }
679 679 }
680 680 pydata = PyBytes_FromStringAndSize(NULL, need);
681 681 if (!pydata)
682 682 return -1;
683 683 data = PyBytes_AsString(pydata);
684 684 if (!data) {
685 685 return -1;
686 686 }
687 687 src = self->lines;
688 688 dst = self->lines;
689 689 for (i = 0; i < self->numlines; i++, src++) {
690 690 char *tofree = NULL;
691 691 if (src->from_malloc) {
692 692 tofree = src->start;
693 693 }
694 694 if (!src->deleted) {
695 695 memcpy(data, src->start, src->len);
696 696 *dst = *src;
697 697 dst->start = data;
698 698 dst->from_malloc = false;
699 699 data += dst->len;
700 700 dst++;
701 701 }
702 702 free(tofree);
703 703 }
704 704 Py_DECREF(self->pydata);
705 705 self->pydata = pydata;
706 706 self->numlines = self->livelines;
707 707 self->dirty = false;
708 708 return 0;
709 709 }
710 710
711 711 static PyObject *lazymanifest_text(lazymanifest *self)
712 712 {
713 713 if (compact(self) != 0) {
714 714 PyErr_NoMemory();
715 715 return NULL;
716 716 }
717 717 Py_INCREF(self->pydata);
718 718 return self->pydata;
719 719 }
720 720
721 721 static lazymanifest *lazymanifest_copy(lazymanifest *self)
722 722 {
723 723 lazymanifest *copy = NULL;
724 724 if (compact(self) != 0) {
725 725 goto nomem;
726 726 }
727 727 copy = PyObject_New(lazymanifest, &lazymanifestType);
728 728 if (!copy) {
729 729 goto nomem;
730 730 }
731 731 lazymanifest_init_early(copy);
732 732 copy->nodelen = self->nodelen;
733 733 copy->numlines = self->numlines;
734 734 copy->livelines = self->livelines;
735 735 copy->dirty = false;
736 736 copy->lines = malloc(self->maxlines *sizeof(line));
737 737 if (!copy->lines) {
738 738 goto nomem;
739 739 }
740 740 memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
741 741 copy->maxlines = self->maxlines;
742 742 copy->pydata = self->pydata;
743 743 Py_INCREF(copy->pydata);
744 744 return copy;
745 745 nomem:
746 746 PyErr_NoMemory();
747 747 Py_XDECREF(copy);
748 748 return NULL;
749 749 }
750 750
751 751 static lazymanifest *lazymanifest_filtercopy(
752 752 lazymanifest *self, PyObject *matchfn)
753 753 {
754 754 lazymanifest *copy = NULL;
755 755 int i;
756 756 if (!PyCallable_Check(matchfn)) {
757 757 PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
758 758 return NULL;
759 759 }
760 760 /* compact ourselves first to avoid double-frees later when we
761 761 * compact tmp so that it doesn't have random pointers to our
762 762 * underlying from_malloc-data (self->pydata is safe) */
763 763 if (compact(self) != 0) {
764 764 goto nomem;
765 765 }
766 766 copy = PyObject_New(lazymanifest, &lazymanifestType);
767 767 if (!copy) {
768 768 goto nomem;
769 769 }
770 770 lazymanifest_init_early(copy);
771 771 copy->nodelen = self->nodelen;
772 772 copy->dirty = true;
773 773 copy->lines = malloc(self->maxlines * sizeof(line));
774 774 if (!copy->lines) {
775 775 goto nomem;
776 776 }
777 777 copy->maxlines = self->maxlines;
778 778 copy->numlines = 0;
779 779 copy->pydata = self->pydata;
780 780 Py_INCREF(copy->pydata);
781 781 for (i = 0; i < self->numlines; i++) {
782 782 PyObject *arglist = NULL, *result = NULL;
783 arglist = Py_BuildValue(PY23("(s)", "(y)"),
783 arglist = Py_BuildValue("(y)",
784 784 self->lines[i].start);
785 785 if (!arglist) {
786 786 goto bail;
787 787 }
788 788 result = PyObject_CallObject(matchfn, arglist);
789 789 Py_DECREF(arglist);
790 790 /* if the callback raised an exception, just let it
791 791 * through and give up */
792 792 if (!result) {
793 793 goto bail;
794 794 }
795 795 if (PyObject_IsTrue(result)) {
796 796 assert(!(self->lines[i].from_malloc));
797 797 copy->lines[copy->numlines++] = self->lines[i];
798 798 }
799 799 Py_DECREF(result);
800 800 }
801 801 copy->livelines = copy->numlines;
802 802 return copy;
803 803 nomem:
804 804 PyErr_NoMemory();
805 805 bail:
806 806 Py_XDECREF(copy);
807 807 return NULL;
808 808 }
809 809
810 810 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
811 811 {
812 812 lazymanifest *other;
813 813 PyObject *pyclean = NULL;
814 814 bool listclean;
815 815 PyObject *emptyTup = NULL, *ret = NULL;
816 816 PyObject *es;
817 817 int sneedle = 0, oneedle = 0;
818 818 if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
819 819 return NULL;
820 820 }
821 821 listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
822 822 es = PyBytes_FromString("");
823 823 if (!es) {
824 824 goto nomem;
825 825 }
826 826 emptyTup = PyTuple_Pack(2, Py_None, es);
827 827 Py_DECREF(es);
828 828 if (!emptyTup) {
829 829 goto nomem;
830 830 }
831 831 ret = PyDict_New();
832 832 if (!ret) {
833 833 goto nomem;
834 834 }
835 835 while (sneedle != self->numlines || oneedle != other->numlines) {
836 836 line *left = self->lines + sneedle;
837 837 line *right = other->lines + oneedle;
838 838 int result;
839 839 PyObject *key;
840 840 PyObject *outer;
841 841 /* If we're looking at a deleted entry and it's not
842 842 * the end of the manifest, just skip it. */
843 843 if (sneedle < self->numlines && left->deleted) {
844 844 sneedle++;
845 845 continue;
846 846 }
847 847 if (oneedle < other->numlines && right->deleted) {
848 848 oneedle++;
849 849 continue;
850 850 }
851 851 /* if we're at the end of either manifest, then we
852 852 * know the remaining items are adds so we can skip
853 853 * the strcmp. */
854 854 if (sneedle == self->numlines) {
855 855 result = 1;
856 856 } else if (oneedle == other->numlines) {
857 857 result = -1;
858 858 } else {
859 859 result = linecmp(left, right);
860 860 }
861 861 key = result <= 0 ?
862 862 PyBytes_FromString(left->start) :
863 863 PyBytes_FromString(right->start);
864 864 if (!key)
865 865 goto nomem;
866 866 if (result < 0) {
867 867 PyObject *l = hashflags(self->nodelen, left);
868 868 if (!l) {
869 869 goto nomem;
870 870 }
871 871 outer = PyTuple_Pack(2, l, emptyTup);
872 872 Py_DECREF(l);
873 873 if (!outer) {
874 874 goto nomem;
875 875 }
876 876 PyDict_SetItem(ret, key, outer);
877 877 Py_DECREF(outer);
878 878 sneedle++;
879 879 } else if (result > 0) {
880 880 PyObject *r = hashflags(self->nodelen, right);
881 881 if (!r) {
882 882 goto nomem;
883 883 }
884 884 outer = PyTuple_Pack(2, emptyTup, r);
885 885 Py_DECREF(r);
886 886 if (!outer) {
887 887 goto nomem;
888 888 }
889 889 PyDict_SetItem(ret, key, outer);
890 890 Py_DECREF(outer);
891 891 oneedle++;
892 892 } else {
893 893 /* file exists in both manifests */
894 894 if (left->len != right->len
895 895 || memcmp(left->start, right->start, left->len)
896 896 || left->hash_suffix != right->hash_suffix) {
897 897 PyObject *l = hashflags(self->nodelen, left);
898 898 PyObject *r;
899 899 if (!l) {
900 900 goto nomem;
901 901 }
902 902 r = hashflags(self->nodelen, right);
903 903 if (!r) {
904 904 Py_DECREF(l);
905 905 goto nomem;
906 906 }
907 907 outer = PyTuple_Pack(2, l, r);
908 908 Py_DECREF(l);
909 909 Py_DECREF(r);
910 910 if (!outer) {
911 911 goto nomem;
912 912 }
913 913 PyDict_SetItem(ret, key, outer);
914 914 Py_DECREF(outer);
915 915 } else if (listclean) {
916 916 PyDict_SetItem(ret, key, Py_None);
917 917 }
918 918 sneedle++;
919 919 oneedle++;
920 920 }
921 921 Py_DECREF(key);
922 922 }
923 923 Py_DECREF(emptyTup);
924 924 return ret;
925 925 nomem:
926 926 PyErr_NoMemory();
927 927 Py_XDECREF(ret);
928 928 Py_XDECREF(emptyTup);
929 929 return NULL;
930 930 }
931 931
932 932 static PyMethodDef lazymanifest_methods[] = {
933 933 {"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
934 934 "Iterate over file names in this lazymanifest."},
935 935 {"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
936 936 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
937 937 {"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
938 938 "Make a copy of this lazymanifest."},
939 939 {"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
940 940 "Make a copy of this manifest filtered by matchfn."},
941 941 {"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
942 942 "Compare this lazymanifest to another one."},
943 943 {"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
944 944 "Encode this manifest to text."},
945 945 {NULL},
946 946 };
947 947
948 948 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
949 949
950 950 static PyTypeObject lazymanifestType = {
951 951 PyVarObject_HEAD_INIT(NULL, 0) /* header */
952 952 "parsers.lazymanifest", /* tp_name */
953 953 sizeof(lazymanifest), /* tp_basicsize */
954 954 0, /* tp_itemsize */
955 955 (destructor)lazymanifest_dealloc, /* tp_dealloc */
956 956 0, /* tp_print */
957 957 0, /* tp_getattr */
958 958 0, /* tp_setattr */
959 959 0, /* tp_compare */
960 960 0, /* tp_repr */
961 961 0, /* tp_as_number */
962 962 &lazymanifest_seq_meths, /* tp_as_sequence */
963 963 &lazymanifest_mapping_methods, /* tp_as_mapping */
964 964 0, /* tp_hash */
965 965 0, /* tp_call */
966 966 0, /* tp_str */
967 967 0, /* tp_getattro */
968 968 0, /* tp_setattro */
969 969 0, /* tp_as_buffer */
970 970 LAZYMANIFEST_TPFLAGS, /* tp_flags */
971 971 "TODO(augie)", /* tp_doc */
972 972 0, /* tp_traverse */
973 973 0, /* tp_clear */
974 974 0, /* tp_richcompare */
975 975 0, /* tp_weaklistoffset */
976 976 (getiterfunc)lazymanifest_getkeysiter, /* tp_iter */
977 977 0, /* tp_iternext */
978 978 lazymanifest_methods, /* tp_methods */
979 979 0, /* tp_members */
980 980 0, /* tp_getset */
981 981 0, /* tp_base */
982 982 0, /* tp_dict */
983 983 0, /* tp_descr_get */
984 984 0, /* tp_descr_set */
985 985 0, /* tp_dictoffset */
986 986 (initproc)lazymanifest_init, /* tp_init */
987 987 0, /* tp_alloc */
988 988 };
989 989
990 990 void manifest_module_init(PyObject * mod)
991 991 {
992 992 lazymanifestType.tp_new = PyType_GenericNew;
993 993 if (PyType_Ready(&lazymanifestType) < 0)
994 994 return;
995 995 Py_INCREF(&lazymanifestType);
996 996
997 997 PyModule_AddObject(mod, "lazymanifest",
998 998 (PyObject *)&lazymanifestType);
999 999 }
@@ -1,204 +1,203 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #define PY_SSIZE_T_CLEAN
24 24 #include <Python.h>
25 25 #include <stdlib.h>
26 26 #include <string.h>
27 27
28 28 #include "bitmanipulation.h"
29 29 #include "compat.h"
30 30 #include "mpatch.h"
31 31 #include "util.h"
32 32
33 33 static char mpatch_doc[] = "Efficient binary patching.";
34 34 static PyObject *mpatch_Error;
35 35
36 36 static void setpyerr(int r)
37 37 {
38 38 switch (r) {
39 39 case MPATCH_ERR_NO_MEM:
40 40 PyErr_NoMemory();
41 41 break;
42 42 case MPATCH_ERR_CANNOT_BE_DECODED:
43 43 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
44 44 break;
45 45 case MPATCH_ERR_INVALID_PATCH:
46 46 PyErr_SetString(mpatch_Error, "invalid patch");
47 47 break;
48 48 }
49 49 }
50 50
51 51 struct mpatch_flist *cpygetitem(void *bins, ssize_t pos)
52 52 {
53 53 Py_buffer buffer;
54 54 struct mpatch_flist *res = NULL;
55 55 int r;
56 56
57 57 PyObject *tmp = PyList_GetItem((PyObject *)bins, pos);
58 58 if (!tmp) {
59 59 return NULL;
60 60 }
61 61 if (PyObject_GetBuffer(tmp, &buffer, PyBUF_CONTIG_RO)) {
62 62 return NULL;
63 63 }
64 64 if ((r = mpatch_decode(buffer.buf, buffer.len, &res)) < 0) {
65 65 if (!PyErr_Occurred()) {
66 66 setpyerr(r);
67 67 }
68 68 res = NULL;
69 69 }
70 70
71 71 PyBuffer_Release(&buffer);
72 72 return res;
73 73 }
74 74
75 75 static PyObject *patches(PyObject *self, PyObject *args)
76 76 {
77 77 PyObject *text, *bins, *result;
78 78 struct mpatch_flist *patch;
79 79 Py_buffer buffer;
80 80 int r = 0;
81 81 char *out;
82 82 Py_ssize_t len, outlen;
83 83
84 84 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins)) {
85 85 return NULL;
86 86 }
87 87
88 88 len = PyList_Size(bins);
89 89 if (!len) {
90 90 /* nothing to do */
91 91 Py_INCREF(text);
92 92 return text;
93 93 }
94 94
95 95 if (PyObject_GetBuffer(text, &buffer, PyBUF_CONTIG_RO)) {
96 96 return NULL;
97 97 }
98 98
99 99 patch = mpatch_fold(bins, cpygetitem, 0, len);
100 100 if (!patch) { /* error already set or memory error */
101 101 if (!PyErr_Occurred()) {
102 102 PyErr_NoMemory();
103 103 }
104 104 result = NULL;
105 105 goto cleanup;
106 106 }
107 107
108 108 outlen = mpatch_calcsize(buffer.len, patch);
109 109 if (outlen < 0) {
110 110 r = (int)outlen;
111 111 result = NULL;
112 112 goto cleanup;
113 113 }
114 114 result = PyBytes_FromStringAndSize(NULL, outlen);
115 115 if (!result) {
116 116 result = NULL;
117 117 goto cleanup;
118 118 }
119 119 out = PyBytes_AsString(result);
120 120 /* clang-format off */
121 121 {
122 122 Py_BEGIN_ALLOW_THREADS
123 123 r = mpatch_apply(out, buffer.buf, buffer.len, patch);
124 124 Py_END_ALLOW_THREADS
125 125 }
126 126 /* clang-format on */
127 127 if (r < 0) {
128 128 Py_DECREF(result);
129 129 result = NULL;
130 130 }
131 131 cleanup:
132 132 mpatch_lfree(patch);
133 133 PyBuffer_Release(&buffer);
134 134 if (!result && !PyErr_Occurred()) {
135 135 setpyerr(r);
136 136 }
137 137 return result;
138 138 }
139 139
140 140 /* calculate size of a patched file directly */
141 141 static PyObject *patchedsize(PyObject *self, PyObject *args)
142 142 {
143 143 long orig, start, end, len, outlen = 0, last = 0, pos = 0;
144 144 Py_ssize_t patchlen;
145 145 char *bin;
146 146
147 if (!PyArg_ParseTuple(args, PY23("ls#", "ly#"), &orig, &bin,
148 &patchlen)) {
147 if (!PyArg_ParseTuple(args, "ly#", &orig, &bin, &patchlen)) {
149 148 return NULL;
150 149 }
151 150
152 151 while (pos >= 0 && pos < patchlen) {
153 152 start = getbe32(bin + pos);
154 153 end = getbe32(bin + pos + 4);
155 154 len = getbe32(bin + pos + 8);
156 155 if (start > end) {
157 156 break; /* sanity check */
158 157 }
159 158 pos += 12 + len;
160 159 outlen += start - last;
161 160 last = end;
162 161 outlen += len;
163 162 }
164 163
165 164 if (pos != patchlen) {
166 165 if (!PyErr_Occurred()) {
167 166 PyErr_SetString(mpatch_Error,
168 167 "patch cannot be decoded");
169 168 }
170 169 return NULL;
171 170 }
172 171
173 172 outlen += orig - last;
174 173 return Py_BuildValue("l", outlen);
175 174 }
176 175
177 176 static PyMethodDef methods[] = {
178 177 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
179 178 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
180 179 {NULL, NULL},
181 180 };
182 181
183 182 static const int version = 1;
184 183
185 184 static struct PyModuleDef mpatch_module = {
186 185 PyModuleDef_HEAD_INIT, "mpatch", mpatch_doc, -1, methods,
187 186 };
188 187
189 188 PyMODINIT_FUNC PyInit_mpatch(void)
190 189 {
191 190 PyObject *m;
192 191
193 192 m = PyModule_Create(&mpatch_module);
194 193 if (m == NULL)
195 194 return NULL;
196 195
197 196 mpatch_Error =
198 197 PyErr_NewException("mercurial.cext.mpatch.mpatchError", NULL, NULL);
199 198 Py_INCREF(mpatch_Error);
200 199 PyModule_AddObject(m, "mpatchError", mpatch_Error);
201 200 PyModule_AddIntConstant(m, "version", version);
202 201
203 202 return m;
204 203 }
@@ -1,1391 +1,1389 b''
1 1 /*
2 2 osutil.c - native operating system services
3 3
4 4 Copyright 2007 Olivia Mackall and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define _ATFILE_SOURCE
11 11 #define PY_SSIZE_T_CLEAN
12 12 #include <Python.h>
13 13 #include <errno.h>
14 14 #include <fcntl.h>
15 15 #include <stdio.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18
19 19 #ifdef _WIN32
20 20 #include <io.h>
21 21 #include <windows.h>
22 22 #else
23 23 #include <dirent.h>
24 24 #include <signal.h>
25 25 #include <sys/socket.h>
26 26 #include <sys/stat.h>
27 27 #include <sys/types.h>
28 28 #include <unistd.h>
29 29 #ifdef HAVE_LINUX_STATFS
30 30 #include <linux/magic.h>
31 31 #include <sys/vfs.h>
32 32 #endif
33 33 #ifdef HAVE_BSD_STATFS
34 34 #include <sys/mount.h>
35 35 #include <sys/param.h>
36 36 #endif
37 37 #endif
38 38
39 39 #ifdef __APPLE__
40 40 #include <sys/attr.h>
41 41 #include <sys/vnode.h>
42 42 #endif
43 43
44 44 #include "util.h"
45 45
46 46 /* some platforms lack the PATH_MAX definition (eg. GNU/Hurd) */
47 47 #ifndef PATH_MAX
48 48 #define PATH_MAX 4096
49 49 #endif
50 50
51 51 #ifdef _WIN32
52 52 /*
53 53 stat struct compatible with hg expectations
54 54 Mercurial only uses st_mode, st_size and st_mtime
55 55 the rest is kept to minimize changes between implementations
56 56 */
57 57 struct hg_stat {
58 58 int st_dev;
59 59 int st_mode;
60 60 int st_nlink;
61 61 __int64 st_size;
62 62 int st_mtime;
63 63 int st_ctime;
64 64 };
65 65 struct listdir_stat {
66 66 PyObject_HEAD
67 67 struct hg_stat st;
68 68 };
69 69 #else
70 70 struct listdir_stat {
71 71 PyObject_HEAD
72 72 struct stat st;
73 73 };
74 74 #endif
75 75
76 76 #define listdir_slot(name) \
77 77 static PyObject *listdir_stat_##name(PyObject *self, void *x) \
78 78 { \
79 79 return PyLong_FromLong(((struct listdir_stat *)self)->st.name); \
80 80 }
81 81
82 82 listdir_slot(st_dev)
83 83 listdir_slot(st_mode)
84 84 listdir_slot(st_nlink)
85 85 #ifdef _WIN32
86 86 static PyObject *listdir_stat_st_size(PyObject *self, void *x)
87 87 {
88 88 return PyLong_FromLongLong(
89 89 (PY_LONG_LONG)((struct listdir_stat *)self)->st.st_size);
90 90 }
91 91 #else
92 92 listdir_slot(st_size)
93 93 #endif
94 94 listdir_slot(st_mtime)
95 95 listdir_slot(st_ctime)
96 96
97 97 static struct PyGetSetDef listdir_stat_getsets[] = {
98 98 {"st_dev", listdir_stat_st_dev, 0, 0, 0},
99 99 {"st_mode", listdir_stat_st_mode, 0, 0, 0},
100 100 {"st_nlink", listdir_stat_st_nlink, 0, 0, 0},
101 101 {"st_size", listdir_stat_st_size, 0, 0, 0},
102 102 {"st_mtime", listdir_stat_st_mtime, 0, 0, 0},
103 103 {"st_ctime", listdir_stat_st_ctime, 0, 0, 0},
104 104 {0, 0, 0, 0, 0}
105 105 };
106 106
107 107 static PyObject *listdir_stat_new(PyTypeObject *t, PyObject *a, PyObject *k)
108 108 {
109 109 return t->tp_alloc(t, 0);
110 110 }
111 111
112 112 static void listdir_stat_dealloc(PyObject *o)
113 113 {
114 114 Py_TYPE(o)->tp_free(o);
115 115 }
116 116
117 117 static PyObject *listdir_stat_getitem(PyObject *self, PyObject *key)
118 118 {
119 119 long index = PyLong_AsLong(key);
120 120 if (index == -1 && PyErr_Occurred()) {
121 121 return NULL;
122 122 }
123 123 if (index != 8) {
124 124 PyErr_Format(PyExc_IndexError, "osutil.stat objects only "
125 125 "support stat.ST_MTIME in "
126 126 "__getitem__");
127 127 return NULL;
128 128 }
129 129 return listdir_stat_st_mtime(self, NULL);
130 130 }
131 131
132 132 static PyMappingMethods listdir_stat_type_mapping_methods = {
133 133 (lenfunc)NULL, /* mp_length */
134 134 (binaryfunc)listdir_stat_getitem, /* mp_subscript */
135 135 (objobjargproc)NULL, /* mp_ass_subscript */
136 136 };
137 137
138 138 static PyTypeObject listdir_stat_type = {
139 139 PyVarObject_HEAD_INIT(NULL, 0) /* header */
140 140 "osutil.stat", /*tp_name*/
141 141 sizeof(struct listdir_stat), /*tp_basicsize*/
142 142 0, /*tp_itemsize*/
143 143 (destructor)listdir_stat_dealloc, /*tp_dealloc*/
144 144 0, /*tp_print*/
145 145 0, /*tp_getattr*/
146 146 0, /*tp_setattr*/
147 147 0, /*tp_compare*/
148 148 0, /*tp_repr*/
149 149 0, /*tp_as_number*/
150 150 0, /*tp_as_sequence*/
151 151 &listdir_stat_type_mapping_methods, /*tp_as_mapping*/
152 152 0, /*tp_hash */
153 153 0, /*tp_call*/
154 154 0, /*tp_str*/
155 155 0, /*tp_getattro*/
156 156 0, /*tp_setattro*/
157 157 0, /*tp_as_buffer*/
158 158 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
159 159 "stat objects", /* tp_doc */
160 160 0, /* tp_traverse */
161 161 0, /* tp_clear */
162 162 0, /* tp_richcompare */
163 163 0, /* tp_weaklistoffset */
164 164 0, /* tp_iter */
165 165 0, /* tp_iternext */
166 166 0, /* tp_methods */
167 167 0, /* tp_members */
168 168 listdir_stat_getsets, /* tp_getset */
169 169 0, /* tp_base */
170 170 0, /* tp_dict */
171 171 0, /* tp_descr_get */
172 172 0, /* tp_descr_set */
173 173 0, /* tp_dictoffset */
174 174 0, /* tp_init */
175 175 0, /* tp_alloc */
176 176 listdir_stat_new, /* tp_new */
177 177 };
178 178
179 179 #ifdef _WIN32
180 180
181 181 static int to_python_time(const FILETIME *tm)
182 182 {
183 183 /* number of seconds between epoch and January 1 1601 */
184 184 const __int64 a0 = (__int64)134774L * (__int64)24L * (__int64)3600L;
185 185 /* conversion factor from 100ns to 1s */
186 186 const __int64 a1 = 10000000;
187 187 /* explicit (int) cast to suspend compiler warnings */
188 188 return (int)((((__int64)tm->dwHighDateTime << 32)
189 189 + tm->dwLowDateTime) / a1 - a0);
190 190 }
191 191
192 192 static PyObject *make_item(const WIN32_FIND_DATAA *fd, int wantstat)
193 193 {
194 194 PyObject *py_st;
195 195 struct hg_stat *stp;
196 196
197 197 int kind = (fd->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
198 198 ? _S_IFDIR : _S_IFREG;
199 199
200 200 if (!wantstat)
201 return Py_BuildValue(PY23("si", "yi"), fd->cFileName, kind);
201 return Py_BuildValue("yi", fd->cFileName, kind);
202 202
203 203 py_st = PyObject_CallObject((PyObject *)&listdir_stat_type, NULL);
204 204 if (!py_st)
205 205 return NULL;
206 206
207 207 stp = &((struct listdir_stat *)py_st)->st;
208 208 /*
209 209 use kind as st_mode
210 210 rwx bits on Win32 are meaningless
211 211 and Hg does not use them anyway
212 212 */
213 213 stp->st_mode = kind;
214 214 stp->st_mtime = to_python_time(&fd->ftLastWriteTime);
215 215 stp->st_ctime = to_python_time(&fd->ftCreationTime);
216 216 if (kind == _S_IFREG)
217 217 stp->st_size = ((__int64)fd->nFileSizeHigh << 32)
218 218 + fd->nFileSizeLow;
219 return Py_BuildValue(PY23("siN", "yiN"), fd->cFileName,
219 return Py_BuildValue("yiN", fd->cFileName,
220 220 kind, py_st);
221 221 }
222 222
223 223 static PyObject *_listdir(char *path, Py_ssize_t plen, int wantstat, char *skip)
224 224 {
225 225 PyObject *rval = NULL; /* initialize - return value */
226 226 PyObject *list;
227 227 HANDLE fh;
228 228 WIN32_FIND_DATAA fd;
229 229 char *pattern;
230 230
231 231 /* build the path + \* pattern string */
232 232 pattern = PyMem_Malloc(plen + 3); /* path + \* + \0 */
233 233 if (!pattern) {
234 234 PyErr_NoMemory();
235 235 goto error_nomem;
236 236 }
237 237 memcpy(pattern, path, plen);
238 238
239 239 if (plen > 0) {
240 240 char c = path[plen-1];
241 241 if (c != ':' && c != '/' && c != '\\')
242 242 pattern[plen++] = '\\';
243 243 }
244 244 pattern[plen++] = '*';
245 245 pattern[plen] = '\0';
246 246
247 247 fh = FindFirstFileA(pattern, &fd);
248 248 if (fh == INVALID_HANDLE_VALUE) {
249 249 PyErr_SetFromWindowsErrWithFilename(GetLastError(), path);
250 250 goto error_file;
251 251 }
252 252
253 253 list = PyList_New(0);
254 254 if (!list)
255 255 goto error_list;
256 256
257 257 do {
258 258 PyObject *item;
259 259
260 260 if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
261 261 if (!strcmp(fd.cFileName, ".")
262 262 || !strcmp(fd.cFileName, ".."))
263 263 continue;
264 264
265 265 if (skip && !strcmp(fd.cFileName, skip)) {
266 266 rval = PyList_New(0);
267 267 goto error;
268 268 }
269 269 }
270 270
271 271 item = make_item(&fd, wantstat);
272 272 if (!item)
273 273 goto error;
274 274
275 275 if (PyList_Append(list, item)) {
276 276 Py_XDECREF(item);
277 277 goto error;
278 278 }
279 279
280 280 Py_XDECREF(item);
281 281 } while (FindNextFileA(fh, &fd));
282 282
283 283 if (GetLastError() != ERROR_NO_MORE_FILES) {
284 284 PyErr_SetFromWindowsErrWithFilename(GetLastError(), path);
285 285 goto error;
286 286 }
287 287
288 288 rval = list;
289 289 Py_XINCREF(rval);
290 290 error:
291 291 Py_XDECREF(list);
292 292 error_list:
293 293 FindClose(fh);
294 294 error_file:
295 295 PyMem_Free(pattern);
296 296 error_nomem:
297 297 return rval;
298 298 }
299 299
300 300 #else
301 301
302 302 int entkind(struct dirent *ent)
303 303 {
304 304 #ifdef DT_REG
305 305 switch (ent->d_type) {
306 306 case DT_REG: return S_IFREG;
307 307 case DT_DIR: return S_IFDIR;
308 308 case DT_LNK: return S_IFLNK;
309 309 case DT_BLK: return S_IFBLK;
310 310 case DT_CHR: return S_IFCHR;
311 311 case DT_FIFO: return S_IFIFO;
312 312 case DT_SOCK: return S_IFSOCK;
313 313 }
314 314 #endif
315 315 return -1;
316 316 }
317 317
318 318 static PyObject *makestat(const struct stat *st)
319 319 {
320 320 PyObject *stat;
321 321
322 322 stat = PyObject_CallObject((PyObject *)&listdir_stat_type, NULL);
323 323 if (stat)
324 324 memcpy(&((struct listdir_stat *)stat)->st, st, sizeof(*st));
325 325 return stat;
326 326 }
327 327
328 328 static PyObject *_listdir_stat(char *path, int pathlen, int keepstat,
329 329 char *skip)
330 330 {
331 331 PyObject *list, *elem, *ret = NULL;
332 332 char fullpath[PATH_MAX + 10];
333 333 int kind, err;
334 334 struct stat st;
335 335 struct dirent *ent;
336 336 DIR *dir;
337 337 #ifdef AT_SYMLINK_NOFOLLOW
338 338 int dfd = -1;
339 339 #endif
340 340
341 341 if (pathlen >= PATH_MAX) {
342 342 errno = ENAMETOOLONG;
343 343 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
344 344 goto error_value;
345 345 }
346 346 strncpy(fullpath, path, PATH_MAX);
347 347 fullpath[pathlen] = '/';
348 348
349 349 #ifdef AT_SYMLINK_NOFOLLOW
350 350 dfd = open(path, O_RDONLY);
351 351 if (dfd == -1) {
352 352 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
353 353 goto error_value;
354 354 }
355 355 dir = fdopendir(dfd);
356 356 #else
357 357 dir = opendir(path);
358 358 #endif
359 359 if (!dir) {
360 360 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
361 361 goto error_dir;
362 362 }
363 363
364 364 list = PyList_New(0);
365 365 if (!list)
366 366 goto error_list;
367 367
368 368 while ((ent = readdir(dir))) {
369 369 if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
370 370 continue;
371 371
372 372 kind = entkind(ent);
373 373 if (kind == -1 || keepstat) {
374 374 #ifdef AT_SYMLINK_NOFOLLOW
375 375 err = fstatat(dfd, ent->d_name, &st,
376 376 AT_SYMLINK_NOFOLLOW);
377 377 #else
378 378 strncpy(fullpath + pathlen + 1, ent->d_name,
379 379 PATH_MAX - pathlen);
380 380 fullpath[PATH_MAX] = '\0';
381 381 err = lstat(fullpath, &st);
382 382 #endif
383 383 if (err == -1) {
384 384 /* race with file deletion? */
385 385 if (errno == ENOENT)
386 386 continue;
387 387 strncpy(fullpath + pathlen + 1, ent->d_name,
388 388 PATH_MAX - pathlen);
389 389 fullpath[PATH_MAX] = 0;
390 390 PyErr_SetFromErrnoWithFilename(PyExc_OSError,
391 391 fullpath);
392 392 goto error;
393 393 }
394 394 kind = st.st_mode & S_IFMT;
395 395 }
396 396
397 397 /* quit early? */
398 398 if (skip && kind == S_IFDIR && !strcmp(ent->d_name, skip)) {
399 399 ret = PyList_New(0);
400 400 goto error;
401 401 }
402 402
403 403 if (keepstat) {
404 404 PyObject *stat = makestat(&st);
405 405 if (!stat)
406 406 goto error;
407 elem = Py_BuildValue(PY23("siN", "yiN"), ent->d_name,
407 elem = Py_BuildValue("yiN", ent->d_name,
408 408 kind, stat);
409 409 } else
410 elem = Py_BuildValue(PY23("si", "yi"), ent->d_name,
410 elem = Py_BuildValue("yi", ent->d_name,
411 411 kind);
412 412 if (!elem)
413 413 goto error;
414 414
415 415 PyList_Append(list, elem);
416 416 Py_DECREF(elem);
417 417 }
418 418
419 419 ret = list;
420 420 Py_INCREF(ret);
421 421
422 422 error:
423 423 Py_DECREF(list);
424 424 error_list:
425 425 closedir(dir);
426 426 /* closedir also closes its dirfd */
427 427 goto error_value;
428 428 error_dir:
429 429 #ifdef AT_SYMLINK_NOFOLLOW
430 430 close(dfd);
431 431 #endif
432 432 error_value:
433 433 return ret;
434 434 }
435 435
436 436 #ifdef __APPLE__
437 437
438 438 typedef struct {
439 439 u_int32_t length;
440 440 attrreference_t name;
441 441 fsobj_type_t obj_type;
442 442 struct timespec mtime;
443 443 #if __LITTLE_ENDIAN__
444 444 mode_t access_mask;
445 445 uint16_t padding;
446 446 #else
447 447 uint16_t padding;
448 448 mode_t access_mask;
449 449 #endif
450 450 off_t size;
451 451 } __attribute__((packed)) attrbuf_entry;
452 452
453 453 int attrkind(attrbuf_entry *entry)
454 454 {
455 455 switch (entry->obj_type) {
456 456 case VREG: return S_IFREG;
457 457 case VDIR: return S_IFDIR;
458 458 case VLNK: return S_IFLNK;
459 459 case VBLK: return S_IFBLK;
460 460 case VCHR: return S_IFCHR;
461 461 case VFIFO: return S_IFIFO;
462 462 case VSOCK: return S_IFSOCK;
463 463 }
464 464 return -1;
465 465 }
466 466
467 467 /* get these many entries at a time */
468 468 #define LISTDIR_BATCH_SIZE 50
469 469
470 470 static PyObject *_listdir_batch(char *path, int pathlen, int keepstat,
471 471 char *skip, bool *fallback)
472 472 {
473 473 PyObject *list, *elem, *ret = NULL;
474 474 int kind, err;
475 475 unsigned long index;
476 476 unsigned int count, old_state, new_state;
477 477 bool state_seen = false;
478 478 attrbuf_entry *entry;
479 479 /* from the getattrlist(2) man page: a path can be no longer than
480 480 (NAME_MAX * 3 + 1) bytes. Also, "The getattrlist() function will
481 481 silently truncate attribute data if attrBufSize is too small." So
482 482 pass in a buffer big enough for the worst case. */
483 483 char attrbuf[LISTDIR_BATCH_SIZE * (sizeof(attrbuf_entry) + NAME_MAX * 3 + 1)];
484 484 unsigned int basep_unused;
485 485
486 486 struct stat st;
487 487 int dfd = -1;
488 488
489 489 /* these must match the attrbuf_entry struct, otherwise you'll end up
490 490 with garbage */
491 491 struct attrlist requested_attr = {0};
492 492 requested_attr.bitmapcount = ATTR_BIT_MAP_COUNT;
493 493 requested_attr.commonattr = (ATTR_CMN_NAME | ATTR_CMN_OBJTYPE |
494 494 ATTR_CMN_MODTIME | ATTR_CMN_ACCESSMASK);
495 495 requested_attr.fileattr = ATTR_FILE_DATALENGTH;
496 496
497 497 *fallback = false;
498 498
499 499 if (pathlen >= PATH_MAX) {
500 500 errno = ENAMETOOLONG;
501 501 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
502 502 goto error_value;
503 503 }
504 504
505 505 dfd = open(path, O_RDONLY);
506 506 if (dfd == -1) {
507 507 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
508 508 goto error_value;
509 509 }
510 510
511 511 list = PyList_New(0);
512 512 if (!list)
513 513 goto error_dir;
514 514
515 515 do {
516 516 count = LISTDIR_BATCH_SIZE;
517 517 err = getdirentriesattr(dfd, &requested_attr, &attrbuf,
518 518 sizeof(attrbuf), &count, &basep_unused,
519 519 &new_state, 0);
520 520 if (err < 0) {
521 521 if (errno == ENOTSUP) {
522 522 /* We're on a filesystem that doesn't support
523 523 getdirentriesattr. Fall back to the
524 524 stat-based implementation. */
525 525 *fallback = true;
526 526 } else
527 527 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
528 528 goto error;
529 529 }
530 530
531 531 if (!state_seen) {
532 532 old_state = new_state;
533 533 state_seen = true;
534 534 } else if (old_state != new_state) {
535 535 /* There's an edge case with getdirentriesattr. Consider
536 536 the following initial list of files:
537 537
538 538 a
539 539 b
540 540 <--
541 541 c
542 542 d
543 543
544 544 If the iteration is paused at the arrow, and b is
545 545 deleted before it is resumed, getdirentriesattr will
546 546 not return d at all! Ordinarily we're expected to
547 547 restart the iteration from the beginning. To avoid
548 548 getting stuck in a retry loop here, fall back to
549 549 stat. */
550 550 *fallback = true;
551 551 goto error;
552 552 }
553 553
554 554 entry = (attrbuf_entry *)attrbuf;
555 555
556 556 for (index = 0; index < count; index++) {
557 557 char *filename = ((char *)&entry->name) +
558 558 entry->name.attr_dataoffset;
559 559
560 560 if (!strcmp(filename, ".") || !strcmp(filename, ".."))
561 561 continue;
562 562
563 563 kind = attrkind(entry);
564 564 if (kind == -1) {
565 565 PyErr_Format(PyExc_OSError,
566 566 "unknown object type %u for file "
567 567 "%s%s!",
568 568 entry->obj_type, path, filename);
569 569 goto error;
570 570 }
571 571
572 572 /* quit early? */
573 573 if (skip && kind == S_IFDIR && !strcmp(filename, skip)) {
574 574 ret = PyList_New(0);
575 575 goto error;
576 576 }
577 577
578 578 if (keepstat) {
579 579 PyObject *stat = NULL;
580 580 /* from the getattrlist(2) man page: "Only the
581 581 permission bits ... are valid". */
582 582 st.st_mode = (entry->access_mask & ~S_IFMT) | kind;
583 583 st.st_mtime = entry->mtime.tv_sec;
584 584 st.st_size = entry->size;
585 585 stat = makestat(&st);
586 586 if (!stat)
587 587 goto error;
588 elem = Py_BuildValue(PY23("siN", "yiN"),
588 elem = Py_BuildValue("yiN",
589 589 filename, kind, stat);
590 590 } else
591 elem = Py_BuildValue(PY23("si", "yi"),
591 elem = Py_BuildValue("yi",
592 592 filename, kind);
593 593 if (!elem)
594 594 goto error;
595 595
596 596 PyList_Append(list, elem);
597 597 Py_DECREF(elem);
598 598
599 599 entry = (attrbuf_entry *)((char *)entry + entry->length);
600 600 }
601 601 } while (err == 0);
602 602
603 603 ret = list;
604 604 Py_INCREF(ret);
605 605
606 606 error:
607 607 Py_DECREF(list);
608 608 error_dir:
609 609 close(dfd);
610 610 error_value:
611 611 return ret;
612 612 }
613 613
614 614 #endif /* __APPLE__ */
615 615
616 616 static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip)
617 617 {
618 618 #ifdef __APPLE__
619 619 PyObject *ret;
620 620 bool fallback = false;
621 621
622 622 ret = _listdir_batch(path, pathlen, keepstat, skip, &fallback);
623 623 if (ret != NULL || !fallback)
624 624 return ret;
625 625 #endif
626 626 return _listdir_stat(path, pathlen, keepstat, skip);
627 627 }
628 628
629 629 static PyObject *statfiles(PyObject *self, PyObject *args)
630 630 {
631 631 PyObject *names, *stats;
632 632 Py_ssize_t i, count;
633 633
634 634 if (!PyArg_ParseTuple(args, "O:statfiles", &names))
635 635 return NULL;
636 636
637 637 count = PySequence_Length(names);
638 638 if (count == -1) {
639 639 PyErr_SetString(PyExc_TypeError, "not a sequence");
640 640 return NULL;
641 641 }
642 642
643 643 stats = PyList_New(count);
644 644 if (stats == NULL)
645 645 return NULL;
646 646
647 647 for (i = 0; i < count; i++) {
648 648 PyObject *stat, *pypath;
649 649 struct stat st;
650 650 int ret, kind;
651 651 char *path;
652 652
653 653 /* With a large file count or on a slow filesystem,
654 654 don't block signals for long (issue4878). */
655 655 if ((i % 1000) == 999 && PyErr_CheckSignals() == -1)
656 656 goto bail;
657 657
658 658 pypath = PySequence_GetItem(names, i);
659 659 if (!pypath)
660 660 goto bail;
661 661 path = PyBytes_AsString(pypath);
662 662 if (path == NULL) {
663 663 Py_DECREF(pypath);
664 664 PyErr_SetString(PyExc_TypeError, "not a string");
665 665 goto bail;
666 666 }
667 667 ret = lstat(path, &st);
668 668 Py_DECREF(pypath);
669 669 kind = st.st_mode & S_IFMT;
670 670 if (ret != -1 && (kind == S_IFREG || kind == S_IFLNK)) {
671 671 stat = makestat(&st);
672 672 if (stat == NULL)
673 673 goto bail;
674 674 PyList_SET_ITEM(stats, i, stat);
675 675 } else {
676 676 Py_INCREF(Py_None);
677 677 PyList_SET_ITEM(stats, i, Py_None);
678 678 }
679 679 }
680 680
681 681 return stats;
682 682
683 683 bail:
684 684 Py_DECREF(stats);
685 685 return NULL;
686 686 }
687 687
688 688 /*
689 689 * recvfds() simply does not release GIL during blocking io operation because
690 690 * command server is known to be single-threaded.
691 691 *
692 692 * Old systems such as Solaris don't provide CMSG_LEN, msg_control, etc.
693 693 * Currently, recvfds() is not supported on these platforms.
694 694 */
695 695 #ifdef CMSG_LEN
696 696
697 697 static ssize_t recvfdstobuf(int sockfd, int **rfds, void *cbuf, size_t cbufsize)
698 698 {
699 699 char dummy[1];
700 700 struct iovec iov = {dummy, sizeof(dummy)};
701 701 struct msghdr msgh = {0};
702 702 struct cmsghdr *cmsg;
703 703
704 704 msgh.msg_iov = &iov;
705 705 msgh.msg_iovlen = 1;
706 706 msgh.msg_control = cbuf;
707 707 msgh.msg_controllen = (socklen_t)cbufsize;
708 708 if (recvmsg(sockfd, &msgh, 0) < 0)
709 709 return -1;
710 710
711 711 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg;
712 712 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
713 713 if (cmsg->cmsg_level != SOL_SOCKET ||
714 714 cmsg->cmsg_type != SCM_RIGHTS)
715 715 continue;
716 716 *rfds = (int *)CMSG_DATA(cmsg);
717 717 return (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
718 718 }
719 719
720 720 *rfds = cbuf;
721 721 return 0;
722 722 }
723 723
724 724 static PyObject *recvfds(PyObject *self, PyObject *args)
725 725 {
726 726 int sockfd;
727 727 int *rfds = NULL;
728 728 ssize_t rfdscount, i;
729 729 char cbuf[256];
730 730 PyObject *rfdslist = NULL;
731 731
732 732 if (!PyArg_ParseTuple(args, "i", &sockfd))
733 733 return NULL;
734 734
735 735 rfdscount = recvfdstobuf(sockfd, &rfds, cbuf, sizeof(cbuf));
736 736 if (rfdscount < 0)
737 737 return PyErr_SetFromErrno(PyExc_OSError);
738 738
739 739 rfdslist = PyList_New(rfdscount);
740 740 if (!rfdslist)
741 741 goto bail;
742 742 for (i = 0; i < rfdscount; i++) {
743 743 PyObject *obj = PyLong_FromLong(rfds[i]);
744 744 if (!obj)
745 745 goto bail;
746 746 PyList_SET_ITEM(rfdslist, i, obj);
747 747 }
748 748 return rfdslist;
749 749
750 750 bail:
751 751 Py_XDECREF(rfdslist);
752 752 return NULL;
753 753 }
754 754
755 755 #endif /* CMSG_LEN */
756 756
757 757 /* allow disabling setprocname via compiler flags */
758 758 #ifndef SETPROCNAME_USE_NONE
759 759 #if defined(HAVE_SETPROCTITLE)
760 760 /* setproctitle is the first choice - available in FreeBSD */
761 761 #define SETPROCNAME_USE_SETPROCTITLE
762 762 #elif (defined(__linux__) || defined(__APPLE__)) && PY_MAJOR_VERSION == 2
763 763 /* rewrite the argv buffer in place - works in Linux and OS X. Py_GetArgcArgv
764 764 * in Python 3 returns the copied wchar_t **argv, thus unsupported. */
765 765 #define SETPROCNAME_USE_ARGVREWRITE
766 766 #else
767 767 #define SETPROCNAME_USE_NONE
768 768 #endif
769 769 #endif /* ndef SETPROCNAME_USE_NONE */
770 770
771 771 #ifndef SETPROCNAME_USE_NONE
772 772 static PyObject *setprocname(PyObject *self, PyObject *args)
773 773 {
774 774 const char *name = NULL;
775 if (!PyArg_ParseTuple(args, PY23("s", "y"), &name))
775 if (!PyArg_ParseTuple(args, "y", &name))
776 776 return NULL;
777 777
778 778 #if defined(SETPROCNAME_USE_SETPROCTITLE)
779 779 setproctitle("%s", name);
780 780 #elif defined(SETPROCNAME_USE_ARGVREWRITE)
781 781 {
782 782 static char *argvstart = NULL;
783 783 static size_t argvsize = 0;
784 784 if (argvstart == NULL) {
785 785 int argc = 0, i;
786 786 char **argv = NULL;
787 787 char *argvend;
788 788 extern void Py_GetArgcArgv(int *argc, char ***argv);
789 789 Py_GetArgcArgv(&argc, &argv);
790 790 /* Py_GetArgcArgv may not do much if a custom python
791 791 * launcher is used that doesn't record the information
792 792 * it needs. Let's handle this gracefully instead of
793 793 * segfaulting. */
794 794 if (argv != NULL)
795 795 argvend = argvstart = argv[0];
796 796 else
797 797 argvend = argvstart = NULL;
798 798
799 799 /* Check the memory we can use. Typically, argv[i] and
800 800 * argv[i + 1] are continuous. */
801 801 for (i = 0; i < argc; ++i) {
802 802 size_t len;
803 803 if (argv[i] > argvend || argv[i] < argvstart)
804 804 break; /* not continuous */
805 805 len = strlen(argv[i]);
806 806 argvend = argv[i] + len + 1 /* '\0' */;
807 807 }
808 808 if (argvend > argvstart) /* sanity check */
809 809 argvsize = argvend - argvstart;
810 810 }
811 811
812 812 if (argvstart && argvsize > 1) {
813 813 int n = snprintf(argvstart, argvsize, "%s", name);
814 814 if (n >= 0 && (size_t)n < argvsize)
815 815 memset(argvstart + n, 0, argvsize - n);
816 816 }
817 817 }
818 818 #endif
819 819
820 820 Py_RETURN_NONE;
821 821 }
822 822 #endif /* ndef SETPROCNAME_USE_NONE */
823 823
824 824 #if defined(HAVE_BSD_STATFS)
825 825 static const char *describefstype(const struct statfs *pbuf)
826 826 {
827 827 /* BSD or OSX provides a f_fstypename field */
828 828 return pbuf->f_fstypename;
829 829 }
830 830 #elif defined(HAVE_LINUX_STATFS)
831 831 static const char *describefstype(const struct statfs *pbuf)
832 832 {
833 833 /* Begin of Linux filesystems */
834 834 #ifdef ADFS_SUPER_MAGIC
835 835 if (pbuf->f_type == ADFS_SUPER_MAGIC)
836 836 return "adfs";
837 837 #endif
838 838 #ifdef AFFS_SUPER_MAGIC
839 839 if (pbuf->f_type == AFFS_SUPER_MAGIC)
840 840 return "affs";
841 841 #endif
842 842 #ifdef AUTOFS_SUPER_MAGIC
843 843 if (pbuf->f_type == AUTOFS_SUPER_MAGIC)
844 844 return "autofs";
845 845 #endif
846 846 #ifdef BDEVFS_MAGIC
847 847 if (pbuf->f_type == BDEVFS_MAGIC)
848 848 return "bdevfs";
849 849 #endif
850 850 #ifdef BEFS_SUPER_MAGIC
851 851 if (pbuf->f_type == BEFS_SUPER_MAGIC)
852 852 return "befs";
853 853 #endif
854 854 #ifdef BFS_MAGIC
855 855 if (pbuf->f_type == BFS_MAGIC)
856 856 return "bfs";
857 857 #endif
858 858 #ifdef BINFMTFS_MAGIC
859 859 if (pbuf->f_type == BINFMTFS_MAGIC)
860 860 return "binfmtfs";
861 861 #endif
862 862 #ifdef BTRFS_SUPER_MAGIC
863 863 if (pbuf->f_type == BTRFS_SUPER_MAGIC)
864 864 return "btrfs";
865 865 #endif
866 866 #ifdef CGROUP_SUPER_MAGIC
867 867 if (pbuf->f_type == CGROUP_SUPER_MAGIC)
868 868 return "cgroup";
869 869 #endif
870 870 #ifdef CIFS_MAGIC_NUMBER
871 871 if (pbuf->f_type == CIFS_MAGIC_NUMBER)
872 872 return "cifs";
873 873 #endif
874 874 #ifdef CODA_SUPER_MAGIC
875 875 if (pbuf->f_type == CODA_SUPER_MAGIC)
876 876 return "coda";
877 877 #endif
878 878 #ifdef COH_SUPER_MAGIC
879 879 if (pbuf->f_type == COH_SUPER_MAGIC)
880 880 return "coh";
881 881 #endif
882 882 #ifdef CRAMFS_MAGIC
883 883 if (pbuf->f_type == CRAMFS_MAGIC)
884 884 return "cramfs";
885 885 #endif
886 886 #ifdef DEBUGFS_MAGIC
887 887 if (pbuf->f_type == DEBUGFS_MAGIC)
888 888 return "debugfs";
889 889 #endif
890 890 #ifdef DEVFS_SUPER_MAGIC
891 891 if (pbuf->f_type == DEVFS_SUPER_MAGIC)
892 892 return "devfs";
893 893 #endif
894 894 #ifdef DEVPTS_SUPER_MAGIC
895 895 if (pbuf->f_type == DEVPTS_SUPER_MAGIC)
896 896 return "devpts";
897 897 #endif
898 898 #ifdef EFIVARFS_MAGIC
899 899 if (pbuf->f_type == EFIVARFS_MAGIC)
900 900 return "efivarfs";
901 901 #endif
902 902 #ifdef EFS_SUPER_MAGIC
903 903 if (pbuf->f_type == EFS_SUPER_MAGIC)
904 904 return "efs";
905 905 #endif
906 906 #ifdef EXT_SUPER_MAGIC
907 907 if (pbuf->f_type == EXT_SUPER_MAGIC)
908 908 return "ext";
909 909 #endif
910 910 #ifdef EXT2_OLD_SUPER_MAGIC
911 911 if (pbuf->f_type == EXT2_OLD_SUPER_MAGIC)
912 912 return "ext2";
913 913 #endif
914 914 #ifdef EXT2_SUPER_MAGIC
915 915 if (pbuf->f_type == EXT2_SUPER_MAGIC)
916 916 return "ext2";
917 917 #endif
918 918 #ifdef EXT3_SUPER_MAGIC
919 919 if (pbuf->f_type == EXT3_SUPER_MAGIC)
920 920 return "ext3";
921 921 #endif
922 922 #ifdef EXT4_SUPER_MAGIC
923 923 if (pbuf->f_type == EXT4_SUPER_MAGIC)
924 924 return "ext4";
925 925 #endif
926 926 #ifdef F2FS_SUPER_MAGIC
927 927 if (pbuf->f_type == F2FS_SUPER_MAGIC)
928 928 return "f2fs";
929 929 #endif
930 930 #ifdef FUSE_SUPER_MAGIC
931 931 if (pbuf->f_type == FUSE_SUPER_MAGIC)
932 932 return "fuse";
933 933 #endif
934 934 #ifdef FUTEXFS_SUPER_MAGIC
935 935 if (pbuf->f_type == FUTEXFS_SUPER_MAGIC)
936 936 return "futexfs";
937 937 #endif
938 938 #ifdef HFS_SUPER_MAGIC
939 939 if (pbuf->f_type == HFS_SUPER_MAGIC)
940 940 return "hfs";
941 941 #endif
942 942 #ifdef HOSTFS_SUPER_MAGIC
943 943 if (pbuf->f_type == HOSTFS_SUPER_MAGIC)
944 944 return "hostfs";
945 945 #endif
946 946 #ifdef HPFS_SUPER_MAGIC
947 947 if (pbuf->f_type == HPFS_SUPER_MAGIC)
948 948 return "hpfs";
949 949 #endif
950 950 #ifdef HUGETLBFS_MAGIC
951 951 if (pbuf->f_type == HUGETLBFS_MAGIC)
952 952 return "hugetlbfs";
953 953 #endif
954 954 #ifdef ISOFS_SUPER_MAGIC
955 955 if (pbuf->f_type == ISOFS_SUPER_MAGIC)
956 956 return "isofs";
957 957 #endif
958 958 #ifdef JFFS2_SUPER_MAGIC
959 959 if (pbuf->f_type == JFFS2_SUPER_MAGIC)
960 960 return "jffs2";
961 961 #endif
962 962 #ifdef JFS_SUPER_MAGIC
963 963 if (pbuf->f_type == JFS_SUPER_MAGIC)
964 964 return "jfs";
965 965 #endif
966 966 #ifdef MINIX_SUPER_MAGIC
967 967 if (pbuf->f_type == MINIX_SUPER_MAGIC)
968 968 return "minix";
969 969 #endif
970 970 #ifdef MINIX2_SUPER_MAGIC
971 971 if (pbuf->f_type == MINIX2_SUPER_MAGIC)
972 972 return "minix2";
973 973 #endif
974 974 #ifdef MINIX3_SUPER_MAGIC
975 975 if (pbuf->f_type == MINIX3_SUPER_MAGIC)
976 976 return "minix3";
977 977 #endif
978 978 #ifdef MQUEUE_MAGIC
979 979 if (pbuf->f_type == MQUEUE_MAGIC)
980 980 return "mqueue";
981 981 #endif
982 982 #ifdef MSDOS_SUPER_MAGIC
983 983 if (pbuf->f_type == MSDOS_SUPER_MAGIC)
984 984 return "msdos";
985 985 #endif
986 986 #ifdef NCP_SUPER_MAGIC
987 987 if (pbuf->f_type == NCP_SUPER_MAGIC)
988 988 return "ncp";
989 989 #endif
990 990 #ifdef NFS_SUPER_MAGIC
991 991 if (pbuf->f_type == NFS_SUPER_MAGIC)
992 992 return "nfs";
993 993 #endif
994 994 #ifdef NILFS_SUPER_MAGIC
995 995 if (pbuf->f_type == NILFS_SUPER_MAGIC)
996 996 return "nilfs";
997 997 #endif
998 998 #ifdef NTFS_SB_MAGIC
999 999 if (pbuf->f_type == NTFS_SB_MAGIC)
1000 1000 return "ntfs-sb";
1001 1001 #endif
1002 1002 #ifdef OCFS2_SUPER_MAGIC
1003 1003 if (pbuf->f_type == OCFS2_SUPER_MAGIC)
1004 1004 return "ocfs2";
1005 1005 #endif
1006 1006 #ifdef OPENPROM_SUPER_MAGIC
1007 1007 if (pbuf->f_type == OPENPROM_SUPER_MAGIC)
1008 1008 return "openprom";
1009 1009 #endif
1010 1010 #ifdef OVERLAYFS_SUPER_MAGIC
1011 1011 if (pbuf->f_type == OVERLAYFS_SUPER_MAGIC)
1012 1012 return "overlay";
1013 1013 #endif
1014 1014 #ifdef PIPEFS_MAGIC
1015 1015 if (pbuf->f_type == PIPEFS_MAGIC)
1016 1016 return "pipefs";
1017 1017 #endif
1018 1018 #ifdef PROC_SUPER_MAGIC
1019 1019 if (pbuf->f_type == PROC_SUPER_MAGIC)
1020 1020 return "proc";
1021 1021 #endif
1022 1022 #ifdef PSTOREFS_MAGIC
1023 1023 if (pbuf->f_type == PSTOREFS_MAGIC)
1024 1024 return "pstorefs";
1025 1025 #endif
1026 1026 #ifdef QNX4_SUPER_MAGIC
1027 1027 if (pbuf->f_type == QNX4_SUPER_MAGIC)
1028 1028 return "qnx4";
1029 1029 #endif
1030 1030 #ifdef QNX6_SUPER_MAGIC
1031 1031 if (pbuf->f_type == QNX6_SUPER_MAGIC)
1032 1032 return "qnx6";
1033 1033 #endif
1034 1034 #ifdef RAMFS_MAGIC
1035 1035 if (pbuf->f_type == RAMFS_MAGIC)
1036 1036 return "ramfs";
1037 1037 #endif
1038 1038 #ifdef REISERFS_SUPER_MAGIC
1039 1039 if (pbuf->f_type == REISERFS_SUPER_MAGIC)
1040 1040 return "reiserfs";
1041 1041 #endif
1042 1042 #ifdef ROMFS_MAGIC
1043 1043 if (pbuf->f_type == ROMFS_MAGIC)
1044 1044 return "romfs";
1045 1045 #endif
1046 1046 #ifdef SECURITYFS_MAGIC
1047 1047 if (pbuf->f_type == SECURITYFS_MAGIC)
1048 1048 return "securityfs";
1049 1049 #endif
1050 1050 #ifdef SELINUX_MAGIC
1051 1051 if (pbuf->f_type == SELINUX_MAGIC)
1052 1052 return "selinux";
1053 1053 #endif
1054 1054 #ifdef SMACK_MAGIC
1055 1055 if (pbuf->f_type == SMACK_MAGIC)
1056 1056 return "smack";
1057 1057 #endif
1058 1058 #ifdef SMB_SUPER_MAGIC
1059 1059 if (pbuf->f_type == SMB_SUPER_MAGIC)
1060 1060 return "smb";
1061 1061 #endif
1062 1062 #ifdef SOCKFS_MAGIC
1063 1063 if (pbuf->f_type == SOCKFS_MAGIC)
1064 1064 return "sockfs";
1065 1065 #endif
1066 1066 #ifdef SQUASHFS_MAGIC
1067 1067 if (pbuf->f_type == SQUASHFS_MAGIC)
1068 1068 return "squashfs";
1069 1069 #endif
1070 1070 #ifdef SYSFS_MAGIC
1071 1071 if (pbuf->f_type == SYSFS_MAGIC)
1072 1072 return "sysfs";
1073 1073 #endif
1074 1074 #ifdef SYSV2_SUPER_MAGIC
1075 1075 if (pbuf->f_type == SYSV2_SUPER_MAGIC)
1076 1076 return "sysv2";
1077 1077 #endif
1078 1078 #ifdef SYSV4_SUPER_MAGIC
1079 1079 if (pbuf->f_type == SYSV4_SUPER_MAGIC)
1080 1080 return "sysv4";
1081 1081 #endif
1082 1082 #ifdef TMPFS_MAGIC
1083 1083 if (pbuf->f_type == TMPFS_MAGIC)
1084 1084 return "tmpfs";
1085 1085 #endif
1086 1086 #ifdef UDF_SUPER_MAGIC
1087 1087 if (pbuf->f_type == UDF_SUPER_MAGIC)
1088 1088 return "udf";
1089 1089 #endif
1090 1090 #ifdef UFS_MAGIC
1091 1091 if (pbuf->f_type == UFS_MAGIC)
1092 1092 return "ufs";
1093 1093 #endif
1094 1094 #ifdef USBDEVICE_SUPER_MAGIC
1095 1095 if (pbuf->f_type == USBDEVICE_SUPER_MAGIC)
1096 1096 return "usbdevice";
1097 1097 #endif
1098 1098 #ifdef V9FS_MAGIC
1099 1099 if (pbuf->f_type == V9FS_MAGIC)
1100 1100 return "v9fs";
1101 1101 #endif
1102 1102 #ifdef VXFS_SUPER_MAGIC
1103 1103 if (pbuf->f_type == VXFS_SUPER_MAGIC)
1104 1104 return "vxfs";
1105 1105 #endif
1106 1106 #ifdef XENFS_SUPER_MAGIC
1107 1107 if (pbuf->f_type == XENFS_SUPER_MAGIC)
1108 1108 return "xenfs";
1109 1109 #endif
1110 1110 #ifdef XENIX_SUPER_MAGIC
1111 1111 if (pbuf->f_type == XENIX_SUPER_MAGIC)
1112 1112 return "xenix";
1113 1113 #endif
1114 1114 #ifdef XFS_SUPER_MAGIC
1115 1115 if (pbuf->f_type == XFS_SUPER_MAGIC)
1116 1116 return "xfs";
1117 1117 #endif
1118 1118 /* End of Linux filesystems */
1119 1119 return NULL;
1120 1120 }
1121 1121 #endif /* def HAVE_LINUX_STATFS */
1122 1122
1123 1123 #if defined(HAVE_BSD_STATFS) || defined(HAVE_LINUX_STATFS)
1124 1124 /* given a directory path, return filesystem type name (best-effort) */
1125 1125 static PyObject *getfstype(PyObject *self, PyObject *args)
1126 1126 {
1127 1127 const char *path = NULL;
1128 1128 struct statfs buf;
1129 1129 int r;
1130 if (!PyArg_ParseTuple(args, PY23("s", "y"), &path))
1130 if (!PyArg_ParseTuple(args, "y", &path))
1131 1131 return NULL;
1132 1132
1133 1133 memset(&buf, 0, sizeof(buf));
1134 1134 r = statfs(path, &buf);
1135 1135 if (r != 0)
1136 1136 return PyErr_SetFromErrno(PyExc_OSError);
1137 return Py_BuildValue(PY23("s", "y"), describefstype(&buf));
1137 return Py_BuildValue("y", describefstype(&buf));
1138 1138 }
1139 1139 #endif /* defined(HAVE_LINUX_STATFS) || defined(HAVE_BSD_STATFS) */
1140 1140
1141 1141 #if defined(HAVE_BSD_STATFS)
1142 1142 /* given a directory path, return filesystem mount point (best-effort) */
1143 1143 static PyObject *getfsmountpoint(PyObject *self, PyObject *args)
1144 1144 {
1145 1145 const char *path = NULL;
1146 1146 struct statfs buf;
1147 1147 int r;
1148 if (!PyArg_ParseTuple(args, PY23("s", "y"), &path))
1148 if (!PyArg_ParseTuple(args, "y", &path))
1149 1149 return NULL;
1150 1150
1151 1151 memset(&buf, 0, sizeof(buf));
1152 1152 r = statfs(path, &buf);
1153 1153 if (r != 0)
1154 1154 return PyErr_SetFromErrno(PyExc_OSError);
1155 return Py_BuildValue(PY23("s", "y"), buf.f_mntonname);
1155 return Py_BuildValue("y", buf.f_mntonname);
1156 1156 }
1157 1157 #endif /* defined(HAVE_BSD_STATFS) */
1158 1158
1159 1159 static PyObject *unblocksignal(PyObject *self, PyObject *args)
1160 1160 {
1161 1161 int sig = 0;
1162 1162 sigset_t set;
1163 1163 int r;
1164 1164 if (!PyArg_ParseTuple(args, "i", &sig))
1165 1165 return NULL;
1166 1166 r = sigemptyset(&set);
1167 1167 if (r != 0)
1168 1168 return PyErr_SetFromErrno(PyExc_OSError);
1169 1169 r = sigaddset(&set, sig);
1170 1170 if (r != 0)
1171 1171 return PyErr_SetFromErrno(PyExc_OSError);
1172 1172 r = sigprocmask(SIG_UNBLOCK, &set, NULL);
1173 1173 if (r != 0)
1174 1174 return PyErr_SetFromErrno(PyExc_OSError);
1175 1175 Py_RETURN_NONE;
1176 1176 }
1177 1177
1178 1178 #endif /* ndef _WIN32 */
1179 1179
1180 1180 static PyObject *listdir(PyObject *self, PyObject *args, PyObject *kwargs)
1181 1181 {
1182 1182 PyObject *statobj = NULL; /* initialize - optional arg */
1183 1183 PyObject *skipobj = NULL; /* initialize - optional arg */
1184 1184 char *path, *skip = NULL;
1185 1185 Py_ssize_t plen;
1186 1186 int wantstat;
1187 1187
1188 1188 static char *kwlist[] = {"path", "stat", "skip", NULL};
1189 1189
1190 if (!PyArg_ParseTupleAndKeywords(args, kwargs, PY23("s#|OO:listdir",
1191 "y#|OO:listdir"),
1190 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|OO:listdir",
1192 1191 kwlist, &path, &plen, &statobj, &skipobj))
1193 1192 return NULL;
1194 1193
1195 1194 wantstat = statobj && PyObject_IsTrue(statobj);
1196 1195
1197 1196 if (skipobj && skipobj != Py_None) {
1198 1197 skip = PyBytes_AsString(skipobj);
1199 1198 if (!skip)
1200 1199 return NULL;
1201 1200 }
1202 1201
1203 1202 return _listdir(path, plen, wantstat, skip);
1204 1203 }
1205 1204
1206 1205 #ifdef _WIN32
1207 1206 static PyObject *posixfile(PyObject *self, PyObject *args, PyObject *kwds)
1208 1207 {
1209 1208 static char *kwlist[] = {"name", "mode", "buffering", NULL};
1210 1209 PyObject *file_obj = NULL;
1211 1210 char *name = NULL;
1212 1211 char *mode = "rb";
1213 1212 DWORD access = 0;
1214 1213 DWORD creation;
1215 1214 HANDLE handle;
1216 1215 int fd, flags = 0;
1217 1216 int bufsize = -1;
1218 1217 char m0, m1, m2;
1219 1218 char fpmode[4];
1220 1219 int fppos = 0;
1221 1220 int plus;
1222 1221 FILE *fp;
1223 1222
1224 if (!PyArg_ParseTupleAndKeywords(args, kwds, PY23("et|si:posixfile",
1225 "et|yi:posixfile"),
1223 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|yi:posixfile",
1226 1224 kwlist,
1227 1225 Py_FileSystemDefaultEncoding,
1228 1226 &name, &mode, &bufsize))
1229 1227 return NULL;
1230 1228
1231 1229 m0 = mode[0];
1232 1230 m1 = m0 ? mode[1] : '\0';
1233 1231 m2 = m1 ? mode[2] : '\0';
1234 1232 plus = m1 == '+' || m2 == '+';
1235 1233
1236 1234 fpmode[fppos++] = m0;
1237 1235 if (m1 == 'b' || m2 == 'b') {
1238 1236 flags = _O_BINARY;
1239 1237 fpmode[fppos++] = 'b';
1240 1238 }
1241 1239 else
1242 1240 flags = _O_TEXT;
1243 1241 if (m0 == 'r' && !plus) {
1244 1242 flags |= _O_RDONLY;
1245 1243 access = GENERIC_READ;
1246 1244 } else {
1247 1245 /*
1248 1246 work around http://support.microsoft.com/kb/899149 and
1249 1247 set _O_RDWR for 'w' and 'a', even if mode has no '+'
1250 1248 */
1251 1249 flags |= _O_RDWR;
1252 1250 access = GENERIC_READ | GENERIC_WRITE;
1253 1251 fpmode[fppos++] = '+';
1254 1252 }
1255 1253 fpmode[fppos++] = '\0';
1256 1254
1257 1255 switch (m0) {
1258 1256 case 'r':
1259 1257 creation = OPEN_EXISTING;
1260 1258 break;
1261 1259 case 'w':
1262 1260 creation = CREATE_ALWAYS;
1263 1261 break;
1264 1262 case 'a':
1265 1263 creation = OPEN_ALWAYS;
1266 1264 flags |= _O_APPEND;
1267 1265 break;
1268 1266 default:
1269 1267 PyErr_Format(PyExc_ValueError,
1270 1268 "mode string must begin with one of 'r', 'w', "
1271 1269 "or 'a', not '%c'", m0);
1272 1270 goto bail;
1273 1271 }
1274 1272
1275 1273 handle = CreateFile(name, access,
1276 1274 FILE_SHARE_READ | FILE_SHARE_WRITE |
1277 1275 FILE_SHARE_DELETE,
1278 1276 NULL,
1279 1277 creation,
1280 1278 FILE_ATTRIBUTE_NORMAL,
1281 1279 0);
1282 1280
1283 1281 if (handle == INVALID_HANDLE_VALUE) {
1284 1282 PyErr_SetFromWindowsErrWithFilename(GetLastError(), name);
1285 1283 goto bail;
1286 1284 }
1287 1285
1288 1286 fd = _open_osfhandle((intptr_t)handle, flags);
1289 1287
1290 1288 if (fd == -1) {
1291 1289 CloseHandle(handle);
1292 1290 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
1293 1291 goto bail;
1294 1292 }
1295 1293 fp = _fdopen(fd, fpmode);
1296 1294 if (fp == NULL) {
1297 1295 _close(fd);
1298 1296 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
1299 1297 goto bail;
1300 1298 }
1301 1299
1302 1300 file_obj = PyFile_FromFile(fp, name, mode, fclose);
1303 1301 if (file_obj == NULL) {
1304 1302 fclose(fp);
1305 1303 goto bail;
1306 1304 }
1307 1305
1308 1306 PyFile_SetBufSize(file_obj, bufsize);
1309 1307 bail:
1310 1308 PyMem_Free(name);
1311 1309 return file_obj;
1312 1310 }
1313 1311 #endif
1314 1312
1315 1313 #ifdef __APPLE__
1316 1314 #include <ApplicationServices/ApplicationServices.h>
1317 1315
1318 1316 static PyObject *isgui(PyObject *self)
1319 1317 {
1320 1318 CFDictionaryRef dict = CGSessionCopyCurrentDictionary();
1321 1319
1322 1320 if (dict != NULL) {
1323 1321 CFRelease(dict);
1324 1322 Py_RETURN_TRUE;
1325 1323 } else {
1326 1324 Py_RETURN_FALSE;
1327 1325 }
1328 1326 }
1329 1327 #endif
1330 1328
1331 1329 static char osutil_doc[] = "Native operating system services.";
1332 1330
1333 1331 static PyMethodDef methods[] = {
1334 1332 {"listdir", (PyCFunction)listdir, METH_VARARGS | METH_KEYWORDS,
1335 1333 "list a directory\n"},
1336 1334 #ifdef _WIN32
1337 1335 {"posixfile", (PyCFunction)posixfile, METH_VARARGS | METH_KEYWORDS,
1338 1336 "Open a file with POSIX-like semantics.\n"
1339 1337 "On error, this function may raise either a WindowsError or an IOError."},
1340 1338 #else
1341 1339 {"statfiles", (PyCFunction)statfiles, METH_VARARGS | METH_KEYWORDS,
1342 1340 "stat a series of files or symlinks\n"
1343 1341 "Returns None for non-existent entries and entries of other types.\n"},
1344 1342 #ifdef CMSG_LEN
1345 1343 {"recvfds", (PyCFunction)recvfds, METH_VARARGS,
1346 1344 "receive list of file descriptors via socket\n"},
1347 1345 #endif
1348 1346 #ifndef SETPROCNAME_USE_NONE
1349 1347 {"setprocname", (PyCFunction)setprocname, METH_VARARGS,
1350 1348 "set process title (best-effort)\n"},
1351 1349 #endif
1352 1350 #if defined(HAVE_BSD_STATFS) || defined(HAVE_LINUX_STATFS)
1353 1351 {"getfstype", (PyCFunction)getfstype, METH_VARARGS,
1354 1352 "get filesystem type (best-effort)\n"},
1355 1353 #endif
1356 1354 #if defined(HAVE_BSD_STATFS)
1357 1355 {"getfsmountpoint", (PyCFunction)getfsmountpoint, METH_VARARGS,
1358 1356 "get filesystem mount point (best-effort)\n"},
1359 1357 #endif
1360 1358 {"unblocksignal", (PyCFunction)unblocksignal, METH_VARARGS,
1361 1359 "change signal mask to unblock a given signal\n"},
1362 1360 #endif /* ndef _WIN32 */
1363 1361 #ifdef __APPLE__
1364 1362 {
1365 1363 "isgui", (PyCFunction)isgui, METH_NOARGS,
1366 1364 "Is a CoreGraphics session available?"
1367 1365 },
1368 1366 #endif
1369 1367 {NULL, NULL}
1370 1368 };
1371 1369
1372 1370 static const int version = 4;
1373 1371
1374 1372 static struct PyModuleDef osutil_module = {
1375 1373 PyModuleDef_HEAD_INIT,
1376 1374 "osutil",
1377 1375 osutil_doc,
1378 1376 -1,
1379 1377 methods
1380 1378 };
1381 1379
1382 1380 PyMODINIT_FUNC PyInit_osutil(void)
1383 1381 {
1384 1382 PyObject *m;
1385 1383 if (PyType_Ready(&listdir_stat_type) < 0)
1386 1384 return NULL;
1387 1385
1388 1386 m = PyModule_Create(&osutil_module);
1389 1387 PyModule_AddIntConstant(m, "version", version);
1390 1388 return m;
1391 1389 }
@@ -1,1306 +1,1304 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 static const char *const versionerrortext = "Python minor version mismatch";
21 21
22 22 static const int dirstate_v1_from_p2 = -2;
23 23 static const int dirstate_v1_nonnormal = -1;
24 24 static const int ambiguous_time = -1;
25 25
26 26 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
27 27 {
28 28 Py_ssize_t expected_size;
29 29
30 30 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
31 31 return NULL;
32 32 }
33 33
34 34 return _dict_new_presized(expected_size);
35 35 }
36 36
37 37 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
38 38 PyObject *kwds)
39 39 {
40 40 /* We do all the initialization here and not a tp_init function because
41 41 * dirstate_item is immutable. */
42 42 dirstateItemObject *t;
43 43 int wc_tracked;
44 44 int p1_tracked;
45 45 int p2_info;
46 46 int has_meaningful_data;
47 47 int has_meaningful_mtime;
48 48 int mtime_second_ambiguous;
49 49 int mode;
50 50 int size;
51 51 int mtime_s;
52 52 int mtime_ns;
53 53 PyObject *parentfiledata;
54 54 PyObject *mtime;
55 55 PyObject *fallback_exec;
56 56 PyObject *fallback_symlink;
57 57 static char *keywords_name[] = {
58 58 "wc_tracked", "p1_tracked", "p2_info",
59 59 "has_meaningful_data", "has_meaningful_mtime", "parentfiledata",
60 60 "fallback_exec", "fallback_symlink", NULL,
61 61 };
62 62 wc_tracked = 0;
63 63 p1_tracked = 0;
64 64 p2_info = 0;
65 65 has_meaningful_mtime = 1;
66 66 has_meaningful_data = 1;
67 67 mtime_second_ambiguous = 0;
68 68 parentfiledata = Py_None;
69 69 fallback_exec = Py_None;
70 70 fallback_symlink = Py_None;
71 71 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiOOO", keywords_name,
72 72 &wc_tracked, &p1_tracked, &p2_info,
73 73 &has_meaningful_data,
74 74 &has_meaningful_mtime, &parentfiledata,
75 75 &fallback_exec, &fallback_symlink)) {
76 76 return NULL;
77 77 }
78 78 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
79 79 if (!t) {
80 80 return NULL;
81 81 }
82 82
83 83 t->flags = 0;
84 84 if (wc_tracked) {
85 85 t->flags |= dirstate_flag_wc_tracked;
86 86 }
87 87 if (p1_tracked) {
88 88 t->flags |= dirstate_flag_p1_tracked;
89 89 }
90 90 if (p2_info) {
91 91 t->flags |= dirstate_flag_p2_info;
92 92 }
93 93
94 94 if (fallback_exec != Py_None) {
95 95 t->flags |= dirstate_flag_has_fallback_exec;
96 96 if (PyObject_IsTrue(fallback_exec)) {
97 97 t->flags |= dirstate_flag_fallback_exec;
98 98 }
99 99 }
100 100 if (fallback_symlink != Py_None) {
101 101 t->flags |= dirstate_flag_has_fallback_symlink;
102 102 if (PyObject_IsTrue(fallback_symlink)) {
103 103 t->flags |= dirstate_flag_fallback_symlink;
104 104 }
105 105 }
106 106
107 107 if (parentfiledata != Py_None) {
108 108 if (!PyArg_ParseTuple(parentfiledata, "iiO", &mode, &size,
109 109 &mtime)) {
110 110 return NULL;
111 111 }
112 112 if (mtime != Py_None) {
113 113 if (!PyArg_ParseTuple(mtime, "iii", &mtime_s, &mtime_ns,
114 114 &mtime_second_ambiguous)) {
115 115 return NULL;
116 116 }
117 117 } else {
118 118 has_meaningful_mtime = 0;
119 119 }
120 120 } else {
121 121 has_meaningful_data = 0;
122 122 has_meaningful_mtime = 0;
123 123 }
124 124 if (has_meaningful_data) {
125 125 t->flags |= dirstate_flag_has_meaningful_data;
126 126 t->mode = mode;
127 127 t->size = size;
128 128 if (mtime_second_ambiguous) {
129 129 t->flags |= dirstate_flag_mtime_second_ambiguous;
130 130 }
131 131 } else {
132 132 t->mode = 0;
133 133 t->size = 0;
134 134 }
135 135 if (has_meaningful_mtime) {
136 136 t->flags |= dirstate_flag_has_mtime;
137 137 t->mtime_s = mtime_s;
138 138 t->mtime_ns = mtime_ns;
139 139 } else {
140 140 t->mtime_s = 0;
141 141 t->mtime_ns = 0;
142 142 }
143 143 return (PyObject *)t;
144 144 }
145 145
146 146 static void dirstate_item_dealloc(PyObject *o)
147 147 {
148 148 PyObject_Del(o);
149 149 }
150 150
151 151 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
152 152 {
153 153 return (self->flags & dirstate_flag_wc_tracked);
154 154 }
155 155
156 156 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
157 157 {
158 158 const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
159 159 dirstate_flag_p2_info;
160 160 return (self->flags & mask);
161 161 }
162 162
163 163 static inline bool dirstate_item_c_added(dirstateItemObject *self)
164 164 {
165 165 const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
166 166 dirstate_flag_p2_info);
167 167 const int target = dirstate_flag_wc_tracked;
168 168 return (self->flags & mask) == target;
169 169 }
170 170
171 171 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
172 172 {
173 173 if (self->flags & dirstate_flag_wc_tracked) {
174 174 return false;
175 175 }
176 176 return (self->flags &
177 177 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
178 178 }
179 179
180 180 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
181 181 {
182 182 return ((self->flags & dirstate_flag_wc_tracked) &&
183 183 (self->flags & dirstate_flag_p1_tracked) &&
184 184 (self->flags & dirstate_flag_p2_info));
185 185 }
186 186
187 187 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
188 188 {
189 189 return ((self->flags & dirstate_flag_wc_tracked) &&
190 190 !(self->flags & dirstate_flag_p1_tracked) &&
191 191 (self->flags & dirstate_flag_p2_info));
192 192 }
193 193
194 194 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
195 195 {
196 196 if (dirstate_item_c_removed(self)) {
197 197 return 'r';
198 198 } else if (dirstate_item_c_merged(self)) {
199 199 return 'm';
200 200 } else if (dirstate_item_c_added(self)) {
201 201 return 'a';
202 202 } else {
203 203 return 'n';
204 204 }
205 205 }
206 206
207 207 static inline bool dirstate_item_c_has_fallback_exec(dirstateItemObject *self)
208 208 {
209 209 return (bool)self->flags & dirstate_flag_has_fallback_exec;
210 210 }
211 211
212 212 static inline bool
213 213 dirstate_item_c_has_fallback_symlink(dirstateItemObject *self)
214 214 {
215 215 return (bool)self->flags & dirstate_flag_has_fallback_symlink;
216 216 }
217 217
218 218 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
219 219 {
220 220 if (self->flags & dirstate_flag_has_meaningful_data) {
221 221 return self->mode;
222 222 } else {
223 223 return 0;
224 224 }
225 225 }
226 226
227 227 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
228 228 {
229 229 if (!(self->flags & dirstate_flag_wc_tracked) &&
230 230 (self->flags & dirstate_flag_p2_info)) {
231 231 if (self->flags & dirstate_flag_p1_tracked) {
232 232 return dirstate_v1_nonnormal;
233 233 } else {
234 234 return dirstate_v1_from_p2;
235 235 }
236 236 } else if (dirstate_item_c_removed(self)) {
237 237 return 0;
238 238 } else if (self->flags & dirstate_flag_p2_info) {
239 239 return dirstate_v1_from_p2;
240 240 } else if (dirstate_item_c_added(self)) {
241 241 return dirstate_v1_nonnormal;
242 242 } else if (self->flags & dirstate_flag_has_meaningful_data) {
243 243 return self->size;
244 244 } else {
245 245 return dirstate_v1_nonnormal;
246 246 }
247 247 }
248 248
249 249 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
250 250 {
251 251 if (dirstate_item_c_removed(self)) {
252 252 return 0;
253 253 } else if (!(self->flags & dirstate_flag_has_mtime) ||
254 254 !(self->flags & dirstate_flag_p1_tracked) ||
255 255 !(self->flags & dirstate_flag_wc_tracked) ||
256 256 (self->flags & dirstate_flag_p2_info) ||
257 257 (self->flags & dirstate_flag_mtime_second_ambiguous)) {
258 258 return ambiguous_time;
259 259 } else {
260 260 return self->mtime_s;
261 261 }
262 262 }
263 263
264 264 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
265 265 {
266 266 int flags = self->flags;
267 267 int mode = dirstate_item_c_v1_mode(self);
268 268 #ifdef S_IXUSR
269 269 /* This is for platforms with an exec bit */
270 270 if ((mode & S_IXUSR) != 0) {
271 271 flags |= dirstate_flag_mode_exec_perm;
272 272 } else {
273 273 flags &= ~dirstate_flag_mode_exec_perm;
274 274 }
275 275 #else
276 276 flags &= ~dirstate_flag_mode_exec_perm;
277 277 #endif
278 278 #ifdef S_ISLNK
279 279 /* This is for platforms with support for symlinks */
280 280 if (S_ISLNK(mode)) {
281 281 flags |= dirstate_flag_mode_is_symlink;
282 282 } else {
283 283 flags &= ~dirstate_flag_mode_is_symlink;
284 284 }
285 285 #else
286 286 flags &= ~dirstate_flag_mode_is_symlink;
287 287 #endif
288 288 return Py_BuildValue("iiii", flags, self->size, self->mtime_s,
289 289 self->mtime_ns);
290 290 };
291 291
292 292 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
293 293 {
294 294 char state = dirstate_item_c_v1_state(self);
295 295 return PyBytes_FromStringAndSize(&state, 1);
296 296 };
297 297
298 298 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
299 299 {
300 300 return PyLong_FromLong(dirstate_item_c_v1_mode(self));
301 301 };
302 302
303 303 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
304 304 {
305 305 return PyLong_FromLong(dirstate_item_c_v1_size(self));
306 306 };
307 307
308 308 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
309 309 {
310 310 return PyLong_FromLong(dirstate_item_c_v1_mtime(self));
311 311 };
312 312
313 313 static PyObject *dirstate_item_mtime_likely_equal_to(dirstateItemObject *self,
314 314 PyObject *other)
315 315 {
316 316 int other_s;
317 317 int other_ns;
318 318 int other_second_ambiguous;
319 319 if (!PyArg_ParseTuple(other, "iii", &other_s, &other_ns,
320 320 &other_second_ambiguous)) {
321 321 return NULL;
322 322 }
323 323 if (!(self->flags & dirstate_flag_has_mtime)) {
324 324 Py_RETURN_FALSE;
325 325 }
326 326 if (self->mtime_s != other_s) {
327 327 Py_RETURN_FALSE;
328 328 }
329 329 if (self->mtime_ns == 0 || other_ns == 0) {
330 330 if (self->flags & dirstate_flag_mtime_second_ambiguous) {
331 331 Py_RETURN_FALSE;
332 332 } else {
333 333 Py_RETURN_TRUE;
334 334 }
335 335 }
336 336 if (self->mtime_ns == other_ns) {
337 337 Py_RETURN_TRUE;
338 338 } else {
339 339 Py_RETURN_FALSE;
340 340 }
341 341 };
342 342
343 343 /* This will never change since it's bound to V1
344 344 */
345 345 static inline dirstateItemObject *
346 346 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
347 347 {
348 348 dirstateItemObject *t =
349 349 PyObject_New(dirstateItemObject, &dirstateItemType);
350 350 if (!t) {
351 351 return NULL;
352 352 }
353 353 t->flags = 0;
354 354 t->mode = 0;
355 355 t->size = 0;
356 356 t->mtime_s = 0;
357 357 t->mtime_ns = 0;
358 358
359 359 if (state == 'm') {
360 360 t->flags = (dirstate_flag_wc_tracked |
361 361 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
362 362 } else if (state == 'a') {
363 363 t->flags = dirstate_flag_wc_tracked;
364 364 } else if (state == 'r') {
365 365 if (size == dirstate_v1_nonnormal) {
366 366 t->flags =
367 367 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
368 368 } else if (size == dirstate_v1_from_p2) {
369 369 t->flags = dirstate_flag_p2_info;
370 370 } else {
371 371 t->flags = dirstate_flag_p1_tracked;
372 372 }
373 373 } else if (state == 'n') {
374 374 if (size == dirstate_v1_from_p2) {
375 375 t->flags =
376 376 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
377 377 } else if (size == dirstate_v1_nonnormal) {
378 378 t->flags =
379 379 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
380 380 } else if (mtime == ambiguous_time) {
381 381 t->flags = (dirstate_flag_wc_tracked |
382 382 dirstate_flag_p1_tracked |
383 383 dirstate_flag_has_meaningful_data);
384 384 t->mode = mode;
385 385 t->size = size;
386 386 } else {
387 387 t->flags = (dirstate_flag_wc_tracked |
388 388 dirstate_flag_p1_tracked |
389 389 dirstate_flag_has_meaningful_data |
390 390 dirstate_flag_has_mtime);
391 391 t->mode = mode;
392 392 t->size = size;
393 393 t->mtime_s = mtime;
394 394 }
395 395 } else {
396 396 PyErr_Format(PyExc_RuntimeError,
397 397 "unknown state: `%c` (%d, %d, %d)", state, mode,
398 398 size, mtime, NULL);
399 399 Py_DECREF(t);
400 400 return NULL;
401 401 }
402 402
403 403 return t;
404 404 }
405 405
406 406 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
407 407 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
408 408 PyObject *args)
409 409 {
410 410 /* We do all the initialization here and not a tp_init function because
411 411 * dirstate_item is immutable. */
412 412 char state;
413 413 int size, mode, mtime;
414 414 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
415 415 return NULL;
416 416 }
417 417 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
418 418 };
419 419
420 420 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
421 421 PyObject *args)
422 422 {
423 423 dirstateItemObject *t =
424 424 PyObject_New(dirstateItemObject, &dirstateItemType);
425 425 if (!t) {
426 426 return NULL;
427 427 }
428 428 if (!PyArg_ParseTuple(args, "iiii", &t->flags, &t->size, &t->mtime_s,
429 429 &t->mtime_ns)) {
430 430 return NULL;
431 431 }
432 432 if (t->flags & dirstate_flag_expected_state_is_modified) {
433 433 t->flags &= ~(dirstate_flag_expected_state_is_modified |
434 434 dirstate_flag_has_meaningful_data |
435 435 dirstate_flag_has_mtime);
436 436 }
437 437 t->mode = 0;
438 438 if (t->flags & dirstate_flag_has_meaningful_data) {
439 439 if (t->flags & dirstate_flag_mode_exec_perm) {
440 440 t->mode = 0755;
441 441 } else {
442 442 t->mode = 0644;
443 443 }
444 444 if (t->flags & dirstate_flag_mode_is_symlink) {
445 445 t->mode |= S_IFLNK;
446 446 } else {
447 447 t->mode |= S_IFREG;
448 448 }
449 449 }
450 450 return (PyObject *)t;
451 451 };
452 452
453 453 /* This means the next status call will have to actually check its content
454 454 to make sure it is correct. */
455 455 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
456 456 {
457 457 self->flags &= ~dirstate_flag_has_mtime;
458 458 Py_RETURN_NONE;
459 459 }
460 460
461 461 /* See docstring of the python implementation for details */
462 462 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
463 463 PyObject *args)
464 464 {
465 465 int size, mode, mtime_s, mtime_ns, mtime_second_ambiguous;
466 466 PyObject *mtime;
467 467 mtime_s = 0;
468 468 mtime_ns = 0;
469 469 mtime_second_ambiguous = 0;
470 470 if (!PyArg_ParseTuple(args, "iiO", &mode, &size, &mtime)) {
471 471 return NULL;
472 472 }
473 473 if (mtime != Py_None) {
474 474 if (!PyArg_ParseTuple(mtime, "iii", &mtime_s, &mtime_ns,
475 475 &mtime_second_ambiguous)) {
476 476 return NULL;
477 477 }
478 478 } else {
479 479 self->flags &= ~dirstate_flag_has_mtime;
480 480 }
481 481 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
482 482 dirstate_flag_has_meaningful_data |
483 483 dirstate_flag_has_mtime;
484 484 if (mtime_second_ambiguous) {
485 485 self->flags |= dirstate_flag_mtime_second_ambiguous;
486 486 }
487 487 self->mode = mode;
488 488 self->size = size;
489 489 self->mtime_s = mtime_s;
490 490 self->mtime_ns = mtime_ns;
491 491 Py_RETURN_NONE;
492 492 }
493 493
494 494 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
495 495 {
496 496 self->flags |= dirstate_flag_wc_tracked;
497 497 self->flags &= ~dirstate_flag_has_mtime;
498 498 Py_RETURN_NONE;
499 499 }
500 500
501 501 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
502 502 {
503 503 self->flags &= ~dirstate_flag_wc_tracked;
504 504 self->mode = 0;
505 505 self->size = 0;
506 506 self->mtime_s = 0;
507 507 self->mtime_ns = 0;
508 508 Py_RETURN_NONE;
509 509 }
510 510
511 511 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
512 512 {
513 513 if (self->flags & dirstate_flag_p2_info) {
514 514 self->flags &= ~(dirstate_flag_p2_info |
515 515 dirstate_flag_has_meaningful_data |
516 516 dirstate_flag_has_mtime);
517 517 self->mode = 0;
518 518 self->size = 0;
519 519 self->mtime_s = 0;
520 520 self->mtime_ns = 0;
521 521 }
522 522 Py_RETURN_NONE;
523 523 }
524 524 static PyMethodDef dirstate_item_methods[] = {
525 525 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
526 526 "return data suitable for v2 serialization"},
527 527 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
528 528 "return a \"state\" suitable for v1 serialization"},
529 529 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
530 530 "return a \"mode\" suitable for v1 serialization"},
531 531 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
532 532 "return a \"size\" suitable for v1 serialization"},
533 533 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
534 534 "return a \"mtime\" suitable for v1 serialization"},
535 535 {"mtime_likely_equal_to", (PyCFunction)dirstate_item_mtime_likely_equal_to,
536 536 METH_O, "True if the stored mtime is likely equal to the given mtime"},
537 537 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
538 538 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
539 539 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
540 540 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
541 541 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
542 542 METH_NOARGS, "mark a file as \"possibly dirty\""},
543 543 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
544 544 "mark a file as \"clean\""},
545 545 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
546 546 "mark a file as \"tracked\""},
547 547 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
548 548 "mark a file as \"untracked\""},
549 549 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
550 550 "remove all \"merge-only\" from a DirstateItem"},
551 551 {NULL} /* Sentinel */
552 552 };
553 553
554 554 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
555 555 {
556 556 return PyLong_FromLong(dirstate_item_c_v1_mode(self));
557 557 };
558 558
559 559 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
560 560 {
561 561 return PyLong_FromLong(dirstate_item_c_v1_size(self));
562 562 };
563 563
564 564 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
565 565 {
566 566 return PyLong_FromLong(dirstate_item_c_v1_mtime(self));
567 567 };
568 568
569 569 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
570 570 {
571 571 char state = dirstate_item_c_v1_state(self);
572 572 return PyBytes_FromStringAndSize(&state, 1);
573 573 };
574 574
575 575 static PyObject *dirstate_item_get_has_fallback_exec(dirstateItemObject *self)
576 576 {
577 577 if (dirstate_item_c_has_fallback_exec(self)) {
578 578 Py_RETURN_TRUE;
579 579 } else {
580 580 Py_RETURN_FALSE;
581 581 }
582 582 };
583 583
584 584 static PyObject *dirstate_item_get_fallback_exec(dirstateItemObject *self)
585 585 {
586 586 if (dirstate_item_c_has_fallback_exec(self)) {
587 587 if (self->flags & dirstate_flag_fallback_exec) {
588 588 Py_RETURN_TRUE;
589 589 } else {
590 590 Py_RETURN_FALSE;
591 591 }
592 592 } else {
593 593 Py_RETURN_NONE;
594 594 }
595 595 };
596 596
597 597 static int dirstate_item_set_fallback_exec(dirstateItemObject *self,
598 598 PyObject *value)
599 599 {
600 600 if ((value == Py_None) || (value == NULL)) {
601 601 self->flags &= ~dirstate_flag_has_fallback_exec;
602 602 } else {
603 603 self->flags |= dirstate_flag_has_fallback_exec;
604 604 if (PyObject_IsTrue(value)) {
605 605 self->flags |= dirstate_flag_fallback_exec;
606 606 } else {
607 607 self->flags &= ~dirstate_flag_fallback_exec;
608 608 }
609 609 }
610 610 return 0;
611 611 };
612 612
613 613 static PyObject *
614 614 dirstate_item_get_has_fallback_symlink(dirstateItemObject *self)
615 615 {
616 616 if (dirstate_item_c_has_fallback_symlink(self)) {
617 617 Py_RETURN_TRUE;
618 618 } else {
619 619 Py_RETURN_FALSE;
620 620 }
621 621 };
622 622
623 623 static PyObject *dirstate_item_get_fallback_symlink(dirstateItemObject *self)
624 624 {
625 625 if (dirstate_item_c_has_fallback_symlink(self)) {
626 626 if (self->flags & dirstate_flag_fallback_symlink) {
627 627 Py_RETURN_TRUE;
628 628 } else {
629 629 Py_RETURN_FALSE;
630 630 }
631 631 } else {
632 632 Py_RETURN_NONE;
633 633 }
634 634 };
635 635
636 636 static int dirstate_item_set_fallback_symlink(dirstateItemObject *self,
637 637 PyObject *value)
638 638 {
639 639 if ((value == Py_None) || (value == NULL)) {
640 640 self->flags &= ~dirstate_flag_has_fallback_symlink;
641 641 } else {
642 642 self->flags |= dirstate_flag_has_fallback_symlink;
643 643 if (PyObject_IsTrue(value)) {
644 644 self->flags |= dirstate_flag_fallback_symlink;
645 645 } else {
646 646 self->flags &= ~dirstate_flag_fallback_symlink;
647 647 }
648 648 }
649 649 return 0;
650 650 };
651 651
652 652 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
653 653 {
654 654 if (dirstate_item_c_tracked(self)) {
655 655 Py_RETURN_TRUE;
656 656 } else {
657 657 Py_RETURN_FALSE;
658 658 }
659 659 };
660 660 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
661 661 {
662 662 if (self->flags & dirstate_flag_p1_tracked) {
663 663 Py_RETURN_TRUE;
664 664 } else {
665 665 Py_RETURN_FALSE;
666 666 }
667 667 };
668 668
669 669 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
670 670 {
671 671 if (dirstate_item_c_added(self)) {
672 672 Py_RETURN_TRUE;
673 673 } else {
674 674 Py_RETURN_FALSE;
675 675 }
676 676 };
677 677
678 678 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
679 679 {
680 680 if (self->flags & dirstate_flag_wc_tracked &&
681 681 self->flags & dirstate_flag_p2_info) {
682 682 Py_RETURN_TRUE;
683 683 } else {
684 684 Py_RETURN_FALSE;
685 685 }
686 686 };
687 687
688 688 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
689 689 {
690 690 if (dirstate_item_c_merged(self)) {
691 691 Py_RETURN_TRUE;
692 692 } else {
693 693 Py_RETURN_FALSE;
694 694 }
695 695 };
696 696
697 697 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
698 698 {
699 699 if (dirstate_item_c_from_p2(self)) {
700 700 Py_RETURN_TRUE;
701 701 } else {
702 702 Py_RETURN_FALSE;
703 703 }
704 704 };
705 705
706 706 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
707 707 {
708 708 if (!(self->flags & dirstate_flag_wc_tracked)) {
709 709 Py_RETURN_FALSE;
710 710 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
711 711 Py_RETURN_FALSE;
712 712 } else if (self->flags & dirstate_flag_p2_info) {
713 713 Py_RETURN_FALSE;
714 714 } else {
715 715 Py_RETURN_TRUE;
716 716 }
717 717 };
718 718
719 719 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
720 720 {
721 721 if (dirstate_item_c_any_tracked(self)) {
722 722 Py_RETURN_TRUE;
723 723 } else {
724 724 Py_RETURN_FALSE;
725 725 }
726 726 };
727 727
728 728 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
729 729 {
730 730 if (dirstate_item_c_removed(self)) {
731 731 Py_RETURN_TRUE;
732 732 } else {
733 733 Py_RETURN_FALSE;
734 734 }
735 735 };
736 736
737 737 static PyGetSetDef dirstate_item_getset[] = {
738 738 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
739 739 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
740 740 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
741 741 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
742 742 {"has_fallback_exec", (getter)dirstate_item_get_has_fallback_exec, NULL,
743 743 "has_fallback_exec", NULL},
744 744 {"fallback_exec", (getter)dirstate_item_get_fallback_exec,
745 745 (setter)dirstate_item_set_fallback_exec, "fallback_exec", NULL},
746 746 {"has_fallback_symlink", (getter)dirstate_item_get_has_fallback_symlink,
747 747 NULL, "has_fallback_symlink", NULL},
748 748 {"fallback_symlink", (getter)dirstate_item_get_fallback_symlink,
749 749 (setter)dirstate_item_set_fallback_symlink, "fallback_symlink", NULL},
750 750 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
751 751 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
752 752 NULL},
753 753 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
754 754 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
755 755 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
756 756 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
757 757 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
758 758 NULL},
759 759 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
760 760 NULL},
761 761 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
762 762 {NULL} /* Sentinel */
763 763 };
764 764
765 765 PyTypeObject dirstateItemType = {
766 766 PyVarObject_HEAD_INIT(NULL, 0) /* header */
767 767 "dirstate_tuple", /* tp_name */
768 768 sizeof(dirstateItemObject), /* tp_basicsize */
769 769 0, /* tp_itemsize */
770 770 (destructor)dirstate_item_dealloc, /* tp_dealloc */
771 771 0, /* tp_print */
772 772 0, /* tp_getattr */
773 773 0, /* tp_setattr */
774 774 0, /* tp_compare */
775 775 0, /* tp_repr */
776 776 0, /* tp_as_number */
777 777 0, /* tp_as_sequence */
778 778 0, /* tp_as_mapping */
779 779 0, /* tp_hash */
780 780 0, /* tp_call */
781 781 0, /* tp_str */
782 782 0, /* tp_getattro */
783 783 0, /* tp_setattro */
784 784 0, /* tp_as_buffer */
785 785 Py_TPFLAGS_DEFAULT, /* tp_flags */
786 786 "dirstate tuple", /* tp_doc */
787 787 0, /* tp_traverse */
788 788 0, /* tp_clear */
789 789 0, /* tp_richcompare */
790 790 0, /* tp_weaklistoffset */
791 791 0, /* tp_iter */
792 792 0, /* tp_iternext */
793 793 dirstate_item_methods, /* tp_methods */
794 794 0, /* tp_members */
795 795 dirstate_item_getset, /* tp_getset */
796 796 0, /* tp_base */
797 797 0, /* tp_dict */
798 798 0, /* tp_descr_get */
799 799 0, /* tp_descr_set */
800 800 0, /* tp_dictoffset */
801 801 0, /* tp_init */
802 802 0, /* tp_alloc */
803 803 dirstate_item_new, /* tp_new */
804 804 };
805 805
806 806 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
807 807 {
808 808 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
809 809 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
810 810 char state, *cur, *str, *cpos;
811 811 int mode, size, mtime;
812 812 unsigned int flen, pos = 40;
813 813 Py_ssize_t len = 40;
814 814 Py_ssize_t readlen;
815 815
816 if (!PyArg_ParseTuple(
817 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
818 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
816 if (!PyArg_ParseTuple(args, "O!O!y#:parse_dirstate", &PyDict_Type,
817 &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
819 818 goto quit;
820 819 }
821 820
822 821 len = readlen;
823 822
824 823 /* read parents */
825 824 if (len < 40) {
826 825 PyErr_SetString(PyExc_ValueError,
827 826 "too little data for parents");
828 827 goto quit;
829 828 }
830 829
831 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
832 str + 20, (Py_ssize_t)20);
830 parents = Py_BuildValue("y#y#", str, (Py_ssize_t)20, str + 20,
831 (Py_ssize_t)20);
833 832 if (!parents) {
834 833 goto quit;
835 834 }
836 835
837 836 /* read filenames */
838 837 while (pos >= 40 && pos < len) {
839 838 if (pos + 17 > len) {
840 839 PyErr_SetString(PyExc_ValueError,
841 840 "overflow in dirstate");
842 841 goto quit;
843 842 }
844 843 cur = str + pos;
845 844 /* unpack header */
846 845 state = *cur;
847 846 mode = getbe32(cur + 1);
848 847 size = getbe32(cur + 5);
849 848 mtime = getbe32(cur + 9);
850 849 flen = getbe32(cur + 13);
851 850 pos += 17;
852 851 cur += 17;
853 852 if (flen > len - pos) {
854 853 PyErr_SetString(PyExc_ValueError,
855 854 "overflow in dirstate");
856 855 goto quit;
857 856 }
858 857
859 858 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
860 859 size, mtime);
861 860 if (!entry)
862 861 goto quit;
863 862 cpos = memchr(cur, 0, flen);
864 863 if (cpos) {
865 864 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
866 865 cname = PyBytes_FromStringAndSize(
867 866 cpos + 1, flen - (cpos - cur) - 1);
868 867 if (!fname || !cname ||
869 868 PyDict_SetItem(cmap, fname, cname) == -1 ||
870 869 PyDict_SetItem(dmap, fname, entry) == -1) {
871 870 goto quit;
872 871 }
873 872 Py_DECREF(cname);
874 873 } else {
875 874 fname = PyBytes_FromStringAndSize(cur, flen);
876 875 if (!fname ||
877 876 PyDict_SetItem(dmap, fname, entry) == -1) {
878 877 goto quit;
879 878 }
880 879 }
881 880 Py_DECREF(fname);
882 881 Py_DECREF(entry);
883 882 fname = cname = entry = NULL;
884 883 pos += flen;
885 884 }
886 885
887 886 ret = parents;
888 887 Py_INCREF(ret);
889 888 quit:
890 889 Py_XDECREF(fname);
891 890 Py_XDECREF(cname);
892 891 Py_XDECREF(entry);
893 892 Py_XDECREF(parents);
894 893 return ret;
895 894 }
896 895
897 896 /*
898 897 * Efficiently pack a dirstate object into its on-disk format.
899 898 */
900 899 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
901 900 {
902 901 PyObject *packobj = NULL;
903 902 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
904 903 Py_ssize_t nbytes, pos, l;
905 904 PyObject *k, *v = NULL, *pn;
906 905 char *p, *s;
907 906
908 907 if (!PyArg_ParseTuple(args, "O!O!O!:pack_dirstate", &PyDict_Type, &map,
909 908 &PyDict_Type, &copymap, &PyTuple_Type, &pl)) {
910 909 return NULL;
911 910 }
912 911
913 912 if (PyTuple_Size(pl) != 2) {
914 913 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
915 914 return NULL;
916 915 }
917 916
918 917 /* Figure out how much we need to allocate. */
919 918 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
920 919 PyObject *c;
921 920 if (!PyBytes_Check(k)) {
922 921 PyErr_SetString(PyExc_TypeError, "expected string key");
923 922 goto bail;
924 923 }
925 924 nbytes += PyBytes_GET_SIZE(k) + 17;
926 925 c = PyDict_GetItem(copymap, k);
927 926 if (c) {
928 927 if (!PyBytes_Check(c)) {
929 928 PyErr_SetString(PyExc_TypeError,
930 929 "expected string key");
931 930 goto bail;
932 931 }
933 932 nbytes += PyBytes_GET_SIZE(c) + 1;
934 933 }
935 934 }
936 935
937 936 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
938 937 if (packobj == NULL) {
939 938 goto bail;
940 939 }
941 940
942 941 p = PyBytes_AS_STRING(packobj);
943 942
944 943 pn = PyTuple_GET_ITEM(pl, 0);
945 944 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
946 945 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
947 946 goto bail;
948 947 }
949 948 memcpy(p, s, l);
950 949 p += 20;
951 950 pn = PyTuple_GET_ITEM(pl, 1);
952 951 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
953 952 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
954 953 goto bail;
955 954 }
956 955 memcpy(p, s, l);
957 956 p += 20;
958 957
959 958 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
960 959 dirstateItemObject *tuple;
961 960 char state;
962 961 int mode, size, mtime;
963 962 Py_ssize_t len, l;
964 963 PyObject *o;
965 964 char *t;
966 965
967 966 if (!dirstate_tuple_check(v)) {
968 967 PyErr_SetString(PyExc_TypeError,
969 968 "expected a dirstate tuple");
970 969 goto bail;
971 970 }
972 971 tuple = (dirstateItemObject *)v;
973 972
974 973 state = dirstate_item_c_v1_state(tuple);
975 974 mode = dirstate_item_c_v1_mode(tuple);
976 975 size = dirstate_item_c_v1_size(tuple);
977 976 mtime = dirstate_item_c_v1_mtime(tuple);
978 977 *p++ = state;
979 978 putbe32((uint32_t)mode, p);
980 979 putbe32((uint32_t)size, p + 4);
981 980 putbe32((uint32_t)mtime, p + 8);
982 981 t = p + 12;
983 982 p += 16;
984 983 len = PyBytes_GET_SIZE(k);
985 984 memcpy(p, PyBytes_AS_STRING(k), len);
986 985 p += len;
987 986 o = PyDict_GetItem(copymap, k);
988 987 if (o) {
989 988 *p++ = '\0';
990 989 l = PyBytes_GET_SIZE(o);
991 990 memcpy(p, PyBytes_AS_STRING(o), l);
992 991 p += l;
993 992 len += l + 1;
994 993 }
995 994 putbe32((uint32_t)len, t);
996 995 }
997 996
998 997 pos = p - PyBytes_AS_STRING(packobj);
999 998 if (pos != nbytes) {
1000 999 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
1001 1000 (long)pos, (long)nbytes);
1002 1001 goto bail;
1003 1002 }
1004 1003
1005 1004 return packobj;
1006 1005 bail:
1007 1006 Py_XDECREF(mtime_unset);
1008 1007 Py_XDECREF(packobj);
1009 1008 Py_XDECREF(v);
1010 1009 return NULL;
1011 1010 }
1012 1011
1013 1012 #define BUMPED_FIX 1
1014 1013 #define USING_SHA_256 2
1015 1014 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
1016 1015
1017 1016 static PyObject *readshas(const char *source, unsigned char num,
1018 1017 Py_ssize_t hashwidth)
1019 1018 {
1020 1019 int i;
1021 1020 PyObject *list = PyTuple_New(num);
1022 1021 if (list == NULL) {
1023 1022 return NULL;
1024 1023 }
1025 1024 for (i = 0; i < num; i++) {
1026 1025 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
1027 1026 if (hash == NULL) {
1028 1027 Py_DECREF(list);
1029 1028 return NULL;
1030 1029 }
1031 1030 PyTuple_SET_ITEM(list, i, hash);
1032 1031 source += hashwidth;
1033 1032 }
1034 1033 return list;
1035 1034 }
1036 1035
1037 1036 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
1038 1037 uint32_t *msize)
1039 1038 {
1040 1039 const char *data = databegin;
1041 1040 const char *meta;
1042 1041
1043 1042 double mtime;
1044 1043 int16_t tz;
1045 1044 uint16_t flags;
1046 1045 unsigned char nsuccs, nparents, nmetadata;
1047 1046 Py_ssize_t hashwidth = 20;
1048 1047
1049 1048 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
1050 1049 PyObject *metadata = NULL, *ret = NULL;
1051 1050 int i;
1052 1051
1053 1052 if (data + FM1_HEADER_SIZE > dataend) {
1054 1053 goto overflow;
1055 1054 }
1056 1055
1057 1056 *msize = getbe32(data);
1058 1057 data += 4;
1059 1058 mtime = getbefloat64(data);
1060 1059 data += 8;
1061 1060 tz = getbeint16(data);
1062 1061 data += 2;
1063 1062 flags = getbeuint16(data);
1064 1063 data += 2;
1065 1064
1066 1065 if (flags & USING_SHA_256) {
1067 1066 hashwidth = 32;
1068 1067 }
1069 1068
1070 1069 nsuccs = (unsigned char)(*data++);
1071 1070 nparents = (unsigned char)(*data++);
1072 1071 nmetadata = (unsigned char)(*data++);
1073 1072
1074 1073 if (databegin + *msize > dataend) {
1075 1074 goto overflow;
1076 1075 }
1077 1076 dataend = databegin + *msize; /* narrow down to marker size */
1078 1077
1079 1078 if (data + hashwidth > dataend) {
1080 1079 goto overflow;
1081 1080 }
1082 1081 prec = PyBytes_FromStringAndSize(data, hashwidth);
1083 1082 data += hashwidth;
1084 1083 if (prec == NULL) {
1085 1084 goto bail;
1086 1085 }
1087 1086
1088 1087 if (data + nsuccs * hashwidth > dataend) {
1089 1088 goto overflow;
1090 1089 }
1091 1090 succs = readshas(data, nsuccs, hashwidth);
1092 1091 if (succs == NULL) {
1093 1092 goto bail;
1094 1093 }
1095 1094 data += nsuccs * hashwidth;
1096 1095
1097 1096 if (nparents == 1 || nparents == 2) {
1098 1097 if (data + nparents * hashwidth > dataend) {
1099 1098 goto overflow;
1100 1099 }
1101 1100 parents = readshas(data, nparents, hashwidth);
1102 1101 if (parents == NULL) {
1103 1102 goto bail;
1104 1103 }
1105 1104 data += nparents * hashwidth;
1106 1105 } else {
1107 1106 parents = Py_None;
1108 1107 Py_INCREF(parents);
1109 1108 }
1110 1109
1111 1110 if (data + 2 * nmetadata > dataend) {
1112 1111 goto overflow;
1113 1112 }
1114 1113 meta = data + (2 * nmetadata);
1115 1114 metadata = PyTuple_New(nmetadata);
1116 1115 if (metadata == NULL) {
1117 1116 goto bail;
1118 1117 }
1119 1118 for (i = 0; i < nmetadata; i++) {
1120 1119 PyObject *tmp, *left = NULL, *right = NULL;
1121 1120 Py_ssize_t leftsize = (unsigned char)(*data++);
1122 1121 Py_ssize_t rightsize = (unsigned char)(*data++);
1123 1122 if (meta + leftsize + rightsize > dataend) {
1124 1123 goto overflow;
1125 1124 }
1126 1125 left = PyBytes_FromStringAndSize(meta, leftsize);
1127 1126 meta += leftsize;
1128 1127 right = PyBytes_FromStringAndSize(meta, rightsize);
1129 1128 meta += rightsize;
1130 1129 tmp = PyTuple_New(2);
1131 1130 if (!left || !right || !tmp) {
1132 1131 Py_XDECREF(left);
1133 1132 Py_XDECREF(right);
1134 1133 Py_XDECREF(tmp);
1135 1134 goto bail;
1136 1135 }
1137 1136 PyTuple_SET_ITEM(tmp, 0, left);
1138 1137 PyTuple_SET_ITEM(tmp, 1, right);
1139 1138 PyTuple_SET_ITEM(metadata, i, tmp);
1140 1139 }
1141 1140 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
1142 1141 (int)tz * 60, parents);
1143 1142 goto bail; /* return successfully */
1144 1143
1145 1144 overflow:
1146 1145 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1147 1146 bail:
1148 1147 Py_XDECREF(prec);
1149 1148 Py_XDECREF(succs);
1150 1149 Py_XDECREF(metadata);
1151 1150 Py_XDECREF(parents);
1152 1151 return ret;
1153 1152 }
1154 1153
1155 1154 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1156 1155 {
1157 1156 const char *data, *dataend;
1158 1157 Py_ssize_t datalen, offset, stop;
1159 1158 PyObject *markers = NULL;
1160 1159
1161 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1162 &offset, &stop)) {
1160 if (!PyArg_ParseTuple(args, "y#nn", &data, &datalen, &offset, &stop)) {
1163 1161 return NULL;
1164 1162 }
1165 1163 if (offset < 0) {
1166 1164 PyErr_SetString(PyExc_ValueError,
1167 1165 "invalid negative offset in fm1readmarkers");
1168 1166 return NULL;
1169 1167 }
1170 1168 if (stop > datalen) {
1171 1169 PyErr_SetString(
1172 1170 PyExc_ValueError,
1173 1171 "stop longer than data length in fm1readmarkers");
1174 1172 return NULL;
1175 1173 }
1176 1174 dataend = data + datalen;
1177 1175 data += offset;
1178 1176 markers = PyList_New(0);
1179 1177 if (!markers) {
1180 1178 return NULL;
1181 1179 }
1182 1180 while (offset < stop) {
1183 1181 uint32_t msize;
1184 1182 int error;
1185 1183 PyObject *record = fm1readmarker(data, dataend, &msize);
1186 1184 if (!record) {
1187 1185 goto bail;
1188 1186 }
1189 1187 error = PyList_Append(markers, record);
1190 1188 Py_DECREF(record);
1191 1189 if (error) {
1192 1190 goto bail;
1193 1191 }
1194 1192 data += msize;
1195 1193 offset += msize;
1196 1194 }
1197 1195 return markers;
1198 1196 bail:
1199 1197 Py_DECREF(markers);
1200 1198 return NULL;
1201 1199 }
1202 1200
1203 1201 static char parsers_doc[] = "Efficient content parsing.";
1204 1202
1205 1203 PyObject *encodedir(PyObject *self, PyObject *args);
1206 1204 PyObject *pathencode(PyObject *self, PyObject *args);
1207 1205 PyObject *lowerencode(PyObject *self, PyObject *args);
1208 1206 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1209 1207
1210 1208 static PyMethodDef methods[] = {
1211 1209 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1212 1210 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1213 1211 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1214 1212 "parse a revlog index\n"},
1215 1213 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1216 1214 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1217 1215 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1218 1216 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1219 1217 "construct a dict with an expected size\n"},
1220 1218 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1221 1219 "make file foldmap\n"},
1222 1220 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1223 1221 "escape a UTF-8 byte string to JSON (fast path)\n"},
1224 1222 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1225 1223 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1226 1224 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1227 1225 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1228 1226 "parse v1 obsolete markers\n"},
1229 1227 {NULL, NULL}};
1230 1228
1231 1229 void dirs_module_init(PyObject *mod);
1232 1230 void manifest_module_init(PyObject *mod);
1233 1231 void revlog_module_init(PyObject *mod);
1234 1232
1235 1233 static const int version = 20;
1236 1234
1237 1235 static void module_init(PyObject *mod)
1238 1236 {
1239 1237 PyModule_AddIntConstant(mod, "version", version);
1240 1238
1241 1239 /* This module constant has two purposes. First, it lets us unit test
1242 1240 * the ImportError raised without hard-coding any error text. This
1243 1241 * means we can change the text in the future without breaking tests,
1244 1242 * even across changesets without a recompile. Second, its presence
1245 1243 * can be used to determine whether the version-checking logic is
1246 1244 * present, which also helps in testing across changesets without a
1247 1245 * recompile. Note that this means the pure-Python version of parsers
1248 1246 * should not have this module constant. */
1249 1247 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1250 1248
1251 1249 dirs_module_init(mod);
1252 1250 manifest_module_init(mod);
1253 1251 revlog_module_init(mod);
1254 1252
1255 1253 if (PyType_Ready(&dirstateItemType) < 0) {
1256 1254 return;
1257 1255 }
1258 1256 Py_INCREF(&dirstateItemType);
1259 1257 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1260 1258 }
1261 1259
1262 1260 static int check_python_version(void)
1263 1261 {
1264 1262 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1265 1263 long hexversion;
1266 1264 if (!sys) {
1267 1265 return -1;
1268 1266 }
1269 1267 ver = PyObject_GetAttrString(sys, "hexversion");
1270 1268 Py_DECREF(sys);
1271 1269 if (!ver) {
1272 1270 return -1;
1273 1271 }
1274 1272 hexversion = PyLong_AsLong(ver);
1275 1273 Py_DECREF(ver);
1276 1274 /* sys.hexversion is a 32-bit number by default, so the -1 case
1277 1275 * should only occur in unusual circumstances (e.g. if sys.hexversion
1278 1276 * is manually set to an invalid value). */
1279 1277 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1280 1278 PyErr_Format(PyExc_ImportError,
1281 1279 "%s: The Mercurial extension "
1282 1280 "modules were compiled with Python " PY_VERSION
1283 1281 ", but "
1284 1282 "Mercurial is currently using Python with "
1285 1283 "sys.hexversion=%ld: "
1286 1284 "Python %s\n at: %s",
1287 1285 versionerrortext, hexversion, Py_GetVersion(),
1288 1286 Py_GetProgramFullPath());
1289 1287 return -1;
1290 1288 }
1291 1289 return 0;
1292 1290 }
1293 1291
1294 1292 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1295 1293 parsers_doc, -1, methods};
1296 1294
1297 1295 PyMODINIT_FUNC PyInit_parsers(void)
1298 1296 {
1299 1297 PyObject *mod;
1300 1298
1301 1299 if (check_python_version() == -1)
1302 1300 return NULL;
1303 1301 mod = PyModule_Create(&parsers_module);
1304 1302 module_init(mod);
1305 1303 return mod;
1306 1304 }
@@ -1,803 +1,802 b''
1 1 /*
2 2 pathencode.c - efficient path name encoding
3 3
4 4 Copyright 2012 Facebook
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 /*
11 11 * An implementation of the name encoding scheme used by the fncache
12 12 * store. The common case is of a path < 120 bytes long, which is
13 13 * handled either in a single pass with no allocations or two passes
14 14 * with a single allocation. For longer paths, multiple passes are
15 15 * required.
16 16 */
17 17
18 18 #define PY_SSIZE_T_CLEAN
19 19 #include <Python.h>
20 20 #include <assert.h>
21 21 #include <ctype.h>
22 22 #include <stdlib.h>
23 23 #include <string.h>
24 24 #include "pythoncapi_compat.h"
25 25
26 26 #include "util.h"
27 27
28 28 /* state machine for the fast path */
29 29 enum path_state {
30 30 START, /* first byte of a path component */
31 31 A, /* "AUX" */
32 32 AU,
33 33 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
34 34 C, /* "CON" or "COMn" */
35 35 CO,
36 36 COMLPT, /* "COM" or "LPT" */
37 37 COMLPTn,
38 38 L,
39 39 LP,
40 40 N,
41 41 NU,
42 42 P, /* "PRN" */
43 43 PR,
44 44 LDOT, /* leading '.' */
45 45 DOT, /* '.' in a non-leading position */
46 46 H, /* ".h" */
47 47 HGDI, /* ".hg", ".d", or ".i" */
48 48 SPACE,
49 49 DEFAULT, /* byte of a path component after the first */
50 50 };
51 51
52 52 /* state machine for dir-encoding */
53 53 enum dir_state {
54 54 DDOT,
55 55 DH,
56 56 DHGDI,
57 57 DDEFAULT,
58 58 };
59 59
60 60 static inline int inset(const uint32_t bitset[], char c)
61 61 {
62 62 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
63 63 }
64 64
65 65 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
66 66 char c)
67 67 {
68 68 if (dest) {
69 69 assert(*destlen < destsize);
70 70 dest[*destlen] = c;
71 71 }
72 72 (*destlen)++;
73 73 }
74 74
75 75 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
76 76 const void *src, Py_ssize_t len)
77 77 {
78 78 if (dest) {
79 79 assert(*destlen + len < destsize);
80 80 memcpy((void *)&dest[*destlen], src, len);
81 81 }
82 82 *destlen += len;
83 83 }
84 84
85 85 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
86 86 uint8_t c)
87 87 {
88 88 static const char hexdigit[] = "0123456789abcdef";
89 89
90 90 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
91 91 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
92 92 }
93 93
94 94 /* 3-byte escape: tilde followed by two hex digits */
95 95 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
96 96 char c)
97 97 {
98 98 charcopy(dest, destlen, destsize, '~');
99 99 hexencode(dest, destlen, destsize, c);
100 100 }
101 101
102 102 static Py_ssize_t _encodedir(char *dest, size_t destsize, const char *src,
103 103 Py_ssize_t len)
104 104 {
105 105 enum dir_state state = DDEFAULT;
106 106 Py_ssize_t i = 0, destlen = 0;
107 107
108 108 while (i < len) {
109 109 switch (state) {
110 110 case DDOT:
111 111 switch (src[i]) {
112 112 case 'd':
113 113 case 'i':
114 114 state = DHGDI;
115 115 charcopy(dest, &destlen, destsize, src[i++]);
116 116 break;
117 117 case 'h':
118 118 state = DH;
119 119 charcopy(dest, &destlen, destsize, src[i++]);
120 120 break;
121 121 default:
122 122 state = DDEFAULT;
123 123 break;
124 124 }
125 125 break;
126 126 case DH:
127 127 if (src[i] == 'g') {
128 128 state = DHGDI;
129 129 charcopy(dest, &destlen, destsize, src[i++]);
130 130 } else {
131 131 state = DDEFAULT;
132 132 }
133 133 break;
134 134 case DHGDI:
135 135 if (src[i] == '/') {
136 136 memcopy(dest, &destlen, destsize, ".hg", 3);
137 137 charcopy(dest, &destlen, destsize, src[i++]);
138 138 }
139 139 state = DDEFAULT;
140 140 break;
141 141 case DDEFAULT:
142 142 if (src[i] == '.') {
143 143 state = DDOT;
144 144 }
145 145 charcopy(dest, &destlen, destsize, src[i++]);
146 146 break;
147 147 }
148 148 }
149 149
150 150 return destlen;
151 151 }
152 152
153 153 PyObject *encodedir(PyObject *self, PyObject *args)
154 154 {
155 155 Py_ssize_t len, newlen;
156 156 PyObject *pathobj, *newobj;
157 157 char *path;
158 158
159 159 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj)) {
160 160 return NULL;
161 161 }
162 162
163 163 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
164 164 PyErr_SetString(PyExc_TypeError, "expected a string");
165 165 return NULL;
166 166 }
167 167
168 168 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
169 169
170 170 if (newlen == len + 1) {
171 171 Py_INCREF(pathobj);
172 172 return pathobj;
173 173 }
174 174
175 175 newobj = PyBytes_FromStringAndSize(NULL, newlen);
176 176
177 177 if (newobj) {
178 178 assert(PyBytes_Check(newobj));
179 179 Py_SET_SIZE(newobj, Py_SIZE(newobj) - 1);
180 180 _encodedir(PyBytes_AS_STRING(newobj), newlen, path, len + 1);
181 181 }
182 182
183 183 return newobj;
184 184 }
185 185
186 186 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
187 187 char *dest, Py_ssize_t destlen, size_t destsize,
188 188 const char *src, Py_ssize_t len, int encodedir)
189 189 {
190 190 enum path_state state = START;
191 191 Py_ssize_t i = 0;
192 192
193 193 /*
194 194 * Python strings end with a zero byte, which we use as a
195 195 * terminal token as they are not valid inside path names.
196 196 */
197 197
198 198 while (i < len) {
199 199 switch (state) {
200 200 case START:
201 201 switch (src[i]) {
202 202 case '/':
203 203 charcopy(dest, &destlen, destsize, src[i++]);
204 204 break;
205 205 case '.':
206 206 state = LDOT;
207 207 escape3(dest, &destlen, destsize, src[i++]);
208 208 break;
209 209 case ' ':
210 210 state = DEFAULT;
211 211 escape3(dest, &destlen, destsize, src[i++]);
212 212 break;
213 213 case 'a':
214 214 state = A;
215 215 charcopy(dest, &destlen, destsize, src[i++]);
216 216 break;
217 217 case 'c':
218 218 state = C;
219 219 charcopy(dest, &destlen, destsize, src[i++]);
220 220 break;
221 221 case 'l':
222 222 state = L;
223 223 charcopy(dest, &destlen, destsize, src[i++]);
224 224 break;
225 225 case 'n':
226 226 state = N;
227 227 charcopy(dest, &destlen, destsize, src[i++]);
228 228 break;
229 229 case 'p':
230 230 state = P;
231 231 charcopy(dest, &destlen, destsize, src[i++]);
232 232 break;
233 233 default:
234 234 state = DEFAULT;
235 235 break;
236 236 }
237 237 break;
238 238 case A:
239 239 if (src[i] == 'u') {
240 240 state = AU;
241 241 charcopy(dest, &destlen, destsize, src[i++]);
242 242 } else {
243 243 state = DEFAULT;
244 244 }
245 245 break;
246 246 case AU:
247 247 if (src[i] == 'x') {
248 248 state = THIRD;
249 249 i++;
250 250 } else {
251 251 state = DEFAULT;
252 252 }
253 253 break;
254 254 case THIRD:
255 255 state = DEFAULT;
256 256 switch (src[i]) {
257 257 case '.':
258 258 case '/':
259 259 case '\0':
260 260 escape3(dest, &destlen, destsize, src[i - 1]);
261 261 break;
262 262 default:
263 263 i--;
264 264 break;
265 265 }
266 266 break;
267 267 case C:
268 268 if (src[i] == 'o') {
269 269 state = CO;
270 270 charcopy(dest, &destlen, destsize, src[i++]);
271 271 } else {
272 272 state = DEFAULT;
273 273 }
274 274 break;
275 275 case CO:
276 276 if (src[i] == 'm') {
277 277 state = COMLPT;
278 278 i++;
279 279 } else if (src[i] == 'n') {
280 280 state = THIRD;
281 281 i++;
282 282 } else {
283 283 state = DEFAULT;
284 284 }
285 285 break;
286 286 case COMLPT:
287 287 switch (src[i]) {
288 288 case '1':
289 289 case '2':
290 290 case '3':
291 291 case '4':
292 292 case '5':
293 293 case '6':
294 294 case '7':
295 295 case '8':
296 296 case '9':
297 297 state = COMLPTn;
298 298 i++;
299 299 break;
300 300 default:
301 301 state = DEFAULT;
302 302 charcopy(dest, &destlen, destsize, src[i - 1]);
303 303 break;
304 304 }
305 305 break;
306 306 case COMLPTn:
307 307 state = DEFAULT;
308 308 switch (src[i]) {
309 309 case '.':
310 310 case '/':
311 311 case '\0':
312 312 escape3(dest, &destlen, destsize, src[i - 2]);
313 313 charcopy(dest, &destlen, destsize, src[i - 1]);
314 314 break;
315 315 default:
316 316 memcopy(dest, &destlen, destsize, &src[i - 2],
317 317 2);
318 318 break;
319 319 }
320 320 break;
321 321 case L:
322 322 if (src[i] == 'p') {
323 323 state = LP;
324 324 charcopy(dest, &destlen, destsize, src[i++]);
325 325 } else {
326 326 state = DEFAULT;
327 327 }
328 328 break;
329 329 case LP:
330 330 if (src[i] == 't') {
331 331 state = COMLPT;
332 332 i++;
333 333 } else {
334 334 state = DEFAULT;
335 335 }
336 336 break;
337 337 case N:
338 338 if (src[i] == 'u') {
339 339 state = NU;
340 340 charcopy(dest, &destlen, destsize, src[i++]);
341 341 } else {
342 342 state = DEFAULT;
343 343 }
344 344 break;
345 345 case NU:
346 346 if (src[i] == 'l') {
347 347 state = THIRD;
348 348 i++;
349 349 } else {
350 350 state = DEFAULT;
351 351 }
352 352 break;
353 353 case P:
354 354 if (src[i] == 'r') {
355 355 state = PR;
356 356 charcopy(dest, &destlen, destsize, src[i++]);
357 357 } else {
358 358 state = DEFAULT;
359 359 }
360 360 break;
361 361 case PR:
362 362 if (src[i] == 'n') {
363 363 state = THIRD;
364 364 i++;
365 365 } else {
366 366 state = DEFAULT;
367 367 }
368 368 break;
369 369 case LDOT:
370 370 switch (src[i]) {
371 371 case 'd':
372 372 case 'i':
373 373 state = HGDI;
374 374 charcopy(dest, &destlen, destsize, src[i++]);
375 375 break;
376 376 case 'h':
377 377 state = H;
378 378 charcopy(dest, &destlen, destsize, src[i++]);
379 379 break;
380 380 default:
381 381 state = DEFAULT;
382 382 break;
383 383 }
384 384 break;
385 385 case DOT:
386 386 switch (src[i]) {
387 387 case '/':
388 388 case '\0':
389 389 state = START;
390 390 memcopy(dest, &destlen, destsize, "~2e", 3);
391 391 charcopy(dest, &destlen, destsize, src[i++]);
392 392 break;
393 393 case 'd':
394 394 case 'i':
395 395 state = HGDI;
396 396 charcopy(dest, &destlen, destsize, '.');
397 397 charcopy(dest, &destlen, destsize, src[i++]);
398 398 break;
399 399 case 'h':
400 400 state = H;
401 401 memcopy(dest, &destlen, destsize, ".h", 2);
402 402 i++;
403 403 break;
404 404 default:
405 405 state = DEFAULT;
406 406 charcopy(dest, &destlen, destsize, '.');
407 407 break;
408 408 }
409 409 break;
410 410 case H:
411 411 if (src[i] == 'g') {
412 412 state = HGDI;
413 413 charcopy(dest, &destlen, destsize, src[i++]);
414 414 } else {
415 415 state = DEFAULT;
416 416 }
417 417 break;
418 418 case HGDI:
419 419 if (src[i] == '/') {
420 420 state = START;
421 421 if (encodedir) {
422 422 memcopy(dest, &destlen, destsize, ".hg",
423 423 3);
424 424 }
425 425 charcopy(dest, &destlen, destsize, src[i++]);
426 426 } else {
427 427 state = DEFAULT;
428 428 }
429 429 break;
430 430 case SPACE:
431 431 switch (src[i]) {
432 432 case '/':
433 433 case '\0':
434 434 state = START;
435 435 memcopy(dest, &destlen, destsize, "~20", 3);
436 436 charcopy(dest, &destlen, destsize, src[i++]);
437 437 break;
438 438 default:
439 439 state = DEFAULT;
440 440 charcopy(dest, &destlen, destsize, ' ');
441 441 break;
442 442 }
443 443 break;
444 444 case DEFAULT:
445 445 while (inset(onebyte, src[i])) {
446 446 charcopy(dest, &destlen, destsize, src[i++]);
447 447 if (i == len) {
448 448 goto done;
449 449 }
450 450 }
451 451 switch (src[i]) {
452 452 case '.':
453 453 state = DOT;
454 454 i++;
455 455 break;
456 456 case ' ':
457 457 state = SPACE;
458 458 i++;
459 459 break;
460 460 case '/':
461 461 state = START;
462 462 charcopy(dest, &destlen, destsize, '/');
463 463 i++;
464 464 break;
465 465 default:
466 466 if (inset(onebyte, src[i])) {
467 467 do {
468 468 charcopy(dest, &destlen,
469 469 destsize, src[i++]);
470 470 } while (i < len &&
471 471 inset(onebyte, src[i]));
472 472 } else if (inset(twobytes, src[i])) {
473 473 char c = src[i++];
474 474 charcopy(dest, &destlen, destsize, '_');
475 475 charcopy(dest, &destlen, destsize,
476 476 c == '_' ? '_' : c + 32);
477 477 } else {
478 478 escape3(dest, &destlen, destsize,
479 479 src[i++]);
480 480 }
481 481 break;
482 482 }
483 483 break;
484 484 }
485 485 }
486 486 done:
487 487 return destlen;
488 488 }
489 489
490 490 static Py_ssize_t basicencode(char *dest, size_t destsize, const char *src,
491 491 Py_ssize_t len)
492 492 {
493 493 static const uint32_t twobytes[8] = {0, 0, 0x87fffffe};
494 494
495 495 static const uint32_t onebyte[8] = {
496 496 1,
497 497 0x2bff3bfa,
498 498 0x68000001,
499 499 0x2fffffff,
500 500 };
501 501
502 502 Py_ssize_t destlen = 0;
503 503
504 504 return _encode(twobytes, onebyte, dest, destlen, destsize, src, len, 1);
505 505 }
506 506
507 507 static const Py_ssize_t maxstorepathlen = 120;
508 508
509 509 static Py_ssize_t _lowerencode(char *dest, size_t destsize, const char *src,
510 510 Py_ssize_t len)
511 511 {
512 512 static const uint32_t onebyte[8] = {1, 0x2bfffbfb, 0xe8000001,
513 513 0x2fffffff};
514 514
515 515 static const uint32_t lower[8] = {0, 0, 0x7fffffe};
516 516
517 517 Py_ssize_t i, destlen = 0;
518 518
519 519 for (i = 0; i < len; i++) {
520 520 if (inset(onebyte, src[i])) {
521 521 charcopy(dest, &destlen, destsize, src[i]);
522 522 } else if (inset(lower, src[i])) {
523 523 charcopy(dest, &destlen, destsize, src[i] + 32);
524 524 } else {
525 525 escape3(dest, &destlen, destsize, src[i]);
526 526 }
527 527 }
528 528
529 529 return destlen;
530 530 }
531 531
532 532 PyObject *lowerencode(PyObject *self, PyObject *args)
533 533 {
534 534 char *path;
535 535 Py_ssize_t len, newlen;
536 536 PyObject *ret;
537 537
538 if (!PyArg_ParseTuple(args, PY23("s#:lowerencode", "y#:lowerencode"),
539 &path, &len)) {
538 if (!PyArg_ParseTuple(args, "y#:lowerencode", &path, &len)) {
540 539 return NULL;
541 540 }
542 541
543 542 newlen = _lowerencode(NULL, 0, path, len);
544 543 ret = PyBytes_FromStringAndSize(NULL, newlen);
545 544 if (ret) {
546 545 _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len);
547 546 }
548 547
549 548 return ret;
550 549 }
551 550
552 551 /* See store.py:_auxencode for a description. */
553 552 static Py_ssize_t auxencode(char *dest, size_t destsize, const char *src,
554 553 Py_ssize_t len)
555 554 {
556 555 static const uint32_t twobytes[8];
557 556
558 557 static const uint32_t onebyte[8] = {
559 558 ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
560 559 };
561 560
562 561 return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
563 562 }
564 563
565 564 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
566 565 {
567 566 static const Py_ssize_t dirprefixlen = 8;
568 567 static const Py_ssize_t maxshortdirslen = 68;
569 568 char *dest;
570 569 PyObject *ret;
571 570
572 571 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
573 572 Py_ssize_t destsize, destlen = 0, slop, used;
574 573
575 574 while (lastslash >= 0 && src[lastslash] != '/') {
576 575 if (src[lastslash] == '.' && lastdot == -1) {
577 576 lastdot = lastslash;
578 577 }
579 578 lastslash--;
580 579 }
581 580
582 581 #if 0
583 582 /* All paths should end in a suffix of ".i" or ".d".
584 583 Unfortunately, the file names in test-hybridencode.py
585 584 violate this rule. */
586 585 if (lastdot != len - 3) {
587 586 PyErr_SetString(PyExc_ValueError,
588 587 "suffix missing or wrong length");
589 588 return NULL;
590 589 }
591 590 #endif
592 591
593 592 /* If src contains a suffix, we will append it to the end of
594 593 the new string, so make room. */
595 594 destsize = 120;
596 595 if (lastdot >= 0) {
597 596 destsize += len - lastdot - 1;
598 597 }
599 598
600 599 ret = PyBytes_FromStringAndSize(NULL, destsize);
601 600 if (ret == NULL) {
602 601 return NULL;
603 602 }
604 603
605 604 dest = PyBytes_AS_STRING(ret);
606 605 memcopy(dest, &destlen, destsize, "dh/", 3);
607 606
608 607 /* Copy up to dirprefixlen bytes of each path component, up to
609 608 a limit of maxshortdirslen bytes. */
610 609 for (i = d = p = 0; i < lastslash; i++, p++) {
611 610 if (src[i] == '/') {
612 611 char d = dest[destlen - 1];
613 612 /* After truncation, a directory name may end
614 613 in a space or dot, which are unportable. */
615 614 if (d == '.' || d == ' ') {
616 615 dest[destlen - 1] = '_';
617 616 /* The + 3 is to account for "dh/" in the
618 617 * beginning */
619 618 }
620 619 if (destlen > maxshortdirslen + 3) {
621 620 break;
622 621 }
623 622 charcopy(dest, &destlen, destsize, src[i]);
624 623 p = -1;
625 624 } else if (p < dirprefixlen) {
626 625 charcopy(dest, &destlen, destsize, src[i]);
627 626 }
628 627 }
629 628
630 629 /* Rewind to just before the last slash copied. */
631 630 if (destlen > maxshortdirslen + 3) {
632 631 do {
633 632 destlen--;
634 633 } while (destlen > 0 && dest[destlen] != '/');
635 634 }
636 635
637 636 if (destlen > 3) {
638 637 if (lastslash > 0) {
639 638 char d = dest[destlen - 1];
640 639 /* The last directory component may be
641 640 truncated, so make it safe. */
642 641 if (d == '.' || d == ' ') {
643 642 dest[destlen - 1] = '_';
644 643 }
645 644 }
646 645
647 646 charcopy(dest, &destlen, destsize, '/');
648 647 }
649 648
650 649 /* Add a prefix of the original file's name. Its length
651 650 depends on the number of bytes left after accounting for
652 651 hash and suffix. */
653 652 used = destlen + 40;
654 653 if (lastdot >= 0) {
655 654 used += len - lastdot - 1;
656 655 }
657 656 slop = maxstorepathlen - used;
658 657 if (slop > 0) {
659 658 Py_ssize_t basenamelen =
660 659 lastslash >= 0 ? len - lastslash - 2 : len - 1;
661 660
662 661 if (basenamelen > slop) {
663 662 basenamelen = slop;
664 663 }
665 664 if (basenamelen > 0) {
666 665 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
667 666 basenamelen);
668 667 }
669 668 }
670 669
671 670 /* Add hash and suffix. */
672 671 for (i = 0; i < 20; i++) {
673 672 hexencode(dest, &destlen, destsize, sha[i]);
674 673 }
675 674
676 675 if (lastdot >= 0) {
677 676 memcopy(dest, &destlen, destsize, &src[lastdot],
678 677 len - lastdot - 1);
679 678 }
680 679
681 680 assert(PyBytes_Check(ret));
682 681 Py_SET_SIZE(ret, destlen);
683 682
684 683 return ret;
685 684 }
686 685
687 686 /*
688 687 * Avoiding a trip through Python would improve performance by 50%,
689 688 * but we don't encounter enough long names to be worth the code.
690 689 */
691 690 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
692 691 {
693 692 static PyObject *shafunc;
694 693 PyObject *shaobj, *hashobj;
695 694
696 695 if (shafunc == NULL) {
697 696 PyObject *hashlib = PyImport_ImportModule("hashlib");
698 697 if (hashlib == NULL) {
699 698 PyErr_SetString(PyExc_ImportError,
700 699 "pathencode failed to find hashlib");
701 700 return -1;
702 701 }
703 702 shafunc = PyObject_GetAttrString(hashlib, "sha1");
704 703 Py_DECREF(hashlib);
705 704
706 705 if (shafunc == NULL) {
707 706 PyErr_SetString(PyExc_AttributeError,
708 707 "module 'hashlib' has no "
709 708 "attribute 'sha1' in pathencode");
710 709 return -1;
711 710 }
712 711 }
713 712
714 shaobj = PyObject_CallFunction(shafunc, PY23("s#", "y#"), str, len);
713 shaobj = PyObject_CallFunction(shafunc, "y#", str, len);
715 714
716 715 if (shaobj == NULL) {
717 716 return -1;
718 717 }
719 718
720 719 hashobj = PyObject_CallMethod(shaobj, "digest", "");
721 720 Py_DECREF(shaobj);
722 721 if (hashobj == NULL) {
723 722 return -1;
724 723 }
725 724
726 725 if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) {
727 726 PyErr_SetString(PyExc_TypeError,
728 727 "result of digest is not a 20-byte hash");
729 728 Py_DECREF(hashobj);
730 729 return -1;
731 730 }
732 731
733 732 memcpy(hash, PyBytes_AS_STRING(hashobj), 20);
734 733 Py_DECREF(hashobj);
735 734 return 0;
736 735 }
737 736
738 737 #define MAXENCODE 4096 * 4
739 738
740 739 static PyObject *hashencode(const char *src, Py_ssize_t len)
741 740 {
742 741 char dired[MAXENCODE];
743 742 char lowered[MAXENCODE];
744 743 char auxed[MAXENCODE];
745 744 Py_ssize_t dirlen, lowerlen, auxlen, baselen;
746 745 char sha[20];
747 746
748 747 baselen = (len - 5) * 3;
749 748 if (baselen >= MAXENCODE) {
750 749 PyErr_SetString(PyExc_ValueError, "string too long");
751 750 return NULL;
752 751 }
753 752
754 753 dirlen = _encodedir(dired, baselen, src, len);
755 754 if (sha1hash(sha, dired, dirlen - 1) == -1) {
756 755 return NULL;
757 756 }
758 757 lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
759 758 auxlen = auxencode(auxed, baselen, lowered, lowerlen);
760 759 return hashmangle(auxed, auxlen, sha);
761 760 }
762 761
763 762 PyObject *pathencode(PyObject *self, PyObject *args)
764 763 {
765 764 Py_ssize_t len, newlen;
766 765 PyObject *pathobj, *newobj;
767 766 char *path;
768 767
769 768 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj)) {
770 769 return NULL;
771 770 }
772 771
773 772 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
774 773 PyErr_SetString(PyExc_TypeError, "expected a string");
775 774 return NULL;
776 775 }
777 776
778 777 if (len > maxstorepathlen) {
779 778 newlen = maxstorepathlen + 2;
780 779 } else {
781 780 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
782 781 }
783 782
784 783 if (newlen <= maxstorepathlen + 1) {
785 784 if (newlen == len + 1) {
786 785 Py_INCREF(pathobj);
787 786 return pathobj;
788 787 }
789 788
790 789 newobj = PyBytes_FromStringAndSize(NULL, newlen);
791 790
792 791 if (newobj) {
793 792 assert(PyBytes_Check(newobj));
794 793 Py_SET_SIZE(newobj, Py_SIZE(newobj) - 1);
795 794 basicencode(PyBytes_AS_STRING(newobj), newlen, path,
796 795 len + 1);
797 796 }
798 797 } else {
799 798 newobj = hashencode(path, len + 1);
800 799 }
801 800
802 801 return newobj;
803 802 }
@@ -1,3278 +1,3275 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18 #include <structmember.h>
19 19
20 20 #include "bitmanipulation.h"
21 21 #include "charencode.h"
22 22 #include "compat.h"
23 23 #include "revlog.h"
24 24 #include "util.h"
25 25
26 26 typedef struct indexObjectStruct indexObject;
27 27
28 28 typedef struct {
29 29 int children[16];
30 30 } nodetreenode;
31 31
32 32 typedef struct {
33 33 int abi_version;
34 34 Py_ssize_t (*index_length)(const indexObject *);
35 35 const char *(*index_node)(indexObject *, Py_ssize_t);
36 36 int (*index_parents)(PyObject *, int, int *);
37 37 } Revlog_CAPI;
38 38
39 39 /*
40 40 * A base-16 trie for fast node->rev mapping.
41 41 *
42 42 * Positive value is index of the next node in the trie
43 43 * Negative value is a leaf: -(rev + 2)
44 44 * Zero is empty
45 45 */
46 46 typedef struct {
47 47 indexObject *index;
48 48 nodetreenode *nodes;
49 49 Py_ssize_t nodelen;
50 50 size_t length; /* # nodes in use */
51 51 size_t capacity; /* # nodes allocated */
52 52 int depth; /* maximum depth of tree */
53 53 int splits; /* # splits performed */
54 54 } nodetree;
55 55
56 56 typedef struct {
57 57 PyObject_HEAD /* ; */
58 58 nodetree nt;
59 59 } nodetreeObject;
60 60
61 61 /*
62 62 * This class has two behaviors.
63 63 *
64 64 * When used in a list-like way (with integer keys), we decode an
65 65 * entry in a RevlogNG index file on demand. We have limited support for
66 66 * integer-keyed insert and delete, only at elements right before the
67 67 * end.
68 68 *
69 69 * With string keys, we lazily perform a reverse mapping from node to
70 70 * rev, using a base-16 trie.
71 71 */
72 72 struct indexObjectStruct {
73 73 PyObject_HEAD
74 74 /* Type-specific fields go here. */
75 75 PyObject *data; /* raw bytes of index */
76 76 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
77 77 PyObject *nullentry; /* fast path for references to null */
78 78 Py_buffer buf; /* buffer of data */
79 79 const char **offsets; /* populated on demand */
80 80 Py_ssize_t length; /* current on-disk number of elements */
81 81 unsigned new_length; /* number of added elements */
82 82 unsigned added_length; /* space reserved for added elements */
83 83 char *added; /* populated on demand */
84 84 PyObject *headrevs; /* cache, invalidated on changes */
85 85 PyObject *filteredrevs; /* filtered revs set */
86 86 nodetree nt; /* base-16 trie */
87 87 int ntinitialized; /* 0 or 1 */
88 88 int ntrev; /* last rev scanned */
89 89 int ntlookups; /* # lookups */
90 90 int ntmisses; /* # lookups that miss the cache */
91 91 int inlined;
92 92 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
93 93 */
94 94 long rust_ext_compat; /* compatibility with being used in rust
95 95 extensions */
96 96 long format_version; /* format version selector (format_*) */
97 97 };
98 98
99 99 static Py_ssize_t index_length(const indexObject *self)
100 100 {
101 101 return self->length + self->new_length;
102 102 }
103 103
104 104 static const char nullid[32] = {0};
105 105 static const Py_ssize_t nullrev = -1;
106 106
107 107 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
108 108
109 109 static int index_find_node(indexObject *self, const char *node);
110 110
111 111 #if LONG_MAX == 0x7fffffffL
112 static const char *const tuple_format =
113 PY23("Kiiiiiis#KiBBi", "Kiiiiiiy#KiBBi");
112 static const char *const tuple_format = "Kiiiiiiy#KiBBi";
114 113 #else
115 static const char *const tuple_format =
116 PY23("kiiiiiis#kiBBi", "kiiiiiiy#kiBBi");
114 static const char *const tuple_format = "kiiiiiiy#kiBBi";
117 115 #endif
118 116
119 117 /* A RevlogNG v1 index entry is 64 bytes long. */
120 118 static const long v1_entry_size = 64;
121 119
122 120 /* A Revlogv2 index entry is 96 bytes long. */
123 121 static const long v2_entry_size = 96;
124 122
125 123 /* A Changelogv2 index entry is 96 bytes long. */
126 124 static const long cl2_entry_size = 96;
127 125
128 126 /* Internal format version.
129 127 * Must match their counterparts in revlogutils/constants.py */
130 128 static const long format_v1 = 1; /* constants.py: REVLOGV1 */
131 129 static const long format_v2 = 0xDEAD; /* constants.py: REVLOGV2 */
132 130 static const long format_cl2 = 0xD34D; /* constants.py: CHANGELOGV2 */
133 131
134 132 static const long entry_v1_offset_high = 0;
135 133 static const long entry_v1_offset_offset_flags = 4;
136 134 static const long entry_v1_offset_comp_len = 8;
137 135 static const long entry_v1_offset_uncomp_len = 12;
138 136 static const long entry_v1_offset_base_rev = 16;
139 137 static const long entry_v1_offset_link_rev = 20;
140 138 static const long entry_v1_offset_parent_1 = 24;
141 139 static const long entry_v1_offset_parent_2 = 28;
142 140 static const long entry_v1_offset_node_id = 32;
143 141
144 142 static const long entry_v2_offset_high = 0;
145 143 static const long entry_v2_offset_offset_flags = 4;
146 144 static const long entry_v2_offset_comp_len = 8;
147 145 static const long entry_v2_offset_uncomp_len = 12;
148 146 static const long entry_v2_offset_base_rev = 16;
149 147 static const long entry_v2_offset_link_rev = 20;
150 148 static const long entry_v2_offset_parent_1 = 24;
151 149 static const long entry_v2_offset_parent_2 = 28;
152 150 static const long entry_v2_offset_node_id = 32;
153 151 static const long entry_v2_offset_sidedata_offset = 64;
154 152 static const long entry_v2_offset_sidedata_comp_len = 72;
155 153 static const long entry_v2_offset_all_comp_mode = 76;
156 154 /* next free offset: 77 */
157 155
158 156 static const long entry_cl2_offset_high = 0;
159 157 static const long entry_cl2_offset_offset_flags = 4;
160 158 static const long entry_cl2_offset_comp_len = 8;
161 159 static const long entry_cl2_offset_uncomp_len = 12;
162 160 static const long entry_cl2_offset_parent_1 = 16;
163 161 static const long entry_cl2_offset_parent_2 = 20;
164 162 static const long entry_cl2_offset_node_id = 24;
165 163 static const long entry_cl2_offset_sidedata_offset = 56;
166 164 static const long entry_cl2_offset_sidedata_comp_len = 64;
167 165 static const long entry_cl2_offset_all_comp_mode = 68;
168 166 static const long entry_cl2_offset_rank = 69;
169 167 /* next free offset: 73 */
170 168
171 169 static const char comp_mode_inline = 2;
172 170 static const char rank_unknown = -1;
173 171
174 172 static void raise_revlog_error(void)
175 173 {
176 174 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
177 175
178 176 mod = PyImport_ImportModule("mercurial.error");
179 177 if (mod == NULL) {
180 178 goto cleanup;
181 179 }
182 180
183 181 dict = PyModule_GetDict(mod);
184 182 if (dict == NULL) {
185 183 goto cleanup;
186 184 }
187 185 Py_INCREF(dict);
188 186
189 187 errclass = PyDict_GetItemString(dict, "RevlogError");
190 188 if (errclass == NULL) {
191 189 PyErr_SetString(PyExc_SystemError,
192 190 "could not find RevlogError");
193 191 goto cleanup;
194 192 }
195 193
196 194 /* value of exception is ignored by callers */
197 195 PyErr_SetString(errclass, "RevlogError");
198 196
199 197 cleanup:
200 198 Py_XDECREF(dict);
201 199 Py_XDECREF(mod);
202 200 }
203 201
204 202 /*
205 203 * Return a pointer to the beginning of a RevlogNG record.
206 204 */
207 205 static const char *index_deref(indexObject *self, Py_ssize_t pos)
208 206 {
209 207 if (pos >= self->length)
210 208 return self->added + (pos - self->length) * self->entry_size;
211 209
212 210 if (self->inlined && pos > 0) {
213 211 if (self->offsets == NULL) {
214 212 Py_ssize_t ret;
215 213 self->offsets =
216 214 PyMem_Malloc(self->length * sizeof(*self->offsets));
217 215 if (self->offsets == NULL)
218 216 return (const char *)PyErr_NoMemory();
219 217 ret = inline_scan(self, self->offsets);
220 218 if (ret == -1) {
221 219 return NULL;
222 220 };
223 221 }
224 222 return self->offsets[pos];
225 223 }
226 224
227 225 return (const char *)(self->buf.buf) + pos * self->entry_size;
228 226 }
229 227
230 228 /*
231 229 * Get parents of the given rev.
232 230 *
233 231 * The specified rev must be valid and must not be nullrev. A returned
234 232 * parent revision may be nullrev, but is guaranteed to be in valid range.
235 233 */
236 234 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
237 235 int maxrev)
238 236 {
239 237 const char *data = index_deref(self, rev);
240 238
241 239 if (self->format_version == format_v1) {
242 240 ps[0] = getbe32(data + entry_v1_offset_parent_1);
243 241 ps[1] = getbe32(data + entry_v1_offset_parent_2);
244 242 } else if (self->format_version == format_v2) {
245 243 ps[0] = getbe32(data + entry_v2_offset_parent_1);
246 244 ps[1] = getbe32(data + entry_v2_offset_parent_2);
247 245 } else if (self->format_version == format_cl2) {
248 246 ps[0] = getbe32(data + entry_cl2_offset_parent_1);
249 247 ps[1] = getbe32(data + entry_cl2_offset_parent_2);
250 248 } else {
251 249 raise_revlog_error();
252 250 return -1;
253 251 }
254 252
255 253 /* If index file is corrupted, ps[] may point to invalid revisions. So
256 254 * there is a risk of buffer overflow to trust them unconditionally. */
257 255 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
258 256 PyErr_SetString(PyExc_ValueError, "parent out of range");
259 257 return -1;
260 258 }
261 259 return 0;
262 260 }
263 261
264 262 /*
265 263 * Get parents of the given rev.
266 264 *
267 265 * If the specified rev is out of range, IndexError will be raised. If the
268 266 * revlog entry is corrupted, ValueError may be raised.
269 267 *
270 268 * Returns 0 on success or -1 on failure.
271 269 */
272 270 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
273 271 {
274 272 int tiprev;
275 273 if (!op || !HgRevlogIndex_Check(op) || !ps) {
276 274 PyErr_BadInternalCall();
277 275 return -1;
278 276 }
279 277 tiprev = (int)index_length((indexObject *)op) - 1;
280 278 if (rev < -1 || rev > tiprev) {
281 279 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
282 280 return -1;
283 281 } else if (rev == -1) {
284 282 ps[0] = ps[1] = -1;
285 283 return 0;
286 284 } else {
287 285 return index_get_parents((indexObject *)op, rev, ps, tiprev);
288 286 }
289 287 }
290 288
291 289 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
292 290 {
293 291 const char *data;
294 292 uint64_t offset;
295 293
296 294 if (rev == nullrev)
297 295 return 0;
298 296
299 297 data = index_deref(self, rev);
300 298
301 299 if (self->format_version == format_v1) {
302 300 offset = getbe32(data + entry_v1_offset_offset_flags);
303 301 if (rev == 0) {
304 302 /* mask out version number for the first entry */
305 303 offset &= 0xFFFF;
306 304 } else {
307 305 uint32_t offset_high =
308 306 getbe32(data + entry_v1_offset_high);
309 307 offset |= ((uint64_t)offset_high) << 32;
310 308 }
311 309 } else if (self->format_version == format_v2) {
312 310 offset = getbe32(data + entry_v2_offset_offset_flags);
313 311 if (rev == 0) {
314 312 /* mask out version number for the first entry */
315 313 offset &= 0xFFFF;
316 314 } else {
317 315 uint32_t offset_high =
318 316 getbe32(data + entry_v2_offset_high);
319 317 offset |= ((uint64_t)offset_high) << 32;
320 318 }
321 319 } else if (self->format_version == format_cl2) {
322 320 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
323 321 offset = getbe32(data + entry_cl2_offset_offset_flags);
324 322 offset |= ((uint64_t)offset_high) << 32;
325 323 } else {
326 324 raise_revlog_error();
327 325 return -1;
328 326 }
329 327
330 328 return (int64_t)(offset >> 16);
331 329 }
332 330
333 331 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
334 332 {
335 333 const char *data;
336 334 int tmp;
337 335
338 336 if (rev == nullrev)
339 337 return 0;
340 338
341 339 data = index_deref(self, rev);
342 340
343 341 if (self->format_version == format_v1) {
344 342 tmp = (int)getbe32(data + entry_v1_offset_comp_len);
345 343 } else if (self->format_version == format_v2) {
346 344 tmp = (int)getbe32(data + entry_v2_offset_comp_len);
347 345 } else if (self->format_version == format_cl2) {
348 346 tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
349 347 } else {
350 348 raise_revlog_error();
351 349 return -1;
352 350 }
353 351 if (tmp < 0) {
354 352 PyErr_Format(PyExc_OverflowError,
355 353 "revlog entry size out of bound (%d)", tmp);
356 354 return -1;
357 355 }
358 356 return tmp;
359 357 }
360 358
361 359 /*
362 360 * RevlogNG format (all in big endian, data may be inlined):
363 361 * 6 bytes: offset
364 362 * 2 bytes: flags
365 363 * 4 bytes: compressed length
366 364 * 4 bytes: uncompressed length
367 365 * 4 bytes: base revision
368 366 * 4 bytes: link revision
369 367 * 4 bytes: parent 1 revision
370 368 * 4 bytes: parent 2 revision
371 369 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
372 370 */
373 371 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
374 372 {
375 373 uint64_t offset_flags, sidedata_offset;
376 374 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
377 375 sidedata_comp_len, rank = rank_unknown;
378 376 char data_comp_mode, sidedata_comp_mode;
379 377 const char *c_node_id;
380 378 const char *data;
381 379 Py_ssize_t length = index_length(self);
382 380
383 381 if (pos == nullrev) {
384 382 Py_INCREF(self->nullentry);
385 383 return self->nullentry;
386 384 }
387 385
388 386 if (pos < 0 || pos >= length) {
389 387 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
390 388 return NULL;
391 389 }
392 390
393 391 data = index_deref(self, pos);
394 392 if (data == NULL)
395 393 return NULL;
396 394
397 395 if (self->format_version == format_v1) {
398 396 offset_flags = getbe32(data + entry_v1_offset_offset_flags);
399 397 /*
400 398 * The first entry on-disk needs the version number masked out,
401 399 * but this doesn't apply if entries are added to an empty
402 400 * index.
403 401 */
404 402 if (self->length && pos == 0)
405 403 offset_flags &= 0xFFFF;
406 404 else {
407 405 uint32_t offset_high =
408 406 getbe32(data + entry_v1_offset_high);
409 407 offset_flags |= ((uint64_t)offset_high) << 32;
410 408 }
411 409
412 410 comp_len = getbe32(data + entry_v1_offset_comp_len);
413 411 uncomp_len = getbe32(data + entry_v1_offset_uncomp_len);
414 412 base_rev = getbe32(data + entry_v1_offset_base_rev);
415 413 link_rev = getbe32(data + entry_v1_offset_link_rev);
416 414 parent_1 = getbe32(data + entry_v1_offset_parent_1);
417 415 parent_2 = getbe32(data + entry_v1_offset_parent_2);
418 416 c_node_id = data + entry_v1_offset_node_id;
419 417
420 418 sidedata_offset = 0;
421 419 sidedata_comp_len = 0;
422 420 data_comp_mode = comp_mode_inline;
423 421 sidedata_comp_mode = comp_mode_inline;
424 422 } else if (self->format_version == format_v2) {
425 423 offset_flags = getbe32(data + entry_v2_offset_offset_flags);
426 424 /*
427 425 * The first entry on-disk needs the version number masked out,
428 426 * but this doesn't apply if entries are added to an empty
429 427 * index.
430 428 */
431 429 if (self->length && pos == 0)
432 430 offset_flags &= 0xFFFF;
433 431 else {
434 432 uint32_t offset_high =
435 433 getbe32(data + entry_v2_offset_high);
436 434 offset_flags |= ((uint64_t)offset_high) << 32;
437 435 }
438 436
439 437 comp_len = getbe32(data + entry_v2_offset_comp_len);
440 438 uncomp_len = getbe32(data + entry_v2_offset_uncomp_len);
441 439 base_rev = getbe32(data + entry_v2_offset_base_rev);
442 440 link_rev = getbe32(data + entry_v2_offset_link_rev);
443 441 parent_1 = getbe32(data + entry_v2_offset_parent_1);
444 442 parent_2 = getbe32(data + entry_v2_offset_parent_2);
445 443 c_node_id = data + entry_v2_offset_node_id;
446 444
447 445 sidedata_offset =
448 446 getbe64(data + entry_v2_offset_sidedata_offset);
449 447 sidedata_comp_len =
450 448 getbe32(data + entry_v2_offset_sidedata_comp_len);
451 449 data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
452 450 sidedata_comp_mode =
453 451 ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
454 452 } else if (self->format_version == format_cl2) {
455 453 uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
456 454 offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
457 455 offset_flags |= ((uint64_t)offset_high) << 32;
458 456 comp_len = getbe32(data + entry_cl2_offset_comp_len);
459 457 uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
460 458 /* base_rev and link_rev are not stored in changelogv2, but are
461 459 still used by some functions shared with the other revlogs.
462 460 They are supposed to contain links to other revisions,
463 461 but they always point to themselves in the case of a changelog.
464 462 */
465 463 base_rev = pos;
466 464 link_rev = pos;
467 465 parent_1 = getbe32(data + entry_cl2_offset_parent_1);
468 466 parent_2 = getbe32(data + entry_cl2_offset_parent_2);
469 467 c_node_id = data + entry_cl2_offset_node_id;
470 468 sidedata_offset =
471 469 getbe64(data + entry_cl2_offset_sidedata_offset);
472 470 sidedata_comp_len =
473 471 getbe32(data + entry_cl2_offset_sidedata_comp_len);
474 472 data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
475 473 sidedata_comp_mode =
476 474 ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
477 475 rank = getbe32(data + entry_cl2_offset_rank);
478 476 } else {
479 477 raise_revlog_error();
480 478 return NULL;
481 479 }
482 480
483 481 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
484 482 base_rev, link_rev, parent_1, parent_2, c_node_id,
485 483 self->nodelen, sidedata_offset, sidedata_comp_len,
486 484 data_comp_mode, sidedata_comp_mode, rank);
487 485 }
488 486 /*
489 487 * Pack header information in binary
490 488 */
491 489 static PyObject *index_pack_header(indexObject *self, PyObject *args)
492 490 {
493 491 int header;
494 492 char out[4];
495 493 if (!PyArg_ParseTuple(args, "I", &header)) {
496 494 return NULL;
497 495 }
498 496 if (self->format_version != format_v1) {
499 497 PyErr_Format(PyExc_RuntimeError,
500 498 "version header should go in the docket, not the "
501 499 "index: %lu",
502 500 header);
503 501 return NULL;
504 502 }
505 503 putbe32(header, out);
506 504 return PyBytes_FromStringAndSize(out, 4);
507 505 }
508 506 /*
509 507 * Return the raw binary string representing a revision
510 508 */
511 509 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
512 510 {
513 511 long rev;
514 512 const char *data;
515 513 Py_ssize_t length = index_length(self);
516 514
517 515 if (!pylong_to_long(value, &rev)) {
518 516 return NULL;
519 517 }
520 518 if (rev < 0 || rev >= length) {
521 519 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
522 520 rev);
523 521 return NULL;
524 522 };
525 523
526 524 data = index_deref(self, rev);
527 525 if (data == NULL)
528 526 return NULL;
529 527 if (rev == 0 && self->format_version == format_v1) {
530 528 /* the header is eating the start of the first entry */
531 529 return PyBytes_FromStringAndSize(data + 4,
532 530 self->entry_size - 4);
533 531 }
534 532 return PyBytes_FromStringAndSize(data, self->entry_size);
535 533 }
536 534
537 535 /*
538 536 * Return the hash of node corresponding to the given rev.
539 537 */
540 538 static const char *index_node(indexObject *self, Py_ssize_t pos)
541 539 {
542 540 Py_ssize_t length = index_length(self);
543 541 const char *data;
544 542 const char *node_id;
545 543
546 544 if (pos == nullrev)
547 545 return nullid;
548 546
549 547 if (pos >= length)
550 548 return NULL;
551 549
552 550 data = index_deref(self, pos);
553 551
554 552 if (self->format_version == format_v1) {
555 553 node_id = data + entry_v1_offset_node_id;
556 554 } else if (self->format_version == format_v2) {
557 555 node_id = data + entry_v2_offset_node_id;
558 556 } else if (self->format_version == format_cl2) {
559 557 node_id = data + entry_cl2_offset_node_id;
560 558 } else {
561 559 raise_revlog_error();
562 560 return NULL;
563 561 }
564 562
565 563 return data ? node_id : NULL;
566 564 }
567 565
568 566 /*
569 567 * Return the hash of the node corresponding to the given rev. The
570 568 * rev is assumed to be existing. If not, an exception is set.
571 569 */
572 570 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
573 571 {
574 572 const char *node = index_node(self, pos);
575 573 if (node == NULL) {
576 574 PyErr_Format(PyExc_IndexError, "could not access rev %d",
577 575 (int)pos);
578 576 }
579 577 return node;
580 578 }
581 579
582 580 static int nt_insert(nodetree *self, const char *node, int rev);
583 581
584 582 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
585 583 {
586 584 Py_ssize_t thisnodelen;
587 585 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
588 586 return -1;
589 587 if (nodelen == thisnodelen)
590 588 return 0;
591 589 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
592 590 thisnodelen, nodelen);
593 591 return -1;
594 592 }
595 593
596 594 static PyObject *index_append(indexObject *self, PyObject *obj)
597 595 {
598 596 uint64_t offset_flags, sidedata_offset;
599 597 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
600 598 sidedata_comp_len, rank;
601 599 char data_comp_mode, sidedata_comp_mode;
602 600 Py_ssize_t c_node_id_len;
603 601 const char *c_node_id;
604 602 char comp_field;
605 603 char *data;
606 604
607 605 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
608 606 &uncomp_len, &base_rev, &link_rev, &parent_1,
609 607 &parent_2, &c_node_id, &c_node_id_len,
610 608 &sidedata_offset, &sidedata_comp_len,
611 609 &data_comp_mode, &sidedata_comp_mode, &rank)) {
612 610 PyErr_SetString(PyExc_TypeError, "12-tuple required");
613 611 return NULL;
614 612 }
615 613
616 614 if (c_node_id_len != self->nodelen) {
617 615 PyErr_SetString(PyExc_TypeError, "invalid node");
618 616 return NULL;
619 617 }
620 618 if (self->format_version == format_v1) {
621 619
622 620 if (data_comp_mode != comp_mode_inline) {
623 621 PyErr_Format(PyExc_ValueError,
624 622 "invalid data compression mode: %i",
625 623 data_comp_mode);
626 624 return NULL;
627 625 }
628 626 if (sidedata_comp_mode != comp_mode_inline) {
629 627 PyErr_Format(PyExc_ValueError,
630 628 "invalid sidedata compression mode: %i",
631 629 sidedata_comp_mode);
632 630 return NULL;
633 631 }
634 632 }
635 633
636 634 if (self->new_length == self->added_length) {
637 635 size_t new_added_length =
638 636 self->added_length ? self->added_length * 2 : 4096;
639 637 void *new_added = PyMem_Realloc(
640 638 self->added, new_added_length * self->entry_size);
641 639 if (!new_added)
642 640 return PyErr_NoMemory();
643 641 self->added = new_added;
644 642 self->added_length = new_added_length;
645 643 }
646 644 rev = self->length + self->new_length;
647 645 data = self->added + self->entry_size * self->new_length++;
648 646
649 647 memset(data, 0, self->entry_size);
650 648
651 649 if (self->format_version == format_v1) {
652 650 putbe32(offset_flags >> 32, data + entry_v1_offset_high);
653 651 putbe32(offset_flags & 0xffffffffU,
654 652 data + entry_v1_offset_offset_flags);
655 653 putbe32(comp_len, data + entry_v1_offset_comp_len);
656 654 putbe32(uncomp_len, data + entry_v1_offset_uncomp_len);
657 655 putbe32(base_rev, data + entry_v1_offset_base_rev);
658 656 putbe32(link_rev, data + entry_v1_offset_link_rev);
659 657 putbe32(parent_1, data + entry_v1_offset_parent_1);
660 658 putbe32(parent_2, data + entry_v1_offset_parent_2);
661 659 memcpy(data + entry_v1_offset_node_id, c_node_id,
662 660 c_node_id_len);
663 661 } else if (self->format_version == format_v2) {
664 662 putbe32(offset_flags >> 32, data + entry_v2_offset_high);
665 663 putbe32(offset_flags & 0xffffffffU,
666 664 data + entry_v2_offset_offset_flags);
667 665 putbe32(comp_len, data + entry_v2_offset_comp_len);
668 666 putbe32(uncomp_len, data + entry_v2_offset_uncomp_len);
669 667 putbe32(base_rev, data + entry_v2_offset_base_rev);
670 668 putbe32(link_rev, data + entry_v2_offset_link_rev);
671 669 putbe32(parent_1, data + entry_v2_offset_parent_1);
672 670 putbe32(parent_2, data + entry_v2_offset_parent_2);
673 671 memcpy(data + entry_v2_offset_node_id, c_node_id,
674 672 c_node_id_len);
675 673 putbe64(sidedata_offset,
676 674 data + entry_v2_offset_sidedata_offset);
677 675 putbe32(sidedata_comp_len,
678 676 data + entry_v2_offset_sidedata_comp_len);
679 677 comp_field = data_comp_mode & 3;
680 678 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
681 679 data[entry_v2_offset_all_comp_mode] = comp_field;
682 680 } else if (self->format_version == format_cl2) {
683 681 putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
684 682 putbe32(offset_flags & 0xffffffffU,
685 683 data + entry_cl2_offset_offset_flags);
686 684 putbe32(comp_len, data + entry_cl2_offset_comp_len);
687 685 putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
688 686 putbe32(parent_1, data + entry_cl2_offset_parent_1);
689 687 putbe32(parent_2, data + entry_cl2_offset_parent_2);
690 688 memcpy(data + entry_cl2_offset_node_id, c_node_id,
691 689 c_node_id_len);
692 690 putbe64(sidedata_offset,
693 691 data + entry_cl2_offset_sidedata_offset);
694 692 putbe32(sidedata_comp_len,
695 693 data + entry_cl2_offset_sidedata_comp_len);
696 694 comp_field = data_comp_mode & 3;
697 695 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
698 696 data[entry_cl2_offset_all_comp_mode] = comp_field;
699 697 putbe32(rank, data + entry_cl2_offset_rank);
700 698 } else {
701 699 raise_revlog_error();
702 700 return NULL;
703 701 }
704 702
705 703 if (self->ntinitialized)
706 704 nt_insert(&self->nt, c_node_id, rev);
707 705
708 706 Py_CLEAR(self->headrevs);
709 707 Py_RETURN_NONE;
710 708 }
711 709
712 710 /* Replace an existing index entry's sidedata offset and length with new ones.
713 711 This cannot be used outside of the context of sidedata rewriting,
714 712 inside the transaction that creates the given revision. */
715 713 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
716 714 {
717 715 uint64_t offset_flags, sidedata_offset;
718 716 Py_ssize_t rev;
719 717 int sidedata_comp_len;
720 718 char comp_mode;
721 719 char *data;
722 720 #if LONG_MAX == 0x7fffffffL
723 const char *const sidedata_format = PY23("nKiKB", "nKiKB");
721 const char *const sidedata_format = "nKiKB";
724 722 #else
725 const char *const sidedata_format = PY23("nkikB", "nkikB");
723 const char *const sidedata_format = "nkikB";
726 724 #endif
727 725
728 726 if (self->entry_size == v1_entry_size || self->inlined) {
729 727 /*
730 728 There is a bug in the transaction handling when going from an
731 729 inline revlog to a separate index and data file. Turn it off until
732 730 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
733 731 See issue6485.
734 732 */
735 733 raise_revlog_error();
736 734 return NULL;
737 735 }
738 736
739 737 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
740 738 &sidedata_comp_len, &offset_flags, &comp_mode))
741 739 return NULL;
742 740
743 741 if (rev < 0 || rev >= index_length(self)) {
744 742 PyErr_SetString(PyExc_IndexError, "revision outside index");
745 743 return NULL;
746 744 }
747 745 if (rev < self->length) {
748 746 PyErr_SetString(
749 747 PyExc_IndexError,
750 748 "cannot rewrite entries outside of this transaction");
751 749 return NULL;
752 750 }
753 751
754 752 /* Find the newly added node, offset from the "already on-disk" length
755 753 */
756 754 data = self->added + self->entry_size * (rev - self->length);
757 755 if (self->format_version == format_v2) {
758 756 putbe64(offset_flags, data + entry_v2_offset_high);
759 757 putbe64(sidedata_offset,
760 758 data + entry_v2_offset_sidedata_offset);
761 759 putbe32(sidedata_comp_len,
762 760 data + entry_v2_offset_sidedata_comp_len);
763 761 data[entry_v2_offset_all_comp_mode] =
764 762 (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
765 763 ((comp_mode & 3) << 2);
766 764 } else if (self->format_version == format_cl2) {
767 765 putbe64(offset_flags, data + entry_cl2_offset_high);
768 766 putbe64(sidedata_offset,
769 767 data + entry_cl2_offset_sidedata_offset);
770 768 putbe32(sidedata_comp_len,
771 769 data + entry_cl2_offset_sidedata_comp_len);
772 770 data[entry_cl2_offset_all_comp_mode] =
773 771 (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
774 772 ((comp_mode & 3) << 2);
775 773 } else {
776 774 raise_revlog_error();
777 775 return NULL;
778 776 }
779 777
780 778 Py_RETURN_NONE;
781 779 }
782 780
783 781 static PyObject *index_stats(indexObject *self)
784 782 {
785 783 PyObject *obj = PyDict_New();
786 784 PyObject *s = NULL;
787 785 PyObject *t = NULL;
788 786
789 787 if (obj == NULL)
790 788 return NULL;
791 789
792 790 #define istat(__n, __d) \
793 791 do { \
794 792 s = PyBytes_FromString(__d); \
795 793 t = PyLong_FromSsize_t(self->__n); \
796 794 if (!s || !t) \
797 795 goto bail; \
798 796 if (PyDict_SetItem(obj, s, t) == -1) \
799 797 goto bail; \
800 798 Py_CLEAR(s); \
801 799 Py_CLEAR(t); \
802 800 } while (0)
803 801
804 802 if (self->added_length)
805 803 istat(new_length, "index entries added");
806 804 istat(length, "revs in memory");
807 805 istat(ntlookups, "node trie lookups");
808 806 istat(ntmisses, "node trie misses");
809 807 istat(ntrev, "node trie last rev scanned");
810 808 if (self->ntinitialized) {
811 809 istat(nt.capacity, "node trie capacity");
812 810 istat(nt.depth, "node trie depth");
813 811 istat(nt.length, "node trie count");
814 812 istat(nt.splits, "node trie splits");
815 813 }
816 814
817 815 #undef istat
818 816
819 817 return obj;
820 818
821 819 bail:
822 820 Py_XDECREF(obj);
823 821 Py_XDECREF(s);
824 822 Py_XDECREF(t);
825 823 return NULL;
826 824 }
827 825
828 826 /*
829 827 * When we cache a list, we want to be sure the caller can't mutate
830 828 * the cached copy.
831 829 */
832 830 static PyObject *list_copy(PyObject *list)
833 831 {
834 832 Py_ssize_t len = PyList_GET_SIZE(list);
835 833 PyObject *newlist = PyList_New(len);
836 834 Py_ssize_t i;
837 835
838 836 if (newlist == NULL)
839 837 return NULL;
840 838
841 839 for (i = 0; i < len; i++) {
842 840 PyObject *obj = PyList_GET_ITEM(list, i);
843 841 Py_INCREF(obj);
844 842 PyList_SET_ITEM(newlist, i, obj);
845 843 }
846 844
847 845 return newlist;
848 846 }
849 847
850 848 static int check_filter(PyObject *filter, Py_ssize_t arg)
851 849 {
852 850 if (filter) {
853 851 PyObject *arglist, *result;
854 852 int isfiltered;
855 853
856 854 arglist = Py_BuildValue("(n)", arg);
857 855 if (!arglist) {
858 856 return -1;
859 857 }
860 858
861 859 result = PyObject_Call(filter, arglist, NULL);
862 860 Py_DECREF(arglist);
863 861 if (!result) {
864 862 return -1;
865 863 }
866 864
867 865 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
868 866 * same as this function, so we can just return it directly.*/
869 867 isfiltered = PyObject_IsTrue(result);
870 868 Py_DECREF(result);
871 869 return isfiltered;
872 870 } else {
873 871 return 0;
874 872 }
875 873 }
876 874
877 875 static inline void set_phase_from_parents(char *phases, int parent_1,
878 876 int parent_2, Py_ssize_t i)
879 877 {
880 878 if (parent_1 >= 0 && phases[parent_1] > phases[i])
881 879 phases[i] = phases[parent_1];
882 880 if (parent_2 >= 0 && phases[parent_2] > phases[i])
883 881 phases[i] = phases[parent_2];
884 882 }
885 883
886 884 static PyObject *reachableroots2(indexObject *self, PyObject *args)
887 885 {
888 886
889 887 /* Input */
890 888 long minroot;
891 889 PyObject *includepatharg = NULL;
892 890 int includepath = 0;
893 891 /* heads and roots are lists */
894 892 PyObject *heads = NULL;
895 893 PyObject *roots = NULL;
896 894 PyObject *reachable = NULL;
897 895
898 896 PyObject *val;
899 897 Py_ssize_t len = index_length(self);
900 898 long revnum;
901 899 Py_ssize_t k;
902 900 Py_ssize_t i;
903 901 Py_ssize_t l;
904 902 int r;
905 903 int parents[2];
906 904
907 905 /* Internal data structure:
908 906 * tovisit: array of length len+1 (all revs + nullrev), filled upto
909 907 * lentovisit
910 908 *
911 909 * revstates: array of length len+1 (all revs + nullrev) */
912 910 int *tovisit = NULL;
913 911 long lentovisit = 0;
914 912 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
915 913 char *revstates = NULL;
916 914
917 915 /* Get arguments */
918 916 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
919 917 &PyList_Type, &roots, &PyBool_Type,
920 918 &includepatharg))
921 919 goto bail;
922 920
923 921 if (includepatharg == Py_True)
924 922 includepath = 1;
925 923
926 924 /* Initialize return set */
927 925 reachable = PyList_New(0);
928 926 if (reachable == NULL)
929 927 goto bail;
930 928
931 929 /* Initialize internal datastructures */
932 930 tovisit = (int *)malloc((len + 1) * sizeof(int));
933 931 if (tovisit == NULL) {
934 932 PyErr_NoMemory();
935 933 goto bail;
936 934 }
937 935
938 936 revstates = (char *)calloc(len + 1, 1);
939 937 if (revstates == NULL) {
940 938 PyErr_NoMemory();
941 939 goto bail;
942 940 }
943 941
944 942 l = PyList_GET_SIZE(roots);
945 943 for (i = 0; i < l; i++) {
946 944 revnum = PyLong_AsLong(PyList_GET_ITEM(roots, i));
947 945 if (revnum == -1 && PyErr_Occurred())
948 946 goto bail;
949 947 /* If root is out of range, e.g. wdir(), it must be unreachable
950 948 * from heads. So we can just ignore it. */
951 949 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
952 950 continue;
953 951 revstates[revnum + 1] |= RS_ROOT;
954 952 }
955 953
956 954 /* Populate tovisit with all the heads */
957 955 l = PyList_GET_SIZE(heads);
958 956 for (i = 0; i < l; i++) {
959 957 revnum = PyLong_AsLong(PyList_GET_ITEM(heads, i));
960 958 if (revnum == -1 && PyErr_Occurred())
961 959 goto bail;
962 960 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
963 961 PyErr_SetString(PyExc_IndexError, "head out of range");
964 962 goto bail;
965 963 }
966 964 if (!(revstates[revnum + 1] & RS_SEEN)) {
967 965 tovisit[lentovisit++] = (int)revnum;
968 966 revstates[revnum + 1] |= RS_SEEN;
969 967 }
970 968 }
971 969
972 970 /* Visit the tovisit list and find the reachable roots */
973 971 k = 0;
974 972 while (k < lentovisit) {
975 973 /* Add the node to reachable if it is a root*/
976 974 revnum = tovisit[k++];
977 975 if (revstates[revnum + 1] & RS_ROOT) {
978 976 revstates[revnum + 1] |= RS_REACHABLE;
979 977 val = PyLong_FromLong(revnum);
980 978 if (val == NULL)
981 979 goto bail;
982 980 r = PyList_Append(reachable, val);
983 981 Py_DECREF(val);
984 982 if (r < 0)
985 983 goto bail;
986 984 if (includepath == 0)
987 985 continue;
988 986 }
989 987
990 988 /* Add its parents to the list of nodes to visit */
991 989 if (revnum == nullrev)
992 990 continue;
993 991 r = index_get_parents(self, revnum, parents, (int)len - 1);
994 992 if (r < 0)
995 993 goto bail;
996 994 for (i = 0; i < 2; i++) {
997 995 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
998 996 parents[i] >= minroot) {
999 997 tovisit[lentovisit++] = parents[i];
1000 998 revstates[parents[i] + 1] |= RS_SEEN;
1001 999 }
1002 1000 }
1003 1001 }
1004 1002
1005 1003 /* Find all the nodes in between the roots we found and the heads
1006 1004 * and add them to the reachable set */
1007 1005 if (includepath == 1) {
1008 1006 long minidx = minroot;
1009 1007 if (minidx < 0)
1010 1008 minidx = 0;
1011 1009 for (i = minidx; i < len; i++) {
1012 1010 if (!(revstates[i + 1] & RS_SEEN))
1013 1011 continue;
1014 1012 r = index_get_parents(self, i, parents, (int)len - 1);
1015 1013 /* Corrupted index file, error is set from
1016 1014 * index_get_parents */
1017 1015 if (r < 0)
1018 1016 goto bail;
1019 1017 if (((revstates[parents[0] + 1] |
1020 1018 revstates[parents[1] + 1]) &
1021 1019 RS_REACHABLE) &&
1022 1020 !(revstates[i + 1] & RS_REACHABLE)) {
1023 1021 revstates[i + 1] |= RS_REACHABLE;
1024 1022 val = PyLong_FromSsize_t(i);
1025 1023 if (val == NULL)
1026 1024 goto bail;
1027 1025 r = PyList_Append(reachable, val);
1028 1026 Py_DECREF(val);
1029 1027 if (r < 0)
1030 1028 goto bail;
1031 1029 }
1032 1030 }
1033 1031 }
1034 1032
1035 1033 free(revstates);
1036 1034 free(tovisit);
1037 1035 return reachable;
1038 1036 bail:
1039 1037 Py_XDECREF(reachable);
1040 1038 free(revstates);
1041 1039 free(tovisit);
1042 1040 return NULL;
1043 1041 }
1044 1042
1045 1043 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
1046 1044 char phase)
1047 1045 {
1048 1046 Py_ssize_t len = index_length(self);
1049 1047 PyObject *item;
1050 1048 PyObject *iterator;
1051 1049 int rev, minrev = -1;
1052 1050 char *node;
1053 1051
1054 1052 if (!PySet_Check(roots)) {
1055 1053 PyErr_SetString(PyExc_TypeError,
1056 1054 "roots must be a set of nodes");
1057 1055 return -2;
1058 1056 }
1059 1057 iterator = PyObject_GetIter(roots);
1060 1058 if (iterator == NULL)
1061 1059 return -2;
1062 1060 while ((item = PyIter_Next(iterator))) {
1063 1061 if (node_check(self->nodelen, item, &node) == -1)
1064 1062 goto failed;
1065 1063 rev = index_find_node(self, node);
1066 1064 /* null is implicitly public, so negative is invalid */
1067 1065 if (rev < 0 || rev >= len)
1068 1066 goto failed;
1069 1067 phases[rev] = phase;
1070 1068 if (minrev == -1 || minrev > rev)
1071 1069 minrev = rev;
1072 1070 Py_DECREF(item);
1073 1071 }
1074 1072 Py_DECREF(iterator);
1075 1073 return minrev;
1076 1074 failed:
1077 1075 Py_DECREF(iterator);
1078 1076 Py_DECREF(item);
1079 1077 return -2;
1080 1078 }
1081 1079
1082 1080 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1083 1081 {
1084 1082 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
1085 1083 96: internal */
1086 1084 static const char trackedphases[] = {1, 2, 32, 96};
1087 1085 PyObject *roots = Py_None;
1088 1086 PyObject *phasesetsdict = NULL;
1089 1087 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
1090 1088 Py_ssize_t len = index_length(self);
1091 1089 char *phases = NULL;
1092 1090 int minphaserev = -1, rev, i;
1093 1091 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
1094 1092
1095 1093 if (!PyArg_ParseTuple(args, "O", &roots))
1096 1094 return NULL;
1097 1095 if (roots == NULL || !PyDict_Check(roots)) {
1098 1096 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
1099 1097 return NULL;
1100 1098 }
1101 1099
1102 1100 phases = calloc(len, 1);
1103 1101 if (phases == NULL) {
1104 1102 PyErr_NoMemory();
1105 1103 return NULL;
1106 1104 }
1107 1105
1108 1106 for (i = 0; i < numphases; ++i) {
1109 1107 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1110 1108 PyObject *phaseroots = NULL;
1111 1109 if (pyphase == NULL)
1112 1110 goto release;
1113 1111 phaseroots = PyDict_GetItem(roots, pyphase);
1114 1112 Py_DECREF(pyphase);
1115 1113 if (phaseroots == NULL)
1116 1114 continue;
1117 1115 rev = add_roots_get_min(self, phaseroots, phases,
1118 1116 trackedphases[i]);
1119 1117 if (rev == -2)
1120 1118 goto release;
1121 1119 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
1122 1120 minphaserev = rev;
1123 1121 }
1124 1122
1125 1123 for (i = 0; i < numphases; ++i) {
1126 1124 phasesets[i] = PySet_New(NULL);
1127 1125 if (phasesets[i] == NULL)
1128 1126 goto release;
1129 1127 }
1130 1128
1131 1129 if (minphaserev == -1)
1132 1130 minphaserev = len;
1133 1131 for (rev = minphaserev; rev < len; ++rev) {
1134 1132 PyObject *pyphase = NULL;
1135 1133 PyObject *pyrev = NULL;
1136 1134 int parents[2];
1137 1135 /*
1138 1136 * The parent lookup could be skipped for phaseroots, but
1139 1137 * phase --force would historically not recompute them
1140 1138 * correctly, leaving descendents with a lower phase around.
1141 1139 * As such, unconditionally recompute the phase.
1142 1140 */
1143 1141 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
1144 1142 goto release;
1145 1143 set_phase_from_parents(phases, parents[0], parents[1], rev);
1146 1144 switch (phases[rev]) {
1147 1145 case 0:
1148 1146 continue;
1149 1147 case 1:
1150 1148 pyphase = phasesets[0];
1151 1149 break;
1152 1150 case 2:
1153 1151 pyphase = phasesets[1];
1154 1152 break;
1155 1153 case 32:
1156 1154 pyphase = phasesets[2];
1157 1155 break;
1158 1156 case 96:
1159 1157 pyphase = phasesets[3];
1160 1158 break;
1161 1159 default:
1162 1160 /* this should never happen since the phase number is
1163 1161 * specified by this function. */
1164 1162 PyErr_SetString(PyExc_SystemError,
1165 1163 "bad phase number in internal list");
1166 1164 goto release;
1167 1165 }
1168 1166 pyrev = PyLong_FromLong(rev);
1169 1167 if (pyrev == NULL)
1170 1168 goto release;
1171 1169 if (PySet_Add(pyphase, pyrev) == -1) {
1172 1170 Py_DECREF(pyrev);
1173 1171 goto release;
1174 1172 }
1175 1173 Py_DECREF(pyrev);
1176 1174 }
1177 1175
1178 1176 phasesetsdict = _dict_new_presized(numphases);
1179 1177 if (phasesetsdict == NULL)
1180 1178 goto release;
1181 1179 for (i = 0; i < numphases; ++i) {
1182 1180 PyObject *pyphase = PyLong_FromLong(trackedphases[i]);
1183 1181 if (pyphase == NULL)
1184 1182 goto release;
1185 1183 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
1186 1184 -1) {
1187 1185 Py_DECREF(pyphase);
1188 1186 goto release;
1189 1187 }
1190 1188 Py_DECREF(phasesets[i]);
1191 1189 phasesets[i] = NULL;
1192 1190 }
1193 1191
1194 1192 free(phases);
1195 1193 return Py_BuildValue("nN", len, phasesetsdict);
1196 1194
1197 1195 release:
1198 1196 for (i = 0; i < numphases; ++i)
1199 1197 Py_XDECREF(phasesets[i]);
1200 1198 Py_XDECREF(phasesetsdict);
1201 1199
1202 1200 free(phases);
1203 1201 return NULL;
1204 1202 }
1205 1203
1206 1204 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1207 1205 {
1208 1206 Py_ssize_t i, j, len;
1209 1207 char *nothead = NULL;
1210 1208 PyObject *heads = NULL;
1211 1209 PyObject *filter = NULL;
1212 1210 PyObject *filteredrevs = Py_None;
1213 1211
1214 1212 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1215 1213 return NULL;
1216 1214 }
1217 1215
1218 1216 if (self->headrevs && filteredrevs == self->filteredrevs)
1219 1217 return list_copy(self->headrevs);
1220 1218
1221 1219 Py_DECREF(self->filteredrevs);
1222 1220 self->filteredrevs = filteredrevs;
1223 1221 Py_INCREF(filteredrevs);
1224 1222
1225 1223 if (filteredrevs != Py_None) {
1226 1224 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1227 1225 if (!filter) {
1228 1226 PyErr_SetString(
1229 1227 PyExc_TypeError,
1230 1228 "filteredrevs has no attribute __contains__");
1231 1229 goto bail;
1232 1230 }
1233 1231 }
1234 1232
1235 1233 len = index_length(self);
1236 1234 heads = PyList_New(0);
1237 1235 if (heads == NULL)
1238 1236 goto bail;
1239 1237 if (len == 0) {
1240 1238 PyObject *nullid = PyLong_FromLong(-1);
1241 1239 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1242 1240 Py_XDECREF(nullid);
1243 1241 goto bail;
1244 1242 }
1245 1243 goto done;
1246 1244 }
1247 1245
1248 1246 nothead = calloc(len, 1);
1249 1247 if (nothead == NULL) {
1250 1248 PyErr_NoMemory();
1251 1249 goto bail;
1252 1250 }
1253 1251
1254 1252 for (i = len - 1; i >= 0; i--) {
1255 1253 int isfiltered;
1256 1254 int parents[2];
1257 1255
1258 1256 /* If nothead[i] == 1, it means we've seen an unfiltered child
1259 1257 * of this node already, and therefore this node is not
1260 1258 * filtered. So we can skip the expensive check_filter step.
1261 1259 */
1262 1260 if (nothead[i] != 1) {
1263 1261 isfiltered = check_filter(filter, i);
1264 1262 if (isfiltered == -1) {
1265 1263 PyErr_SetString(PyExc_TypeError,
1266 1264 "unable to check filter");
1267 1265 goto bail;
1268 1266 }
1269 1267
1270 1268 if (isfiltered) {
1271 1269 nothead[i] = 1;
1272 1270 continue;
1273 1271 }
1274 1272 }
1275 1273
1276 1274 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1277 1275 goto bail;
1278 1276 for (j = 0; j < 2; j++) {
1279 1277 if (parents[j] >= 0)
1280 1278 nothead[parents[j]] = 1;
1281 1279 }
1282 1280 }
1283 1281
1284 1282 for (i = 0; i < len; i++) {
1285 1283 PyObject *head;
1286 1284
1287 1285 if (nothead[i])
1288 1286 continue;
1289 1287 head = PyLong_FromSsize_t(i);
1290 1288 if (head == NULL || PyList_Append(heads, head) == -1) {
1291 1289 Py_XDECREF(head);
1292 1290 goto bail;
1293 1291 }
1294 1292 }
1295 1293
1296 1294 done:
1297 1295 self->headrevs = heads;
1298 1296 Py_XDECREF(filter);
1299 1297 free(nothead);
1300 1298 return list_copy(self->headrevs);
1301 1299 bail:
1302 1300 Py_XDECREF(filter);
1303 1301 Py_XDECREF(heads);
1304 1302 free(nothead);
1305 1303 return NULL;
1306 1304 }
1307 1305
1308 1306 /**
1309 1307 * Obtain the base revision index entry.
1310 1308 *
1311 1309 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1312 1310 */
1313 1311 static inline int index_baserev(indexObject *self, int rev)
1314 1312 {
1315 1313 const char *data;
1316 1314 int result;
1317 1315
1318 1316 data = index_deref(self, rev);
1319 1317 if (data == NULL)
1320 1318 return -2;
1321 1319
1322 1320 if (self->format_version == format_v1) {
1323 1321 result = getbe32(data + entry_v1_offset_base_rev);
1324 1322 } else if (self->format_version == format_v2) {
1325 1323 result = getbe32(data + entry_v2_offset_base_rev);
1326 1324 } else if (self->format_version == format_cl2) {
1327 1325 return rev;
1328 1326 } else {
1329 1327 raise_revlog_error();
1330 1328 return -1;
1331 1329 }
1332 1330
1333 1331 if (result > rev) {
1334 1332 PyErr_Format(
1335 1333 PyExc_ValueError,
1336 1334 "corrupted revlog, revision base above revision: %d, %d",
1337 1335 rev, result);
1338 1336 return -2;
1339 1337 }
1340 1338 if (result < -1) {
1341 1339 PyErr_Format(
1342 1340 PyExc_ValueError,
1343 1341 "corrupted revlog, revision base out of range: %d, %d", rev,
1344 1342 result);
1345 1343 return -2;
1346 1344 }
1347 1345 return result;
1348 1346 }
1349 1347
1350 1348 /**
1351 1349 * Find if a revision is a snapshot or not
1352 1350 *
1353 1351 * Only relevant for sparse-revlog case.
1354 1352 * Callers must ensure that rev is in a valid range.
1355 1353 */
1356 1354 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1357 1355 {
1358 1356 int ps[2];
1359 1357 Py_ssize_t base;
1360 1358 while (rev >= 0) {
1361 1359 base = (Py_ssize_t)index_baserev(self, rev);
1362 1360 if (base == rev) {
1363 1361 base = -1;
1364 1362 }
1365 1363 if (base == -2) {
1366 1364 assert(PyErr_Occurred());
1367 1365 return -1;
1368 1366 }
1369 1367 if (base == -1) {
1370 1368 return 1;
1371 1369 }
1372 1370 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1373 1371 assert(PyErr_Occurred());
1374 1372 return -1;
1375 1373 };
1376 1374 if (base == ps[0] || base == ps[1]) {
1377 1375 return 0;
1378 1376 }
1379 1377 rev = base;
1380 1378 }
1381 1379 return rev == -1;
1382 1380 }
1383 1381
1384 1382 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1385 1383 {
1386 1384 long rev;
1387 1385 int issnap;
1388 1386 Py_ssize_t length = index_length(self);
1389 1387
1390 1388 if (!pylong_to_long(value, &rev)) {
1391 1389 return NULL;
1392 1390 }
1393 1391 if (rev < -1 || rev >= length) {
1394 1392 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1395 1393 rev);
1396 1394 return NULL;
1397 1395 };
1398 1396 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1399 1397 if (issnap < 0) {
1400 1398 return NULL;
1401 1399 };
1402 1400 return PyBool_FromLong((long)issnap);
1403 1401 }
1404 1402
1405 1403 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1406 1404 {
1407 1405 Py_ssize_t start_rev;
1408 1406 PyObject *cache;
1409 1407 Py_ssize_t base;
1410 1408 Py_ssize_t rev;
1411 1409 PyObject *key = NULL;
1412 1410 PyObject *value = NULL;
1413 1411 const Py_ssize_t length = index_length(self);
1414 1412 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1415 1413 return NULL;
1416 1414 }
1417 1415 for (rev = start_rev; rev < length; rev++) {
1418 1416 int issnap;
1419 1417 PyObject *allvalues = NULL;
1420 1418 issnap = index_issnapshotrev(self, rev);
1421 1419 if (issnap < 0) {
1422 1420 goto bail;
1423 1421 }
1424 1422 if (issnap == 0) {
1425 1423 continue;
1426 1424 }
1427 1425 base = (Py_ssize_t)index_baserev(self, rev);
1428 1426 if (base == rev) {
1429 1427 base = -1;
1430 1428 }
1431 1429 if (base == -2) {
1432 1430 assert(PyErr_Occurred());
1433 1431 goto bail;
1434 1432 }
1435 1433 key = PyLong_FromSsize_t(base);
1436 1434 allvalues = PyDict_GetItem(cache, key);
1437 1435 if (allvalues == NULL && PyErr_Occurred()) {
1438 1436 goto bail;
1439 1437 }
1440 1438 if (allvalues == NULL) {
1441 1439 int r;
1442 1440 allvalues = PyList_New(0);
1443 1441 if (!allvalues) {
1444 1442 goto bail;
1445 1443 }
1446 1444 r = PyDict_SetItem(cache, key, allvalues);
1447 1445 Py_DECREF(allvalues);
1448 1446 if (r < 0) {
1449 1447 goto bail;
1450 1448 }
1451 1449 }
1452 1450 value = PyLong_FromSsize_t(rev);
1453 1451 if (PyList_Append(allvalues, value)) {
1454 1452 goto bail;
1455 1453 }
1456 1454 Py_CLEAR(key);
1457 1455 Py_CLEAR(value);
1458 1456 }
1459 1457 Py_RETURN_NONE;
1460 1458 bail:
1461 1459 Py_XDECREF(key);
1462 1460 Py_XDECREF(value);
1463 1461 return NULL;
1464 1462 }
1465 1463
1466 1464 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1467 1465 {
1468 1466 int rev, generaldelta;
1469 1467 PyObject *stoparg;
1470 1468 int stoprev, iterrev, baserev = -1;
1471 1469 int stopped;
1472 1470 PyObject *chain = NULL, *result = NULL;
1473 1471 const Py_ssize_t length = index_length(self);
1474 1472
1475 1473 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1476 1474 return NULL;
1477 1475 }
1478 1476
1479 1477 if (PyLong_Check(stoparg)) {
1480 1478 stoprev = (int)PyLong_AsLong(stoparg);
1481 1479 if (stoprev == -1 && PyErr_Occurred()) {
1482 1480 return NULL;
1483 1481 }
1484 1482 } else if (stoparg == Py_None) {
1485 1483 stoprev = -2;
1486 1484 } else {
1487 1485 PyErr_SetString(PyExc_ValueError,
1488 1486 "stoprev must be integer or None");
1489 1487 return NULL;
1490 1488 }
1491 1489
1492 1490 if (rev < 0 || rev >= length) {
1493 1491 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1494 1492 return NULL;
1495 1493 }
1496 1494
1497 1495 chain = PyList_New(0);
1498 1496 if (chain == NULL) {
1499 1497 return NULL;
1500 1498 }
1501 1499
1502 1500 baserev = index_baserev(self, rev);
1503 1501
1504 1502 /* This should never happen. */
1505 1503 if (baserev <= -2) {
1506 1504 /* Error should be set by index_deref() */
1507 1505 assert(PyErr_Occurred());
1508 1506 goto bail;
1509 1507 }
1510 1508
1511 1509 iterrev = rev;
1512 1510
1513 1511 while (iterrev != baserev && iterrev != stoprev) {
1514 1512 PyObject *value = PyLong_FromLong(iterrev);
1515 1513 if (value == NULL) {
1516 1514 goto bail;
1517 1515 }
1518 1516 if (PyList_Append(chain, value)) {
1519 1517 Py_DECREF(value);
1520 1518 goto bail;
1521 1519 }
1522 1520 Py_DECREF(value);
1523 1521
1524 1522 if (generaldelta) {
1525 1523 iterrev = baserev;
1526 1524 } else {
1527 1525 iterrev--;
1528 1526 }
1529 1527
1530 1528 if (iterrev < 0) {
1531 1529 break;
1532 1530 }
1533 1531
1534 1532 if (iterrev >= length) {
1535 1533 PyErr_SetString(PyExc_IndexError,
1536 1534 "revision outside index");
1537 1535 return NULL;
1538 1536 }
1539 1537
1540 1538 baserev = index_baserev(self, iterrev);
1541 1539
1542 1540 /* This should never happen. */
1543 1541 if (baserev <= -2) {
1544 1542 /* Error should be set by index_deref() */
1545 1543 assert(PyErr_Occurred());
1546 1544 goto bail;
1547 1545 }
1548 1546 }
1549 1547
1550 1548 if (iterrev == stoprev) {
1551 1549 stopped = 1;
1552 1550 } else {
1553 1551 PyObject *value = PyLong_FromLong(iterrev);
1554 1552 if (value == NULL) {
1555 1553 goto bail;
1556 1554 }
1557 1555 if (PyList_Append(chain, value)) {
1558 1556 Py_DECREF(value);
1559 1557 goto bail;
1560 1558 }
1561 1559 Py_DECREF(value);
1562 1560
1563 1561 stopped = 0;
1564 1562 }
1565 1563
1566 1564 if (PyList_Reverse(chain)) {
1567 1565 goto bail;
1568 1566 }
1569 1567
1570 1568 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1571 1569 Py_DECREF(chain);
1572 1570 return result;
1573 1571
1574 1572 bail:
1575 1573 Py_DECREF(chain);
1576 1574 return NULL;
1577 1575 }
1578 1576
1579 1577 static inline int64_t
1580 1578 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1581 1579 {
1582 1580 int64_t start_offset;
1583 1581 int64_t end_offset;
1584 1582 int end_size;
1585 1583 start_offset = index_get_start(self, start_rev);
1586 1584 if (start_offset < 0) {
1587 1585 return -1;
1588 1586 }
1589 1587 end_offset = index_get_start(self, end_rev);
1590 1588 if (end_offset < 0) {
1591 1589 return -1;
1592 1590 }
1593 1591 end_size = index_get_length(self, end_rev);
1594 1592 if (end_size < 0) {
1595 1593 return -1;
1596 1594 }
1597 1595 if (end_offset < start_offset) {
1598 1596 PyErr_Format(PyExc_ValueError,
1599 1597 "corrupted revlog index: inconsistent offset "
1600 1598 "between revisions (%zd) and (%zd)",
1601 1599 start_rev, end_rev);
1602 1600 return -1;
1603 1601 }
1604 1602 return (end_offset - start_offset) + (int64_t)end_size;
1605 1603 }
1606 1604
1607 1605 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1608 1606 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1609 1607 Py_ssize_t startidx, Py_ssize_t endidx)
1610 1608 {
1611 1609 int length;
1612 1610 while (endidx > 1 && endidx > startidx) {
1613 1611 length = index_get_length(self, revs[endidx - 1]);
1614 1612 if (length < 0) {
1615 1613 return -1;
1616 1614 }
1617 1615 if (length != 0) {
1618 1616 break;
1619 1617 }
1620 1618 endidx -= 1;
1621 1619 }
1622 1620 return endidx;
1623 1621 }
1624 1622
1625 1623 struct Gap {
1626 1624 int64_t size;
1627 1625 Py_ssize_t idx;
1628 1626 };
1629 1627
1630 1628 static int gap_compare(const void *left, const void *right)
1631 1629 {
1632 1630 const struct Gap *l_left = ((const struct Gap *)left);
1633 1631 const struct Gap *l_right = ((const struct Gap *)right);
1634 1632 if (l_left->size < l_right->size) {
1635 1633 return -1;
1636 1634 } else if (l_left->size > l_right->size) {
1637 1635 return 1;
1638 1636 }
1639 1637 return 0;
1640 1638 }
1641 1639 static int Py_ssize_t_compare(const void *left, const void *right)
1642 1640 {
1643 1641 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1644 1642 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1645 1643 if (l_left < l_right) {
1646 1644 return -1;
1647 1645 } else if (l_left > l_right) {
1648 1646 return 1;
1649 1647 }
1650 1648 return 0;
1651 1649 }
1652 1650
1653 1651 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1654 1652 {
1655 1653 /* method arguments */
1656 1654 PyObject *list_revs = NULL; /* revisions in the chain */
1657 1655 double targetdensity = 0; /* min density to achieve */
1658 1656 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1659 1657
1660 1658 /* other core variables */
1661 1659 Py_ssize_t idxlen = index_length(self);
1662 1660 Py_ssize_t i; /* used for various iteration */
1663 1661 PyObject *result = NULL; /* the final return of the function */
1664 1662
1665 1663 /* generic information about the delta chain being slice */
1666 1664 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1667 1665 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1668 1666 int64_t chainpayload = 0; /* sum of all delta in the chain */
1669 1667 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1670 1668
1671 1669 /* variable used for slicing the delta chain */
1672 1670 int64_t readdata = 0; /* amount of data currently planned to be read */
1673 1671 double density = 0; /* ration of payload data compared to read ones */
1674 1672 int64_t previous_end;
1675 1673 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1676 1674 Py_ssize_t num_gaps =
1677 1675 0; /* total number of notable gap recorded so far */
1678 1676 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1679 1677 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1680 1678 PyObject *chunk = NULL; /* individual slice */
1681 1679 PyObject *allchunks = NULL; /* all slices */
1682 1680 Py_ssize_t previdx;
1683 1681
1684 1682 /* parsing argument */
1685 1683 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1686 1684 &targetdensity, &mingapsize)) {
1687 1685 goto bail;
1688 1686 }
1689 1687
1690 1688 /* If the delta chain contains a single element, we do not need slicing
1691 1689 */
1692 1690 num_revs = PyList_GET_SIZE(list_revs);
1693 1691 if (num_revs <= 1) {
1694 1692 result = PyTuple_Pack(1, list_revs);
1695 1693 goto done;
1696 1694 }
1697 1695
1698 1696 /* Turn the python list into a native integer array (for efficiency) */
1699 1697 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1700 1698 if (revs == NULL) {
1701 1699 PyErr_NoMemory();
1702 1700 goto bail;
1703 1701 }
1704 1702 for (i = 0; i < num_revs; i++) {
1705 1703 Py_ssize_t revnum =
1706 1704 PyLong_AsLong(PyList_GET_ITEM(list_revs, i));
1707 1705 if (revnum == -1 && PyErr_Occurred()) {
1708 1706 goto bail;
1709 1707 }
1710 1708 if (revnum < nullrev || revnum >= idxlen) {
1711 1709 PyErr_Format(PyExc_IndexError,
1712 1710 "index out of range: %zd", revnum);
1713 1711 goto bail;
1714 1712 }
1715 1713 revs[i] = revnum;
1716 1714 }
1717 1715
1718 1716 /* Compute and check various property of the unsliced delta chain */
1719 1717 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1720 1718 if (deltachainspan < 0) {
1721 1719 goto bail;
1722 1720 }
1723 1721
1724 1722 if (deltachainspan <= mingapsize) {
1725 1723 result = PyTuple_Pack(1, list_revs);
1726 1724 goto done;
1727 1725 }
1728 1726 chainpayload = 0;
1729 1727 for (i = 0; i < num_revs; i++) {
1730 1728 int tmp = index_get_length(self, revs[i]);
1731 1729 if (tmp < 0) {
1732 1730 goto bail;
1733 1731 }
1734 1732 chainpayload += tmp;
1735 1733 }
1736 1734
1737 1735 readdata = deltachainspan;
1738 1736 density = 1.0;
1739 1737
1740 1738 if (0 < deltachainspan) {
1741 1739 density = (double)chainpayload / (double)deltachainspan;
1742 1740 }
1743 1741
1744 1742 if (density >= targetdensity) {
1745 1743 result = PyTuple_Pack(1, list_revs);
1746 1744 goto done;
1747 1745 }
1748 1746
1749 1747 /* if chain is too sparse, look for relevant gaps */
1750 1748 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1751 1749 if (gaps == NULL) {
1752 1750 PyErr_NoMemory();
1753 1751 goto bail;
1754 1752 }
1755 1753
1756 1754 previous_end = -1;
1757 1755 for (i = 0; i < num_revs; i++) {
1758 1756 int64_t revstart;
1759 1757 int revsize;
1760 1758 revstart = index_get_start(self, revs[i]);
1761 1759 if (revstart < 0) {
1762 1760 goto bail;
1763 1761 };
1764 1762 revsize = index_get_length(self, revs[i]);
1765 1763 if (revsize < 0) {
1766 1764 goto bail;
1767 1765 };
1768 1766 if (revsize == 0) {
1769 1767 continue;
1770 1768 }
1771 1769 if (previous_end >= 0) {
1772 1770 int64_t gapsize = revstart - previous_end;
1773 1771 if (gapsize > mingapsize) {
1774 1772 gaps[num_gaps].size = gapsize;
1775 1773 gaps[num_gaps].idx = i;
1776 1774 num_gaps += 1;
1777 1775 }
1778 1776 }
1779 1777 previous_end = revstart + revsize;
1780 1778 }
1781 1779 if (num_gaps == 0) {
1782 1780 result = PyTuple_Pack(1, list_revs);
1783 1781 goto done;
1784 1782 }
1785 1783 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1786 1784
1787 1785 /* Slice the largest gap first, they improve the density the most */
1788 1786 selected_indices =
1789 1787 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1790 1788 if (selected_indices == NULL) {
1791 1789 PyErr_NoMemory();
1792 1790 goto bail;
1793 1791 }
1794 1792
1795 1793 for (i = num_gaps - 1; i >= 0; i--) {
1796 1794 selected_indices[num_selected] = gaps[i].idx;
1797 1795 readdata -= gaps[i].size;
1798 1796 num_selected += 1;
1799 1797 if (readdata <= 0) {
1800 1798 density = 1.0;
1801 1799 } else {
1802 1800 density = (double)chainpayload / (double)readdata;
1803 1801 }
1804 1802 if (density >= targetdensity) {
1805 1803 break;
1806 1804 }
1807 1805 }
1808 1806 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1809 1807 &Py_ssize_t_compare);
1810 1808
1811 1809 /* create the resulting slice */
1812 1810 allchunks = PyList_New(0);
1813 1811 if (allchunks == NULL) {
1814 1812 goto bail;
1815 1813 }
1816 1814 previdx = 0;
1817 1815 selected_indices[num_selected] = num_revs;
1818 1816 for (i = 0; i <= num_selected; i++) {
1819 1817 Py_ssize_t idx = selected_indices[i];
1820 1818 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1821 1819 if (endidx < 0) {
1822 1820 goto bail;
1823 1821 }
1824 1822 if (previdx < endidx) {
1825 1823 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1826 1824 if (chunk == NULL) {
1827 1825 goto bail;
1828 1826 }
1829 1827 if (PyList_Append(allchunks, chunk) == -1) {
1830 1828 goto bail;
1831 1829 }
1832 1830 Py_DECREF(chunk);
1833 1831 chunk = NULL;
1834 1832 }
1835 1833 previdx = idx;
1836 1834 }
1837 1835 result = allchunks;
1838 1836 goto done;
1839 1837
1840 1838 bail:
1841 1839 Py_XDECREF(allchunks);
1842 1840 Py_XDECREF(chunk);
1843 1841 done:
1844 1842 free(revs);
1845 1843 free(gaps);
1846 1844 free(selected_indices);
1847 1845 return result;
1848 1846 }
1849 1847
1850 1848 static inline int nt_level(const char *node, Py_ssize_t level)
1851 1849 {
1852 1850 int v = node[level >> 1];
1853 1851 if (!(level & 1))
1854 1852 v >>= 4;
1855 1853 return v & 0xf;
1856 1854 }
1857 1855
1858 1856 /*
1859 1857 * Return values:
1860 1858 *
1861 1859 * -4: match is ambiguous (multiple candidates)
1862 1860 * -2: not found
1863 1861 * rest: valid rev
1864 1862 */
1865 1863 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1866 1864 int hex)
1867 1865 {
1868 1866 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1869 1867 int level, maxlevel, off;
1870 1868
1871 1869 /* If the input is binary, do a fast check for the nullid first. */
1872 1870 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1873 1871 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1874 1872 return -1;
1875 1873
1876 1874 if (hex)
1877 1875 maxlevel = nodelen;
1878 1876 else
1879 1877 maxlevel = 2 * nodelen;
1880 1878 if (maxlevel > 2 * self->nodelen)
1881 1879 maxlevel = 2 * self->nodelen;
1882 1880
1883 1881 for (level = off = 0; level < maxlevel; level++) {
1884 1882 int k = getnybble(node, level);
1885 1883 nodetreenode *n = &self->nodes[off];
1886 1884 int v = n->children[k];
1887 1885
1888 1886 if (v < 0) {
1889 1887 const char *n;
1890 1888 Py_ssize_t i;
1891 1889
1892 1890 v = -(v + 2);
1893 1891 n = index_node(self->index, v);
1894 1892 if (n == NULL)
1895 1893 return -2;
1896 1894 for (i = level; i < maxlevel; i++)
1897 1895 if (getnybble(node, i) != nt_level(n, i))
1898 1896 return -2;
1899 1897 return v;
1900 1898 }
1901 1899 if (v == 0)
1902 1900 return -2;
1903 1901 off = v;
1904 1902 }
1905 1903 /* multiple matches against an ambiguous prefix */
1906 1904 return -4;
1907 1905 }
1908 1906
1909 1907 static int nt_new(nodetree *self)
1910 1908 {
1911 1909 if (self->length == self->capacity) {
1912 1910 size_t newcapacity;
1913 1911 nodetreenode *newnodes;
1914 1912 newcapacity = self->capacity * 2;
1915 1913 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1916 1914 PyErr_SetString(PyExc_MemoryError,
1917 1915 "overflow in nt_new");
1918 1916 return -1;
1919 1917 }
1920 1918 newnodes =
1921 1919 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1922 1920 if (newnodes == NULL) {
1923 1921 PyErr_SetString(PyExc_MemoryError, "out of memory");
1924 1922 return -1;
1925 1923 }
1926 1924 self->capacity = newcapacity;
1927 1925 self->nodes = newnodes;
1928 1926 memset(&self->nodes[self->length], 0,
1929 1927 sizeof(nodetreenode) * (self->capacity - self->length));
1930 1928 }
1931 1929 return self->length++;
1932 1930 }
1933 1931
1934 1932 static int nt_insert(nodetree *self, const char *node, int rev)
1935 1933 {
1936 1934 int level = 0;
1937 1935 int off = 0;
1938 1936
1939 1937 while (level < 2 * self->nodelen) {
1940 1938 int k = nt_level(node, level);
1941 1939 nodetreenode *n;
1942 1940 int v;
1943 1941
1944 1942 n = &self->nodes[off];
1945 1943 v = n->children[k];
1946 1944
1947 1945 if (v == 0) {
1948 1946 n->children[k] = -rev - 2;
1949 1947 return 0;
1950 1948 }
1951 1949 if (v < 0) {
1952 1950 const char *oldnode =
1953 1951 index_node_existing(self->index, -(v + 2));
1954 1952 int noff;
1955 1953
1956 1954 if (oldnode == NULL)
1957 1955 return -1;
1958 1956 if (!memcmp(oldnode, node, self->nodelen)) {
1959 1957 n->children[k] = -rev - 2;
1960 1958 return 0;
1961 1959 }
1962 1960 noff = nt_new(self);
1963 1961 if (noff == -1)
1964 1962 return -1;
1965 1963 /* self->nodes may have been changed by realloc */
1966 1964 self->nodes[off].children[k] = noff;
1967 1965 off = noff;
1968 1966 n = &self->nodes[off];
1969 1967 n->children[nt_level(oldnode, ++level)] = v;
1970 1968 if (level > self->depth)
1971 1969 self->depth = level;
1972 1970 self->splits += 1;
1973 1971 } else {
1974 1972 level += 1;
1975 1973 off = v;
1976 1974 }
1977 1975 }
1978 1976
1979 1977 return -1;
1980 1978 }
1981 1979
1982 1980 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1983 1981 {
1984 1982 Py_ssize_t rev;
1985 1983 const char *node;
1986 1984 Py_ssize_t length;
1987 1985 if (!PyArg_ParseTuple(args, "n", &rev))
1988 1986 return NULL;
1989 1987 length = index_length(self->nt.index);
1990 1988 if (rev < 0 || rev >= length) {
1991 1989 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1992 1990 return NULL;
1993 1991 }
1994 1992 node = index_node_existing(self->nt.index, rev);
1995 1993 if (nt_insert(&self->nt, node, (int)rev) == -1)
1996 1994 return NULL;
1997 1995 Py_RETURN_NONE;
1998 1996 }
1999 1997
2000 1998 static int nt_delete_node(nodetree *self, const char *node)
2001 1999 {
2002 2000 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
2003 2001 */
2004 2002 return nt_insert(self, node, -2);
2005 2003 }
2006 2004
2007 2005 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
2008 2006 {
2009 2007 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
2010 2008 self->nodes = NULL;
2011 2009
2012 2010 self->index = index;
2013 2011 /* The input capacity is in terms of revisions, while the field is in
2014 2012 * terms of nodetree nodes. */
2015 2013 self->capacity = (capacity < 4 ? 4 : capacity / 2);
2016 2014 self->nodelen = index->nodelen;
2017 2015 self->depth = 0;
2018 2016 self->splits = 0;
2019 2017 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
2020 2018 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
2021 2019 return -1;
2022 2020 }
2023 2021 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
2024 2022 if (self->nodes == NULL) {
2025 2023 PyErr_NoMemory();
2026 2024 return -1;
2027 2025 }
2028 2026 self->length = 1;
2029 2027 return 0;
2030 2028 }
2031 2029
2032 2030 static int ntobj_init(nodetreeObject *self, PyObject *args)
2033 2031 {
2034 2032 PyObject *index;
2035 2033 unsigned capacity;
2036 2034 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
2037 2035 &capacity))
2038 2036 return -1;
2039 2037 Py_INCREF(index);
2040 2038 return nt_init(&self->nt, (indexObject *)index, capacity);
2041 2039 }
2042 2040
2043 2041 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
2044 2042 {
2045 2043 return nt_find(self, node, nodelen, 1);
2046 2044 }
2047 2045
2048 2046 /*
2049 2047 * Find the length of the shortest unique prefix of node.
2050 2048 *
2051 2049 * Return values:
2052 2050 *
2053 2051 * -3: error (exception set)
2054 2052 * -2: not found (no exception set)
2055 2053 * rest: length of shortest prefix
2056 2054 */
2057 2055 static int nt_shortest(nodetree *self, const char *node)
2058 2056 {
2059 2057 int level, off;
2060 2058
2061 2059 for (level = off = 0; level < 2 * self->nodelen; level++) {
2062 2060 int k, v;
2063 2061 nodetreenode *n = &self->nodes[off];
2064 2062 k = nt_level(node, level);
2065 2063 v = n->children[k];
2066 2064 if (v < 0) {
2067 2065 const char *n;
2068 2066 v = -(v + 2);
2069 2067 n = index_node_existing(self->index, v);
2070 2068 if (n == NULL)
2071 2069 return -3;
2072 2070 if (memcmp(node, n, self->nodelen) != 0)
2073 2071 /*
2074 2072 * Found a unique prefix, but it wasn't for the
2075 2073 * requested node (i.e the requested node does
2076 2074 * not exist).
2077 2075 */
2078 2076 return -2;
2079 2077 return level + 1;
2080 2078 }
2081 2079 if (v == 0)
2082 2080 return -2;
2083 2081 off = v;
2084 2082 }
2085 2083 /*
2086 2084 * The node was still not unique after 40 hex digits, so this won't
2087 2085 * happen. Also, if we get here, then there's a programming error in
2088 2086 * this file that made us insert a node longer than 40 hex digits.
2089 2087 */
2090 2088 PyErr_SetString(PyExc_Exception, "broken node tree");
2091 2089 return -3;
2092 2090 }
2093 2091
2094 2092 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
2095 2093 {
2096 2094 PyObject *val;
2097 2095 char *node;
2098 2096 int length;
2099 2097
2100 2098 if (!PyArg_ParseTuple(args, "O", &val))
2101 2099 return NULL;
2102 2100 if (node_check(self->nt.nodelen, val, &node) == -1)
2103 2101 return NULL;
2104 2102
2105 2103 length = nt_shortest(&self->nt, node);
2106 2104 if (length == -3)
2107 2105 return NULL;
2108 2106 if (length == -2) {
2109 2107 raise_revlog_error();
2110 2108 return NULL;
2111 2109 }
2112 2110 return PyLong_FromLong(length);
2113 2111 }
2114 2112
2115 2113 static void nt_dealloc(nodetree *self)
2116 2114 {
2117 2115 free(self->nodes);
2118 2116 self->nodes = NULL;
2119 2117 }
2120 2118
2121 2119 static void ntobj_dealloc(nodetreeObject *self)
2122 2120 {
2123 2121 Py_XDECREF(self->nt.index);
2124 2122 nt_dealloc(&self->nt);
2125 2123 PyObject_Del(self);
2126 2124 }
2127 2125
2128 2126 static PyMethodDef ntobj_methods[] = {
2129 2127 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
2130 2128 "insert an index entry"},
2131 2129 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
2132 2130 "find length of shortest hex nodeid of a binary ID"},
2133 2131 {NULL} /* Sentinel */
2134 2132 };
2135 2133
2136 2134 static PyTypeObject nodetreeType = {
2137 2135 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2138 2136 "parsers.nodetree", /* tp_name */
2139 2137 sizeof(nodetreeObject), /* tp_basicsize */
2140 2138 0, /* tp_itemsize */
2141 2139 (destructor)ntobj_dealloc, /* tp_dealloc */
2142 2140 0, /* tp_print */
2143 2141 0, /* tp_getattr */
2144 2142 0, /* tp_setattr */
2145 2143 0, /* tp_compare */
2146 2144 0, /* tp_repr */
2147 2145 0, /* tp_as_number */
2148 2146 0, /* tp_as_sequence */
2149 2147 0, /* tp_as_mapping */
2150 2148 0, /* tp_hash */
2151 2149 0, /* tp_call */
2152 2150 0, /* tp_str */
2153 2151 0, /* tp_getattro */
2154 2152 0, /* tp_setattro */
2155 2153 0, /* tp_as_buffer */
2156 2154 Py_TPFLAGS_DEFAULT, /* tp_flags */
2157 2155 "nodetree", /* tp_doc */
2158 2156 0, /* tp_traverse */
2159 2157 0, /* tp_clear */
2160 2158 0, /* tp_richcompare */
2161 2159 0, /* tp_weaklistoffset */
2162 2160 0, /* tp_iter */
2163 2161 0, /* tp_iternext */
2164 2162 ntobj_methods, /* tp_methods */
2165 2163 0, /* tp_members */
2166 2164 0, /* tp_getset */
2167 2165 0, /* tp_base */
2168 2166 0, /* tp_dict */
2169 2167 0, /* tp_descr_get */
2170 2168 0, /* tp_descr_set */
2171 2169 0, /* tp_dictoffset */
2172 2170 (initproc)ntobj_init, /* tp_init */
2173 2171 0, /* tp_alloc */
2174 2172 };
2175 2173
2176 2174 static int index_init_nt(indexObject *self)
2177 2175 {
2178 2176 if (!self->ntinitialized) {
2179 2177 if (nt_init(&self->nt, self, (int)self->length) == -1) {
2180 2178 nt_dealloc(&self->nt);
2181 2179 return -1;
2182 2180 }
2183 2181 if (nt_insert(&self->nt, nullid, -1) == -1) {
2184 2182 nt_dealloc(&self->nt);
2185 2183 return -1;
2186 2184 }
2187 2185 self->ntinitialized = 1;
2188 2186 self->ntrev = (int)index_length(self);
2189 2187 self->ntlookups = 1;
2190 2188 self->ntmisses = 0;
2191 2189 }
2192 2190 return 0;
2193 2191 }
2194 2192
2195 2193 /*
2196 2194 * Return values:
2197 2195 *
2198 2196 * -3: error (exception set)
2199 2197 * -2: not found (no exception set)
2200 2198 * rest: valid rev
2201 2199 */
2202 2200 static int index_find_node(indexObject *self, const char *node)
2203 2201 {
2204 2202 int rev;
2205 2203
2206 2204 if (index_init_nt(self) == -1)
2207 2205 return -3;
2208 2206
2209 2207 self->ntlookups++;
2210 2208 rev = nt_find(&self->nt, node, self->nodelen, 0);
2211 2209 if (rev >= -1)
2212 2210 return rev;
2213 2211
2214 2212 /*
2215 2213 * For the first handful of lookups, we scan the entire index,
2216 2214 * and cache only the matching nodes. This optimizes for cases
2217 2215 * like "hg tip", where only a few nodes are accessed.
2218 2216 *
2219 2217 * After that, we cache every node we visit, using a single
2220 2218 * scan amortized over multiple lookups. This gives the best
2221 2219 * bulk performance, e.g. for "hg log".
2222 2220 */
2223 2221 if (self->ntmisses++ < 4) {
2224 2222 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2225 2223 const char *n = index_node_existing(self, rev);
2226 2224 if (n == NULL)
2227 2225 return -3;
2228 2226 if (memcmp(node, n, self->nodelen) == 0) {
2229 2227 if (nt_insert(&self->nt, n, rev) == -1)
2230 2228 return -3;
2231 2229 break;
2232 2230 }
2233 2231 }
2234 2232 } else {
2235 2233 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2236 2234 const char *n = index_node_existing(self, rev);
2237 2235 if (n == NULL)
2238 2236 return -3;
2239 2237 if (nt_insert(&self->nt, n, rev) == -1) {
2240 2238 self->ntrev = rev + 1;
2241 2239 return -3;
2242 2240 }
2243 2241 if (memcmp(node, n, self->nodelen) == 0) {
2244 2242 break;
2245 2243 }
2246 2244 }
2247 2245 self->ntrev = rev;
2248 2246 }
2249 2247
2250 2248 if (rev >= 0)
2251 2249 return rev;
2252 2250 return -2;
2253 2251 }
2254 2252
2255 2253 static PyObject *index_getitem(indexObject *self, PyObject *value)
2256 2254 {
2257 2255 char *node;
2258 2256 int rev;
2259 2257
2260 2258 if (PyLong_Check(value)) {
2261 2259 long idx;
2262 2260 if (!pylong_to_long(value, &idx)) {
2263 2261 return NULL;
2264 2262 }
2265 2263 return index_get(self, idx);
2266 2264 }
2267 2265
2268 2266 if (node_check(self->nodelen, value, &node) == -1)
2269 2267 return NULL;
2270 2268 rev = index_find_node(self, node);
2271 2269 if (rev >= -1)
2272 2270 return PyLong_FromLong(rev);
2273 2271 if (rev == -2)
2274 2272 raise_revlog_error();
2275 2273 return NULL;
2276 2274 }
2277 2275
2278 2276 /*
2279 2277 * Fully populate the radix tree.
2280 2278 */
2281 2279 static int index_populate_nt(indexObject *self)
2282 2280 {
2283 2281 int rev;
2284 2282 if (self->ntrev > 0) {
2285 2283 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2286 2284 const char *n = index_node_existing(self, rev);
2287 2285 if (n == NULL)
2288 2286 return -1;
2289 2287 if (nt_insert(&self->nt, n, rev) == -1)
2290 2288 return -1;
2291 2289 }
2292 2290 self->ntrev = -1;
2293 2291 }
2294 2292 return 0;
2295 2293 }
2296 2294
2297 2295 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2298 2296 {
2299 2297 const char *fullnode;
2300 2298 Py_ssize_t nodelen;
2301 2299 char *node;
2302 2300 int rev, i;
2303 2301
2304 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2302 if (!PyArg_ParseTuple(args, "y#", &node, &nodelen))
2305 2303 return NULL;
2306 2304
2307 2305 if (nodelen < 1) {
2308 2306 PyErr_SetString(PyExc_ValueError, "key too short");
2309 2307 return NULL;
2310 2308 }
2311 2309
2312 2310 if (nodelen > 2 * self->nodelen) {
2313 2311 PyErr_SetString(PyExc_ValueError, "key too long");
2314 2312 return NULL;
2315 2313 }
2316 2314
2317 2315 for (i = 0; i < nodelen; i++)
2318 2316 hexdigit(node, i);
2319 2317 if (PyErr_Occurred()) {
2320 2318 /* input contains non-hex characters */
2321 2319 PyErr_Clear();
2322 2320 Py_RETURN_NONE;
2323 2321 }
2324 2322
2325 2323 if (index_init_nt(self) == -1)
2326 2324 return NULL;
2327 2325 if (index_populate_nt(self) == -1)
2328 2326 return NULL;
2329 2327 rev = nt_partialmatch(&self->nt, node, nodelen);
2330 2328
2331 2329 switch (rev) {
2332 2330 case -4:
2333 2331 raise_revlog_error();
2334 2332 return NULL;
2335 2333 case -2:
2336 2334 Py_RETURN_NONE;
2337 2335 case -1:
2338 2336 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2339 2337 }
2340 2338
2341 2339 fullnode = index_node_existing(self, rev);
2342 2340 if (fullnode == NULL) {
2343 2341 return NULL;
2344 2342 }
2345 2343 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2346 2344 }
2347 2345
2348 2346 static PyObject *index_shortest(indexObject *self, PyObject *args)
2349 2347 {
2350 2348 PyObject *val;
2351 2349 char *node;
2352 2350 int length;
2353 2351
2354 2352 if (!PyArg_ParseTuple(args, "O", &val))
2355 2353 return NULL;
2356 2354 if (node_check(self->nodelen, val, &node) == -1)
2357 2355 return NULL;
2358 2356
2359 2357 self->ntlookups++;
2360 2358 if (index_init_nt(self) == -1)
2361 2359 return NULL;
2362 2360 if (index_populate_nt(self) == -1)
2363 2361 return NULL;
2364 2362 length = nt_shortest(&self->nt, node);
2365 2363 if (length == -3)
2366 2364 return NULL;
2367 2365 if (length == -2) {
2368 2366 raise_revlog_error();
2369 2367 return NULL;
2370 2368 }
2371 2369 return PyLong_FromLong(length);
2372 2370 }
2373 2371
2374 2372 static PyObject *index_m_get(indexObject *self, PyObject *args)
2375 2373 {
2376 2374 PyObject *val;
2377 2375 char *node;
2378 2376 int rev;
2379 2377
2380 2378 if (!PyArg_ParseTuple(args, "O", &val))
2381 2379 return NULL;
2382 2380 if (node_check(self->nodelen, val, &node) == -1)
2383 2381 return NULL;
2384 2382 rev = index_find_node(self, node);
2385 2383 if (rev == -3)
2386 2384 return NULL;
2387 2385 if (rev == -2)
2388 2386 Py_RETURN_NONE;
2389 2387 return PyLong_FromLong(rev);
2390 2388 }
2391 2389
2392 2390 static int index_contains(indexObject *self, PyObject *value)
2393 2391 {
2394 2392 char *node;
2395 2393
2396 2394 if (PyLong_Check(value)) {
2397 2395 long rev;
2398 2396 if (!pylong_to_long(value, &rev)) {
2399 2397 return -1;
2400 2398 }
2401 2399 return rev >= -1 && rev < index_length(self);
2402 2400 }
2403 2401
2404 2402 if (node_check(self->nodelen, value, &node) == -1)
2405 2403 return -1;
2406 2404
2407 2405 switch (index_find_node(self, node)) {
2408 2406 case -3:
2409 2407 return -1;
2410 2408 case -2:
2411 2409 return 0;
2412 2410 default:
2413 2411 return 1;
2414 2412 }
2415 2413 }
2416 2414
2417 2415 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2418 2416 {
2419 2417 int ret = index_contains(self, args);
2420 2418 if (ret < 0)
2421 2419 return NULL;
2422 2420 return PyBool_FromLong((long)ret);
2423 2421 }
2424 2422
2425 2423 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2426 2424 {
2427 2425 char *node;
2428 2426 int rev;
2429 2427
2430 2428 if (node_check(self->nodelen, val, &node) == -1)
2431 2429 return NULL;
2432 2430 rev = index_find_node(self, node);
2433 2431 if (rev >= -1)
2434 2432 return PyLong_FromLong(rev);
2435 2433 if (rev == -2)
2436 2434 raise_revlog_error();
2437 2435 return NULL;
2438 2436 }
2439 2437
2440 2438 typedef uint64_t bitmask;
2441 2439
2442 2440 /*
2443 2441 * Given a disjoint set of revs, return all candidates for the
2444 2442 * greatest common ancestor. In revset notation, this is the set
2445 2443 * "heads(::a and ::b and ...)"
2446 2444 */
2447 2445 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2448 2446 int revcount)
2449 2447 {
2450 2448 const bitmask allseen = (1ull << revcount) - 1;
2451 2449 const bitmask poison = 1ull << revcount;
2452 2450 PyObject *gca = PyList_New(0);
2453 2451 int i, v, interesting;
2454 2452 int maxrev = -1;
2455 2453 bitmask sp;
2456 2454 bitmask *seen;
2457 2455
2458 2456 if (gca == NULL)
2459 2457 return PyErr_NoMemory();
2460 2458
2461 2459 for (i = 0; i < revcount; i++) {
2462 2460 if (revs[i] > maxrev)
2463 2461 maxrev = revs[i];
2464 2462 }
2465 2463
2466 2464 seen = calloc(sizeof(*seen), maxrev + 1);
2467 2465 if (seen == NULL) {
2468 2466 Py_DECREF(gca);
2469 2467 return PyErr_NoMemory();
2470 2468 }
2471 2469
2472 2470 for (i = 0; i < revcount; i++)
2473 2471 seen[revs[i]] = 1ull << i;
2474 2472
2475 2473 interesting = revcount;
2476 2474
2477 2475 for (v = maxrev; v >= 0 && interesting; v--) {
2478 2476 bitmask sv = seen[v];
2479 2477 int parents[2];
2480 2478
2481 2479 if (!sv)
2482 2480 continue;
2483 2481
2484 2482 if (sv < poison) {
2485 2483 interesting -= 1;
2486 2484 if (sv == allseen) {
2487 2485 PyObject *obj = PyLong_FromLong(v);
2488 2486 if (obj == NULL)
2489 2487 goto bail;
2490 2488 if (PyList_Append(gca, obj) == -1) {
2491 2489 Py_DECREF(obj);
2492 2490 goto bail;
2493 2491 }
2494 2492 sv |= poison;
2495 2493 for (i = 0; i < revcount; i++) {
2496 2494 if (revs[i] == v)
2497 2495 goto done;
2498 2496 }
2499 2497 }
2500 2498 }
2501 2499 if (index_get_parents(self, v, parents, maxrev) < 0)
2502 2500 goto bail;
2503 2501
2504 2502 for (i = 0; i < 2; i++) {
2505 2503 int p = parents[i];
2506 2504 if (p == -1)
2507 2505 continue;
2508 2506 sp = seen[p];
2509 2507 if (sv < poison) {
2510 2508 if (sp == 0) {
2511 2509 seen[p] = sv;
2512 2510 interesting++;
2513 2511 } else if (sp != sv)
2514 2512 seen[p] |= sv;
2515 2513 } else {
2516 2514 if (sp && sp < poison)
2517 2515 interesting--;
2518 2516 seen[p] = sv;
2519 2517 }
2520 2518 }
2521 2519 }
2522 2520
2523 2521 done:
2524 2522 free(seen);
2525 2523 return gca;
2526 2524 bail:
2527 2525 free(seen);
2528 2526 Py_XDECREF(gca);
2529 2527 return NULL;
2530 2528 }
2531 2529
2532 2530 /*
2533 2531 * Given a disjoint set of revs, return the subset with the longest
2534 2532 * path to the root.
2535 2533 */
2536 2534 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2537 2535 {
2538 2536 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2539 2537 static const Py_ssize_t capacity = 24;
2540 2538 int *depth, *interesting = NULL;
2541 2539 int i, j, v, ninteresting;
2542 2540 PyObject *dict = NULL, *keys = NULL;
2543 2541 long *seen = NULL;
2544 2542 int maxrev = -1;
2545 2543 long final;
2546 2544
2547 2545 if (revcount > capacity) {
2548 2546 PyErr_Format(PyExc_OverflowError,
2549 2547 "bitset size (%ld) > capacity (%ld)",
2550 2548 (long)revcount, (long)capacity);
2551 2549 return NULL;
2552 2550 }
2553 2551
2554 2552 for (i = 0; i < revcount; i++) {
2555 2553 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2556 2554 if (n > maxrev)
2557 2555 maxrev = n;
2558 2556 }
2559 2557
2560 2558 depth = calloc(sizeof(*depth), maxrev + 1);
2561 2559 if (depth == NULL)
2562 2560 return PyErr_NoMemory();
2563 2561
2564 2562 seen = calloc(sizeof(*seen), maxrev + 1);
2565 2563 if (seen == NULL) {
2566 2564 PyErr_NoMemory();
2567 2565 goto bail;
2568 2566 }
2569 2567
2570 2568 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2571 2569 if (interesting == NULL) {
2572 2570 PyErr_NoMemory();
2573 2571 goto bail;
2574 2572 }
2575 2573
2576 2574 if (PyList_Sort(revs) == -1)
2577 2575 goto bail;
2578 2576
2579 2577 for (i = 0; i < revcount; i++) {
2580 2578 int n = (int)PyLong_AsLong(PyList_GET_ITEM(revs, i));
2581 2579 long b = 1l << i;
2582 2580 depth[n] = 1;
2583 2581 seen[n] = b;
2584 2582 interesting[b] = 1;
2585 2583 }
2586 2584
2587 2585 /* invariant: ninteresting is the number of non-zero entries in
2588 2586 * interesting. */
2589 2587 ninteresting = (int)revcount;
2590 2588
2591 2589 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2592 2590 int dv = depth[v];
2593 2591 int parents[2];
2594 2592 long sv;
2595 2593
2596 2594 if (dv == 0)
2597 2595 continue;
2598 2596
2599 2597 sv = seen[v];
2600 2598 if (index_get_parents(self, v, parents, maxrev) < 0)
2601 2599 goto bail;
2602 2600
2603 2601 for (i = 0; i < 2; i++) {
2604 2602 int p = parents[i];
2605 2603 long sp;
2606 2604 int dp;
2607 2605
2608 2606 if (p == -1)
2609 2607 continue;
2610 2608
2611 2609 dp = depth[p];
2612 2610 sp = seen[p];
2613 2611 if (dp <= dv) {
2614 2612 depth[p] = dv + 1;
2615 2613 if (sp != sv) {
2616 2614 interesting[sv] += 1;
2617 2615 seen[p] = sv;
2618 2616 if (sp) {
2619 2617 interesting[sp] -= 1;
2620 2618 if (interesting[sp] == 0)
2621 2619 ninteresting -= 1;
2622 2620 }
2623 2621 }
2624 2622 } else if (dv == dp - 1) {
2625 2623 long nsp = sp | sv;
2626 2624 if (nsp == sp)
2627 2625 continue;
2628 2626 seen[p] = nsp;
2629 2627 interesting[sp] -= 1;
2630 2628 if (interesting[sp] == 0)
2631 2629 ninteresting -= 1;
2632 2630 if (interesting[nsp] == 0)
2633 2631 ninteresting += 1;
2634 2632 interesting[nsp] += 1;
2635 2633 }
2636 2634 }
2637 2635 interesting[sv] -= 1;
2638 2636 if (interesting[sv] == 0)
2639 2637 ninteresting -= 1;
2640 2638 }
2641 2639
2642 2640 final = 0;
2643 2641 j = ninteresting;
2644 2642 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2645 2643 if (interesting[i] == 0)
2646 2644 continue;
2647 2645 final |= i;
2648 2646 j -= 1;
2649 2647 }
2650 2648 if (final == 0) {
2651 2649 keys = PyList_New(0);
2652 2650 goto bail;
2653 2651 }
2654 2652
2655 2653 dict = PyDict_New();
2656 2654 if (dict == NULL)
2657 2655 goto bail;
2658 2656
2659 2657 for (i = 0; i < revcount; i++) {
2660 2658 PyObject *key;
2661 2659
2662 2660 if ((final & (1 << i)) == 0)
2663 2661 continue;
2664 2662
2665 2663 key = PyList_GET_ITEM(revs, i);
2666 2664 Py_INCREF(key);
2667 2665 Py_INCREF(Py_None);
2668 2666 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2669 2667 Py_DECREF(key);
2670 2668 Py_DECREF(Py_None);
2671 2669 goto bail;
2672 2670 }
2673 2671 }
2674 2672
2675 2673 keys = PyDict_Keys(dict);
2676 2674
2677 2675 bail:
2678 2676 free(depth);
2679 2677 free(seen);
2680 2678 free(interesting);
2681 2679 Py_XDECREF(dict);
2682 2680
2683 2681 return keys;
2684 2682 }
2685 2683
2686 2684 /*
2687 2685 * Given a (possibly overlapping) set of revs, return all the
2688 2686 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2689 2687 */
2690 2688 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2691 2689 {
2692 2690 PyObject *ret = NULL;
2693 2691 Py_ssize_t argcount, i, len;
2694 2692 bitmask repeat = 0;
2695 2693 int revcount = 0;
2696 2694 int *revs;
2697 2695
2698 2696 argcount = PySequence_Length(args);
2699 2697 revs = PyMem_Malloc(argcount * sizeof(*revs));
2700 2698 if (argcount > 0 && revs == NULL)
2701 2699 return PyErr_NoMemory();
2702 2700 len = index_length(self);
2703 2701
2704 2702 for (i = 0; i < argcount; i++) {
2705 2703 static const int capacity = 24;
2706 2704 PyObject *obj = PySequence_GetItem(args, i);
2707 2705 bitmask x;
2708 2706 long val;
2709 2707
2710 2708 if (!PyLong_Check(obj)) {
2711 2709 PyErr_SetString(PyExc_TypeError,
2712 2710 "arguments must all be ints");
2713 2711 Py_DECREF(obj);
2714 2712 goto bail;
2715 2713 }
2716 2714 val = PyLong_AsLong(obj);
2717 2715 Py_DECREF(obj);
2718 2716 if (val == -1) {
2719 2717 ret = PyList_New(0);
2720 2718 goto done;
2721 2719 }
2722 2720 if (val < 0 || val >= len) {
2723 2721 PyErr_SetString(PyExc_IndexError, "index out of range");
2724 2722 goto bail;
2725 2723 }
2726 2724 /* this cheesy bloom filter lets us avoid some more
2727 2725 * expensive duplicate checks in the common set-is-disjoint
2728 2726 * case */
2729 2727 x = 1ull << (val & 0x3f);
2730 2728 if (repeat & x) {
2731 2729 int k;
2732 2730 for (k = 0; k < revcount; k++) {
2733 2731 if (val == revs[k])
2734 2732 goto duplicate;
2735 2733 }
2736 2734 } else
2737 2735 repeat |= x;
2738 2736 if (revcount >= capacity) {
2739 2737 PyErr_Format(PyExc_OverflowError,
2740 2738 "bitset size (%d) > capacity (%d)",
2741 2739 revcount, capacity);
2742 2740 goto bail;
2743 2741 }
2744 2742 revs[revcount++] = (int)val;
2745 2743 duplicate:;
2746 2744 }
2747 2745
2748 2746 if (revcount == 0) {
2749 2747 ret = PyList_New(0);
2750 2748 goto done;
2751 2749 }
2752 2750 if (revcount == 1) {
2753 2751 PyObject *obj;
2754 2752 ret = PyList_New(1);
2755 2753 if (ret == NULL)
2756 2754 goto bail;
2757 2755 obj = PyLong_FromLong(revs[0]);
2758 2756 if (obj == NULL)
2759 2757 goto bail;
2760 2758 PyList_SET_ITEM(ret, 0, obj);
2761 2759 goto done;
2762 2760 }
2763 2761
2764 2762 ret = find_gca_candidates(self, revs, revcount);
2765 2763 if (ret == NULL)
2766 2764 goto bail;
2767 2765
2768 2766 done:
2769 2767 PyMem_Free(revs);
2770 2768 return ret;
2771 2769
2772 2770 bail:
2773 2771 PyMem_Free(revs);
2774 2772 Py_XDECREF(ret);
2775 2773 return NULL;
2776 2774 }
2777 2775
2778 2776 /*
2779 2777 * Given a (possibly overlapping) set of revs, return the greatest
2780 2778 * common ancestors: those with the longest path to the root.
2781 2779 */
2782 2780 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2783 2781 {
2784 2782 PyObject *ret;
2785 2783 PyObject *gca = index_commonancestorsheads(self, args);
2786 2784 if (gca == NULL)
2787 2785 return NULL;
2788 2786
2789 2787 if (PyList_GET_SIZE(gca) <= 1) {
2790 2788 return gca;
2791 2789 }
2792 2790
2793 2791 ret = find_deepest(self, gca);
2794 2792 Py_DECREF(gca);
2795 2793 return ret;
2796 2794 }
2797 2795
2798 2796 /*
2799 2797 * Invalidate any trie entries introduced by added revs.
2800 2798 */
2801 2799 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2802 2800 {
2803 2801 Py_ssize_t i, len;
2804 2802
2805 2803 len = self->length + self->new_length;
2806 2804 i = start - self->length;
2807 2805 if (i < 0)
2808 2806 return;
2809 2807
2810 2808 for (i = start; i < len; i++) {
2811 2809 const char *node = index_node(self, i);
2812 2810 nt_delete_node(&self->nt, node);
2813 2811 }
2814 2812
2815 2813 self->new_length = start - self->length;
2816 2814 }
2817 2815
2818 2816 /*
2819 2817 * Delete a numeric range of revs, which must be at the end of the
2820 2818 * range.
2821 2819 */
2822 2820 static int index_slice_del(indexObject *self, PyObject *item)
2823 2821 {
2824 2822 Py_ssize_t start, stop, step, slicelength;
2825 2823 Py_ssize_t length = index_length(self) + 1;
2826 2824 int ret = 0;
2827 2825
2828 2826 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2829 2827 &slicelength) < 0)
2830 2828 return -1;
2831 2829
2832 2830 if (slicelength <= 0)
2833 2831 return 0;
2834 2832
2835 2833 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2836 2834 stop = start;
2837 2835
2838 2836 if (step < 0) {
2839 2837 stop = start + 1;
2840 2838 start = stop + step * (slicelength - 1) - 1;
2841 2839 step = -step;
2842 2840 }
2843 2841
2844 2842 if (step != 1) {
2845 2843 PyErr_SetString(PyExc_ValueError,
2846 2844 "revlog index delete requires step size of 1");
2847 2845 return -1;
2848 2846 }
2849 2847
2850 2848 if (stop != length - 1) {
2851 2849 PyErr_SetString(PyExc_IndexError,
2852 2850 "revlog index deletion indices are invalid");
2853 2851 return -1;
2854 2852 }
2855 2853
2856 2854 if (start < self->length) {
2857 2855 if (self->ntinitialized) {
2858 2856 Py_ssize_t i;
2859 2857
2860 2858 for (i = start; i < self->length; i++) {
2861 2859 const char *node = index_node_existing(self, i);
2862 2860 if (node == NULL)
2863 2861 return -1;
2864 2862
2865 2863 nt_delete_node(&self->nt, node);
2866 2864 }
2867 2865 if (self->new_length)
2868 2866 index_invalidate_added(self, self->length);
2869 2867 if (self->ntrev > start)
2870 2868 self->ntrev = (int)start;
2871 2869 } else if (self->new_length) {
2872 2870 self->new_length = 0;
2873 2871 }
2874 2872
2875 2873 self->length = start;
2876 2874 goto done;
2877 2875 }
2878 2876
2879 2877 if (self->ntinitialized) {
2880 2878 index_invalidate_added(self, start);
2881 2879 if (self->ntrev > start)
2882 2880 self->ntrev = (int)start;
2883 2881 } else {
2884 2882 self->new_length = start - self->length;
2885 2883 }
2886 2884 done:
2887 2885 Py_CLEAR(self->headrevs);
2888 2886 return ret;
2889 2887 }
2890 2888
2891 2889 /*
2892 2890 * Supported ops:
2893 2891 *
2894 2892 * slice deletion
2895 2893 * string assignment (extend node->rev mapping)
2896 2894 * string deletion (shrink node->rev mapping)
2897 2895 */
2898 2896 static int index_assign_subscript(indexObject *self, PyObject *item,
2899 2897 PyObject *value)
2900 2898 {
2901 2899 char *node;
2902 2900 long rev;
2903 2901
2904 2902 if (PySlice_Check(item) && value == NULL)
2905 2903 return index_slice_del(self, item);
2906 2904
2907 2905 if (node_check(self->nodelen, item, &node) == -1)
2908 2906 return -1;
2909 2907
2910 2908 if (value == NULL)
2911 2909 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2912 2910 : 0;
2913 2911 rev = PyLong_AsLong(value);
2914 2912 if (rev > INT_MAX || rev < 0) {
2915 2913 if (!PyErr_Occurred())
2916 2914 PyErr_SetString(PyExc_ValueError, "rev out of range");
2917 2915 return -1;
2918 2916 }
2919 2917
2920 2918 if (index_init_nt(self) == -1)
2921 2919 return -1;
2922 2920 return nt_insert(&self->nt, node, (int)rev);
2923 2921 }
2924 2922
2925 2923 /*
2926 2924 * Find all RevlogNG entries in an index that has inline data. Update
2927 2925 * the optional "offsets" table with those entries.
2928 2926 */
2929 2927 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2930 2928 {
2931 2929 const char *data = (const char *)self->buf.buf;
2932 2930 Py_ssize_t pos = 0;
2933 2931 Py_ssize_t end = self->buf.len;
2934 2932 long incr = self->entry_size;
2935 2933 Py_ssize_t len = 0;
2936 2934
2937 2935 while (pos + self->entry_size <= end && pos >= 0) {
2938 2936 uint32_t comp_len, sidedata_comp_len = 0;
2939 2937 /* 3rd element of header is length of compressed inline data */
2940 2938 if (self->format_version == format_v1) {
2941 2939 comp_len =
2942 2940 getbe32(data + pos + entry_v1_offset_comp_len);
2943 2941 sidedata_comp_len = 0;
2944 2942 } else if (self->format_version == format_v2) {
2945 2943 comp_len =
2946 2944 getbe32(data + pos + entry_v2_offset_comp_len);
2947 2945 sidedata_comp_len = getbe32(
2948 2946 data + pos + entry_v2_offset_sidedata_comp_len);
2949 2947 } else {
2950 2948 raise_revlog_error();
2951 2949 return -1;
2952 2950 }
2953 2951 incr = self->entry_size + comp_len + sidedata_comp_len;
2954 2952 if (offsets)
2955 2953 offsets[len] = data + pos;
2956 2954 len++;
2957 2955 pos += incr;
2958 2956 }
2959 2957
2960 2958 if (pos != end) {
2961 2959 if (!PyErr_Occurred())
2962 2960 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2963 2961 return -1;
2964 2962 }
2965 2963
2966 2964 return len;
2967 2965 }
2968 2966
2969 2967 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2970 2968 {
2971 2969 PyObject *data_obj, *inlined_obj;
2972 2970 Py_ssize_t size;
2973 2971
2974 2972 static char *kwlist[] = {"data", "inlined", "format", NULL};
2975 2973
2976 2974 /* Initialize before argument-checking to avoid index_dealloc() crash.
2977 2975 */
2978 2976 self->added = NULL;
2979 2977 self->new_length = 0;
2980 2978 self->added_length = 0;
2981 2979 self->data = NULL;
2982 2980 memset(&self->buf, 0, sizeof(self->buf));
2983 2981 self->headrevs = NULL;
2984 2982 self->filteredrevs = Py_None;
2985 2983 Py_INCREF(Py_None);
2986 2984 self->ntinitialized = 0;
2987 2985 self->offsets = NULL;
2988 2986 self->nodelen = 20;
2989 2987 self->nullentry = NULL;
2990 2988 self->rust_ext_compat = 1;
2991 2989 self->format_version = format_v1;
2992 2990
2993 2991 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist,
2994 2992 &data_obj, &inlined_obj,
2995 2993 &(self->format_version)))
2996 2994 return -1;
2997 2995 if (!PyObject_CheckBuffer(data_obj)) {
2998 2996 PyErr_SetString(PyExc_TypeError,
2999 2997 "data does not support buffer interface");
3000 2998 return -1;
3001 2999 }
3002 3000 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
3003 3001 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
3004 3002 return -1;
3005 3003 }
3006 3004
3007 3005 if (self->format_version == format_v1) {
3008 3006 self->entry_size = v1_entry_size;
3009 3007 } else if (self->format_version == format_v2) {
3010 3008 self->entry_size = v2_entry_size;
3011 3009 } else if (self->format_version == format_cl2) {
3012 3010 self->entry_size = cl2_entry_size;
3013 3011 }
3014 3012
3015 self->nullentry =
3016 Py_BuildValue(PY23("iiiiiiis#iiBBi", "iiiiiiiy#iiBBi"), 0, 0, 0, -1,
3017 -1, -1, -1, nullid, self->nodelen, 0, 0,
3018 comp_mode_inline, comp_mode_inline, rank_unknown);
3013 self->nullentry = Py_BuildValue(
3014 "iiiiiiiy#iiBBi", 0, 0, 0, -1, -1, -1, -1, nullid, self->nodelen, 0,
3015 0, comp_mode_inline, comp_mode_inline, rank_unknown);
3019 3016
3020 3017 if (!self->nullentry)
3021 3018 return -1;
3022 3019 PyObject_GC_UnTrack(self->nullentry);
3023 3020
3024 3021 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
3025 3022 return -1;
3026 3023 size = self->buf.len;
3027 3024
3028 3025 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
3029 3026 self->data = data_obj;
3030 3027
3031 3028 self->ntlookups = self->ntmisses = 0;
3032 3029 self->ntrev = -1;
3033 3030 Py_INCREF(self->data);
3034 3031
3035 3032 if (self->inlined) {
3036 3033 Py_ssize_t len = inline_scan(self, NULL);
3037 3034 if (len == -1)
3038 3035 goto bail;
3039 3036 self->length = len;
3040 3037 } else {
3041 3038 if (size % self->entry_size) {
3042 3039 PyErr_SetString(PyExc_ValueError, "corrupt index file");
3043 3040 goto bail;
3044 3041 }
3045 3042 self->length = size / self->entry_size;
3046 3043 }
3047 3044
3048 3045 return 0;
3049 3046 bail:
3050 3047 return -1;
3051 3048 }
3052 3049
3053 3050 static PyObject *index_nodemap(indexObject *self)
3054 3051 {
3055 3052 Py_INCREF(self);
3056 3053 return (PyObject *)self;
3057 3054 }
3058 3055
3059 3056 static void _index_clearcaches(indexObject *self)
3060 3057 {
3061 3058 if (self->offsets) {
3062 3059 PyMem_Free((void *)self->offsets);
3063 3060 self->offsets = NULL;
3064 3061 }
3065 3062 if (self->ntinitialized) {
3066 3063 nt_dealloc(&self->nt);
3067 3064 }
3068 3065 self->ntinitialized = 0;
3069 3066 Py_CLEAR(self->headrevs);
3070 3067 }
3071 3068
3072 3069 static PyObject *index_clearcaches(indexObject *self)
3073 3070 {
3074 3071 _index_clearcaches(self);
3075 3072 self->ntrev = -1;
3076 3073 self->ntlookups = self->ntmisses = 0;
3077 3074 Py_RETURN_NONE;
3078 3075 }
3079 3076
3080 3077 static void index_dealloc(indexObject *self)
3081 3078 {
3082 3079 _index_clearcaches(self);
3083 3080 Py_XDECREF(self->filteredrevs);
3084 3081 if (self->buf.buf) {
3085 3082 PyBuffer_Release(&self->buf);
3086 3083 memset(&self->buf, 0, sizeof(self->buf));
3087 3084 }
3088 3085 Py_XDECREF(self->data);
3089 3086 PyMem_Free(self->added);
3090 3087 Py_XDECREF(self->nullentry);
3091 3088 PyObject_Del(self);
3092 3089 }
3093 3090
3094 3091 static PySequenceMethods index_sequence_methods = {
3095 3092 (lenfunc)index_length, /* sq_length */
3096 3093 0, /* sq_concat */
3097 3094 0, /* sq_repeat */
3098 3095 (ssizeargfunc)index_get, /* sq_item */
3099 3096 0, /* sq_slice */
3100 3097 0, /* sq_ass_item */
3101 3098 0, /* sq_ass_slice */
3102 3099 (objobjproc)index_contains, /* sq_contains */
3103 3100 };
3104 3101
3105 3102 static PyMappingMethods index_mapping_methods = {
3106 3103 (lenfunc)index_length, /* mp_length */
3107 3104 (binaryfunc)index_getitem, /* mp_subscript */
3108 3105 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
3109 3106 };
3110 3107
3111 3108 static PyMethodDef index_methods[] = {
3112 3109 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
3113 3110 "return the gca set of the given revs"},
3114 3111 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
3115 3112 METH_VARARGS,
3116 3113 "return the heads of the common ancestors of the given revs"},
3117 3114 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
3118 3115 "clear the index caches"},
3119 3116 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
3120 3117 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
3121 3118 "return `rev` associated with a node or None"},
3122 3119 {"has_node", (PyCFunction)index_m_has_node, METH_O,
3123 3120 "return True if the node exist in the index"},
3124 3121 {"rev", (PyCFunction)index_m_rev, METH_O,
3125 3122 "return `rev` associated with a node or raise RevlogError"},
3126 3123 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
3127 3124 "compute phases"},
3128 3125 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
3129 3126 "reachableroots"},
3130 3127 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
3131 3128 METH_VARARGS, "replace an existing index entry with a new value"},
3132 3129 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
3133 3130 "get head revisions"}, /* Can do filtering since 3.2 */
3134 3131 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
3135 3132 "get filtered head revisions"}, /* Can always do filtering */
3136 3133 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
3137 3134 "True if the object is a snapshot"},
3138 3135 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
3139 3136 "Gather snapshot data in a cache dict"},
3140 3137 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
3141 3138 "determine revisions with deltas to reconstruct fulltext"},
3142 3139 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
3143 3140 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
3144 3141 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
3145 3142 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
3146 3143 "match a potentially ambiguous node ID"},
3147 3144 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
3148 3145 "find length of shortest hex nodeid of a binary ID"},
3149 3146 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
3150 3147 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
3151 3148 "return an entry in binary form"},
3152 3149 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
3153 3150 "pack the revlog header information into binary"},
3154 3151 {NULL} /* Sentinel */
3155 3152 };
3156 3153
3157 3154 static PyGetSetDef index_getset[] = {
3158 3155 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
3159 3156 {NULL} /* Sentinel */
3160 3157 };
3161 3158
3162 3159 static PyMemberDef index_members[] = {
3163 3160 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
3164 3161 "size of an index entry"},
3165 3162 {"rust_ext_compat", T_LONG, offsetof(indexObject, rust_ext_compat), 0,
3166 3163 "size of an index entry"},
3167 3164 {NULL} /* Sentinel */
3168 3165 };
3169 3166
3170 3167 PyTypeObject HgRevlogIndex_Type = {
3171 3168 PyVarObject_HEAD_INIT(NULL, 0) /* header */
3172 3169 "parsers.index", /* tp_name */
3173 3170 sizeof(indexObject), /* tp_basicsize */
3174 3171 0, /* tp_itemsize */
3175 3172 (destructor)index_dealloc, /* tp_dealloc */
3176 3173 0, /* tp_print */
3177 3174 0, /* tp_getattr */
3178 3175 0, /* tp_setattr */
3179 3176 0, /* tp_compare */
3180 3177 0, /* tp_repr */
3181 3178 0, /* tp_as_number */
3182 3179 &index_sequence_methods, /* tp_as_sequence */
3183 3180 &index_mapping_methods, /* tp_as_mapping */
3184 3181 0, /* tp_hash */
3185 3182 0, /* tp_call */
3186 3183 0, /* tp_str */
3187 3184 0, /* tp_getattro */
3188 3185 0, /* tp_setattro */
3189 3186 0, /* tp_as_buffer */
3190 3187 Py_TPFLAGS_DEFAULT, /* tp_flags */
3191 3188 "revlog index", /* tp_doc */
3192 3189 0, /* tp_traverse */
3193 3190 0, /* tp_clear */
3194 3191 0, /* tp_richcompare */
3195 3192 0, /* tp_weaklistoffset */
3196 3193 0, /* tp_iter */
3197 3194 0, /* tp_iternext */
3198 3195 index_methods, /* tp_methods */
3199 3196 index_members, /* tp_members */
3200 3197 index_getset, /* tp_getset */
3201 3198 0, /* tp_base */
3202 3199 0, /* tp_dict */
3203 3200 0, /* tp_descr_get */
3204 3201 0, /* tp_descr_set */
3205 3202 0, /* tp_dictoffset */
3206 3203 (initproc)index_init, /* tp_init */
3207 3204 0, /* tp_alloc */
3208 3205 };
3209 3206
3210 3207 /*
3211 3208 * returns a tuple of the form (index, cache) with elements as
3212 3209 * follows:
3213 3210 *
3214 3211 * index: an index object that lazily parses Revlog (v1 or v2) records
3215 3212 * cache: if data is inlined, a tuple (0, index_file_content), else None
3216 3213 * index_file_content could be a string, or a buffer
3217 3214 *
3218 3215 * added complications are for backwards compatibility
3219 3216 */
3220 3217 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
3221 3218 {
3222 3219 PyObject *cache = NULL;
3223 3220 indexObject *idx;
3224 3221 int ret;
3225 3222
3226 3223 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3227 3224 if (idx == NULL)
3228 3225 goto bail;
3229 3226
3230 3227 ret = index_init(idx, args, kwargs);
3231 3228 if (ret == -1)
3232 3229 goto bail;
3233 3230
3234 3231 if (idx->inlined) {
3235 3232 cache = Py_BuildValue("iO", 0, idx->data);
3236 3233 if (cache == NULL)
3237 3234 goto bail;
3238 3235 } else {
3239 3236 cache = Py_None;
3240 3237 Py_INCREF(cache);
3241 3238 }
3242 3239
3243 3240 return Py_BuildValue("NN", idx, cache);
3244 3241
3245 3242 bail:
3246 3243 Py_XDECREF(idx);
3247 3244 Py_XDECREF(cache);
3248 3245 return NULL;
3249 3246 }
3250 3247
3251 3248 static Revlog_CAPI CAPI = {
3252 3249 /* increment the abi_version field upon each change in the Revlog_CAPI
3253 3250 struct or in the ABI of the listed functions */
3254 3251 2,
3255 3252 index_length,
3256 3253 index_node,
3257 3254 HgRevlogIndex_GetParents,
3258 3255 };
3259 3256
3260 3257 void revlog_module_init(PyObject *mod)
3261 3258 {
3262 3259 PyObject *caps = NULL;
3263 3260 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3264 3261 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3265 3262 return;
3266 3263 Py_INCREF(&HgRevlogIndex_Type);
3267 3264 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3268 3265
3269 3266 nodetreeType.tp_new = PyType_GenericNew;
3270 3267 if (PyType_Ready(&nodetreeType) < 0)
3271 3268 return;
3272 3269 Py_INCREF(&nodetreeType);
3273 3270 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3274 3271
3275 3272 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3276 3273 if (caps != NULL)
3277 3274 PyModule_AddObject(mod, "revlog_CAPI", caps);
3278 3275 }
@@ -1,92 +1,85 b''
1 1 /*
2 2 util.h - utility functions for interfacing with the various python APIs.
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_UTIL_H_
9 9 #define _HG_UTIL_H_
10 10
11 11 #include "compat.h"
12 12
13 13 #if PY_MAJOR_VERSION >= 3
14 14 #define IS_PY3K
15 15 #endif
16 16
17 /* helper to switch things like string literal depending on Python version */
18 #ifdef IS_PY3K
19 #define PY23(py2, py3) py3
20 #else
21 #define PY23(py2, py3) py2
22 #endif
23
24 17 /* clang-format off */
25 18 typedef struct {
26 19 PyObject_HEAD
27 20 int flags;
28 21 int mode;
29 22 int size;
30 23 int mtime_s;
31 24 int mtime_ns;
32 25 } dirstateItemObject;
33 26 /* clang-format on */
34 27
35 28 static const int dirstate_flag_wc_tracked = 1 << 0;
36 29 static const int dirstate_flag_p1_tracked = 1 << 1;
37 30 static const int dirstate_flag_p2_info = 1 << 2;
38 31 static const int dirstate_flag_mode_exec_perm = 1 << 3;
39 32 static const int dirstate_flag_mode_is_symlink = 1 << 4;
40 33 static const int dirstate_flag_has_fallback_exec = 1 << 5;
41 34 static const int dirstate_flag_fallback_exec = 1 << 6;
42 35 static const int dirstate_flag_has_fallback_symlink = 1 << 7;
43 36 static const int dirstate_flag_fallback_symlink = 1 << 8;
44 37 static const int dirstate_flag_expected_state_is_modified = 1 << 9;
45 38 static const int dirstate_flag_has_meaningful_data = 1 << 10;
46 39 static const int dirstate_flag_has_mtime = 1 << 11;
47 40 static const int dirstate_flag_mtime_second_ambiguous = 1 << 12;
48 41 static const int dirstate_flag_directory = 1 << 13;
49 42 static const int dirstate_flag_all_unknown_recorded = 1 << 14;
50 43 static const int dirstate_flag_all_ignored_recorded = 1 << 15;
51 44
52 45 extern PyTypeObject dirstateItemType;
53 46 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
54 47
55 48 #ifndef MIN
56 49 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
57 50 #endif
58 51 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
59 52 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
60 53 #define true 1
61 54 #define false 0
62 55 typedef unsigned char bool;
63 56 #else
64 57 #include <stdbool.h>
65 58 #endif
66 59
67 60 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
68 61 {
69 62 /* _PyDict_NewPresized expects a minused parameter, but it actually
70 63 creates a dictionary that's the nearest power of two bigger than the
71 64 parameter. For example, with the initial minused = 1000, the
72 65 dictionary created has size 1024. Of course in a lot of cases that
73 66 can be greater than the maximum load factor Python's dict object
74 67 expects (= 2/3), so as soon as we cross the threshold we'll resize
75 68 anyway. So create a dictionary that's at least 3/2 the size. */
76 69 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
77 70 }
78 71
79 72 /* Convert a PyInt or PyLong to a long. Returns false if there is an
80 73 error, in which case an exception will already have been set. */
81 74 static inline bool pylong_to_long(PyObject *pylong, long *out)
82 75 {
83 76 *out = PyLong_AsLong(pylong);
84 77 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
85 78 * not an error. */
86 79 if (*out != -1) {
87 80 return true;
88 81 }
89 82 return PyErr_Occurred() == NULL;
90 83 }
91 84
92 85 #endif /* _HG_UTIL_H_ */
General Comments 0
You need to be logged in to leave comments. Login now