##// END OF EJS Templates
dirstate-entry: introduce dedicated accessors for v1 serialization...
marmoute -
r48298:80dc1d45 default
parent child Browse files
Show More
@@ -1,763 +1,795 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
33 33 {
34 34 Py_ssize_t expected_size;
35 35
36 36 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
37 37 return NULL;
38 38 }
39 39
40 40 return _dict_new_presized(expected_size);
41 41 }
42 42
43 43 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
44 44 int size, int mtime)
45 45 {
46 46 dirstateTupleObject *t =
47 47 PyObject_New(dirstateTupleObject, &dirstateTupleType);
48 48 if (!t) {
49 49 return NULL;
50 50 }
51 51 t->state = state;
52 52 t->mode = mode;
53 53 t->size = size;
54 54 t->mtime = mtime;
55 55 return t;
56 56 }
57 57
58 58 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
59 59 PyObject *kwds)
60 60 {
61 61 /* We do all the initialization here and not a tp_init function because
62 62 * dirstate_tuple is immutable. */
63 63 dirstateTupleObject *t;
64 64 char state;
65 65 int size, mode, mtime;
66 66 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
67 67 return NULL;
68 68 }
69 69
70 70 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
71 71 if (!t) {
72 72 return NULL;
73 73 }
74 74 t->state = state;
75 75 t->mode = mode;
76 76 t->size = size;
77 77 t->mtime = mtime;
78 78
79 79 return (PyObject *)t;
80 80 }
81 81
82 82 static void dirstate_tuple_dealloc(PyObject *o)
83 83 {
84 84 PyObject_Del(o);
85 85 }
86 86
87 87 static Py_ssize_t dirstate_tuple_length(PyObject *o)
88 88 {
89 89 return 4;
90 90 }
91 91
92 92 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
93 93 {
94 94 dirstateTupleObject *t = (dirstateTupleObject *)o;
95 95 switch (i) {
96 96 case 0:
97 97 return PyBytes_FromStringAndSize(&t->state, 1);
98 98 case 1:
99 99 return PyInt_FromLong(t->mode);
100 100 case 2:
101 101 return PyInt_FromLong(t->size);
102 102 case 3:
103 103 return PyInt_FromLong(t->mtime);
104 104 default:
105 105 PyErr_SetString(PyExc_IndexError, "index out of range");
106 106 return NULL;
107 107 }
108 108 }
109 109
110 110 static PySequenceMethods dirstate_tuple_sq = {
111 111 dirstate_tuple_length, /* sq_length */
112 112 0, /* sq_concat */
113 113 0, /* sq_repeat */
114 114 dirstate_tuple_item, /* sq_item */
115 115 0, /* sq_ass_item */
116 116 0, /* sq_contains */
117 117 0, /* sq_inplace_concat */
118 118 0 /* sq_inplace_repeat */
119 119 };
120 120
121 static PyObject *dirstatetuple_v1_state(dirstateTupleObject *self)
122 {
123 return PyBytes_FromStringAndSize(&self->state, 1);
124 };
125
126 static PyObject *dirstatetuple_v1_mode(dirstateTupleObject *self)
127 {
128 return PyInt_FromLong(self->mode);
129 };
130
131 static PyObject *dirstatetuple_v1_size(dirstateTupleObject *self)
132 {
133 return PyInt_FromLong(self->size);
134 };
135
136 static PyObject *dirstatetuple_v1_mtime(dirstateTupleObject *self)
137 {
138 return PyInt_FromLong(self->mtime);
139 };
140
141 static PyMethodDef dirstatetuple_methods[] = {
142 {"v1_state", (PyCFunction)dirstatetuple_v1_state, METH_NOARGS,
143 "return a \"state\" suitable for v1 serialization"},
144 {"v1_mode", (PyCFunction)dirstatetuple_v1_mode, METH_NOARGS,
145 "return a \"mode\" suitable for v1 serialization"},
146 {"v1_size", (PyCFunction)dirstatetuple_v1_size, METH_NOARGS,
147 "return a \"size\" suitable for v1 serialization"},
148 {"v1_mtime", (PyCFunction)dirstatetuple_v1_mtime, METH_NOARGS,
149 "return a \"mtime\" suitable for v1 serialization"},
150 {NULL} /* Sentinel */
151 };
152
121 153 PyTypeObject dirstateTupleType = {
122 154 PyVarObject_HEAD_INIT(NULL, 0) /* header */
123 155 "dirstate_tuple", /* tp_name */
124 156 sizeof(dirstateTupleObject), /* tp_basicsize */
125 157 0, /* tp_itemsize */
126 158 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
127 159 0, /* tp_print */
128 160 0, /* tp_getattr */
129 161 0, /* tp_setattr */
130 162 0, /* tp_compare */
131 163 0, /* tp_repr */
132 164 0, /* tp_as_number */
133 165 &dirstate_tuple_sq, /* tp_as_sequence */
134 166 0, /* tp_as_mapping */
135 167 0, /* tp_hash */
136 168 0, /* tp_call */
137 169 0, /* tp_str */
138 170 0, /* tp_getattro */
139 171 0, /* tp_setattro */
140 172 0, /* tp_as_buffer */
141 173 Py_TPFLAGS_DEFAULT, /* tp_flags */
142 174 "dirstate tuple", /* tp_doc */
143 175 0, /* tp_traverse */
144 176 0, /* tp_clear */
145 177 0, /* tp_richcompare */
146 178 0, /* tp_weaklistoffset */
147 179 0, /* tp_iter */
148 180 0, /* tp_iternext */
149 0, /* tp_methods */
181 dirstatetuple_methods, /* tp_methods */
150 182 0, /* tp_members */
151 183 0, /* tp_getset */
152 184 0, /* tp_base */
153 185 0, /* tp_dict */
154 186 0, /* tp_descr_get */
155 187 0, /* tp_descr_set */
156 188 0, /* tp_dictoffset */
157 189 0, /* tp_init */
158 190 0, /* tp_alloc */
159 191 dirstate_tuple_new, /* tp_new */
160 192 };
161 193
162 194 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
163 195 {
164 196 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
165 197 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
166 198 char state, *cur, *str, *cpos;
167 199 int mode, size, mtime;
168 200 unsigned int flen, pos = 40;
169 201 Py_ssize_t len = 40;
170 202 Py_ssize_t readlen;
171 203
172 204 if (!PyArg_ParseTuple(
173 205 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
174 206 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
175 207 goto quit;
176 208 }
177 209
178 210 len = readlen;
179 211
180 212 /* read parents */
181 213 if (len < 40) {
182 214 PyErr_SetString(PyExc_ValueError,
183 215 "too little data for parents");
184 216 goto quit;
185 217 }
186 218
187 219 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
188 220 str + 20, (Py_ssize_t)20);
189 221 if (!parents) {
190 222 goto quit;
191 223 }
192 224
193 225 /* read filenames */
194 226 while (pos >= 40 && pos < len) {
195 227 if (pos + 17 > len) {
196 228 PyErr_SetString(PyExc_ValueError,
197 229 "overflow in dirstate");
198 230 goto quit;
199 231 }
200 232 cur = str + pos;
201 233 /* unpack header */
202 234 state = *cur;
203 235 mode = getbe32(cur + 1);
204 236 size = getbe32(cur + 5);
205 237 mtime = getbe32(cur + 9);
206 238 flen = getbe32(cur + 13);
207 239 pos += 17;
208 240 cur += 17;
209 241 if (flen > len - pos) {
210 242 PyErr_SetString(PyExc_ValueError,
211 243 "overflow in dirstate");
212 244 goto quit;
213 245 }
214 246
215 247 entry =
216 248 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
217 249 cpos = memchr(cur, 0, flen);
218 250 if (cpos) {
219 251 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
220 252 cname = PyBytes_FromStringAndSize(
221 253 cpos + 1, flen - (cpos - cur) - 1);
222 254 if (!fname || !cname ||
223 255 PyDict_SetItem(cmap, fname, cname) == -1 ||
224 256 PyDict_SetItem(dmap, fname, entry) == -1) {
225 257 goto quit;
226 258 }
227 259 Py_DECREF(cname);
228 260 } else {
229 261 fname = PyBytes_FromStringAndSize(cur, flen);
230 262 if (!fname ||
231 263 PyDict_SetItem(dmap, fname, entry) == -1) {
232 264 goto quit;
233 265 }
234 266 }
235 267 Py_DECREF(fname);
236 268 Py_DECREF(entry);
237 269 fname = cname = entry = NULL;
238 270 pos += flen;
239 271 }
240 272
241 273 ret = parents;
242 274 Py_INCREF(ret);
243 275 quit:
244 276 Py_XDECREF(fname);
245 277 Py_XDECREF(cname);
246 278 Py_XDECREF(entry);
247 279 Py_XDECREF(parents);
248 280 return ret;
249 281 }
250 282
251 283 /*
252 284 * Build a set of non-normal and other parent entries from the dirstate dmap
253 285 */
254 286 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
255 287 {
256 288 PyObject *dmap, *fname, *v;
257 289 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
258 290 Py_ssize_t pos;
259 291
260 292 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type,
261 293 &dmap)) {
262 294 goto bail;
263 295 }
264 296
265 297 nonnset = PySet_New(NULL);
266 298 if (nonnset == NULL) {
267 299 goto bail;
268 300 }
269 301
270 302 otherpset = PySet_New(NULL);
271 303 if (otherpset == NULL) {
272 304 goto bail;
273 305 }
274 306
275 307 pos = 0;
276 308 while (PyDict_Next(dmap, &pos, &fname, &v)) {
277 309 dirstateTupleObject *t;
278 310 if (!dirstate_tuple_check(v)) {
279 311 PyErr_SetString(PyExc_TypeError,
280 312 "expected a dirstate tuple");
281 313 goto bail;
282 314 }
283 315 t = (dirstateTupleObject *)v;
284 316
285 317 if (t->state == 'n' && t->size == -2) {
286 318 if (PySet_Add(otherpset, fname) == -1) {
287 319 goto bail;
288 320 }
289 321 }
290 322
291 323 if (t->state == 'n' && t->mtime != -1) {
292 324 continue;
293 325 }
294 326 if (PySet_Add(nonnset, fname) == -1) {
295 327 goto bail;
296 328 }
297 329 }
298 330
299 331 result = Py_BuildValue("(OO)", nonnset, otherpset);
300 332 if (result == NULL) {
301 333 goto bail;
302 334 }
303 335 Py_DECREF(nonnset);
304 336 Py_DECREF(otherpset);
305 337 return result;
306 338 bail:
307 339 Py_XDECREF(nonnset);
308 340 Py_XDECREF(otherpset);
309 341 Py_XDECREF(result);
310 342 return NULL;
311 343 }
312 344
313 345 /*
314 346 * Efficiently pack a dirstate object into its on-disk format.
315 347 */
316 348 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
317 349 {
318 350 PyObject *packobj = NULL;
319 351 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
320 352 Py_ssize_t nbytes, pos, l;
321 353 PyObject *k, *v = NULL, *pn;
322 354 char *p, *s;
323 355 int now;
324 356
325 357 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
326 358 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
327 359 &now)) {
328 360 return NULL;
329 361 }
330 362
331 363 if (PyTuple_Size(pl) != 2) {
332 364 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
333 365 return NULL;
334 366 }
335 367
336 368 /* Figure out how much we need to allocate. */
337 369 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
338 370 PyObject *c;
339 371 if (!PyBytes_Check(k)) {
340 372 PyErr_SetString(PyExc_TypeError, "expected string key");
341 373 goto bail;
342 374 }
343 375 nbytes += PyBytes_GET_SIZE(k) + 17;
344 376 c = PyDict_GetItem(copymap, k);
345 377 if (c) {
346 378 if (!PyBytes_Check(c)) {
347 379 PyErr_SetString(PyExc_TypeError,
348 380 "expected string key");
349 381 goto bail;
350 382 }
351 383 nbytes += PyBytes_GET_SIZE(c) + 1;
352 384 }
353 385 }
354 386
355 387 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
356 388 if (packobj == NULL) {
357 389 goto bail;
358 390 }
359 391
360 392 p = PyBytes_AS_STRING(packobj);
361 393
362 394 pn = PyTuple_GET_ITEM(pl, 0);
363 395 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
364 396 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
365 397 goto bail;
366 398 }
367 399 memcpy(p, s, l);
368 400 p += 20;
369 401 pn = PyTuple_GET_ITEM(pl, 1);
370 402 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
371 403 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
372 404 goto bail;
373 405 }
374 406 memcpy(p, s, l);
375 407 p += 20;
376 408
377 409 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
378 410 dirstateTupleObject *tuple;
379 411 char state;
380 412 int mode, size, mtime;
381 413 Py_ssize_t len, l;
382 414 PyObject *o;
383 415 char *t;
384 416
385 417 if (!dirstate_tuple_check(v)) {
386 418 PyErr_SetString(PyExc_TypeError,
387 419 "expected a dirstate tuple");
388 420 goto bail;
389 421 }
390 422 tuple = (dirstateTupleObject *)v;
391 423
392 424 state = tuple->state;
393 425 mode = tuple->mode;
394 426 size = tuple->size;
395 427 mtime = tuple->mtime;
396 428 if (state == 'n' && mtime == now) {
397 429 /* See pure/parsers.py:pack_dirstate for why we do
398 430 * this. */
399 431 mtime = -1;
400 432 mtime_unset = (PyObject *)make_dirstate_tuple(
401 433 state, mode, size, mtime);
402 434 if (!mtime_unset) {
403 435 goto bail;
404 436 }
405 437 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
406 438 goto bail;
407 439 }
408 440 Py_DECREF(mtime_unset);
409 441 mtime_unset = NULL;
410 442 }
411 443 *p++ = state;
412 444 putbe32((uint32_t)mode, p);
413 445 putbe32((uint32_t)size, p + 4);
414 446 putbe32((uint32_t)mtime, p + 8);
415 447 t = p + 12;
416 448 p += 16;
417 449 len = PyBytes_GET_SIZE(k);
418 450 memcpy(p, PyBytes_AS_STRING(k), len);
419 451 p += len;
420 452 o = PyDict_GetItem(copymap, k);
421 453 if (o) {
422 454 *p++ = '\0';
423 455 l = PyBytes_GET_SIZE(o);
424 456 memcpy(p, PyBytes_AS_STRING(o), l);
425 457 p += l;
426 458 len += l + 1;
427 459 }
428 460 putbe32((uint32_t)len, t);
429 461 }
430 462
431 463 pos = p - PyBytes_AS_STRING(packobj);
432 464 if (pos != nbytes) {
433 465 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
434 466 (long)pos, (long)nbytes);
435 467 goto bail;
436 468 }
437 469
438 470 return packobj;
439 471 bail:
440 472 Py_XDECREF(mtime_unset);
441 473 Py_XDECREF(packobj);
442 474 Py_XDECREF(v);
443 475 return NULL;
444 476 }
445 477
446 478 #define BUMPED_FIX 1
447 479 #define USING_SHA_256 2
448 480 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
449 481
450 482 static PyObject *readshas(const char *source, unsigned char num,
451 483 Py_ssize_t hashwidth)
452 484 {
453 485 int i;
454 486 PyObject *list = PyTuple_New(num);
455 487 if (list == NULL) {
456 488 return NULL;
457 489 }
458 490 for (i = 0; i < num; i++) {
459 491 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
460 492 if (hash == NULL) {
461 493 Py_DECREF(list);
462 494 return NULL;
463 495 }
464 496 PyTuple_SET_ITEM(list, i, hash);
465 497 source += hashwidth;
466 498 }
467 499 return list;
468 500 }
469 501
470 502 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
471 503 uint32_t *msize)
472 504 {
473 505 const char *data = databegin;
474 506 const char *meta;
475 507
476 508 double mtime;
477 509 int16_t tz;
478 510 uint16_t flags;
479 511 unsigned char nsuccs, nparents, nmetadata;
480 512 Py_ssize_t hashwidth = 20;
481 513
482 514 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
483 515 PyObject *metadata = NULL, *ret = NULL;
484 516 int i;
485 517
486 518 if (data + FM1_HEADER_SIZE > dataend) {
487 519 goto overflow;
488 520 }
489 521
490 522 *msize = getbe32(data);
491 523 data += 4;
492 524 mtime = getbefloat64(data);
493 525 data += 8;
494 526 tz = getbeint16(data);
495 527 data += 2;
496 528 flags = getbeuint16(data);
497 529 data += 2;
498 530
499 531 if (flags & USING_SHA_256) {
500 532 hashwidth = 32;
501 533 }
502 534
503 535 nsuccs = (unsigned char)(*data++);
504 536 nparents = (unsigned char)(*data++);
505 537 nmetadata = (unsigned char)(*data++);
506 538
507 539 if (databegin + *msize > dataend) {
508 540 goto overflow;
509 541 }
510 542 dataend = databegin + *msize; /* narrow down to marker size */
511 543
512 544 if (data + hashwidth > dataend) {
513 545 goto overflow;
514 546 }
515 547 prec = PyBytes_FromStringAndSize(data, hashwidth);
516 548 data += hashwidth;
517 549 if (prec == NULL) {
518 550 goto bail;
519 551 }
520 552
521 553 if (data + nsuccs * hashwidth > dataend) {
522 554 goto overflow;
523 555 }
524 556 succs = readshas(data, nsuccs, hashwidth);
525 557 if (succs == NULL) {
526 558 goto bail;
527 559 }
528 560 data += nsuccs * hashwidth;
529 561
530 562 if (nparents == 1 || nparents == 2) {
531 563 if (data + nparents * hashwidth > dataend) {
532 564 goto overflow;
533 565 }
534 566 parents = readshas(data, nparents, hashwidth);
535 567 if (parents == NULL) {
536 568 goto bail;
537 569 }
538 570 data += nparents * hashwidth;
539 571 } else {
540 572 parents = Py_None;
541 573 Py_INCREF(parents);
542 574 }
543 575
544 576 if (data + 2 * nmetadata > dataend) {
545 577 goto overflow;
546 578 }
547 579 meta = data + (2 * nmetadata);
548 580 metadata = PyTuple_New(nmetadata);
549 581 if (metadata == NULL) {
550 582 goto bail;
551 583 }
552 584 for (i = 0; i < nmetadata; i++) {
553 585 PyObject *tmp, *left = NULL, *right = NULL;
554 586 Py_ssize_t leftsize = (unsigned char)(*data++);
555 587 Py_ssize_t rightsize = (unsigned char)(*data++);
556 588 if (meta + leftsize + rightsize > dataend) {
557 589 goto overflow;
558 590 }
559 591 left = PyBytes_FromStringAndSize(meta, leftsize);
560 592 meta += leftsize;
561 593 right = PyBytes_FromStringAndSize(meta, rightsize);
562 594 meta += rightsize;
563 595 tmp = PyTuple_New(2);
564 596 if (!left || !right || !tmp) {
565 597 Py_XDECREF(left);
566 598 Py_XDECREF(right);
567 599 Py_XDECREF(tmp);
568 600 goto bail;
569 601 }
570 602 PyTuple_SET_ITEM(tmp, 0, left);
571 603 PyTuple_SET_ITEM(tmp, 1, right);
572 604 PyTuple_SET_ITEM(metadata, i, tmp);
573 605 }
574 606 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
575 607 (int)tz * 60, parents);
576 608 goto bail; /* return successfully */
577 609
578 610 overflow:
579 611 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
580 612 bail:
581 613 Py_XDECREF(prec);
582 614 Py_XDECREF(succs);
583 615 Py_XDECREF(metadata);
584 616 Py_XDECREF(parents);
585 617 return ret;
586 618 }
587 619
588 620 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
589 621 {
590 622 const char *data, *dataend;
591 623 Py_ssize_t datalen, offset, stop;
592 624 PyObject *markers = NULL;
593 625
594 626 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
595 627 &offset, &stop)) {
596 628 return NULL;
597 629 }
598 630 if (offset < 0) {
599 631 PyErr_SetString(PyExc_ValueError,
600 632 "invalid negative offset in fm1readmarkers");
601 633 return NULL;
602 634 }
603 635 if (stop > datalen) {
604 636 PyErr_SetString(
605 637 PyExc_ValueError,
606 638 "stop longer than data length in fm1readmarkers");
607 639 return NULL;
608 640 }
609 641 dataend = data + datalen;
610 642 data += offset;
611 643 markers = PyList_New(0);
612 644 if (!markers) {
613 645 return NULL;
614 646 }
615 647 while (offset < stop) {
616 648 uint32_t msize;
617 649 int error;
618 650 PyObject *record = fm1readmarker(data, dataend, &msize);
619 651 if (!record) {
620 652 goto bail;
621 653 }
622 654 error = PyList_Append(markers, record);
623 655 Py_DECREF(record);
624 656 if (error) {
625 657 goto bail;
626 658 }
627 659 data += msize;
628 660 offset += msize;
629 661 }
630 662 return markers;
631 663 bail:
632 664 Py_DECREF(markers);
633 665 return NULL;
634 666 }
635 667
636 668 static char parsers_doc[] = "Efficient content parsing.";
637 669
638 670 PyObject *encodedir(PyObject *self, PyObject *args);
639 671 PyObject *pathencode(PyObject *self, PyObject *args);
640 672 PyObject *lowerencode(PyObject *self, PyObject *args);
641 673 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
642 674
643 675 static PyMethodDef methods[] = {
644 676 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
645 677 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
646 678 "create a set containing non-normal and other parent entries of given "
647 679 "dirstate\n"},
648 680 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
649 681 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
650 682 "parse a revlog index\n"},
651 683 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
652 684 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
653 685 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
654 686 {"dict_new_presized", dict_new_presized, METH_VARARGS,
655 687 "construct a dict with an expected size\n"},
656 688 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
657 689 "make file foldmap\n"},
658 690 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
659 691 "escape a UTF-8 byte string to JSON (fast path)\n"},
660 692 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
661 693 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
662 694 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
663 695 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
664 696 "parse v1 obsolete markers\n"},
665 697 {NULL, NULL}};
666 698
667 699 void dirs_module_init(PyObject *mod);
668 700 void manifest_module_init(PyObject *mod);
669 701 void revlog_module_init(PyObject *mod);
670 702
671 703 static const int version = 20;
672 704
673 705 static void module_init(PyObject *mod)
674 706 {
675 707 PyObject *capsule = NULL;
676 708 PyModule_AddIntConstant(mod, "version", version);
677 709
678 710 /* This module constant has two purposes. First, it lets us unit test
679 711 * the ImportError raised without hard-coding any error text. This
680 712 * means we can change the text in the future without breaking tests,
681 713 * even across changesets without a recompile. Second, its presence
682 714 * can be used to determine whether the version-checking logic is
683 715 * present, which also helps in testing across changesets without a
684 716 * recompile. Note that this means the pure-Python version of parsers
685 717 * should not have this module constant. */
686 718 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
687 719
688 720 dirs_module_init(mod);
689 721 manifest_module_init(mod);
690 722 revlog_module_init(mod);
691 723
692 724 capsule = PyCapsule_New(
693 725 make_dirstate_tuple,
694 726 "mercurial.cext.parsers.make_dirstate_tuple_CAPI", NULL);
695 727 if (capsule != NULL)
696 728 PyModule_AddObject(mod, "make_dirstate_tuple_CAPI", capsule);
697 729
698 730 if (PyType_Ready(&dirstateTupleType) < 0) {
699 731 return;
700 732 }
701 733 Py_INCREF(&dirstateTupleType);
702 734 PyModule_AddObject(mod, "dirstatetuple",
703 735 (PyObject *)&dirstateTupleType);
704 736 }
705 737
706 738 static int check_python_version(void)
707 739 {
708 740 PyObject *sys = PyImport_ImportModule("sys"), *ver;
709 741 long hexversion;
710 742 if (!sys) {
711 743 return -1;
712 744 }
713 745 ver = PyObject_GetAttrString(sys, "hexversion");
714 746 Py_DECREF(sys);
715 747 if (!ver) {
716 748 return -1;
717 749 }
718 750 hexversion = PyInt_AsLong(ver);
719 751 Py_DECREF(ver);
720 752 /* sys.hexversion is a 32-bit number by default, so the -1 case
721 753 * should only occur in unusual circumstances (e.g. if sys.hexversion
722 754 * is manually set to an invalid value). */
723 755 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
724 756 PyErr_Format(PyExc_ImportError,
725 757 "%s: The Mercurial extension "
726 758 "modules were compiled with Python " PY_VERSION
727 759 ", but "
728 760 "Mercurial is currently using Python with "
729 761 "sys.hexversion=%ld: "
730 762 "Python %s\n at: %s",
731 763 versionerrortext, hexversion, Py_GetVersion(),
732 764 Py_GetProgramFullPath());
733 765 return -1;
734 766 }
735 767 return 0;
736 768 }
737 769
738 770 #ifdef IS_PY3K
739 771 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
740 772 parsers_doc, -1, methods};
741 773
742 774 PyMODINIT_FUNC PyInit_parsers(void)
743 775 {
744 776 PyObject *mod;
745 777
746 778 if (check_python_version() == -1)
747 779 return NULL;
748 780 mod = PyModule_Create(&parsers_module);
749 781 module_init(mod);
750 782 return mod;
751 783 }
752 784 #else
753 785 PyMODINIT_FUNC initparsers(void)
754 786 {
755 787 PyObject *mod;
756 788
757 789 if (check_python_version() == -1) {
758 790 return;
759 791 }
760 792 mod = Py_InitModule3("parsers", methods, parsers_doc);
761 793 module_init(mod);
762 794 }
763 795 #endif
@@ -1,456 +1,479 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import (
14 14 nullrev,
15 15 sha1nodeconstants,
16 16 )
17 17 from .. import (
18 18 error,
19 19 pycompat,
20 20 revlogutils,
21 21 util,
22 22 )
23 23
24 24 from ..revlogutils import nodemap as nodemaputil
25 25 from ..revlogutils import constants as revlog_constants
26 26
27 27 stringio = pycompat.bytesio
28 28
29 29
30 30 _pack = struct.pack
31 31 _unpack = struct.unpack
32 32 _compress = zlib.compress
33 33 _decompress = zlib.decompress
34 34
35 35
36 36 class dirstatetuple(object):
37 37 """represent a dirstate entry
38 38
39 39 It contains:
40 40
41 41 - state (one of 'n', 'a', 'r', 'm')
42 42 - mode,
43 43 - size,
44 44 - mtime,
45 45 """
46 46
47 47 __slot__ = ('_state', '_mode', '_size', '_mtime')
48 48
49 49 def __init__(self, state, mode, size, mtime):
50 50 self._state = state
51 51 self._mode = mode
52 52 self._size = size
53 53 self._mtime = mtime
54 54
55 55 def __getitem__(self, idx):
56 56 if idx == 0 or idx == -4:
57 57 return self._state
58 58 elif idx == 1 or idx == -3:
59 59 return self._mode
60 60 elif idx == 2 or idx == -2:
61 61 return self._size
62 62 elif idx == 3 or idx == -1:
63 63 return self._mtime
64 64 else:
65 65 raise IndexError(idx)
66 66
67 def v1_state(self):
68 """return a "state" suitable for v1 serialization"""
69 return self._state
70
71 def v1_mode(self):
72 """return a "mode" suitable for v1 serialization"""
73 return self._mode
74
75 def v1_size(self):
76 """return a "size" suitable for v1 serialization"""
77 return self._size
78
79 def v1_mtime(self):
80 """return a "mtime" suitable for v1 serialization"""
81 return self._mtime
82
67 83
68 84 def gettype(q):
69 85 return int(q & 0xFFFF)
70 86
71 87
72 88 class BaseIndexObject(object):
73 89 # Can I be passed to an algorithme implemented in Rust ?
74 90 rust_ext_compat = 0
75 91 # Format of an index entry according to Python's `struct` language
76 92 index_format = revlog_constants.INDEX_ENTRY_V1
77 93 # Size of a C unsigned long long int, platform independent
78 94 big_int_size = struct.calcsize(b'>Q')
79 95 # Size of a C long int, platform independent
80 96 int_size = struct.calcsize(b'>i')
81 97 # An empty index entry, used as a default value to be overridden, or nullrev
82 98 null_item = (
83 99 0,
84 100 0,
85 101 0,
86 102 -1,
87 103 -1,
88 104 -1,
89 105 -1,
90 106 sha1nodeconstants.nullid,
91 107 0,
92 108 0,
93 109 revlog_constants.COMP_MODE_INLINE,
94 110 revlog_constants.COMP_MODE_INLINE,
95 111 )
96 112
97 113 @util.propertycache
98 114 def entry_size(self):
99 115 return self.index_format.size
100 116
101 117 @property
102 118 def nodemap(self):
103 119 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
104 120 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
105 121 return self._nodemap
106 122
107 123 @util.propertycache
108 124 def _nodemap(self):
109 125 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
110 126 for r in range(0, len(self)):
111 127 n = self[r][7]
112 128 nodemap[n] = r
113 129 return nodemap
114 130
115 131 def has_node(self, node):
116 132 """return True if the node exist in the index"""
117 133 return node in self._nodemap
118 134
119 135 def rev(self, node):
120 136 """return a revision for a node
121 137
122 138 If the node is unknown, raise a RevlogError"""
123 139 return self._nodemap[node]
124 140
125 141 def get_rev(self, node):
126 142 """return a revision for a node
127 143
128 144 If the node is unknown, return None"""
129 145 return self._nodemap.get(node)
130 146
131 147 def _stripnodes(self, start):
132 148 if '_nodemap' in vars(self):
133 149 for r in range(start, len(self)):
134 150 n = self[r][7]
135 151 del self._nodemap[n]
136 152
137 153 def clearcaches(self):
138 154 self.__dict__.pop('_nodemap', None)
139 155
140 156 def __len__(self):
141 157 return self._lgt + len(self._extra)
142 158
143 159 def append(self, tup):
144 160 if '_nodemap' in vars(self):
145 161 self._nodemap[tup[7]] = len(self)
146 162 data = self._pack_entry(len(self), tup)
147 163 self._extra.append(data)
148 164
149 165 def _pack_entry(self, rev, entry):
150 166 assert entry[8] == 0
151 167 assert entry[9] == 0
152 168 return self.index_format.pack(*entry[:8])
153 169
154 170 def _check_index(self, i):
155 171 if not isinstance(i, int):
156 172 raise TypeError(b"expecting int indexes")
157 173 if i < 0 or i >= len(self):
158 174 raise IndexError
159 175
160 176 def __getitem__(self, i):
161 177 if i == -1:
162 178 return self.null_item
163 179 self._check_index(i)
164 180 if i >= self._lgt:
165 181 data = self._extra[i - self._lgt]
166 182 else:
167 183 index = self._calculate_index(i)
168 184 data = self._data[index : index + self.entry_size]
169 185 r = self._unpack_entry(i, data)
170 186 if self._lgt and i == 0:
171 187 offset = revlogutils.offset_type(0, gettype(r[0]))
172 188 r = (offset,) + r[1:]
173 189 return r
174 190
175 191 def _unpack_entry(self, rev, data):
176 192 r = self.index_format.unpack(data)
177 193 r = r + (
178 194 0,
179 195 0,
180 196 revlog_constants.COMP_MODE_INLINE,
181 197 revlog_constants.COMP_MODE_INLINE,
182 198 )
183 199 return r
184 200
185 201 def pack_header(self, header):
186 202 """pack header information as binary"""
187 203 v_fmt = revlog_constants.INDEX_HEADER
188 204 return v_fmt.pack(header)
189 205
190 206 def entry_binary(self, rev):
191 207 """return the raw binary string representing a revision"""
192 208 entry = self[rev]
193 209 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
194 210 if rev == 0:
195 211 p = p[revlog_constants.INDEX_HEADER.size :]
196 212 return p
197 213
198 214
199 215 class IndexObject(BaseIndexObject):
200 216 def __init__(self, data):
201 217 assert len(data) % self.entry_size == 0, (
202 218 len(data),
203 219 self.entry_size,
204 220 len(data) % self.entry_size,
205 221 )
206 222 self._data = data
207 223 self._lgt = len(data) // self.entry_size
208 224 self._extra = []
209 225
210 226 def _calculate_index(self, i):
211 227 return i * self.entry_size
212 228
213 229 def __delitem__(self, i):
214 230 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
215 231 raise ValueError(b"deleting slices only supports a:-1 with step 1")
216 232 i = i.start
217 233 self._check_index(i)
218 234 self._stripnodes(i)
219 235 if i < self._lgt:
220 236 self._data = self._data[: i * self.entry_size]
221 237 self._lgt = i
222 238 self._extra = []
223 239 else:
224 240 self._extra = self._extra[: i - self._lgt]
225 241
226 242
227 243 class PersistentNodeMapIndexObject(IndexObject):
228 244 """a Debug oriented class to test persistent nodemap
229 245
230 246 We need a simple python object to test API and higher level behavior. See
231 247 the Rust implementation for more serious usage. This should be used only
232 248 through the dedicated `devel.persistent-nodemap` config.
233 249 """
234 250
235 251 def nodemap_data_all(self):
236 252 """Return bytes containing a full serialization of a nodemap
237 253
238 254 The nodemap should be valid for the full set of revisions in the
239 255 index."""
240 256 return nodemaputil.persistent_data(self)
241 257
242 258 def nodemap_data_incremental(self):
243 259 """Return bytes containing a incremental update to persistent nodemap
244 260
245 261 This containst the data for an append-only update of the data provided
246 262 in the last call to `update_nodemap_data`.
247 263 """
248 264 if self._nm_root is None:
249 265 return None
250 266 docket = self._nm_docket
251 267 changed, data = nodemaputil.update_persistent_data(
252 268 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
253 269 )
254 270
255 271 self._nm_root = self._nm_max_idx = self._nm_docket = None
256 272 return docket, changed, data
257 273
258 274 def update_nodemap_data(self, docket, nm_data):
259 275 """provide full block of persisted binary data for a nodemap
260 276
261 277 The data are expected to come from disk. See `nodemap_data_all` for a
262 278 produceur of such data."""
263 279 if nm_data is not None:
264 280 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
265 281 if self._nm_root:
266 282 self._nm_docket = docket
267 283 else:
268 284 self._nm_root = self._nm_max_idx = self._nm_docket = None
269 285
270 286
271 287 class InlinedIndexObject(BaseIndexObject):
272 288 def __init__(self, data, inline=0):
273 289 self._data = data
274 290 self._lgt = self._inline_scan(None)
275 291 self._inline_scan(self._lgt)
276 292 self._extra = []
277 293
278 294 def _inline_scan(self, lgt):
279 295 off = 0
280 296 if lgt is not None:
281 297 self._offsets = [0] * lgt
282 298 count = 0
283 299 while off <= len(self._data) - self.entry_size:
284 300 start = off + self.big_int_size
285 301 (s,) = struct.unpack(
286 302 b'>i',
287 303 self._data[start : start + self.int_size],
288 304 )
289 305 if lgt is not None:
290 306 self._offsets[count] = off
291 307 count += 1
292 308 off += self.entry_size + s
293 309 if off != len(self._data):
294 310 raise ValueError(b"corrupted data")
295 311 return count
296 312
297 313 def __delitem__(self, i):
298 314 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
299 315 raise ValueError(b"deleting slices only supports a:-1 with step 1")
300 316 i = i.start
301 317 self._check_index(i)
302 318 self._stripnodes(i)
303 319 if i < self._lgt:
304 320 self._offsets = self._offsets[:i]
305 321 self._lgt = i
306 322 self._extra = []
307 323 else:
308 324 self._extra = self._extra[: i - self._lgt]
309 325
310 326 def _calculate_index(self, i):
311 327 return self._offsets[i]
312 328
313 329
314 330 def parse_index2(data, inline, revlogv2=False):
315 331 if not inline:
316 332 cls = IndexObject2 if revlogv2 else IndexObject
317 333 return cls(data), None
318 334 cls = InlinedIndexObject
319 335 return cls(data, inline), (0, data)
320 336
321 337
322 338 def parse_index_cl_v2(data):
323 339 return IndexChangelogV2(data), None
324 340
325 341
326 342 class IndexObject2(IndexObject):
327 343 index_format = revlog_constants.INDEX_ENTRY_V2
328 344
329 345 def replace_sidedata_info(
330 346 self,
331 347 rev,
332 348 sidedata_offset,
333 349 sidedata_length,
334 350 offset_flags,
335 351 compression_mode,
336 352 ):
337 353 """
338 354 Replace an existing index entry's sidedata offset and length with new
339 355 ones.
340 356 This cannot be used outside of the context of sidedata rewriting,
341 357 inside the transaction that creates the revision `rev`.
342 358 """
343 359 if rev < 0:
344 360 raise KeyError
345 361 self._check_index(rev)
346 362 if rev < self._lgt:
347 363 msg = b"cannot rewrite entries outside of this transaction"
348 364 raise KeyError(msg)
349 365 else:
350 366 entry = list(self[rev])
351 367 entry[0] = offset_flags
352 368 entry[8] = sidedata_offset
353 369 entry[9] = sidedata_length
354 370 entry[11] = compression_mode
355 371 entry = tuple(entry)
356 372 new = self._pack_entry(rev, entry)
357 373 self._extra[rev - self._lgt] = new
358 374
359 375 def _unpack_entry(self, rev, data):
360 376 data = self.index_format.unpack(data)
361 377 entry = data[:10]
362 378 data_comp = data[10] & 3
363 379 sidedata_comp = (data[10] & (3 << 2)) >> 2
364 380 return entry + (data_comp, sidedata_comp)
365 381
366 382 def _pack_entry(self, rev, entry):
367 383 data = entry[:10]
368 384 data_comp = entry[10] & 3
369 385 sidedata_comp = (entry[11] & 3) << 2
370 386 data += (data_comp | sidedata_comp,)
371 387
372 388 return self.index_format.pack(*data)
373 389
374 390 def entry_binary(self, rev):
375 391 """return the raw binary string representing a revision"""
376 392 entry = self[rev]
377 393 return self._pack_entry(rev, entry)
378 394
379 395 def pack_header(self, header):
380 396 """pack header information as binary"""
381 397 msg = 'version header should go in the docket, not the index: %d'
382 398 msg %= header
383 399 raise error.ProgrammingError(msg)
384 400
385 401
386 402 class IndexChangelogV2(IndexObject2):
387 403 index_format = revlog_constants.INDEX_ENTRY_CL_V2
388 404
389 405 def _unpack_entry(self, rev, data, r=True):
390 406 items = self.index_format.unpack(data)
391 407 entry = items[:3] + (rev, rev) + items[3:8]
392 408 data_comp = items[8] & 3
393 409 sidedata_comp = (items[8] >> 2) & 3
394 410 return entry + (data_comp, sidedata_comp)
395 411
396 412 def _pack_entry(self, rev, entry):
397 413 assert entry[3] == rev, entry[3]
398 414 assert entry[4] == rev, entry[4]
399 415 data = entry[:3] + entry[5:10]
400 416 data_comp = entry[10] & 3
401 417 sidedata_comp = (entry[11] & 3) << 2
402 418 data += (data_comp | sidedata_comp,)
403 419 return self.index_format.pack(*data)
404 420
405 421
406 422 def parse_index_devel_nodemap(data, inline):
407 423 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
408 424 return PersistentNodeMapIndexObject(data), None
409 425
410 426
411 427 def parse_dirstate(dmap, copymap, st):
412 428 parents = [st[:20], st[20:40]]
413 429 # dereference fields so they will be local in loop
414 430 format = b">cllll"
415 431 e_size = struct.calcsize(format)
416 432 pos1 = 40
417 433 l = len(st)
418 434
419 435 # the inner loop
420 436 while pos1 < l:
421 437 pos2 = pos1 + e_size
422 438 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
423 439 pos1 = pos2 + e[4]
424 440 f = st[pos2:pos1]
425 441 if b'\0' in f:
426 442 f, c = f.split(b'\0')
427 443 copymap[f] = c
428 444 dmap[f] = dirstatetuple(*e[:4])
429 445 return parents
430 446
431 447
432 448 def pack_dirstate(dmap, copymap, pl, now):
433 449 now = int(now)
434 450 cs = stringio()
435 451 write = cs.write
436 452 write(b"".join(pl))
437 453 for f, e in pycompat.iteritems(dmap):
438 454 if e[0] == b'n' and e[3] == now:
439 455 # The file was last modified "simultaneously" with the current
440 456 # write to dirstate (i.e. within the same second for file-
441 457 # systems with a granularity of 1 sec). This commonly happens
442 458 # for at least a couple of files on 'update'.
443 459 # The user could change the file without changing its size
444 460 # within the same second. Invalidate the file's mtime in
445 461 # dirstate, forcing future 'status' calls to compare the
446 462 # contents of the file if the size is the same. This prevents
447 463 # mistakenly treating such files as clean.
448 464 e = dirstatetuple(e[0], e[1], e[2], -1)
449 465 dmap[f] = e
450 466
451 467 if f in copymap:
452 468 f = b"%s\0%s" % (f, copymap[f])
453 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
469 e = _pack(
470 b">cllll",
471 e.v1_state(),
472 e.v1_mode(),
473 e.v1_size(),
474 e.v1_mtime(),
475 len(f),
476 )
454 477 write(e)
455 478 write(f)
456 479 return cs.getvalue()
General Comments 0
You need to be logged in to leave comments. Login now