##// END OF EJS Templates
dirstate-v2: Initial Python serializer...
Raphaël Gomès -
r49036:7e78c72e default
parent child Browse files
Show More
@@ -1,1155 +1,1174 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static const int dirstate_v1_from_p2 = -2;
33 33 static const int dirstate_v1_nonnormal = -1;
34 34 static const int ambiguous_time = -1;
35 35
36 36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
37 37 {
38 38 Py_ssize_t expected_size;
39 39
40 40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
41 41 return NULL;
42 42 }
43 43
44 44 return _dict_new_presized(expected_size);
45 45 }
46 46
47 47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
48 48 PyObject *kwds)
49 49 {
50 50 /* We do all the initialization here and not a tp_init function because
51 51 * dirstate_item is immutable. */
52 52 dirstateItemObject *t;
53 53 int wc_tracked;
54 54 int p1_tracked;
55 55 int p2_info;
56 56 int has_meaningful_data;
57 57 int has_meaningful_mtime;
58 58 int mode;
59 59 int size;
60 60 int mtime;
61 61 PyObject *parentfiledata;
62 62 static char *keywords_name[] = {
63 63 "wc_tracked",
64 64 "p1_tracked",
65 65 "p2_info",
66 66 "has_meaningful_data",
67 67 "has_meaningful_mtime",
68 68 "parentfiledata",
69 69 NULL,
70 70 };
71 71 wc_tracked = 0;
72 72 p1_tracked = 0;
73 73 p2_info = 0;
74 74 has_meaningful_mtime = 1;
75 75 has_meaningful_data = 1;
76 76 parentfiledata = Py_None;
77 77 if (!PyArg_ParseTupleAndKeywords(
78 78 args, kwds, "|iiiiiO", keywords_name, &wc_tracked, &p1_tracked,
79 79 &p2_info, &has_meaningful_data, &has_meaningful_mtime,
80 80 &parentfiledata)) {
81 81 return NULL;
82 82 }
83 83 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
84 84 if (!t) {
85 85 return NULL;
86 86 }
87 87
88 88 t->flags = 0;
89 89 if (wc_tracked) {
90 90 t->flags |= dirstate_flag_wc_tracked;
91 91 }
92 92 if (p1_tracked) {
93 93 t->flags |= dirstate_flag_p1_tracked;
94 94 }
95 95 if (p2_info) {
96 96 t->flags |= dirstate_flag_p2_info;
97 97 }
98 98
99 99 if (parentfiledata != Py_None) {
100 100 if (!PyTuple_CheckExact(parentfiledata)) {
101 101 PyErr_SetString(
102 102 PyExc_TypeError,
103 103 "parentfiledata should be a Tuple or None");
104 104 return NULL;
105 105 }
106 106 mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
107 107 size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
108 108 mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
109 109 } else {
110 110 has_meaningful_data = 0;
111 111 has_meaningful_mtime = 0;
112 112 }
113 113 if (has_meaningful_data) {
114 114 t->flags |= dirstate_flag_has_meaningful_data;
115 115 t->mode = mode;
116 116 t->size = size;
117 117 } else {
118 118 t->mode = 0;
119 119 t->size = 0;
120 120 }
121 121 if (has_meaningful_mtime) {
122 122 t->flags |= dirstate_flag_has_meaningful_mtime;
123 123 t->mtime = mtime;
124 124 } else {
125 125 t->mtime = 0;
126 126 }
127 127 return (PyObject *)t;
128 128 }
129 129
130 130 static void dirstate_item_dealloc(PyObject *o)
131 131 {
132 132 PyObject_Del(o);
133 133 }
134 134
135 135 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
136 136 {
137 137 return (self->flags & dirstate_flag_wc_tracked);
138 138 }
139 139
140 140 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
141 141 {
142 142 const unsigned char mask = dirstate_flag_wc_tracked |
143 143 dirstate_flag_p1_tracked |
144 144 dirstate_flag_p2_info;
145 145 return (self->flags & mask);
146 146 }
147 147
148 148 static inline bool dirstate_item_c_added(dirstateItemObject *self)
149 149 {
150 150 const unsigned char mask =
151 151 (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
152 152 dirstate_flag_p2_info);
153 153 const unsigned char target = dirstate_flag_wc_tracked;
154 154 return (self->flags & mask) == target;
155 155 }
156 156
157 157 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
158 158 {
159 159 if (self->flags & dirstate_flag_wc_tracked) {
160 160 return false;
161 161 }
162 162 return (self->flags &
163 163 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
164 164 }
165 165
166 166 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
167 167 {
168 168 return ((self->flags & dirstate_flag_wc_tracked) &&
169 169 (self->flags & dirstate_flag_p1_tracked) &&
170 170 (self->flags & dirstate_flag_p2_info));
171 171 }
172 172
173 173 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
174 174 {
175 175 return ((self->flags & dirstate_flag_wc_tracked) &&
176 176 !(self->flags & dirstate_flag_p1_tracked) &&
177 177 (self->flags & dirstate_flag_p2_info));
178 178 }
179 179
180 180 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
181 181 {
182 182 if (dirstate_item_c_removed(self)) {
183 183 return 'r';
184 184 } else if (dirstate_item_c_merged(self)) {
185 185 return 'm';
186 186 } else if (dirstate_item_c_added(self)) {
187 187 return 'a';
188 188 } else {
189 189 return 'n';
190 190 }
191 191 }
192 192
193 193 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
194 194 {
195 195 if (self->flags & dirstate_flag_has_meaningful_data) {
196 196 return self->mode;
197 197 } else {
198 198 return 0;
199 199 }
200 200 }
201 201
202 202 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
203 203 {
204 204 if (!(self->flags & dirstate_flag_wc_tracked) &&
205 205 (self->flags & dirstate_flag_p2_info)) {
206 206 if (self->flags & dirstate_flag_p1_tracked) {
207 207 return dirstate_v1_nonnormal;
208 208 } else {
209 209 return dirstate_v1_from_p2;
210 210 }
211 211 } else if (dirstate_item_c_removed(self)) {
212 212 return 0;
213 213 } else if (self->flags & dirstate_flag_p2_info) {
214 214 return dirstate_v1_from_p2;
215 215 } else if (dirstate_item_c_added(self)) {
216 216 return dirstate_v1_nonnormal;
217 217 } else if (self->flags & dirstate_flag_has_meaningful_data) {
218 218 return self->size;
219 219 } else {
220 220 return dirstate_v1_nonnormal;
221 221 }
222 222 }
223 223
224 224 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
225 225 {
226 226 if (dirstate_item_c_removed(self)) {
227 227 return 0;
228 228 } else if (!(self->flags & dirstate_flag_has_meaningful_mtime) ||
229 229 !(self->flags & dirstate_flag_p1_tracked) ||
230 230 !(self->flags & dirstate_flag_wc_tracked) ||
231 231 (self->flags & dirstate_flag_p2_info)) {
232 232 return ambiguous_time;
233 233 } else {
234 234 return self->mtime;
235 235 }
236 236 }
237 237
238 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
239 {
240 unsigned char flags = self->flags;
241 int mode = dirstate_item_c_v1_mode(self);
242 if ((mode & S_IXUSR) != 0) {
243 flags |= dirstate_flag_mode_exec_perm;
244 } else {
245 flags &= ~dirstate_flag_mode_exec_perm;
246 }
247 if (S_ISLNK(mode)) {
248 flags |= dirstate_flag_mode_is_symlink;
249 } else {
250 flags &= ~dirstate_flag_mode_is_symlink;
251 }
252 return Py_BuildValue("Bii", flags, self->size, self->mtime);
253 };
254
238 255 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
239 256 {
240 257 char state = dirstate_item_c_v1_state(self);
241 258 return PyBytes_FromStringAndSize(&state, 1);
242 259 };
243 260
244 261 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
245 262 {
246 263 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
247 264 };
248 265
249 266 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
250 267 {
251 268 return PyInt_FromLong(dirstate_item_c_v1_size(self));
252 269 };
253 270
254 271 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
255 272 {
256 273 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
257 274 };
258 275
259 276 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
260 277 PyObject *value)
261 278 {
262 279 long now;
263 280 if (!pylong_to_long(value, &now)) {
264 281 return NULL;
265 282 }
266 283 if (dirstate_item_c_v1_state(self) == 'n' &&
267 284 dirstate_item_c_v1_mtime(self) == now) {
268 285 Py_RETURN_TRUE;
269 286 } else {
270 287 Py_RETURN_FALSE;
271 288 }
272 289 };
273 290
274 291 /* This will never change since it's bound to V1
275 292 */
276 293 static inline dirstateItemObject *
277 294 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
278 295 {
279 296 dirstateItemObject *t =
280 297 PyObject_New(dirstateItemObject, &dirstateItemType);
281 298 if (!t) {
282 299 return NULL;
283 300 }
284 301 t->flags = 0;
285 302 t->mode = 0;
286 303 t->size = 0;
287 304 t->mtime = 0;
288 305
289 306 if (state == 'm') {
290 307 t->flags = (dirstate_flag_wc_tracked |
291 308 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
292 309 } else if (state == 'a') {
293 310 t->flags = dirstate_flag_wc_tracked;
294 311 } else if (state == 'r') {
295 312 if (size == dirstate_v1_nonnormal) {
296 313 t->flags =
297 314 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
298 315 } else if (size == dirstate_v1_from_p2) {
299 316 t->flags = dirstate_flag_p2_info;
300 317 } else {
301 318 t->flags = dirstate_flag_p1_tracked;
302 319 }
303 320 } else if (state == 'n') {
304 321 if (size == dirstate_v1_from_p2) {
305 322 t->flags =
306 323 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
307 324 } else if (size == dirstate_v1_nonnormal) {
308 325 t->flags =
309 326 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
310 327 } else if (mtime == ambiguous_time) {
311 328 t->flags = (dirstate_flag_wc_tracked |
312 329 dirstate_flag_p1_tracked |
313 330 dirstate_flag_has_meaningful_data);
314 331 t->mode = mode;
315 332 t->size = size;
316 333 } else {
317 334 t->flags = (dirstate_flag_wc_tracked |
318 335 dirstate_flag_p1_tracked |
319 336 dirstate_flag_has_meaningful_data |
320 337 dirstate_flag_has_meaningful_mtime);
321 338 t->mode = mode;
322 339 t->size = size;
323 340 t->mtime = mtime;
324 341 }
325 342 } else {
326 343 PyErr_Format(PyExc_RuntimeError,
327 344 "unknown state: `%c` (%d, %d, %d)", state, mode,
328 345 size, mtime, NULL);
329 346 Py_DECREF(t);
330 347 return NULL;
331 348 }
332 349
333 350 return t;
334 351 }
335 352
336 353 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
337 354 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
338 355 PyObject *args)
339 356 {
340 357 /* We do all the initialization here and not a tp_init function because
341 358 * dirstate_item is immutable. */
342 359 char state;
343 360 int size, mode, mtime;
344 361 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
345 362 return NULL;
346 363 }
347 364 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
348 365 };
349 366
350 367 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
351 368 PyObject *args)
352 369 {
353 370 dirstateItemObject *t =
354 371 PyObject_New(dirstateItemObject, &dirstateItemType);
355 372 if (!t) {
356 373 return NULL;
357 374 }
358 375 if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
359 376 return NULL;
360 377 }
361 378 t->mode = 0;
362 379 if (t->flags & dirstate_flag_has_meaningful_data) {
363 380 if (t->flags & dirstate_flag_mode_exec_perm) {
364 381 t->mode = 0755;
365 382 } else {
366 383 t->mode = 0644;
367 384 }
368 385 if (t->flags & dirstate_flag_mode_is_symlink) {
369 386 t->mode |= S_IFLNK;
370 387 } else {
371 388 t->mode |= S_IFREG;
372 389 }
373 390 }
374 391 return (PyObject *)t;
375 392 };
376 393
377 394 /* This means the next status call will have to actually check its content
378 395 to make sure it is correct. */
379 396 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
380 397 {
381 398 self->flags &= ~dirstate_flag_has_meaningful_mtime;
382 399 Py_RETURN_NONE;
383 400 }
384 401
385 402 /* See docstring of the python implementation for details */
386 403 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
387 404 PyObject *args)
388 405 {
389 406 int size, mode, mtime;
390 407 if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
391 408 return NULL;
392 409 }
393 410 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
394 411 dirstate_flag_has_meaningful_data |
395 412 dirstate_flag_has_meaningful_mtime;
396 413 self->mode = mode;
397 414 self->size = size;
398 415 self->mtime = mtime;
399 416 Py_RETURN_NONE;
400 417 }
401 418
402 419 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
403 420 {
404 421 self->flags |= dirstate_flag_wc_tracked;
405 422 self->flags &= ~dirstate_flag_has_meaningful_mtime;
406 423 Py_RETURN_NONE;
407 424 }
408 425
409 426 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
410 427 {
411 428 self->flags &= ~dirstate_flag_wc_tracked;
412 429 self->mode = 0;
413 430 self->mtime = 0;
414 431 self->size = 0;
415 432 Py_RETURN_NONE;
416 433 }
417 434
418 435 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
419 436 {
420 437 if (self->flags & dirstate_flag_p2_info) {
421 438 self->flags &= ~(dirstate_flag_p2_info |
422 439 dirstate_flag_has_meaningful_data |
423 440 dirstate_flag_has_meaningful_mtime);
424 441 self->mode = 0;
425 442 self->mtime = 0;
426 443 self->size = 0;
427 444 }
428 445 Py_RETURN_NONE;
429 446 }
430 447 static PyMethodDef dirstate_item_methods[] = {
448 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
449 "return data suitable for v2 serialization"},
431 450 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
432 451 "return a \"state\" suitable for v1 serialization"},
433 452 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
434 453 "return a \"mode\" suitable for v1 serialization"},
435 454 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
436 455 "return a \"size\" suitable for v1 serialization"},
437 456 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
438 457 "return a \"mtime\" suitable for v1 serialization"},
439 458 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
440 459 "True if the stored mtime would be ambiguous with the current time"},
441 460 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
442 461 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
443 462 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
444 463 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
445 464 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
446 465 METH_NOARGS, "mark a file as \"possibly dirty\""},
447 466 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
448 467 "mark a file as \"clean\""},
449 468 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
450 469 "mark a file as \"tracked\""},
451 470 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
452 471 "mark a file as \"untracked\""},
453 472 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
454 473 "remove all \"merge-only\" from a DirstateItem"},
455 474 {NULL} /* Sentinel */
456 475 };
457 476
458 477 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
459 478 {
460 479 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
461 480 };
462 481
463 482 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
464 483 {
465 484 return PyInt_FromLong(dirstate_item_c_v1_size(self));
466 485 };
467 486
468 487 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
469 488 {
470 489 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
471 490 };
472 491
473 492 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
474 493 {
475 494 char state = dirstate_item_c_v1_state(self);
476 495 return PyBytes_FromStringAndSize(&state, 1);
477 496 };
478 497
479 498 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
480 499 {
481 500 if (dirstate_item_c_tracked(self)) {
482 501 Py_RETURN_TRUE;
483 502 } else {
484 503 Py_RETURN_FALSE;
485 504 }
486 505 };
487 506 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
488 507 {
489 508 if (self->flags & dirstate_flag_p1_tracked) {
490 509 Py_RETURN_TRUE;
491 510 } else {
492 511 Py_RETURN_FALSE;
493 512 }
494 513 };
495 514
496 515 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
497 516 {
498 517 if (dirstate_item_c_added(self)) {
499 518 Py_RETURN_TRUE;
500 519 } else {
501 520 Py_RETURN_FALSE;
502 521 }
503 522 };
504 523
505 524 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
506 525 {
507 526 if (self->flags & dirstate_flag_wc_tracked &&
508 527 self->flags & dirstate_flag_p2_info) {
509 528 Py_RETURN_TRUE;
510 529 } else {
511 530 Py_RETURN_FALSE;
512 531 }
513 532 };
514 533
515 534 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
516 535 {
517 536 if (dirstate_item_c_merged(self)) {
518 537 Py_RETURN_TRUE;
519 538 } else {
520 539 Py_RETURN_FALSE;
521 540 }
522 541 };
523 542
524 543 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
525 544 {
526 545 if (dirstate_item_c_from_p2(self)) {
527 546 Py_RETURN_TRUE;
528 547 } else {
529 548 Py_RETURN_FALSE;
530 549 }
531 550 };
532 551
533 552 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
534 553 {
535 554 if (!(self->flags & dirstate_flag_wc_tracked)) {
536 555 Py_RETURN_FALSE;
537 556 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
538 557 Py_RETURN_FALSE;
539 558 } else if (self->flags & dirstate_flag_p2_info) {
540 559 Py_RETURN_FALSE;
541 560 } else {
542 561 Py_RETURN_TRUE;
543 562 }
544 563 };
545 564
546 565 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
547 566 {
548 567 if (dirstate_item_c_any_tracked(self)) {
549 568 Py_RETURN_TRUE;
550 569 } else {
551 570 Py_RETURN_FALSE;
552 571 }
553 572 };
554 573
555 574 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
556 575 {
557 576 if (dirstate_item_c_removed(self)) {
558 577 Py_RETURN_TRUE;
559 578 } else {
560 579 Py_RETURN_FALSE;
561 580 }
562 581 };
563 582
564 583 static PyGetSetDef dirstate_item_getset[] = {
565 584 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
566 585 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
567 586 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
568 587 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
569 588 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
570 589 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
571 590 NULL},
572 591 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
573 592 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
574 593 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
575 594 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
576 595 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
577 596 NULL},
578 597 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
579 598 NULL},
580 599 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
581 600 {NULL} /* Sentinel */
582 601 };
583 602
584 603 PyTypeObject dirstateItemType = {
585 604 PyVarObject_HEAD_INIT(NULL, 0) /* header */
586 605 "dirstate_tuple", /* tp_name */
587 606 sizeof(dirstateItemObject), /* tp_basicsize */
588 607 0, /* tp_itemsize */
589 608 (destructor)dirstate_item_dealloc, /* tp_dealloc */
590 609 0, /* tp_print */
591 610 0, /* tp_getattr */
592 611 0, /* tp_setattr */
593 612 0, /* tp_compare */
594 613 0, /* tp_repr */
595 614 0, /* tp_as_number */
596 615 0, /* tp_as_sequence */
597 616 0, /* tp_as_mapping */
598 617 0, /* tp_hash */
599 618 0, /* tp_call */
600 619 0, /* tp_str */
601 620 0, /* tp_getattro */
602 621 0, /* tp_setattro */
603 622 0, /* tp_as_buffer */
604 623 Py_TPFLAGS_DEFAULT, /* tp_flags */
605 624 "dirstate tuple", /* tp_doc */
606 625 0, /* tp_traverse */
607 626 0, /* tp_clear */
608 627 0, /* tp_richcompare */
609 628 0, /* tp_weaklistoffset */
610 629 0, /* tp_iter */
611 630 0, /* tp_iternext */
612 631 dirstate_item_methods, /* tp_methods */
613 632 0, /* tp_members */
614 633 dirstate_item_getset, /* tp_getset */
615 634 0, /* tp_base */
616 635 0, /* tp_dict */
617 636 0, /* tp_descr_get */
618 637 0, /* tp_descr_set */
619 638 0, /* tp_dictoffset */
620 639 0, /* tp_init */
621 640 0, /* tp_alloc */
622 641 dirstate_item_new, /* tp_new */
623 642 };
624 643
625 644 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
626 645 {
627 646 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
628 647 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
629 648 char state, *cur, *str, *cpos;
630 649 int mode, size, mtime;
631 650 unsigned int flen, pos = 40;
632 651 Py_ssize_t len = 40;
633 652 Py_ssize_t readlen;
634 653
635 654 if (!PyArg_ParseTuple(
636 655 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
637 656 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
638 657 goto quit;
639 658 }
640 659
641 660 len = readlen;
642 661
643 662 /* read parents */
644 663 if (len < 40) {
645 664 PyErr_SetString(PyExc_ValueError,
646 665 "too little data for parents");
647 666 goto quit;
648 667 }
649 668
650 669 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
651 670 str + 20, (Py_ssize_t)20);
652 671 if (!parents) {
653 672 goto quit;
654 673 }
655 674
656 675 /* read filenames */
657 676 while (pos >= 40 && pos < len) {
658 677 if (pos + 17 > len) {
659 678 PyErr_SetString(PyExc_ValueError,
660 679 "overflow in dirstate");
661 680 goto quit;
662 681 }
663 682 cur = str + pos;
664 683 /* unpack header */
665 684 state = *cur;
666 685 mode = getbe32(cur + 1);
667 686 size = getbe32(cur + 5);
668 687 mtime = getbe32(cur + 9);
669 688 flen = getbe32(cur + 13);
670 689 pos += 17;
671 690 cur += 17;
672 691 if (flen > len - pos) {
673 692 PyErr_SetString(PyExc_ValueError,
674 693 "overflow in dirstate");
675 694 goto quit;
676 695 }
677 696
678 697 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
679 698 size, mtime);
680 699 if (!entry)
681 700 goto quit;
682 701 cpos = memchr(cur, 0, flen);
683 702 if (cpos) {
684 703 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
685 704 cname = PyBytes_FromStringAndSize(
686 705 cpos + 1, flen - (cpos - cur) - 1);
687 706 if (!fname || !cname ||
688 707 PyDict_SetItem(cmap, fname, cname) == -1 ||
689 708 PyDict_SetItem(dmap, fname, entry) == -1) {
690 709 goto quit;
691 710 }
692 711 Py_DECREF(cname);
693 712 } else {
694 713 fname = PyBytes_FromStringAndSize(cur, flen);
695 714 if (!fname ||
696 715 PyDict_SetItem(dmap, fname, entry) == -1) {
697 716 goto quit;
698 717 }
699 718 }
700 719 Py_DECREF(fname);
701 720 Py_DECREF(entry);
702 721 fname = cname = entry = NULL;
703 722 pos += flen;
704 723 }
705 724
706 725 ret = parents;
707 726 Py_INCREF(ret);
708 727 quit:
709 728 Py_XDECREF(fname);
710 729 Py_XDECREF(cname);
711 730 Py_XDECREF(entry);
712 731 Py_XDECREF(parents);
713 732 return ret;
714 733 }
715 734
716 735 /*
717 736 * Efficiently pack a dirstate object into its on-disk format.
718 737 */
719 738 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
720 739 {
721 740 PyObject *packobj = NULL;
722 741 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
723 742 Py_ssize_t nbytes, pos, l;
724 743 PyObject *k, *v = NULL, *pn;
725 744 char *p, *s;
726 745 int now;
727 746
728 747 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
729 748 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
730 749 &now)) {
731 750 return NULL;
732 751 }
733 752
734 753 if (PyTuple_Size(pl) != 2) {
735 754 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
736 755 return NULL;
737 756 }
738 757
739 758 /* Figure out how much we need to allocate. */
740 759 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
741 760 PyObject *c;
742 761 if (!PyBytes_Check(k)) {
743 762 PyErr_SetString(PyExc_TypeError, "expected string key");
744 763 goto bail;
745 764 }
746 765 nbytes += PyBytes_GET_SIZE(k) + 17;
747 766 c = PyDict_GetItem(copymap, k);
748 767 if (c) {
749 768 if (!PyBytes_Check(c)) {
750 769 PyErr_SetString(PyExc_TypeError,
751 770 "expected string key");
752 771 goto bail;
753 772 }
754 773 nbytes += PyBytes_GET_SIZE(c) + 1;
755 774 }
756 775 }
757 776
758 777 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
759 778 if (packobj == NULL) {
760 779 goto bail;
761 780 }
762 781
763 782 p = PyBytes_AS_STRING(packobj);
764 783
765 784 pn = PyTuple_GET_ITEM(pl, 0);
766 785 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
767 786 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
768 787 goto bail;
769 788 }
770 789 memcpy(p, s, l);
771 790 p += 20;
772 791 pn = PyTuple_GET_ITEM(pl, 1);
773 792 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
774 793 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
775 794 goto bail;
776 795 }
777 796 memcpy(p, s, l);
778 797 p += 20;
779 798
780 799 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
781 800 dirstateItemObject *tuple;
782 801 char state;
783 802 int mode, size, mtime;
784 803 Py_ssize_t len, l;
785 804 PyObject *o;
786 805 char *t;
787 806
788 807 if (!dirstate_tuple_check(v)) {
789 808 PyErr_SetString(PyExc_TypeError,
790 809 "expected a dirstate tuple");
791 810 goto bail;
792 811 }
793 812 tuple = (dirstateItemObject *)v;
794 813
795 814 state = dirstate_item_c_v1_state(tuple);
796 815 mode = dirstate_item_c_v1_mode(tuple);
797 816 size = dirstate_item_c_v1_size(tuple);
798 817 mtime = dirstate_item_c_v1_mtime(tuple);
799 818 if (state == 'n' && mtime == now) {
800 819 /* See pure/parsers.py:pack_dirstate for why we do
801 820 * this. */
802 821 mtime = -1;
803 822 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
804 823 state, mode, size, mtime);
805 824 if (!mtime_unset) {
806 825 goto bail;
807 826 }
808 827 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
809 828 goto bail;
810 829 }
811 830 Py_DECREF(mtime_unset);
812 831 mtime_unset = NULL;
813 832 }
814 833 *p++ = state;
815 834 putbe32((uint32_t)mode, p);
816 835 putbe32((uint32_t)size, p + 4);
817 836 putbe32((uint32_t)mtime, p + 8);
818 837 t = p + 12;
819 838 p += 16;
820 839 len = PyBytes_GET_SIZE(k);
821 840 memcpy(p, PyBytes_AS_STRING(k), len);
822 841 p += len;
823 842 o = PyDict_GetItem(copymap, k);
824 843 if (o) {
825 844 *p++ = '\0';
826 845 l = PyBytes_GET_SIZE(o);
827 846 memcpy(p, PyBytes_AS_STRING(o), l);
828 847 p += l;
829 848 len += l + 1;
830 849 }
831 850 putbe32((uint32_t)len, t);
832 851 }
833 852
834 853 pos = p - PyBytes_AS_STRING(packobj);
835 854 if (pos != nbytes) {
836 855 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
837 856 (long)pos, (long)nbytes);
838 857 goto bail;
839 858 }
840 859
841 860 return packobj;
842 861 bail:
843 862 Py_XDECREF(mtime_unset);
844 863 Py_XDECREF(packobj);
845 864 Py_XDECREF(v);
846 865 return NULL;
847 866 }
848 867
849 868 #define BUMPED_FIX 1
850 869 #define USING_SHA_256 2
851 870 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
852 871
853 872 static PyObject *readshas(const char *source, unsigned char num,
854 873 Py_ssize_t hashwidth)
855 874 {
856 875 int i;
857 876 PyObject *list = PyTuple_New(num);
858 877 if (list == NULL) {
859 878 return NULL;
860 879 }
861 880 for (i = 0; i < num; i++) {
862 881 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
863 882 if (hash == NULL) {
864 883 Py_DECREF(list);
865 884 return NULL;
866 885 }
867 886 PyTuple_SET_ITEM(list, i, hash);
868 887 source += hashwidth;
869 888 }
870 889 return list;
871 890 }
872 891
873 892 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
874 893 uint32_t *msize)
875 894 {
876 895 const char *data = databegin;
877 896 const char *meta;
878 897
879 898 double mtime;
880 899 int16_t tz;
881 900 uint16_t flags;
882 901 unsigned char nsuccs, nparents, nmetadata;
883 902 Py_ssize_t hashwidth = 20;
884 903
885 904 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
886 905 PyObject *metadata = NULL, *ret = NULL;
887 906 int i;
888 907
889 908 if (data + FM1_HEADER_SIZE > dataend) {
890 909 goto overflow;
891 910 }
892 911
893 912 *msize = getbe32(data);
894 913 data += 4;
895 914 mtime = getbefloat64(data);
896 915 data += 8;
897 916 tz = getbeint16(data);
898 917 data += 2;
899 918 flags = getbeuint16(data);
900 919 data += 2;
901 920
902 921 if (flags & USING_SHA_256) {
903 922 hashwidth = 32;
904 923 }
905 924
906 925 nsuccs = (unsigned char)(*data++);
907 926 nparents = (unsigned char)(*data++);
908 927 nmetadata = (unsigned char)(*data++);
909 928
910 929 if (databegin + *msize > dataend) {
911 930 goto overflow;
912 931 }
913 932 dataend = databegin + *msize; /* narrow down to marker size */
914 933
915 934 if (data + hashwidth > dataend) {
916 935 goto overflow;
917 936 }
918 937 prec = PyBytes_FromStringAndSize(data, hashwidth);
919 938 data += hashwidth;
920 939 if (prec == NULL) {
921 940 goto bail;
922 941 }
923 942
924 943 if (data + nsuccs * hashwidth > dataend) {
925 944 goto overflow;
926 945 }
927 946 succs = readshas(data, nsuccs, hashwidth);
928 947 if (succs == NULL) {
929 948 goto bail;
930 949 }
931 950 data += nsuccs * hashwidth;
932 951
933 952 if (nparents == 1 || nparents == 2) {
934 953 if (data + nparents * hashwidth > dataend) {
935 954 goto overflow;
936 955 }
937 956 parents = readshas(data, nparents, hashwidth);
938 957 if (parents == NULL) {
939 958 goto bail;
940 959 }
941 960 data += nparents * hashwidth;
942 961 } else {
943 962 parents = Py_None;
944 963 Py_INCREF(parents);
945 964 }
946 965
947 966 if (data + 2 * nmetadata > dataend) {
948 967 goto overflow;
949 968 }
950 969 meta = data + (2 * nmetadata);
951 970 metadata = PyTuple_New(nmetadata);
952 971 if (metadata == NULL) {
953 972 goto bail;
954 973 }
955 974 for (i = 0; i < nmetadata; i++) {
956 975 PyObject *tmp, *left = NULL, *right = NULL;
957 976 Py_ssize_t leftsize = (unsigned char)(*data++);
958 977 Py_ssize_t rightsize = (unsigned char)(*data++);
959 978 if (meta + leftsize + rightsize > dataend) {
960 979 goto overflow;
961 980 }
962 981 left = PyBytes_FromStringAndSize(meta, leftsize);
963 982 meta += leftsize;
964 983 right = PyBytes_FromStringAndSize(meta, rightsize);
965 984 meta += rightsize;
966 985 tmp = PyTuple_New(2);
967 986 if (!left || !right || !tmp) {
968 987 Py_XDECREF(left);
969 988 Py_XDECREF(right);
970 989 Py_XDECREF(tmp);
971 990 goto bail;
972 991 }
973 992 PyTuple_SET_ITEM(tmp, 0, left);
974 993 PyTuple_SET_ITEM(tmp, 1, right);
975 994 PyTuple_SET_ITEM(metadata, i, tmp);
976 995 }
977 996 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
978 997 (int)tz * 60, parents);
979 998 goto bail; /* return successfully */
980 999
981 1000 overflow:
982 1001 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
983 1002 bail:
984 1003 Py_XDECREF(prec);
985 1004 Py_XDECREF(succs);
986 1005 Py_XDECREF(metadata);
987 1006 Py_XDECREF(parents);
988 1007 return ret;
989 1008 }
990 1009
991 1010 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
992 1011 {
993 1012 const char *data, *dataend;
994 1013 Py_ssize_t datalen, offset, stop;
995 1014 PyObject *markers = NULL;
996 1015
997 1016 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
998 1017 &offset, &stop)) {
999 1018 return NULL;
1000 1019 }
1001 1020 if (offset < 0) {
1002 1021 PyErr_SetString(PyExc_ValueError,
1003 1022 "invalid negative offset in fm1readmarkers");
1004 1023 return NULL;
1005 1024 }
1006 1025 if (stop > datalen) {
1007 1026 PyErr_SetString(
1008 1027 PyExc_ValueError,
1009 1028 "stop longer than data length in fm1readmarkers");
1010 1029 return NULL;
1011 1030 }
1012 1031 dataend = data + datalen;
1013 1032 data += offset;
1014 1033 markers = PyList_New(0);
1015 1034 if (!markers) {
1016 1035 return NULL;
1017 1036 }
1018 1037 while (offset < stop) {
1019 1038 uint32_t msize;
1020 1039 int error;
1021 1040 PyObject *record = fm1readmarker(data, dataend, &msize);
1022 1041 if (!record) {
1023 1042 goto bail;
1024 1043 }
1025 1044 error = PyList_Append(markers, record);
1026 1045 Py_DECREF(record);
1027 1046 if (error) {
1028 1047 goto bail;
1029 1048 }
1030 1049 data += msize;
1031 1050 offset += msize;
1032 1051 }
1033 1052 return markers;
1034 1053 bail:
1035 1054 Py_DECREF(markers);
1036 1055 return NULL;
1037 1056 }
1038 1057
1039 1058 static char parsers_doc[] = "Efficient content parsing.";
1040 1059
1041 1060 PyObject *encodedir(PyObject *self, PyObject *args);
1042 1061 PyObject *pathencode(PyObject *self, PyObject *args);
1043 1062 PyObject *lowerencode(PyObject *self, PyObject *args);
1044 1063 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1045 1064
1046 1065 static PyMethodDef methods[] = {
1047 1066 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1048 1067 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1049 1068 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1050 1069 "parse a revlog index\n"},
1051 1070 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1052 1071 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1053 1072 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1054 1073 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1055 1074 "construct a dict with an expected size\n"},
1056 1075 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1057 1076 "make file foldmap\n"},
1058 1077 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1059 1078 "escape a UTF-8 byte string to JSON (fast path)\n"},
1060 1079 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1061 1080 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1062 1081 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1063 1082 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1064 1083 "parse v1 obsolete markers\n"},
1065 1084 {NULL, NULL}};
1066 1085
1067 1086 void dirs_module_init(PyObject *mod);
1068 1087 void manifest_module_init(PyObject *mod);
1069 1088 void revlog_module_init(PyObject *mod);
1070 1089
1071 1090 static const int version = 20;
1072 1091
1073 1092 static void module_init(PyObject *mod)
1074 1093 {
1075 1094 PyModule_AddIntConstant(mod, "version", version);
1076 1095
1077 1096 /* This module constant has two purposes. First, it lets us unit test
1078 1097 * the ImportError raised without hard-coding any error text. This
1079 1098 * means we can change the text in the future without breaking tests,
1080 1099 * even across changesets without a recompile. Second, its presence
1081 1100 * can be used to determine whether the version-checking logic is
1082 1101 * present, which also helps in testing across changesets without a
1083 1102 * recompile. Note that this means the pure-Python version of parsers
1084 1103 * should not have this module constant. */
1085 1104 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1086 1105
1087 1106 dirs_module_init(mod);
1088 1107 manifest_module_init(mod);
1089 1108 revlog_module_init(mod);
1090 1109
1091 1110 if (PyType_Ready(&dirstateItemType) < 0) {
1092 1111 return;
1093 1112 }
1094 1113 Py_INCREF(&dirstateItemType);
1095 1114 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1096 1115 }
1097 1116
1098 1117 static int check_python_version(void)
1099 1118 {
1100 1119 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1101 1120 long hexversion;
1102 1121 if (!sys) {
1103 1122 return -1;
1104 1123 }
1105 1124 ver = PyObject_GetAttrString(sys, "hexversion");
1106 1125 Py_DECREF(sys);
1107 1126 if (!ver) {
1108 1127 return -1;
1109 1128 }
1110 1129 hexversion = PyInt_AsLong(ver);
1111 1130 Py_DECREF(ver);
1112 1131 /* sys.hexversion is a 32-bit number by default, so the -1 case
1113 1132 * should only occur in unusual circumstances (e.g. if sys.hexversion
1114 1133 * is manually set to an invalid value). */
1115 1134 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1116 1135 PyErr_Format(PyExc_ImportError,
1117 1136 "%s: The Mercurial extension "
1118 1137 "modules were compiled with Python " PY_VERSION
1119 1138 ", but "
1120 1139 "Mercurial is currently using Python with "
1121 1140 "sys.hexversion=%ld: "
1122 1141 "Python %s\n at: %s",
1123 1142 versionerrortext, hexversion, Py_GetVersion(),
1124 1143 Py_GetProgramFullPath());
1125 1144 return -1;
1126 1145 }
1127 1146 return 0;
1128 1147 }
1129 1148
1130 1149 #ifdef IS_PY3K
1131 1150 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1132 1151 parsers_doc, -1, methods};
1133 1152
1134 1153 PyMODINIT_FUNC PyInit_parsers(void)
1135 1154 {
1136 1155 PyObject *mod;
1137 1156
1138 1157 if (check_python_version() == -1)
1139 1158 return NULL;
1140 1159 mod = PyModule_Create(&parsers_module);
1141 1160 module_init(mod);
1142 1161 return mod;
1143 1162 }
1144 1163 #else
1145 1164 PyMODINIT_FUNC initparsers(void)
1146 1165 {
1147 1166 PyObject *mod;
1148 1167
1149 1168 if (check_python_version() == -1) {
1150 1169 return;
1151 1170 }
1152 1171 mod = Py_InitModule3("parsers", methods, parsers_doc);
1153 1172 module_init(mod);
1154 1173 }
1155 1174 #endif
@@ -1,118 +1,411 b''
1 1 # v2.py - Pure-Python implementation of the dirstate-v2 file format
2 2 #
3 3 # Copyright Mercurial Contributors
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 from .. import policy
12 from ..thirdparty import attr
13 from .. import error, policy
13 14
14 15 parsers = policy.importmod('parsers')
15 16
16 17
17 18 # Must match the constant of the same name in
18 19 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
19 20 TREE_METADATA_SIZE = 44
20 21 NODE_SIZE = 43
21 22
22 23
23 24 # Must match the `TreeMetadata` Rust struct in
24 25 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
25 26 #
26 27 # * 4 bytes: start offset of root nodes
27 28 # * 4 bytes: number of root nodes
28 29 # * 4 bytes: total number of nodes in the tree that have an entry
29 30 # * 4 bytes: total number of nodes in the tree that have a copy source
30 31 # * 4 bytes: number of bytes in the data file that are not used anymore
31 32 # * 4 bytes: unused
32 33 # * 20 bytes: SHA-1 hash of ignore patterns
33 34 TREE_METADATA = struct.Struct('>LLLLL4s20s')
34 35
35 36
36 37 # Must match the `Node` Rust struct in
37 38 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
38 39 #
39 40 # * 4 bytes: start offset of full path
40 41 # * 2 bytes: length of the full path
41 42 # * 2 bytes: length within the full path before its "base name"
42 43 # * 4 bytes: start offset of the copy source if any, or zero for no copy source
43 44 # * 2 bytes: length of the copy source if any, or unused
44 45 # * 4 bytes: start offset of child nodes
45 46 # * 4 bytes: number of child nodes
46 47 # * 4 bytes: number of descendant nodes that have an entry
47 48 # * 4 bytes: number of descendant nodes that have a "tracked" state
48 49 # * 1 byte: flags
49 50 # * 4 bytes: expected size
50 51 # * 4 bytes: mtime seconds
51 52 # * 4 bytes: mtime nanoseconds
52 53 NODE = struct.Struct('>LHHLHLLLLBlll')
53 54
54 55
55 56 assert TREE_METADATA_SIZE == TREE_METADATA.size
56 57 assert NODE_SIZE == NODE.size
57 58
58 59
59 60 def parse_dirstate(map, copy_map, data, tree_metadata):
60 61 """parse a full v2-dirstate from a binary data into dictionnaries:
61 62
62 63 - map: a {path: entry} mapping that will be filled
63 64 - copy_map: a {path: copy-source} mapping that will be filled
64 65 - data: a binary blob contains v2 nodes data
65 66 - tree_metadata:: a binary blob of the top level node (from the docket)
66 67 """
67 68 (
68 69 root_nodes_start,
69 70 root_nodes_len,
70 71 _nodes_with_entry_count,
71 72 _nodes_with_copy_source_count,
72 73 _unreachable_bytes,
73 74 _unused,
74 75 _ignore_patterns_hash,
75 76 ) = TREE_METADATA.unpack(tree_metadata)
76 77 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
77 78
78 79
79 80 def parse_nodes(map, copy_map, data, start, len):
80 81 """parse <len> nodes from <data> starting at offset <start>
81 82
82 83 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
83 84 """
84 85 for i in range(len):
85 86 node_start = start + NODE_SIZE * i
86 87 node_bytes = slice_with_len(data, node_start, NODE_SIZE)
87 88 (
88 89 path_start,
89 90 path_len,
90 91 _basename_start,
91 92 copy_source_start,
92 93 copy_source_len,
93 94 children_start,
94 95 children_count,
95 96 _descendants_with_entry_count,
96 97 _tracked_descendants_count,
97 98 flags,
98 99 size,
99 100 mtime_s,
100 101 _mtime_ns,
101 102 ) = NODE.unpack(node_bytes)
102 103
103 104 # Parse child nodes of this node recursively
104 105 parse_nodes(map, copy_map, data, children_start, children_count)
105 106
106 107 item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s)
107 108 if not item.any_tracked:
108 109 continue
109 110 path = slice_with_len(data, path_start, path_len)
110 111 map[path] = item
111 112 if copy_source_start:
112 113 copy_map[path] = slice_with_len(
113 114 data, copy_source_start, copy_source_len
114 115 )
115 116
116 117
117 118 def slice_with_len(data, start, len):
118 119 return data[start : start + len]
120
121
122 @attr.s
123 class Node(object):
124 path = attr.ib()
125 entry = attr.ib()
126 parent = attr.ib(default=None)
127 children_count = attr.ib(default=0)
128 children_offset = attr.ib(default=0)
129 descendants_with_entry = attr.ib(default=0)
130 tracked_descendants = attr.ib(default=0)
131
132 def pack(self, copy_map, paths_offset):
133 path = self.path
134 copy = copy_map.get(path)
135 entry = self.entry
136
137 path_start = paths_offset
138 path_len = len(path)
139 basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1
140 if copy is not None:
141 copy_source_start = paths_offset + len(path)
142 copy_source_len = len(copy)
143 else:
144 copy_source_start = 0
145 copy_source_len = 0
146 if entry is not None:
147 flags, size, mtime_s = entry.v2_data()
148 mtime_ns = 0
149 else:
150 # There are no mtime-cached directories in the Python implementation
151 flags = 0
152 mode = 0
153 size = 0
154 mtime_s = 0
155 mtime_ns = 0
156 return NODE.pack(
157 path_start,
158 path_len,
159 basename_start,
160 copy_source_start,
161 copy_source_len,
162 self.children_offset,
163 self.children_count,
164 self.descendants_with_entry,
165 self.tracked_descendants,
166 flags,
167 size,
168 mtime_s,
169 mtime_ns,
170 )
171
172
173 def pack_dirstate(map, copy_map, now):
174 """
175 Pack `map` and `copy_map` into the dirstate v2 binary format and return
176 the bytearray.
177 `now` is a timestamp of the current filesystem time used to detect race
178 conditions in writing the dirstate to disk, see inline comment.
179
180 The on-disk format expects a tree-like structure where the leaves are
181 written first (and sorted per-directory), going up levels until the root
182 node and writing that one to the docket. See more details on the on-disk
183 format in `mercurial/helptext/internals/dirstate-v2`.
184
185 Since both `map` and `copy_map` are flat dicts we need to figure out the
186 hierarchy. This algorithm does so without having to build the entire tree
187 in-memory: it only keeps the minimum number of nodes around to satisfy the
188 format.
189
190 # Algorithm explanation
191
192 This explanation does not talk about the different counters for tracked
193 descendents and storing the copies, but that work is pretty simple once this
194 algorithm is in place.
195
196 ## Building a subtree
197
198 First, sort `map`: this makes it so the leaves of the tree are contiguous
199 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
200 and enables us to use the ordering of folders to have a "cursor" of the
201 current folder we're in without ever going twice in the same branch of the
202 tree. The cursor is a node that remembers its parent and any information
203 relevant to the format (see the `Node` class), building the relevant part
204 of the tree lazily.
205 Then, for each file in `map`, move the cursor into the tree to the
206 corresponding folder of the file: for example, if the very first file
207 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
208 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
209 `Node["a"]`. These nodes are kept around in a stack.
210 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
211 add it to the stack and keep looping with the same logic of creating the
212 tree nodes as needed. If however the next file in `map` is *not* in the same
213 subtree ("a/other", if we're still in the "a/b" folder), then we know that
214 the subtree we're in is complete.
215
216 ## Writing the subtree
217
218 We have the entire subtree in the stack, so we start writing it to disk
219 folder by folder. The way we write a folder is to pop the stack into a list
220 until the folder changes, revert this list of direct children (to satisfy
221 the format requirement that children be sorted). This process repeats until
222 we hit the "other" subtree.
223
224 An example:
225 a
226 dir1/b
227 dir1/c
228 dir2/dir3/d
229 dir2/dir3/e
230 dir2/f
231
232 Would have us:
233 - add to the stack until "dir2/dir3/e"
234 - realize that "dir2/f" is in a different subtree
235 - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
236 pack them since the next entry is "dir2/dir3"
237 - go back up to "dir2"
238 - add "dir2/f" to the stack
239 - realize we're done with the map
240 - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
241 - go up to the root node, do the same to write "a", "dir1" and "dir2" in
242 that order
243
244 ## Special case for the root node
245
246 The root node is not serialized in the format, but its information is
247 written to the docket. Again, see more details on the on-disk format in
248 `mercurial/helptext/internals/dirstate-v2`.
249 """
250 now = int(now)
251 data = bytearray()
252 root_nodes_start = 0
253 root_nodes_len = 0
254 nodes_with_entry_count = 0
255 nodes_with_copy_source_count = 0
256 # Will always be 0 since this implementation always re-writes everything
257 # to disk
258 unreachable_bytes = 0
259 unused = b'\x00' * 4
260 # This is an optimization that's only useful for the Rust implementation
261 ignore_patterns_hash = b'\x00' * 20
262
263 if len(map) == 0:
264 tree_metadata = TREE_METADATA.pack(
265 root_nodes_start,
266 root_nodes_len,
267 nodes_with_entry_count,
268 nodes_with_copy_source_count,
269 unreachable_bytes,
270 unused,
271 ignore_patterns_hash,
272 )
273 return data, tree_metadata
274
275 sorted_map = sorted(map.items(), key=lambda x: x[0])
276
277 # Use a stack to not have to only remember the nodes we currently need
278 # instead of building the entire tree in memory
279 stack = []
280 current_node = Node(b"", None)
281 stack.append(current_node)
282
283 for index, (path, entry) in enumerate(sorted_map, 1):
284 if entry.need_delay(now):
285 # The file was last modified "simultaneously" with the current
286 # write to dirstate (i.e. within the same second for file-
287 # systems with a granularity of 1 sec). This commonly happens
288 # for at least a couple of files on 'update'.
289 # The user could change the file without changing its size
290 # within the same second. Invalidate the file's mtime in
291 # dirstate, forcing future 'status' calls to compare the
292 # contents of the file if the size is the same. This prevents
293 # mistakenly treating such files as clean.
294 entry.set_possibly_dirty()
295 nodes_with_entry_count += 1
296 if path in copy_map:
297 nodes_with_copy_source_count += 1
298 current_folder = get_folder(path)
299 current_node = move_to_correct_node_in_tree(
300 current_folder, current_node, stack
301 )
302
303 current_node.children_count += 1
304 # Entries from `map` are never `None`
305 if entry.tracked:
306 current_node.tracked_descendants += 1
307 current_node.descendants_with_entry += 1
308 stack.append(Node(path, entry, current_node))
309
310 should_pack = True
311 next_path = None
312 if index < len(sorted_map):
313 # Determine if the next entry is in the same sub-tree, if so don't
314 # pack yet
315 next_path = sorted_map[index][0]
316 should_pack = not get_folder(next_path).startswith(current_folder)
317 if should_pack:
318 pack_directory_children(current_node, copy_map, data, stack)
319 while stack and current_node.path != b"":
320 # Go up the tree and write until we reach the folder of the next
321 # entry (if any, otherwise the root)
322 parent = current_node.parent
323 in_parent_folder_of_next_entry = next_path is not None and (
324 get_folder(next_path).startswith(get_folder(stack[-1].path))
325 )
326 if parent is None or in_parent_folder_of_next_entry:
327 break
328 pack_directory_children(parent, copy_map, data, stack)
329 current_node = parent
330
331 # Special case for the root node since we don't write it to disk, only its
332 # children to the docket
333 current_node = stack.pop()
334 assert current_node.path == b"", current_node.path
335 assert len(stack) == 0, len(stack)
336
337 tree_metadata = TREE_METADATA.pack(
338 current_node.children_offset,
339 current_node.children_count,
340 nodes_with_entry_count,
341 nodes_with_copy_source_count,
342 unreachable_bytes,
343 unused,
344 ignore_patterns_hash,
345 )
346
347 return data, tree_metadata
348
349
350 def get_folder(path):
351 """
352 Return the folder of the path that's given, an empty string for root paths.
353 """
354 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
355
356
357 def move_to_correct_node_in_tree(target_folder, current_node, stack):
358 """
359 Move inside the dirstate node tree to the node corresponding to
360 `target_folder`, creating the missing nodes along the way if needed.
361 """
362 while target_folder != current_node.path:
363 if target_folder.startswith(current_node.path):
364 # We need to go down a folder
365 prefix = target_folder[len(current_node.path) :].lstrip(b'/')
366 subfolder_name = prefix.split(b'/', 1)[0]
367 if current_node.path:
368 subfolder_path = current_node.path + b'/' + subfolder_name
369 else:
370 subfolder_path = subfolder_name
371 next_node = stack[-1]
372 if next_node.path == target_folder:
373 # This folder is now a file and only contains removed entries
374 # merge with the last node
375 current_node = next_node
376 else:
377 current_node.children_count += 1
378 current_node = Node(subfolder_path, None, current_node)
379 stack.append(current_node)
380 else:
381 # We need to go up a folder
382 current_node = current_node.parent
383 return current_node
384
385
386 def pack_directory_children(node, copy_map, data, stack):
387 """
388 Write the binary representation of the direct sorted children of `node` to
389 `data`
390 """
391 direct_children = []
392
393 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
394 direct_children.append(stack.pop())
395 if not direct_children:
396 raise error.ProgrammingError(b"no direct children for %r" % node.path)
397
398 # Reverse the stack to get the correct sorted order
399 direct_children.reverse()
400 packed_children = bytearray()
401 # Write the paths to `data`. Pack child nodes but don't write them yet
402 for child in direct_children:
403 packed = child.pack(copy_map=copy_map, paths_offset=len(data))
404 packed_children.extend(packed)
405 data.extend(child.path)
406 data.extend(copy_map.get(child.path, b""))
407 node.tracked_descendants += child.tracked_descendants
408 node.descendants_with_entry += child.descendants_with_entry
409 # Write the fixed-size child nodes all together
410 node.children_offset = len(data)
411 data.extend(packed_children)
@@ -1,770 +1,789 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import stat
11 11 import struct
12 12 import zlib
13 13
14 14 from ..node import (
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from ..thirdparty import attr
19 19 from .. import (
20 20 error,
21 21 pycompat,
22 22 revlogutils,
23 23 util,
24 24 )
25 25
26 26 from ..revlogutils import nodemap as nodemaputil
27 27 from ..revlogutils import constants as revlog_constants
28 28
29 29 stringio = pycompat.bytesio
30 30
31 31
32 32 _pack = struct.pack
33 33 _unpack = struct.unpack
34 34 _compress = zlib.compress
35 35 _decompress = zlib.decompress
36 36
37 37
38 38 # a special value used internally for `size` if the file come from the other parent
39 39 FROM_P2 = -2
40 40
41 41 # a special value used internally for `size` if the file is modified/merged/added
42 42 NONNORMAL = -1
43 43
44 44 # a special value used internally for `time` if the time is ambigeous
45 45 AMBIGUOUS_TIME = -1
46 46
47 47 # Bits of the `flags` byte inside a node in the file format
48 48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 50 DIRSTATE_V2_P2_INFO = 1 << 2
51 51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
52 52 DIRSTATE_V2_HAS_MTIME = 1 << 4
53 53 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 5
54 54 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 6
55 55
56 56
57 57 @attr.s(slots=True, init=False)
58 58 class DirstateItem(object):
59 59 """represent a dirstate entry
60 60
61 61 It hold multiple attributes
62 62
63 63 # about file tracking
64 64 - wc_tracked: is the file tracked by the working copy
65 65 - p1_tracked: is the file tracked in working copy first parent
66 66 - p2_info: the file has been involved in some merge operation. Either
67 67 because it was actually merged, or because the p2 version was
68 68 ahead, or because some rename moved it there. In either case
69 69 `hg status` will want it displayed as modified.
70 70
71 71 # about the file state expected from p1 manifest:
72 72 - mode: the file mode in p1
73 73 - size: the file size in p1
74 74
75 75 These value can be set to None, which mean we don't have a meaningful value
76 76 to compare with. Either because we don't really care about them as there
77 77 `status` is known without having to look at the disk or because we don't
78 78 know these right now and a full comparison will be needed to find out if
79 79 the file is clean.
80 80
81 81 # about the file state on disk last time we saw it:
82 82 - mtime: the last known clean mtime for the file.
83 83
84 84 This value can be set to None if no cachable state exist. Either because we
85 85 do not care (see previous section) or because we could not cache something
86 86 yet.
87 87 """
88 88
89 89 _wc_tracked = attr.ib()
90 90 _p1_tracked = attr.ib()
91 91 _p2_info = attr.ib()
92 92 _mode = attr.ib()
93 93 _size = attr.ib()
94 94 _mtime = attr.ib()
95 95
96 96 def __init__(
97 97 self,
98 98 wc_tracked=False,
99 99 p1_tracked=False,
100 100 p2_info=False,
101 101 has_meaningful_data=True,
102 102 has_meaningful_mtime=True,
103 103 parentfiledata=None,
104 104 ):
105 105 self._wc_tracked = wc_tracked
106 106 self._p1_tracked = p1_tracked
107 107 self._p2_info = p2_info
108 108
109 109 self._mode = None
110 110 self._size = None
111 111 self._mtime = None
112 112 if parentfiledata is None:
113 113 has_meaningful_mtime = False
114 114 has_meaningful_data = False
115 115 if has_meaningful_data:
116 116 self._mode = parentfiledata[0]
117 117 self._size = parentfiledata[1]
118 118 if has_meaningful_mtime:
119 119 self._mtime = parentfiledata[2]
120 120
121 121 @classmethod
122 122 def from_v2_data(cls, flags, size, mtime):
123 123 """Build a new DirstateItem object from V2 data"""
124 124 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
125 125 mode = None
126 126 if has_mode_size:
127 127 assert stat.S_IXUSR == 0o100
128 128 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
129 129 mode = 0o755
130 130 else:
131 131 mode = 0o644
132 132 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
133 133 mode |= stat.S_IFLNK
134 134 else:
135 135 mode |= stat.S_IFREG
136 136 return cls(
137 137 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
138 138 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
139 139 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
140 140 has_meaningful_data=has_mode_size,
141 141 has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_MTIME),
142 142 parentfiledata=(mode, size, mtime),
143 143 )
144 144
145 145 @classmethod
146 146 def from_v1_data(cls, state, mode, size, mtime):
147 147 """Build a new DirstateItem object from V1 data
148 148
149 149 Since the dirstate-v1 format is frozen, the signature of this function
150 150 is not expected to change, unlike the __init__ one.
151 151 """
152 152 if state == b'm':
153 153 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
154 154 elif state == b'a':
155 155 return cls(wc_tracked=True)
156 156 elif state == b'r':
157 157 if size == NONNORMAL:
158 158 p1_tracked = True
159 159 p2_info = True
160 160 elif size == FROM_P2:
161 161 p1_tracked = False
162 162 p2_info = True
163 163 else:
164 164 p1_tracked = True
165 165 p2_info = False
166 166 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
167 167 elif state == b'n':
168 168 if size == FROM_P2:
169 169 return cls(wc_tracked=True, p2_info=True)
170 170 elif size == NONNORMAL:
171 171 return cls(wc_tracked=True, p1_tracked=True)
172 172 elif mtime == AMBIGUOUS_TIME:
173 173 return cls(
174 174 wc_tracked=True,
175 175 p1_tracked=True,
176 176 has_meaningful_mtime=False,
177 177 parentfiledata=(mode, size, 42),
178 178 )
179 179 else:
180 180 return cls(
181 181 wc_tracked=True,
182 182 p1_tracked=True,
183 183 parentfiledata=(mode, size, mtime),
184 184 )
185 185 else:
186 186 raise RuntimeError(b'unknown state: %s' % state)
187 187
188 188 def set_possibly_dirty(self):
189 189 """Mark a file as "possibly dirty"
190 190
191 191 This means the next status call will have to actually check its content
192 192 to make sure it is correct.
193 193 """
194 194 self._mtime = None
195 195
196 196 def set_clean(self, mode, size, mtime):
197 197 """mark a file as "clean" cancelling potential "possibly dirty call"
198 198
199 199 Note: this function is a descendant of `dirstate.normal` and is
200 200 currently expected to be call on "normal" entry only. There are not
201 201 reason for this to not change in the future as long as the ccode is
202 202 updated to preserve the proper state of the non-normal files.
203 203 """
204 204 self._wc_tracked = True
205 205 self._p1_tracked = True
206 206 self._mode = mode
207 207 self._size = size
208 208 self._mtime = mtime
209 209
210 210 def set_tracked(self):
211 211 """mark a file as tracked in the working copy
212 212
213 213 This will ultimately be called by command like `hg add`.
214 214 """
215 215 self._wc_tracked = True
216 216 # `set_tracked` is replacing various `normallookup` call. So we mark
217 217 # the files as needing lookup
218 218 #
219 219 # Consider dropping this in the future in favor of something less broad.
220 220 self._mtime = None
221 221
222 222 def set_untracked(self):
223 223 """mark a file as untracked in the working copy
224 224
225 225 This will ultimately be called by command like `hg remove`.
226 226 """
227 227 self._wc_tracked = False
228 228 self._mode = None
229 229 self._size = None
230 230 self._mtime = None
231 231
232 232 def drop_merge_data(self):
233 233 """remove all "merge-only" from a DirstateItem
234 234
235 235 This is to be call by the dirstatemap code when the second parent is dropped
236 236 """
237 237 if self._p2_info:
238 238 self._p2_info = False
239 239 self._mode = None
240 240 self._size = None
241 241 self._mtime = None
242 242
243 243 @property
244 244 def mode(self):
245 245 return self.v1_mode()
246 246
247 247 @property
248 248 def size(self):
249 249 return self.v1_size()
250 250
251 251 @property
252 252 def mtime(self):
253 253 return self.v1_mtime()
254 254
255 255 @property
256 256 def state(self):
257 257 """
258 258 States are:
259 259 n normal
260 260 m needs merging
261 261 r marked for removal
262 262 a marked for addition
263 263
264 264 XXX This "state" is a bit obscure and mostly a direct expression of the
265 265 dirstatev1 format. It would make sense to ultimately deprecate it in
266 266 favor of the more "semantic" attributes.
267 267 """
268 268 if not self.any_tracked:
269 269 return b'?'
270 270 return self.v1_state()
271 271
272 272 @property
273 273 def tracked(self):
274 274 """True is the file is tracked in the working copy"""
275 275 return self._wc_tracked
276 276
277 277 @property
278 278 def any_tracked(self):
279 279 """True is the file is tracked anywhere (wc or parents)"""
280 280 return self._wc_tracked or self._p1_tracked or self._p2_info
281 281
282 282 @property
283 283 def added(self):
284 284 """True if the file has been added"""
285 285 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
286 286
287 287 @property
288 288 def maybe_clean(self):
289 289 """True if the file has a chance to be in the "clean" state"""
290 290 if not self._wc_tracked:
291 291 return False
292 292 elif not self._p1_tracked:
293 293 return False
294 294 elif self._p2_info:
295 295 return False
296 296 return True
297 297
298 298 @property
299 299 def p1_tracked(self):
300 300 """True if the file is tracked in the first parent manifest"""
301 301 return self._p1_tracked
302 302
303 303 @property
304 304 def p2_info(self):
305 305 """True if the file needed to merge or apply any input from p2
306 306
307 307 See the class documentation for details.
308 308 """
309 309 return self._wc_tracked and self._p2_info
310 310
311 311 @property
312 312 def removed(self):
313 313 """True if the file has been removed"""
314 314 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
315 315
316 def v2_data(self):
317 """Returns (flags, mode, size, mtime) for v2 serialization"""
318 flags = 0
319 if self._wc_tracked:
320 flags |= DIRSTATE_V2_WDIR_TRACKED
321 if self._p1_tracked:
322 flags |= DIRSTATE_V2_P1_TRACKED
323 if self._p2_info:
324 flags |= DIRSTATE_V2_P2_INFO
325 if self.mode is not None and self.size is not None:
326 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
327 if self.mode & stat.S_IXUSR:
328 flags |= DIRSTATE_V2_MODE_EXEC_PERM
329 if stat.S_ISLNK(self.mode):
330 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
331 if self.mtime is not None:
332 flags |= DIRSTATE_V2_HAS_MTIME
333 return (flags, self.size or 0, self.mtime or 0)
334
316 335 def v1_state(self):
317 336 """return a "state" suitable for v1 serialization"""
318 337 if not self.any_tracked:
319 338 # the object has no state to record, this is -currently-
320 339 # unsupported
321 340 raise RuntimeError('untracked item')
322 341 elif self.removed:
323 342 return b'r'
324 343 elif self._p1_tracked and self._p2_info:
325 344 return b'm'
326 345 elif self.added:
327 346 return b'a'
328 347 else:
329 348 return b'n'
330 349
331 350 def v1_mode(self):
332 351 """return a "mode" suitable for v1 serialization"""
333 352 return self._mode if self._mode is not None else 0
334 353
335 354 def v1_size(self):
336 355 """return a "size" suitable for v1 serialization"""
337 356 if not self.any_tracked:
338 357 # the object has no state to record, this is -currently-
339 358 # unsupported
340 359 raise RuntimeError('untracked item')
341 360 elif self.removed and self._p1_tracked and self._p2_info:
342 361 return NONNORMAL
343 362 elif self._p2_info:
344 363 return FROM_P2
345 364 elif self.removed:
346 365 return 0
347 366 elif self.added:
348 367 return NONNORMAL
349 368 elif self._size is None:
350 369 return NONNORMAL
351 370 else:
352 371 return self._size
353 372
354 373 def v1_mtime(self):
355 374 """return a "mtime" suitable for v1 serialization"""
356 375 if not self.any_tracked:
357 376 # the object has no state to record, this is -currently-
358 377 # unsupported
359 378 raise RuntimeError('untracked item')
360 379 elif self.removed:
361 380 return 0
362 381 elif self._mtime is None:
363 382 return AMBIGUOUS_TIME
364 383 elif self._p2_info:
365 384 return AMBIGUOUS_TIME
366 385 elif not self._p1_tracked:
367 386 return AMBIGUOUS_TIME
368 387 else:
369 388 return self._mtime
370 389
371 390 def need_delay(self, now):
372 391 """True if the stored mtime would be ambiguous with the current time"""
373 392 return self.v1_state() == b'n' and self.v1_mtime() == now
374 393
375 394
376 395 def gettype(q):
377 396 return int(q & 0xFFFF)
378 397
379 398
380 399 class BaseIndexObject(object):
381 400 # Can I be passed to an algorithme implemented in Rust ?
382 401 rust_ext_compat = 0
383 402 # Format of an index entry according to Python's `struct` language
384 403 index_format = revlog_constants.INDEX_ENTRY_V1
385 404 # Size of a C unsigned long long int, platform independent
386 405 big_int_size = struct.calcsize(b'>Q')
387 406 # Size of a C long int, platform independent
388 407 int_size = struct.calcsize(b'>i')
389 408 # An empty index entry, used as a default value to be overridden, or nullrev
390 409 null_item = (
391 410 0,
392 411 0,
393 412 0,
394 413 -1,
395 414 -1,
396 415 -1,
397 416 -1,
398 417 sha1nodeconstants.nullid,
399 418 0,
400 419 0,
401 420 revlog_constants.COMP_MODE_INLINE,
402 421 revlog_constants.COMP_MODE_INLINE,
403 422 )
404 423
405 424 @util.propertycache
406 425 def entry_size(self):
407 426 return self.index_format.size
408 427
409 428 @property
410 429 def nodemap(self):
411 430 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
412 431 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
413 432 return self._nodemap
414 433
415 434 @util.propertycache
416 435 def _nodemap(self):
417 436 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
418 437 for r in range(0, len(self)):
419 438 n = self[r][7]
420 439 nodemap[n] = r
421 440 return nodemap
422 441
423 442 def has_node(self, node):
424 443 """return True if the node exist in the index"""
425 444 return node in self._nodemap
426 445
427 446 def rev(self, node):
428 447 """return a revision for a node
429 448
430 449 If the node is unknown, raise a RevlogError"""
431 450 return self._nodemap[node]
432 451
433 452 def get_rev(self, node):
434 453 """return a revision for a node
435 454
436 455 If the node is unknown, return None"""
437 456 return self._nodemap.get(node)
438 457
439 458 def _stripnodes(self, start):
440 459 if '_nodemap' in vars(self):
441 460 for r in range(start, len(self)):
442 461 n = self[r][7]
443 462 del self._nodemap[n]
444 463
445 464 def clearcaches(self):
446 465 self.__dict__.pop('_nodemap', None)
447 466
448 467 def __len__(self):
449 468 return self._lgt + len(self._extra)
450 469
451 470 def append(self, tup):
452 471 if '_nodemap' in vars(self):
453 472 self._nodemap[tup[7]] = len(self)
454 473 data = self._pack_entry(len(self), tup)
455 474 self._extra.append(data)
456 475
457 476 def _pack_entry(self, rev, entry):
458 477 assert entry[8] == 0
459 478 assert entry[9] == 0
460 479 return self.index_format.pack(*entry[:8])
461 480
462 481 def _check_index(self, i):
463 482 if not isinstance(i, int):
464 483 raise TypeError(b"expecting int indexes")
465 484 if i < 0 or i >= len(self):
466 485 raise IndexError
467 486
468 487 def __getitem__(self, i):
469 488 if i == -1:
470 489 return self.null_item
471 490 self._check_index(i)
472 491 if i >= self._lgt:
473 492 data = self._extra[i - self._lgt]
474 493 else:
475 494 index = self._calculate_index(i)
476 495 data = self._data[index : index + self.entry_size]
477 496 r = self._unpack_entry(i, data)
478 497 if self._lgt and i == 0:
479 498 offset = revlogutils.offset_type(0, gettype(r[0]))
480 499 r = (offset,) + r[1:]
481 500 return r
482 501
483 502 def _unpack_entry(self, rev, data):
484 503 r = self.index_format.unpack(data)
485 504 r = r + (
486 505 0,
487 506 0,
488 507 revlog_constants.COMP_MODE_INLINE,
489 508 revlog_constants.COMP_MODE_INLINE,
490 509 )
491 510 return r
492 511
493 512 def pack_header(self, header):
494 513 """pack header information as binary"""
495 514 v_fmt = revlog_constants.INDEX_HEADER
496 515 return v_fmt.pack(header)
497 516
498 517 def entry_binary(self, rev):
499 518 """return the raw binary string representing a revision"""
500 519 entry = self[rev]
501 520 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
502 521 if rev == 0:
503 522 p = p[revlog_constants.INDEX_HEADER.size :]
504 523 return p
505 524
506 525
507 526 class IndexObject(BaseIndexObject):
508 527 def __init__(self, data):
509 528 assert len(data) % self.entry_size == 0, (
510 529 len(data),
511 530 self.entry_size,
512 531 len(data) % self.entry_size,
513 532 )
514 533 self._data = data
515 534 self._lgt = len(data) // self.entry_size
516 535 self._extra = []
517 536
518 537 def _calculate_index(self, i):
519 538 return i * self.entry_size
520 539
521 540 def __delitem__(self, i):
522 541 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
523 542 raise ValueError(b"deleting slices only supports a:-1 with step 1")
524 543 i = i.start
525 544 self._check_index(i)
526 545 self._stripnodes(i)
527 546 if i < self._lgt:
528 547 self._data = self._data[: i * self.entry_size]
529 548 self._lgt = i
530 549 self._extra = []
531 550 else:
532 551 self._extra = self._extra[: i - self._lgt]
533 552
534 553
535 554 class PersistentNodeMapIndexObject(IndexObject):
536 555 """a Debug oriented class to test persistent nodemap
537 556
538 557 We need a simple python object to test API and higher level behavior. See
539 558 the Rust implementation for more serious usage. This should be used only
540 559 through the dedicated `devel.persistent-nodemap` config.
541 560 """
542 561
543 562 def nodemap_data_all(self):
544 563 """Return bytes containing a full serialization of a nodemap
545 564
546 565 The nodemap should be valid for the full set of revisions in the
547 566 index."""
548 567 return nodemaputil.persistent_data(self)
549 568
550 569 def nodemap_data_incremental(self):
551 570 """Return bytes containing a incremental update to persistent nodemap
552 571
553 572 This containst the data for an append-only update of the data provided
554 573 in the last call to `update_nodemap_data`.
555 574 """
556 575 if self._nm_root is None:
557 576 return None
558 577 docket = self._nm_docket
559 578 changed, data = nodemaputil.update_persistent_data(
560 579 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
561 580 )
562 581
563 582 self._nm_root = self._nm_max_idx = self._nm_docket = None
564 583 return docket, changed, data
565 584
566 585 def update_nodemap_data(self, docket, nm_data):
567 586 """provide full block of persisted binary data for a nodemap
568 587
569 588 The data are expected to come from disk. See `nodemap_data_all` for a
570 589 produceur of such data."""
571 590 if nm_data is not None:
572 591 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
573 592 if self._nm_root:
574 593 self._nm_docket = docket
575 594 else:
576 595 self._nm_root = self._nm_max_idx = self._nm_docket = None
577 596
578 597
579 598 class InlinedIndexObject(BaseIndexObject):
580 599 def __init__(self, data, inline=0):
581 600 self._data = data
582 601 self._lgt = self._inline_scan(None)
583 602 self._inline_scan(self._lgt)
584 603 self._extra = []
585 604
586 605 def _inline_scan(self, lgt):
587 606 off = 0
588 607 if lgt is not None:
589 608 self._offsets = [0] * lgt
590 609 count = 0
591 610 while off <= len(self._data) - self.entry_size:
592 611 start = off + self.big_int_size
593 612 (s,) = struct.unpack(
594 613 b'>i',
595 614 self._data[start : start + self.int_size],
596 615 )
597 616 if lgt is not None:
598 617 self._offsets[count] = off
599 618 count += 1
600 619 off += self.entry_size + s
601 620 if off != len(self._data):
602 621 raise ValueError(b"corrupted data")
603 622 return count
604 623
605 624 def __delitem__(self, i):
606 625 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
607 626 raise ValueError(b"deleting slices only supports a:-1 with step 1")
608 627 i = i.start
609 628 self._check_index(i)
610 629 self._stripnodes(i)
611 630 if i < self._lgt:
612 631 self._offsets = self._offsets[:i]
613 632 self._lgt = i
614 633 self._extra = []
615 634 else:
616 635 self._extra = self._extra[: i - self._lgt]
617 636
618 637 def _calculate_index(self, i):
619 638 return self._offsets[i]
620 639
621 640
622 641 def parse_index2(data, inline, revlogv2=False):
623 642 if not inline:
624 643 cls = IndexObject2 if revlogv2 else IndexObject
625 644 return cls(data), None
626 645 cls = InlinedIndexObject
627 646 return cls(data, inline), (0, data)
628 647
629 648
630 649 def parse_index_cl_v2(data):
631 650 return IndexChangelogV2(data), None
632 651
633 652
634 653 class IndexObject2(IndexObject):
635 654 index_format = revlog_constants.INDEX_ENTRY_V2
636 655
637 656 def replace_sidedata_info(
638 657 self,
639 658 rev,
640 659 sidedata_offset,
641 660 sidedata_length,
642 661 offset_flags,
643 662 compression_mode,
644 663 ):
645 664 """
646 665 Replace an existing index entry's sidedata offset and length with new
647 666 ones.
648 667 This cannot be used outside of the context of sidedata rewriting,
649 668 inside the transaction that creates the revision `rev`.
650 669 """
651 670 if rev < 0:
652 671 raise KeyError
653 672 self._check_index(rev)
654 673 if rev < self._lgt:
655 674 msg = b"cannot rewrite entries outside of this transaction"
656 675 raise KeyError(msg)
657 676 else:
658 677 entry = list(self[rev])
659 678 entry[0] = offset_flags
660 679 entry[8] = sidedata_offset
661 680 entry[9] = sidedata_length
662 681 entry[11] = compression_mode
663 682 entry = tuple(entry)
664 683 new = self._pack_entry(rev, entry)
665 684 self._extra[rev - self._lgt] = new
666 685
667 686 def _unpack_entry(self, rev, data):
668 687 data = self.index_format.unpack(data)
669 688 entry = data[:10]
670 689 data_comp = data[10] & 3
671 690 sidedata_comp = (data[10] & (3 << 2)) >> 2
672 691 return entry + (data_comp, sidedata_comp)
673 692
674 693 def _pack_entry(self, rev, entry):
675 694 data = entry[:10]
676 695 data_comp = entry[10] & 3
677 696 sidedata_comp = (entry[11] & 3) << 2
678 697 data += (data_comp | sidedata_comp,)
679 698
680 699 return self.index_format.pack(*data)
681 700
682 701 def entry_binary(self, rev):
683 702 """return the raw binary string representing a revision"""
684 703 entry = self[rev]
685 704 return self._pack_entry(rev, entry)
686 705
687 706 def pack_header(self, header):
688 707 """pack header information as binary"""
689 708 msg = 'version header should go in the docket, not the index: %d'
690 709 msg %= header
691 710 raise error.ProgrammingError(msg)
692 711
693 712
694 713 class IndexChangelogV2(IndexObject2):
695 714 index_format = revlog_constants.INDEX_ENTRY_CL_V2
696 715
697 716 def _unpack_entry(self, rev, data, r=True):
698 717 items = self.index_format.unpack(data)
699 718 entry = items[:3] + (rev, rev) + items[3:8]
700 719 data_comp = items[8] & 3
701 720 sidedata_comp = (items[8] >> 2) & 3
702 721 return entry + (data_comp, sidedata_comp)
703 722
704 723 def _pack_entry(self, rev, entry):
705 724 assert entry[3] == rev, entry[3]
706 725 assert entry[4] == rev, entry[4]
707 726 data = entry[:3] + entry[5:10]
708 727 data_comp = entry[10] & 3
709 728 sidedata_comp = (entry[11] & 3) << 2
710 729 data += (data_comp | sidedata_comp,)
711 730 return self.index_format.pack(*data)
712 731
713 732
714 733 def parse_index_devel_nodemap(data, inline):
715 734 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
716 735 return PersistentNodeMapIndexObject(data), None
717 736
718 737
719 738 def parse_dirstate(dmap, copymap, st):
720 739 parents = [st[:20], st[20:40]]
721 740 # dereference fields so they will be local in loop
722 741 format = b">cllll"
723 742 e_size = struct.calcsize(format)
724 743 pos1 = 40
725 744 l = len(st)
726 745
727 746 # the inner loop
728 747 while pos1 < l:
729 748 pos2 = pos1 + e_size
730 749 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
731 750 pos1 = pos2 + e[4]
732 751 f = st[pos2:pos1]
733 752 if b'\0' in f:
734 753 f, c = f.split(b'\0')
735 754 copymap[f] = c
736 755 dmap[f] = DirstateItem.from_v1_data(*e[:4])
737 756 return parents
738 757
739 758
740 759 def pack_dirstate(dmap, copymap, pl, now):
741 760 now = int(now)
742 761 cs = stringio()
743 762 write = cs.write
744 763 write(b"".join(pl))
745 764 for f, e in pycompat.iteritems(dmap):
746 765 if e.need_delay(now):
747 766 # The file was last modified "simultaneously" with the current
748 767 # write to dirstate (i.e. within the same second for file-
749 768 # systems with a granularity of 1 sec). This commonly happens
750 769 # for at least a couple of files on 'update'.
751 770 # The user could change the file without changing its size
752 771 # within the same second. Invalidate the file's mtime in
753 772 # dirstate, forcing future 'status' calls to compare the
754 773 # contents of the file if the size is the same. This prevents
755 774 # mistakenly treating such files as clean.
756 775 e.set_possibly_dirty()
757 776
758 777 if f in copymap:
759 778 f = b"%s\0%s" % (f, copymap[f])
760 779 e = _pack(
761 780 b">cllll",
762 781 e.v1_state(),
763 782 e.v1_mode(),
764 783 e.v1_size(),
765 784 e.v1_mtime(),
766 785 len(f),
767 786 )
768 787 write(e)
769 788 write(f)
770 789 return cs.getvalue()
General Comments 0
You need to be logged in to leave comments. Login now