##// END OF EJS Templates
dirstate-v2: adjust the meaning of directory flags...
marmoute -
r49083:bb240915 default
parent child Browse files
Show More
@@ -1,1321 +1,1321 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static const int dirstate_v1_from_p2 = -2;
33 33 static const int dirstate_v1_nonnormal = -1;
34 34 static const int ambiguous_time = -1;
35 35
36 36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
37 37 {
38 38 Py_ssize_t expected_size;
39 39
40 40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
41 41 return NULL;
42 42 }
43 43
44 44 return _dict_new_presized(expected_size);
45 45 }
46 46
47 47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
48 48 PyObject *kwds)
49 49 {
50 50 /* We do all the initialization here and not a tp_init function because
51 51 * dirstate_item is immutable. */
52 52 dirstateItemObject *t;
53 53 int wc_tracked;
54 54 int p1_tracked;
55 55 int p2_info;
56 56 int has_meaningful_data;
57 57 int has_meaningful_mtime;
58 58 int mode;
59 59 int size;
60 60 int mtime_s;
61 61 int mtime_ns;
62 62 PyObject *parentfiledata;
63 63 PyObject *fallback_exec;
64 64 PyObject *fallback_symlink;
65 65 static char *keywords_name[] = {
66 66 "wc_tracked", "p1_tracked", "p2_info",
67 67 "has_meaningful_data", "has_meaningful_mtime", "parentfiledata",
68 68 "fallback_exec", "fallback_symlink", NULL,
69 69 };
70 70 wc_tracked = 0;
71 71 p1_tracked = 0;
72 72 p2_info = 0;
73 73 has_meaningful_mtime = 1;
74 74 has_meaningful_data = 1;
75 75 parentfiledata = Py_None;
76 76 fallback_exec = Py_None;
77 77 fallback_symlink = Py_None;
78 78 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiOOO", keywords_name,
79 79 &wc_tracked, &p1_tracked, &p2_info,
80 80 &has_meaningful_data,
81 81 &has_meaningful_mtime, &parentfiledata,
82 82 &fallback_exec, &fallback_symlink)) {
83 83 return NULL;
84 84 }
85 85 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
86 86 if (!t) {
87 87 return NULL;
88 88 }
89 89
90 90 t->flags = 0;
91 91 if (wc_tracked) {
92 92 t->flags |= dirstate_flag_wc_tracked;
93 93 }
94 94 if (p1_tracked) {
95 95 t->flags |= dirstate_flag_p1_tracked;
96 96 }
97 97 if (p2_info) {
98 98 t->flags |= dirstate_flag_p2_info;
99 99 }
100 100
101 101 if (fallback_exec != Py_None) {
102 102 t->flags |= dirstate_flag_has_fallback_exec;
103 103 if (PyObject_IsTrue(fallback_exec)) {
104 104 t->flags |= dirstate_flag_fallback_exec;
105 105 }
106 106 }
107 107 if (fallback_symlink != Py_None) {
108 108 t->flags |= dirstate_flag_has_fallback_symlink;
109 109 if (PyObject_IsTrue(fallback_symlink)) {
110 110 t->flags |= dirstate_flag_fallback_symlink;
111 111 }
112 112 }
113 113
114 114 if (parentfiledata != Py_None) {
115 115 if (!PyArg_ParseTuple(parentfiledata, "ii(ii)", &mode, &size,
116 116 &mtime_s, &mtime_ns)) {
117 117 return NULL;
118 118 }
119 119 } else {
120 120 has_meaningful_data = 0;
121 121 has_meaningful_mtime = 0;
122 122 }
123 123 if (has_meaningful_data) {
124 124 t->flags |= dirstate_flag_has_meaningful_data;
125 125 t->mode = mode;
126 126 t->size = size;
127 127 } else {
128 128 t->mode = 0;
129 129 t->size = 0;
130 130 }
131 131 if (has_meaningful_mtime) {
132 t->flags |= dirstate_flag_has_file_mtime;
132 t->flags |= dirstate_flag_has_mtime;
133 133 t->mtime_s = mtime_s;
134 134 t->mtime_ns = mtime_ns;
135 135 } else {
136 136 t->mtime_s = 0;
137 137 t->mtime_ns = 0;
138 138 }
139 139 return (PyObject *)t;
140 140 }
141 141
142 142 static void dirstate_item_dealloc(PyObject *o)
143 143 {
144 144 PyObject_Del(o);
145 145 }
146 146
147 147 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
148 148 {
149 149 return (self->flags & dirstate_flag_wc_tracked);
150 150 }
151 151
152 152 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
153 153 {
154 154 const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
155 155 dirstate_flag_p2_info;
156 156 return (self->flags & mask);
157 157 }
158 158
159 159 static inline bool dirstate_item_c_added(dirstateItemObject *self)
160 160 {
161 161 const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
162 162 dirstate_flag_p2_info);
163 163 const int target = dirstate_flag_wc_tracked;
164 164 return (self->flags & mask) == target;
165 165 }
166 166
167 167 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
168 168 {
169 169 if (self->flags & dirstate_flag_wc_tracked) {
170 170 return false;
171 171 }
172 172 return (self->flags &
173 173 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
174 174 }
175 175
176 176 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
177 177 {
178 178 return ((self->flags & dirstate_flag_wc_tracked) &&
179 179 (self->flags & dirstate_flag_p1_tracked) &&
180 180 (self->flags & dirstate_flag_p2_info));
181 181 }
182 182
183 183 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
184 184 {
185 185 return ((self->flags & dirstate_flag_wc_tracked) &&
186 186 !(self->flags & dirstate_flag_p1_tracked) &&
187 187 (self->flags & dirstate_flag_p2_info));
188 188 }
189 189
190 190 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
191 191 {
192 192 if (dirstate_item_c_removed(self)) {
193 193 return 'r';
194 194 } else if (dirstate_item_c_merged(self)) {
195 195 return 'm';
196 196 } else if (dirstate_item_c_added(self)) {
197 197 return 'a';
198 198 } else {
199 199 return 'n';
200 200 }
201 201 }
202 202
203 203 static inline bool dirstate_item_c_has_fallback_exec(dirstateItemObject *self)
204 204 {
205 205 return (bool)self->flags & dirstate_flag_has_fallback_exec;
206 206 }
207 207
208 208 static inline bool
209 209 dirstate_item_c_has_fallback_symlink(dirstateItemObject *self)
210 210 {
211 211 return (bool)self->flags & dirstate_flag_has_fallback_symlink;
212 212 }
213 213
214 214 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
215 215 {
216 216 if (self->flags & dirstate_flag_has_meaningful_data) {
217 217 return self->mode;
218 218 } else {
219 219 return 0;
220 220 }
221 221 }
222 222
223 223 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
224 224 {
225 225 if (!(self->flags & dirstate_flag_wc_tracked) &&
226 226 (self->flags & dirstate_flag_p2_info)) {
227 227 if (self->flags & dirstate_flag_p1_tracked) {
228 228 return dirstate_v1_nonnormal;
229 229 } else {
230 230 return dirstate_v1_from_p2;
231 231 }
232 232 } else if (dirstate_item_c_removed(self)) {
233 233 return 0;
234 234 } else if (self->flags & dirstate_flag_p2_info) {
235 235 return dirstate_v1_from_p2;
236 236 } else if (dirstate_item_c_added(self)) {
237 237 return dirstate_v1_nonnormal;
238 238 } else if (self->flags & dirstate_flag_has_meaningful_data) {
239 239 return self->size;
240 240 } else {
241 241 return dirstate_v1_nonnormal;
242 242 }
243 243 }
244 244
245 245 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
246 246 {
247 247 if (dirstate_item_c_removed(self)) {
248 248 return 0;
249 } else if (!(self->flags & dirstate_flag_has_file_mtime) ||
249 } else if (!(self->flags & dirstate_flag_has_mtime) ||
250 250 !(self->flags & dirstate_flag_p1_tracked) ||
251 251 !(self->flags & dirstate_flag_wc_tracked) ||
252 252 (self->flags & dirstate_flag_p2_info)) {
253 253 return ambiguous_time;
254 254 } else {
255 255 return self->mtime_s;
256 256 }
257 257 }
258 258
259 259 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
260 260 {
261 261 int flags = self->flags;
262 262 int mode = dirstate_item_c_v1_mode(self);
263 263 if ((mode & S_IXUSR) != 0) {
264 264 flags |= dirstate_flag_mode_exec_perm;
265 265 } else {
266 266 flags &= ~dirstate_flag_mode_exec_perm;
267 267 }
268 268 if (S_ISLNK(mode)) {
269 269 flags |= dirstate_flag_mode_is_symlink;
270 270 } else {
271 271 flags &= ~dirstate_flag_mode_is_symlink;
272 272 }
273 273 return Py_BuildValue("iiii", flags, self->size, self->mtime_s,
274 274 self->mtime_ns);
275 275 };
276 276
277 277 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
278 278 {
279 279 char state = dirstate_item_c_v1_state(self);
280 280 return PyBytes_FromStringAndSize(&state, 1);
281 281 };
282 282
283 283 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
284 284 {
285 285 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
286 286 };
287 287
288 288 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
289 289 {
290 290 return PyInt_FromLong(dirstate_item_c_v1_size(self));
291 291 };
292 292
293 293 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
294 294 {
295 295 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
296 296 };
297 297
298 298 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
299 299 PyObject *now)
300 300 {
301 301 int now_s;
302 302 int now_ns;
303 303 if (!PyArg_ParseTuple(now, "ii", &now_s, &now_ns)) {
304 304 return NULL;
305 305 }
306 306 if (dirstate_item_c_v1_state(self) == 'n' && self->mtime_s == now_s) {
307 307 Py_RETURN_TRUE;
308 308 } else {
309 309 Py_RETURN_FALSE;
310 310 }
311 311 };
312 312
313 313 static PyObject *dirstate_item_mtime_likely_equal_to(dirstateItemObject *self,
314 314 PyObject *other)
315 315 {
316 316 int other_s;
317 317 int other_ns;
318 318 if (!PyArg_ParseTuple(other, "ii", &other_s, &other_ns)) {
319 319 return NULL;
320 320 }
321 if ((self->flags & dirstate_flag_has_file_mtime) &&
321 if ((self->flags & dirstate_flag_has_mtime) &&
322 322 self->mtime_s == other_s &&
323 323 (self->mtime_ns == other_ns || self->mtime_ns == 0 ||
324 324 other_ns == 0)) {
325 325 Py_RETURN_TRUE;
326 326 } else {
327 327 Py_RETURN_FALSE;
328 328 }
329 329 };
330 330
331 331 /* This will never change since it's bound to V1
332 332 */
333 333 static inline dirstateItemObject *
334 334 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
335 335 {
336 336 dirstateItemObject *t =
337 337 PyObject_New(dirstateItemObject, &dirstateItemType);
338 338 if (!t) {
339 339 return NULL;
340 340 }
341 341 t->flags = 0;
342 342 t->mode = 0;
343 343 t->size = 0;
344 344 t->mtime_s = 0;
345 345 t->mtime_ns = 0;
346 346
347 347 if (state == 'm') {
348 348 t->flags = (dirstate_flag_wc_tracked |
349 349 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
350 350 } else if (state == 'a') {
351 351 t->flags = dirstate_flag_wc_tracked;
352 352 } else if (state == 'r') {
353 353 if (size == dirstate_v1_nonnormal) {
354 354 t->flags =
355 355 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
356 356 } else if (size == dirstate_v1_from_p2) {
357 357 t->flags = dirstate_flag_p2_info;
358 358 } else {
359 359 t->flags = dirstate_flag_p1_tracked;
360 360 }
361 361 } else if (state == 'n') {
362 362 if (size == dirstate_v1_from_p2) {
363 363 t->flags =
364 364 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
365 365 } else if (size == dirstate_v1_nonnormal) {
366 366 t->flags =
367 367 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
368 368 } else if (mtime == ambiguous_time) {
369 369 t->flags = (dirstate_flag_wc_tracked |
370 370 dirstate_flag_p1_tracked |
371 371 dirstate_flag_has_meaningful_data);
372 372 t->mode = mode;
373 373 t->size = size;
374 374 } else {
375 375 t->flags = (dirstate_flag_wc_tracked |
376 376 dirstate_flag_p1_tracked |
377 377 dirstate_flag_has_meaningful_data |
378 dirstate_flag_has_file_mtime);
378 dirstate_flag_has_mtime);
379 379 t->mode = mode;
380 380 t->size = size;
381 381 t->mtime_s = mtime;
382 382 }
383 383 } else {
384 384 PyErr_Format(PyExc_RuntimeError,
385 385 "unknown state: `%c` (%d, %d, %d)", state, mode,
386 386 size, mtime, NULL);
387 387 Py_DECREF(t);
388 388 return NULL;
389 389 }
390 390
391 391 return t;
392 392 }
393 393
394 394 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
395 395 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
396 396 PyObject *args)
397 397 {
398 398 /* We do all the initialization here and not a tp_init function because
399 399 * dirstate_item is immutable. */
400 400 char state;
401 401 int size, mode, mtime;
402 402 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
403 403 return NULL;
404 404 }
405 405 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
406 406 };
407 407
408 408 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
409 409 PyObject *args)
410 410 {
411 411 dirstateItemObject *t =
412 412 PyObject_New(dirstateItemObject, &dirstateItemType);
413 413 if (!t) {
414 414 return NULL;
415 415 }
416 416 if (!PyArg_ParseTuple(args, "iiii", &t->flags, &t->size, &t->mtime_s,
417 417 &t->mtime_ns)) {
418 418 return NULL;
419 419 }
420 420 if (t->flags & dirstate_flag_expected_state_is_modified) {
421 421 t->flags &= ~(dirstate_flag_expected_state_is_modified |
422 422 dirstate_flag_has_meaningful_data |
423 dirstate_flag_has_file_mtime);
423 dirstate_flag_has_mtime);
424 424 }
425 425 if (t->flags & dirstate_flag_mtime_second_ambiguous) {
426 426 /* The current code is not able to do the more subtle comparison
427 427 * that the MTIME_SECOND_AMBIGUOUS requires. So we ignore the
428 428 * mtime */
429 429 t->flags &= ~(dirstate_flag_mtime_second_ambiguous |
430 430 dirstate_flag_has_meaningful_data |
431 dirstate_flag_has_file_mtime);
431 dirstate_flag_has_mtime);
432 432 }
433 433 t->mode = 0;
434 434 if (t->flags & dirstate_flag_has_meaningful_data) {
435 435 if (t->flags & dirstate_flag_mode_exec_perm) {
436 436 t->mode = 0755;
437 437 } else {
438 438 t->mode = 0644;
439 439 }
440 440 if (t->flags & dirstate_flag_mode_is_symlink) {
441 441 t->mode |= S_IFLNK;
442 442 } else {
443 443 t->mode |= S_IFREG;
444 444 }
445 445 }
446 446 return (PyObject *)t;
447 447 };
448 448
449 449 /* This means the next status call will have to actually check its content
450 450 to make sure it is correct. */
451 451 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
452 452 {
453 self->flags &= ~dirstate_flag_has_file_mtime;
453 self->flags &= ~dirstate_flag_has_mtime;
454 454 Py_RETURN_NONE;
455 455 }
456 456
457 457 /* See docstring of the python implementation for details */
458 458 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
459 459 PyObject *args)
460 460 {
461 461 int size, mode, mtime_s, mtime_ns;
462 462 if (!PyArg_ParseTuple(args, "ii(ii)", &mode, &size, &mtime_s,
463 463 &mtime_ns)) {
464 464 return NULL;
465 465 }
466 466 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
467 467 dirstate_flag_has_meaningful_data |
468 dirstate_flag_has_file_mtime;
468 dirstate_flag_has_mtime;
469 469 self->mode = mode;
470 470 self->size = size;
471 471 self->mtime_s = mtime_s;
472 472 self->mtime_ns = mtime_ns;
473 473 Py_RETURN_NONE;
474 474 }
475 475
476 476 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
477 477 {
478 478 self->flags |= dirstate_flag_wc_tracked;
479 self->flags &= ~dirstate_flag_has_file_mtime;
479 self->flags &= ~dirstate_flag_has_mtime;
480 480 Py_RETURN_NONE;
481 481 }
482 482
483 483 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
484 484 {
485 485 self->flags &= ~dirstate_flag_wc_tracked;
486 486 self->mode = 0;
487 487 self->size = 0;
488 488 self->mtime_s = 0;
489 489 self->mtime_ns = 0;
490 490 Py_RETURN_NONE;
491 491 }
492 492
493 493 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
494 494 {
495 495 if (self->flags & dirstate_flag_p2_info) {
496 496 self->flags &= ~(dirstate_flag_p2_info |
497 497 dirstate_flag_has_meaningful_data |
498 dirstate_flag_has_file_mtime);
498 dirstate_flag_has_mtime);
499 499 self->mode = 0;
500 500 self->size = 0;
501 501 self->mtime_s = 0;
502 502 self->mtime_ns = 0;
503 503 }
504 504 Py_RETURN_NONE;
505 505 }
506 506 static PyMethodDef dirstate_item_methods[] = {
507 507 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
508 508 "return data suitable for v2 serialization"},
509 509 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
510 510 "return a \"state\" suitable for v1 serialization"},
511 511 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
512 512 "return a \"mode\" suitable for v1 serialization"},
513 513 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
514 514 "return a \"size\" suitable for v1 serialization"},
515 515 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
516 516 "return a \"mtime\" suitable for v1 serialization"},
517 517 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
518 518 "True if the stored mtime would be ambiguous with the current time"},
519 519 {"mtime_likely_equal_to", (PyCFunction)dirstate_item_mtime_likely_equal_to,
520 520 METH_O, "True if the stored mtime is likely equal to the given mtime"},
521 521 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
522 522 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
523 523 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
524 524 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
525 525 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
526 526 METH_NOARGS, "mark a file as \"possibly dirty\""},
527 527 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
528 528 "mark a file as \"clean\""},
529 529 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
530 530 "mark a file as \"tracked\""},
531 531 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
532 532 "mark a file as \"untracked\""},
533 533 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
534 534 "remove all \"merge-only\" from a DirstateItem"},
535 535 {NULL} /* Sentinel */
536 536 };
537 537
538 538 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
539 539 {
540 540 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
541 541 };
542 542
543 543 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
544 544 {
545 545 return PyInt_FromLong(dirstate_item_c_v1_size(self));
546 546 };
547 547
548 548 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
549 549 {
550 550 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
551 551 };
552 552
553 553 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
554 554 {
555 555 char state = dirstate_item_c_v1_state(self);
556 556 return PyBytes_FromStringAndSize(&state, 1);
557 557 };
558 558
559 559 static PyObject *dirstate_item_get_has_fallback_exec(dirstateItemObject *self)
560 560 {
561 561 if (dirstate_item_c_has_fallback_exec(self)) {
562 562 Py_RETURN_TRUE;
563 563 } else {
564 564 Py_RETURN_FALSE;
565 565 }
566 566 };
567 567
568 568 static PyObject *dirstate_item_get_fallback_exec(dirstateItemObject *self)
569 569 {
570 570 if (dirstate_item_c_has_fallback_exec(self)) {
571 571 if (self->flags & dirstate_flag_fallback_exec) {
572 572 Py_RETURN_TRUE;
573 573 } else {
574 574 Py_RETURN_FALSE;
575 575 }
576 576 } else {
577 577 Py_RETURN_NONE;
578 578 }
579 579 };
580 580
581 581 static int dirstate_item_set_fallback_exec(dirstateItemObject *self,
582 582 PyObject *value)
583 583 {
584 584 if ((value == Py_None) || (value == NULL)) {
585 585 self->flags &= ~dirstate_flag_has_fallback_exec;
586 586 } else {
587 587 self->flags |= dirstate_flag_has_fallback_exec;
588 588 if (PyObject_IsTrue(value)) {
589 589 self->flags |= dirstate_flag_fallback_exec;
590 590 } else {
591 591 self->flags &= ~dirstate_flag_fallback_exec;
592 592 }
593 593 }
594 594 return 0;
595 595 };
596 596
597 597 static PyObject *
598 598 dirstate_item_get_has_fallback_symlink(dirstateItemObject *self)
599 599 {
600 600 if (dirstate_item_c_has_fallback_symlink(self)) {
601 601 Py_RETURN_TRUE;
602 602 } else {
603 603 Py_RETURN_FALSE;
604 604 }
605 605 };
606 606
607 607 static PyObject *dirstate_item_get_fallback_symlink(dirstateItemObject *self)
608 608 {
609 609 if (dirstate_item_c_has_fallback_symlink(self)) {
610 610 if (self->flags & dirstate_flag_fallback_symlink) {
611 611 Py_RETURN_TRUE;
612 612 } else {
613 613 Py_RETURN_FALSE;
614 614 }
615 615 } else {
616 616 Py_RETURN_NONE;
617 617 }
618 618 };
619 619
620 620 static int dirstate_item_set_fallback_symlink(dirstateItemObject *self,
621 621 PyObject *value)
622 622 {
623 623 if ((value == Py_None) || (value == NULL)) {
624 624 self->flags &= ~dirstate_flag_has_fallback_symlink;
625 625 } else {
626 626 self->flags |= dirstate_flag_has_fallback_symlink;
627 627 if (PyObject_IsTrue(value)) {
628 628 self->flags |= dirstate_flag_fallback_symlink;
629 629 } else {
630 630 self->flags &= ~dirstate_flag_fallback_symlink;
631 631 }
632 632 }
633 633 return 0;
634 634 };
635 635
636 636 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
637 637 {
638 638 if (dirstate_item_c_tracked(self)) {
639 639 Py_RETURN_TRUE;
640 640 } else {
641 641 Py_RETURN_FALSE;
642 642 }
643 643 };
644 644 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
645 645 {
646 646 if (self->flags & dirstate_flag_p1_tracked) {
647 647 Py_RETURN_TRUE;
648 648 } else {
649 649 Py_RETURN_FALSE;
650 650 }
651 651 };
652 652
653 653 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
654 654 {
655 655 if (dirstate_item_c_added(self)) {
656 656 Py_RETURN_TRUE;
657 657 } else {
658 658 Py_RETURN_FALSE;
659 659 }
660 660 };
661 661
662 662 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
663 663 {
664 664 if (self->flags & dirstate_flag_wc_tracked &&
665 665 self->flags & dirstate_flag_p2_info) {
666 666 Py_RETURN_TRUE;
667 667 } else {
668 668 Py_RETURN_FALSE;
669 669 }
670 670 };
671 671
672 672 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
673 673 {
674 674 if (dirstate_item_c_merged(self)) {
675 675 Py_RETURN_TRUE;
676 676 } else {
677 677 Py_RETURN_FALSE;
678 678 }
679 679 };
680 680
681 681 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
682 682 {
683 683 if (dirstate_item_c_from_p2(self)) {
684 684 Py_RETURN_TRUE;
685 685 } else {
686 686 Py_RETURN_FALSE;
687 687 }
688 688 };
689 689
690 690 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
691 691 {
692 692 if (!(self->flags & dirstate_flag_wc_tracked)) {
693 693 Py_RETURN_FALSE;
694 694 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
695 695 Py_RETURN_FALSE;
696 696 } else if (self->flags & dirstate_flag_p2_info) {
697 697 Py_RETURN_FALSE;
698 698 } else {
699 699 Py_RETURN_TRUE;
700 700 }
701 701 };
702 702
703 703 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
704 704 {
705 705 if (dirstate_item_c_any_tracked(self)) {
706 706 Py_RETURN_TRUE;
707 707 } else {
708 708 Py_RETURN_FALSE;
709 709 }
710 710 };
711 711
712 712 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
713 713 {
714 714 if (dirstate_item_c_removed(self)) {
715 715 Py_RETURN_TRUE;
716 716 } else {
717 717 Py_RETURN_FALSE;
718 718 }
719 719 };
720 720
721 721 static PyGetSetDef dirstate_item_getset[] = {
722 722 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
723 723 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
724 724 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
725 725 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
726 726 {"has_fallback_exec", (getter)dirstate_item_get_has_fallback_exec, NULL,
727 727 "has_fallback_exec", NULL},
728 728 {"fallback_exec", (getter)dirstate_item_get_fallback_exec,
729 729 (setter)dirstate_item_set_fallback_exec, "fallback_exec", NULL},
730 730 {"has_fallback_symlink", (getter)dirstate_item_get_has_fallback_symlink,
731 731 NULL, "has_fallback_symlink", NULL},
732 732 {"fallback_symlink", (getter)dirstate_item_get_fallback_symlink,
733 733 (setter)dirstate_item_set_fallback_symlink, "fallback_symlink", NULL},
734 734 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
735 735 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
736 736 NULL},
737 737 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
738 738 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
739 739 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
740 740 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
741 741 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
742 742 NULL},
743 743 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
744 744 NULL},
745 745 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
746 746 {NULL} /* Sentinel */
747 747 };
748 748
749 749 PyTypeObject dirstateItemType = {
750 750 PyVarObject_HEAD_INIT(NULL, 0) /* header */
751 751 "dirstate_tuple", /* tp_name */
752 752 sizeof(dirstateItemObject), /* tp_basicsize */
753 753 0, /* tp_itemsize */
754 754 (destructor)dirstate_item_dealloc, /* tp_dealloc */
755 755 0, /* tp_print */
756 756 0, /* tp_getattr */
757 757 0, /* tp_setattr */
758 758 0, /* tp_compare */
759 759 0, /* tp_repr */
760 760 0, /* tp_as_number */
761 761 0, /* tp_as_sequence */
762 762 0, /* tp_as_mapping */
763 763 0, /* tp_hash */
764 764 0, /* tp_call */
765 765 0, /* tp_str */
766 766 0, /* tp_getattro */
767 767 0, /* tp_setattro */
768 768 0, /* tp_as_buffer */
769 769 Py_TPFLAGS_DEFAULT, /* tp_flags */
770 770 "dirstate tuple", /* tp_doc */
771 771 0, /* tp_traverse */
772 772 0, /* tp_clear */
773 773 0, /* tp_richcompare */
774 774 0, /* tp_weaklistoffset */
775 775 0, /* tp_iter */
776 776 0, /* tp_iternext */
777 777 dirstate_item_methods, /* tp_methods */
778 778 0, /* tp_members */
779 779 dirstate_item_getset, /* tp_getset */
780 780 0, /* tp_base */
781 781 0, /* tp_dict */
782 782 0, /* tp_descr_get */
783 783 0, /* tp_descr_set */
784 784 0, /* tp_dictoffset */
785 785 0, /* tp_init */
786 786 0, /* tp_alloc */
787 787 dirstate_item_new, /* tp_new */
788 788 };
789 789
790 790 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
791 791 {
792 792 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
793 793 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
794 794 char state, *cur, *str, *cpos;
795 795 int mode, size, mtime;
796 796 unsigned int flen, pos = 40;
797 797 Py_ssize_t len = 40;
798 798 Py_ssize_t readlen;
799 799
800 800 if (!PyArg_ParseTuple(
801 801 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
802 802 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
803 803 goto quit;
804 804 }
805 805
806 806 len = readlen;
807 807
808 808 /* read parents */
809 809 if (len < 40) {
810 810 PyErr_SetString(PyExc_ValueError,
811 811 "too little data for parents");
812 812 goto quit;
813 813 }
814 814
815 815 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
816 816 str + 20, (Py_ssize_t)20);
817 817 if (!parents) {
818 818 goto quit;
819 819 }
820 820
821 821 /* read filenames */
822 822 while (pos >= 40 && pos < len) {
823 823 if (pos + 17 > len) {
824 824 PyErr_SetString(PyExc_ValueError,
825 825 "overflow in dirstate");
826 826 goto quit;
827 827 }
828 828 cur = str + pos;
829 829 /* unpack header */
830 830 state = *cur;
831 831 mode = getbe32(cur + 1);
832 832 size = getbe32(cur + 5);
833 833 mtime = getbe32(cur + 9);
834 834 flen = getbe32(cur + 13);
835 835 pos += 17;
836 836 cur += 17;
837 837 if (flen > len - pos) {
838 838 PyErr_SetString(PyExc_ValueError,
839 839 "overflow in dirstate");
840 840 goto quit;
841 841 }
842 842
843 843 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
844 844 size, mtime);
845 845 if (!entry)
846 846 goto quit;
847 847 cpos = memchr(cur, 0, flen);
848 848 if (cpos) {
849 849 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
850 850 cname = PyBytes_FromStringAndSize(
851 851 cpos + 1, flen - (cpos - cur) - 1);
852 852 if (!fname || !cname ||
853 853 PyDict_SetItem(cmap, fname, cname) == -1 ||
854 854 PyDict_SetItem(dmap, fname, entry) == -1) {
855 855 goto quit;
856 856 }
857 857 Py_DECREF(cname);
858 858 } else {
859 859 fname = PyBytes_FromStringAndSize(cur, flen);
860 860 if (!fname ||
861 861 PyDict_SetItem(dmap, fname, entry) == -1) {
862 862 goto quit;
863 863 }
864 864 }
865 865 Py_DECREF(fname);
866 866 Py_DECREF(entry);
867 867 fname = cname = entry = NULL;
868 868 pos += flen;
869 869 }
870 870
871 871 ret = parents;
872 872 Py_INCREF(ret);
873 873 quit:
874 874 Py_XDECREF(fname);
875 875 Py_XDECREF(cname);
876 876 Py_XDECREF(entry);
877 877 Py_XDECREF(parents);
878 878 return ret;
879 879 }
880 880
881 881 /*
882 882 * Efficiently pack a dirstate object into its on-disk format.
883 883 */
884 884 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
885 885 {
886 886 PyObject *packobj = NULL;
887 887 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
888 888 Py_ssize_t nbytes, pos, l;
889 889 PyObject *k, *v = NULL, *pn;
890 890 char *p, *s;
891 891 int now_s;
892 892 int now_ns;
893 893
894 894 if (!PyArg_ParseTuple(args, "O!O!O!(ii):pack_dirstate", &PyDict_Type,
895 895 &map, &PyDict_Type, &copymap, &PyTuple_Type, &pl,
896 896 &now_s, &now_ns)) {
897 897 return NULL;
898 898 }
899 899
900 900 if (PyTuple_Size(pl) != 2) {
901 901 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
902 902 return NULL;
903 903 }
904 904
905 905 /* Figure out how much we need to allocate. */
906 906 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
907 907 PyObject *c;
908 908 if (!PyBytes_Check(k)) {
909 909 PyErr_SetString(PyExc_TypeError, "expected string key");
910 910 goto bail;
911 911 }
912 912 nbytes += PyBytes_GET_SIZE(k) + 17;
913 913 c = PyDict_GetItem(copymap, k);
914 914 if (c) {
915 915 if (!PyBytes_Check(c)) {
916 916 PyErr_SetString(PyExc_TypeError,
917 917 "expected string key");
918 918 goto bail;
919 919 }
920 920 nbytes += PyBytes_GET_SIZE(c) + 1;
921 921 }
922 922 }
923 923
924 924 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
925 925 if (packobj == NULL) {
926 926 goto bail;
927 927 }
928 928
929 929 p = PyBytes_AS_STRING(packobj);
930 930
931 931 pn = PyTuple_GET_ITEM(pl, 0);
932 932 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
933 933 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
934 934 goto bail;
935 935 }
936 936 memcpy(p, s, l);
937 937 p += 20;
938 938 pn = PyTuple_GET_ITEM(pl, 1);
939 939 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
940 940 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
941 941 goto bail;
942 942 }
943 943 memcpy(p, s, l);
944 944 p += 20;
945 945
946 946 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
947 947 dirstateItemObject *tuple;
948 948 char state;
949 949 int mode, size, mtime;
950 950 Py_ssize_t len, l;
951 951 PyObject *o;
952 952 char *t;
953 953
954 954 if (!dirstate_tuple_check(v)) {
955 955 PyErr_SetString(PyExc_TypeError,
956 956 "expected a dirstate tuple");
957 957 goto bail;
958 958 }
959 959 tuple = (dirstateItemObject *)v;
960 960
961 961 state = dirstate_item_c_v1_state(tuple);
962 962 mode = dirstate_item_c_v1_mode(tuple);
963 963 size = dirstate_item_c_v1_size(tuple);
964 964 mtime = dirstate_item_c_v1_mtime(tuple);
965 965 if (state == 'n' && tuple->mtime_s == now_s) {
966 966 /* See pure/parsers.py:pack_dirstate for why we do
967 967 * this. */
968 968 mtime = -1;
969 969 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
970 970 state, mode, size, mtime);
971 971 if (!mtime_unset) {
972 972 goto bail;
973 973 }
974 974 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
975 975 goto bail;
976 976 }
977 977 Py_DECREF(mtime_unset);
978 978 mtime_unset = NULL;
979 979 }
980 980 *p++ = state;
981 981 putbe32((uint32_t)mode, p);
982 982 putbe32((uint32_t)size, p + 4);
983 983 putbe32((uint32_t)mtime, p + 8);
984 984 t = p + 12;
985 985 p += 16;
986 986 len = PyBytes_GET_SIZE(k);
987 987 memcpy(p, PyBytes_AS_STRING(k), len);
988 988 p += len;
989 989 o = PyDict_GetItem(copymap, k);
990 990 if (o) {
991 991 *p++ = '\0';
992 992 l = PyBytes_GET_SIZE(o);
993 993 memcpy(p, PyBytes_AS_STRING(o), l);
994 994 p += l;
995 995 len += l + 1;
996 996 }
997 997 putbe32((uint32_t)len, t);
998 998 }
999 999
1000 1000 pos = p - PyBytes_AS_STRING(packobj);
1001 1001 if (pos != nbytes) {
1002 1002 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
1003 1003 (long)pos, (long)nbytes);
1004 1004 goto bail;
1005 1005 }
1006 1006
1007 1007 return packobj;
1008 1008 bail:
1009 1009 Py_XDECREF(mtime_unset);
1010 1010 Py_XDECREF(packobj);
1011 1011 Py_XDECREF(v);
1012 1012 return NULL;
1013 1013 }
1014 1014
1015 1015 #define BUMPED_FIX 1
1016 1016 #define USING_SHA_256 2
1017 1017 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
1018 1018
1019 1019 static PyObject *readshas(const char *source, unsigned char num,
1020 1020 Py_ssize_t hashwidth)
1021 1021 {
1022 1022 int i;
1023 1023 PyObject *list = PyTuple_New(num);
1024 1024 if (list == NULL) {
1025 1025 return NULL;
1026 1026 }
1027 1027 for (i = 0; i < num; i++) {
1028 1028 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
1029 1029 if (hash == NULL) {
1030 1030 Py_DECREF(list);
1031 1031 return NULL;
1032 1032 }
1033 1033 PyTuple_SET_ITEM(list, i, hash);
1034 1034 source += hashwidth;
1035 1035 }
1036 1036 return list;
1037 1037 }
1038 1038
1039 1039 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
1040 1040 uint32_t *msize)
1041 1041 {
1042 1042 const char *data = databegin;
1043 1043 const char *meta;
1044 1044
1045 1045 double mtime;
1046 1046 int16_t tz;
1047 1047 uint16_t flags;
1048 1048 unsigned char nsuccs, nparents, nmetadata;
1049 1049 Py_ssize_t hashwidth = 20;
1050 1050
1051 1051 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
1052 1052 PyObject *metadata = NULL, *ret = NULL;
1053 1053 int i;
1054 1054
1055 1055 if (data + FM1_HEADER_SIZE > dataend) {
1056 1056 goto overflow;
1057 1057 }
1058 1058
1059 1059 *msize = getbe32(data);
1060 1060 data += 4;
1061 1061 mtime = getbefloat64(data);
1062 1062 data += 8;
1063 1063 tz = getbeint16(data);
1064 1064 data += 2;
1065 1065 flags = getbeuint16(data);
1066 1066 data += 2;
1067 1067
1068 1068 if (flags & USING_SHA_256) {
1069 1069 hashwidth = 32;
1070 1070 }
1071 1071
1072 1072 nsuccs = (unsigned char)(*data++);
1073 1073 nparents = (unsigned char)(*data++);
1074 1074 nmetadata = (unsigned char)(*data++);
1075 1075
1076 1076 if (databegin + *msize > dataend) {
1077 1077 goto overflow;
1078 1078 }
1079 1079 dataend = databegin + *msize; /* narrow down to marker size */
1080 1080
1081 1081 if (data + hashwidth > dataend) {
1082 1082 goto overflow;
1083 1083 }
1084 1084 prec = PyBytes_FromStringAndSize(data, hashwidth);
1085 1085 data += hashwidth;
1086 1086 if (prec == NULL) {
1087 1087 goto bail;
1088 1088 }
1089 1089
1090 1090 if (data + nsuccs * hashwidth > dataend) {
1091 1091 goto overflow;
1092 1092 }
1093 1093 succs = readshas(data, nsuccs, hashwidth);
1094 1094 if (succs == NULL) {
1095 1095 goto bail;
1096 1096 }
1097 1097 data += nsuccs * hashwidth;
1098 1098
1099 1099 if (nparents == 1 || nparents == 2) {
1100 1100 if (data + nparents * hashwidth > dataend) {
1101 1101 goto overflow;
1102 1102 }
1103 1103 parents = readshas(data, nparents, hashwidth);
1104 1104 if (parents == NULL) {
1105 1105 goto bail;
1106 1106 }
1107 1107 data += nparents * hashwidth;
1108 1108 } else {
1109 1109 parents = Py_None;
1110 1110 Py_INCREF(parents);
1111 1111 }
1112 1112
1113 1113 if (data + 2 * nmetadata > dataend) {
1114 1114 goto overflow;
1115 1115 }
1116 1116 meta = data + (2 * nmetadata);
1117 1117 metadata = PyTuple_New(nmetadata);
1118 1118 if (metadata == NULL) {
1119 1119 goto bail;
1120 1120 }
1121 1121 for (i = 0; i < nmetadata; i++) {
1122 1122 PyObject *tmp, *left = NULL, *right = NULL;
1123 1123 Py_ssize_t leftsize = (unsigned char)(*data++);
1124 1124 Py_ssize_t rightsize = (unsigned char)(*data++);
1125 1125 if (meta + leftsize + rightsize > dataend) {
1126 1126 goto overflow;
1127 1127 }
1128 1128 left = PyBytes_FromStringAndSize(meta, leftsize);
1129 1129 meta += leftsize;
1130 1130 right = PyBytes_FromStringAndSize(meta, rightsize);
1131 1131 meta += rightsize;
1132 1132 tmp = PyTuple_New(2);
1133 1133 if (!left || !right || !tmp) {
1134 1134 Py_XDECREF(left);
1135 1135 Py_XDECREF(right);
1136 1136 Py_XDECREF(tmp);
1137 1137 goto bail;
1138 1138 }
1139 1139 PyTuple_SET_ITEM(tmp, 0, left);
1140 1140 PyTuple_SET_ITEM(tmp, 1, right);
1141 1141 PyTuple_SET_ITEM(metadata, i, tmp);
1142 1142 }
1143 1143 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
1144 1144 (int)tz * 60, parents);
1145 1145 goto bail; /* return successfully */
1146 1146
1147 1147 overflow:
1148 1148 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1149 1149 bail:
1150 1150 Py_XDECREF(prec);
1151 1151 Py_XDECREF(succs);
1152 1152 Py_XDECREF(metadata);
1153 1153 Py_XDECREF(parents);
1154 1154 return ret;
1155 1155 }
1156 1156
1157 1157 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1158 1158 {
1159 1159 const char *data, *dataend;
1160 1160 Py_ssize_t datalen, offset, stop;
1161 1161 PyObject *markers = NULL;
1162 1162
1163 1163 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1164 1164 &offset, &stop)) {
1165 1165 return NULL;
1166 1166 }
1167 1167 if (offset < 0) {
1168 1168 PyErr_SetString(PyExc_ValueError,
1169 1169 "invalid negative offset in fm1readmarkers");
1170 1170 return NULL;
1171 1171 }
1172 1172 if (stop > datalen) {
1173 1173 PyErr_SetString(
1174 1174 PyExc_ValueError,
1175 1175 "stop longer than data length in fm1readmarkers");
1176 1176 return NULL;
1177 1177 }
1178 1178 dataend = data + datalen;
1179 1179 data += offset;
1180 1180 markers = PyList_New(0);
1181 1181 if (!markers) {
1182 1182 return NULL;
1183 1183 }
1184 1184 while (offset < stop) {
1185 1185 uint32_t msize;
1186 1186 int error;
1187 1187 PyObject *record = fm1readmarker(data, dataend, &msize);
1188 1188 if (!record) {
1189 1189 goto bail;
1190 1190 }
1191 1191 error = PyList_Append(markers, record);
1192 1192 Py_DECREF(record);
1193 1193 if (error) {
1194 1194 goto bail;
1195 1195 }
1196 1196 data += msize;
1197 1197 offset += msize;
1198 1198 }
1199 1199 return markers;
1200 1200 bail:
1201 1201 Py_DECREF(markers);
1202 1202 return NULL;
1203 1203 }
1204 1204
1205 1205 static char parsers_doc[] = "Efficient content parsing.";
1206 1206
1207 1207 PyObject *encodedir(PyObject *self, PyObject *args);
1208 1208 PyObject *pathencode(PyObject *self, PyObject *args);
1209 1209 PyObject *lowerencode(PyObject *self, PyObject *args);
1210 1210 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1211 1211
1212 1212 static PyMethodDef methods[] = {
1213 1213 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1214 1214 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1215 1215 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1216 1216 "parse a revlog index\n"},
1217 1217 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1218 1218 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1219 1219 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1220 1220 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1221 1221 "construct a dict with an expected size\n"},
1222 1222 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1223 1223 "make file foldmap\n"},
1224 1224 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1225 1225 "escape a UTF-8 byte string to JSON (fast path)\n"},
1226 1226 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1227 1227 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1228 1228 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1229 1229 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1230 1230 "parse v1 obsolete markers\n"},
1231 1231 {NULL, NULL}};
1232 1232
1233 1233 void dirs_module_init(PyObject *mod);
1234 1234 void manifest_module_init(PyObject *mod);
1235 1235 void revlog_module_init(PyObject *mod);
1236 1236
1237 1237 static const int version = 20;
1238 1238
1239 1239 static void module_init(PyObject *mod)
1240 1240 {
1241 1241 PyModule_AddIntConstant(mod, "version", version);
1242 1242
1243 1243 /* This module constant has two purposes. First, it lets us unit test
1244 1244 * the ImportError raised without hard-coding any error text. This
1245 1245 * means we can change the text in the future without breaking tests,
1246 1246 * even across changesets without a recompile. Second, its presence
1247 1247 * can be used to determine whether the version-checking logic is
1248 1248 * present, which also helps in testing across changesets without a
1249 1249 * recompile. Note that this means the pure-Python version of parsers
1250 1250 * should not have this module constant. */
1251 1251 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1252 1252
1253 1253 dirs_module_init(mod);
1254 1254 manifest_module_init(mod);
1255 1255 revlog_module_init(mod);
1256 1256
1257 1257 if (PyType_Ready(&dirstateItemType) < 0) {
1258 1258 return;
1259 1259 }
1260 1260 Py_INCREF(&dirstateItemType);
1261 1261 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1262 1262 }
1263 1263
1264 1264 static int check_python_version(void)
1265 1265 {
1266 1266 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1267 1267 long hexversion;
1268 1268 if (!sys) {
1269 1269 return -1;
1270 1270 }
1271 1271 ver = PyObject_GetAttrString(sys, "hexversion");
1272 1272 Py_DECREF(sys);
1273 1273 if (!ver) {
1274 1274 return -1;
1275 1275 }
1276 1276 hexversion = PyInt_AsLong(ver);
1277 1277 Py_DECREF(ver);
1278 1278 /* sys.hexversion is a 32-bit number by default, so the -1 case
1279 1279 * should only occur in unusual circumstances (e.g. if sys.hexversion
1280 1280 * is manually set to an invalid value). */
1281 1281 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1282 1282 PyErr_Format(PyExc_ImportError,
1283 1283 "%s: The Mercurial extension "
1284 1284 "modules were compiled with Python " PY_VERSION
1285 1285 ", but "
1286 1286 "Mercurial is currently using Python with "
1287 1287 "sys.hexversion=%ld: "
1288 1288 "Python %s\n at: %s",
1289 1289 versionerrortext, hexversion, Py_GetVersion(),
1290 1290 Py_GetProgramFullPath());
1291 1291 return -1;
1292 1292 }
1293 1293 return 0;
1294 1294 }
1295 1295
1296 1296 #ifdef IS_PY3K
1297 1297 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1298 1298 parsers_doc, -1, methods};
1299 1299
1300 1300 PyMODINIT_FUNC PyInit_parsers(void)
1301 1301 {
1302 1302 PyObject *mod;
1303 1303
1304 1304 if (check_python_version() == -1)
1305 1305 return NULL;
1306 1306 mod = PyModule_Create(&parsers_module);
1307 1307 module_init(mod);
1308 1308 return mod;
1309 1309 }
1310 1310 #else
1311 1311 PyMODINIT_FUNC initparsers(void)
1312 1312 {
1313 1313 PyObject *mod;
1314 1314
1315 1315 if (check_python_version() == -1) {
1316 1316 return;
1317 1317 }
1318 1318 mod = Py_InitModule3("parsers", methods, parsers_doc);
1319 1319 module_init(mod);
1320 1320 }
1321 1321 #endif
@@ -1,92 +1,92 b''
1 1 /*
2 2 util.h - utility functions for interfacing with the various python APIs.
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_UTIL_H_
9 9 #define _HG_UTIL_H_
10 10
11 11 #include "compat.h"
12 12
13 13 #if PY_MAJOR_VERSION >= 3
14 14 #define IS_PY3K
15 15 #endif
16 16
17 17 /* helper to switch things like string literal depending on Python version */
18 18 #ifdef IS_PY3K
19 19 #define PY23(py2, py3) py3
20 20 #else
21 21 #define PY23(py2, py3) py2
22 22 #endif
23 23
24 24 /* clang-format off */
25 25 typedef struct {
26 26 PyObject_HEAD
27 27 int flags;
28 28 int mode;
29 29 int size;
30 30 int mtime_s;
31 31 int mtime_ns;
32 32 } dirstateItemObject;
33 33 /* clang-format on */
34 34
35 35 static const int dirstate_flag_wc_tracked = 1;
36 36 static const int dirstate_flag_p1_tracked = 1 << 1;
37 37 static const int dirstate_flag_p2_info = 1 << 2;
38 38 static const int dirstate_flag_has_meaningful_data = 1 << 3;
39 static const int dirstate_flag_has_file_mtime = 1 << 4;
40 static const int dirstate_flag_has_directory_mtime = 1 << 5;
39 static const int dirstate_flag_has_mtime = 1 << 4;
40 static const int dirstate_flag_directory = 1 << 5;
41 41 static const int dirstate_flag_mode_exec_perm = 1 << 6;
42 42 static const int dirstate_flag_mode_is_symlink = 1 << 7;
43 43 static const int dirstate_flag_expected_state_is_modified = 1 << 8;
44 44 static const int dirstate_flag_all_unknown_recorded = 1 << 9;
45 45 static const int dirstate_flag_all_ignored_recorded = 1 << 10;
46 46 static const int dirstate_flag_fallback_exec = 1 << 11;
47 47 static const int dirstate_flag_has_fallback_exec = 1 << 12;
48 48 static const int dirstate_flag_fallback_symlink = 1 << 13;
49 49 static const int dirstate_flag_has_fallback_symlink = 1 << 14;
50 50 static const int dirstate_flag_mtime_second_ambiguous = 1 << 15;
51 51
52 52 extern PyTypeObject dirstateItemType;
53 53 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
54 54
55 55 #ifndef MIN
56 56 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
57 57 #endif
58 58 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
59 59 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
60 60 #define true 1
61 61 #define false 0
62 62 typedef unsigned char bool;
63 63 #else
64 64 #include <stdbool.h>
65 65 #endif
66 66
67 67 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
68 68 {
69 69 /* _PyDict_NewPresized expects a minused parameter, but it actually
70 70 creates a dictionary that's the nearest power of two bigger than the
71 71 parameter. For example, with the initial minused = 1000, the
72 72 dictionary created has size 1024. Of course in a lot of cases that
73 73 can be greater than the maximum load factor Python's dict object
74 74 expects (= 2/3), so as soon as we cross the threshold we'll resize
75 75 anyway. So create a dictionary that's at least 3/2 the size. */
76 76 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
77 77 }
78 78
79 79 /* Convert a PyInt or PyLong to a long. Returns false if there is an
80 80 error, in which case an exception will already have been set. */
81 81 static inline bool pylong_to_long(PyObject *pylong, long *out)
82 82 {
83 83 *out = PyLong_AsLong(pylong);
84 84 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
85 85 * not an error. */
86 86 if (*out != -1) {
87 87 return true;
88 88 }
89 89 return PyErr_Occurred() == NULL;
90 90 }
91 91
92 92 #endif /* _HG_UTIL_H_ */
@@ -1,411 +1,414 b''
1 1 # v2.py - Pure-Python implementation of the dirstate-v2 file format
2 2 #
3 3 # Copyright Mercurial Contributors
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from ..thirdparty import attr
13 13 from .. import error, policy
14 14
15 15 parsers = policy.importmod('parsers')
16 16
17 17
18 18 # Must match the constant of the same name in
19 19 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
20 20 TREE_METADATA_SIZE = 44
21 21 NODE_SIZE = 44
22 22
23 23
24 24 # Must match the `TreeMetadata` Rust struct in
25 25 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
26 26 #
27 27 # * 4 bytes: start offset of root nodes
28 28 # * 4 bytes: number of root nodes
29 29 # * 4 bytes: total number of nodes in the tree that have an entry
30 30 # * 4 bytes: total number of nodes in the tree that have a copy source
31 31 # * 4 bytes: number of bytes in the data file that are not used anymore
32 32 # * 4 bytes: unused
33 33 # * 20 bytes: SHA-1 hash of ignore patterns
34 34 TREE_METADATA = struct.Struct('>LLLLL4s20s')
35 35
36 36
37 37 # Must match the `Node` Rust struct in
38 38 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
39 39 #
40 40 # * 4 bytes: start offset of full path
41 41 # * 2 bytes: length of the full path
42 42 # * 2 bytes: length within the full path before its "base name"
43 43 # * 4 bytes: start offset of the copy source if any, or zero for no copy source
44 44 # * 2 bytes: length of the copy source if any, or unused
45 45 # * 4 bytes: start offset of child nodes
46 46 # * 4 bytes: number of child nodes
47 47 # * 4 bytes: number of descendant nodes that have an entry
48 48 # * 4 bytes: number of descendant nodes that have a "tracked" state
49 49 # * 1 byte: flags
50 50 # * 4 bytes: expected size
51 51 # * 4 bytes: mtime seconds
52 52 # * 4 bytes: mtime nanoseconds
53 53 NODE = struct.Struct('>LHHLHLLLLHlll')
54 54
55 55
56 56 assert TREE_METADATA_SIZE == TREE_METADATA.size
57 57 assert NODE_SIZE == NODE.size
58 58
59 # match constant in mercurial/pure/parsers.py
60 DIRSTATE_V2_DIRECTORY = 1 << 5
61
59 62
60 63 def parse_dirstate(map, copy_map, data, tree_metadata):
61 64 """parse a full v2-dirstate from a binary data into dictionnaries:
62 65
63 66 - map: a {path: entry} mapping that will be filled
64 67 - copy_map: a {path: copy-source} mapping that will be filled
65 68 - data: a binary blob contains v2 nodes data
66 69 - tree_metadata:: a binary blob of the top level node (from the docket)
67 70 """
68 71 (
69 72 root_nodes_start,
70 73 root_nodes_len,
71 74 _nodes_with_entry_count,
72 75 _nodes_with_copy_source_count,
73 76 _unreachable_bytes,
74 77 _unused,
75 78 _ignore_patterns_hash,
76 79 ) = TREE_METADATA.unpack(tree_metadata)
77 80 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
78 81
79 82
80 83 def parse_nodes(map, copy_map, data, start, len):
81 84 """parse <len> nodes from <data> starting at offset <start>
82 85
83 86 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
84 87
85 88 All directory specific information is ignored and do not need any
86 processing (HAS_DIRECTORY_MTIME, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
89 processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
87 90 """
88 91 for i in range(len):
89 92 node_start = start + NODE_SIZE * i
90 93 node_bytes = slice_with_len(data, node_start, NODE_SIZE)
91 94 (
92 95 path_start,
93 96 path_len,
94 97 _basename_start,
95 98 copy_source_start,
96 99 copy_source_len,
97 100 children_start,
98 101 children_count,
99 102 _descendants_with_entry_count,
100 103 _tracked_descendants_count,
101 104 flags,
102 105 size,
103 106 mtime_s,
104 107 mtime_ns,
105 108 ) = NODE.unpack(node_bytes)
106 109
107 110 # Parse child nodes of this node recursively
108 111 parse_nodes(map, copy_map, data, children_start, children_count)
109 112
110 113 item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
111 114 if not item.any_tracked:
112 115 continue
113 116 path = slice_with_len(data, path_start, path_len)
114 117 map[path] = item
115 118 if copy_source_start:
116 119 copy_map[path] = slice_with_len(
117 120 data, copy_source_start, copy_source_len
118 121 )
119 122
120 123
121 124 def slice_with_len(data, start, len):
122 125 return data[start : start + len]
123 126
124 127
125 128 @attr.s
126 129 class Node(object):
127 130 path = attr.ib()
128 131 entry = attr.ib()
129 132 parent = attr.ib(default=None)
130 133 children_count = attr.ib(default=0)
131 134 children_offset = attr.ib(default=0)
132 135 descendants_with_entry = attr.ib(default=0)
133 136 tracked_descendants = attr.ib(default=0)
134 137
135 138 def pack(self, copy_map, paths_offset):
136 139 path = self.path
137 140 copy = copy_map.get(path)
138 141 entry = self.entry
139 142
140 143 path_start = paths_offset
141 144 path_len = len(path)
142 145 basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1
143 146 if copy is not None:
144 147 copy_source_start = paths_offset + len(path)
145 148 copy_source_len = len(copy)
146 149 else:
147 150 copy_source_start = 0
148 151 copy_source_len = 0
149 152 if entry is not None:
150 153 flags, size, mtime_s, mtime_ns = entry.v2_data()
151 154 else:
152 155 # There are no mtime-cached directories in the Python implementation
153 flags = 0
156 flags = DIRSTATE_V2_DIRECTORY
154 157 size = 0
155 158 mtime_s = 0
156 159 mtime_ns = 0
157 160 return NODE.pack(
158 161 path_start,
159 162 path_len,
160 163 basename_start,
161 164 copy_source_start,
162 165 copy_source_len,
163 166 self.children_offset,
164 167 self.children_count,
165 168 self.descendants_with_entry,
166 169 self.tracked_descendants,
167 170 flags,
168 171 size,
169 172 mtime_s,
170 173 mtime_ns,
171 174 )
172 175
173 176
174 177 def pack_dirstate(map, copy_map, now):
175 178 """
176 179 Pack `map` and `copy_map` into the dirstate v2 binary format and return
177 180 the bytearray.
178 181 `now` is a timestamp of the current filesystem time used to detect race
179 182 conditions in writing the dirstate to disk, see inline comment.
180 183
181 184 The on-disk format expects a tree-like structure where the leaves are
182 185 written first (and sorted per-directory), going up levels until the root
183 186 node and writing that one to the docket. See more details on the on-disk
184 187 format in `mercurial/helptext/internals/dirstate-v2`.
185 188
186 189 Since both `map` and `copy_map` are flat dicts we need to figure out the
187 190 hierarchy. This algorithm does so without having to build the entire tree
188 191 in-memory: it only keeps the minimum number of nodes around to satisfy the
189 192 format.
190 193
191 194 # Algorithm explanation
192 195
193 196 This explanation does not talk about the different counters for tracked
194 197 descendents and storing the copies, but that work is pretty simple once this
195 198 algorithm is in place.
196 199
197 200 ## Building a subtree
198 201
199 202 First, sort `map`: this makes it so the leaves of the tree are contiguous
200 203 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
201 204 and enables us to use the ordering of folders to have a "cursor" of the
202 205 current folder we're in without ever going twice in the same branch of the
203 206 tree. The cursor is a node that remembers its parent and any information
204 207 relevant to the format (see the `Node` class), building the relevant part
205 208 of the tree lazily.
206 209 Then, for each file in `map`, move the cursor into the tree to the
207 210 corresponding folder of the file: for example, if the very first file
208 211 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
209 212 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
210 213 `Node["a"]`. These nodes are kept around in a stack.
211 214 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
212 215 add it to the stack and keep looping with the same logic of creating the
213 216 tree nodes as needed. If however the next file in `map` is *not* in the same
214 217 subtree ("a/other", if we're still in the "a/b" folder), then we know that
215 218 the subtree we're in is complete.
216 219
217 220 ## Writing the subtree
218 221
219 222 We have the entire subtree in the stack, so we start writing it to disk
220 223 folder by folder. The way we write a folder is to pop the stack into a list
221 224 until the folder changes, revert this list of direct children (to satisfy
222 225 the format requirement that children be sorted). This process repeats until
223 226 we hit the "other" subtree.
224 227
225 228 An example:
226 229 a
227 230 dir1/b
228 231 dir1/c
229 232 dir2/dir3/d
230 233 dir2/dir3/e
231 234 dir2/f
232 235
233 236 Would have us:
234 237 - add to the stack until "dir2/dir3/e"
235 238 - realize that "dir2/f" is in a different subtree
236 239 - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
237 240 pack them since the next entry is "dir2/dir3"
238 241 - go back up to "dir2"
239 242 - add "dir2/f" to the stack
240 243 - realize we're done with the map
241 244 - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
242 245 - go up to the root node, do the same to write "a", "dir1" and "dir2" in
243 246 that order
244 247
245 248 ## Special case for the root node
246 249
247 250 The root node is not serialized in the format, but its information is
248 251 written to the docket. Again, see more details on the on-disk format in
249 252 `mercurial/helptext/internals/dirstate-v2`.
250 253 """
251 254 data = bytearray()
252 255 root_nodes_start = 0
253 256 root_nodes_len = 0
254 257 nodes_with_entry_count = 0
255 258 nodes_with_copy_source_count = 0
256 259 # Will always be 0 since this implementation always re-writes everything
257 260 # to disk
258 261 unreachable_bytes = 0
259 262 unused = b'\x00' * 4
260 263 # This is an optimization that's only useful for the Rust implementation
261 264 ignore_patterns_hash = b'\x00' * 20
262 265
263 266 if len(map) == 0:
264 267 tree_metadata = TREE_METADATA.pack(
265 268 root_nodes_start,
266 269 root_nodes_len,
267 270 nodes_with_entry_count,
268 271 nodes_with_copy_source_count,
269 272 unreachable_bytes,
270 273 unused,
271 274 ignore_patterns_hash,
272 275 )
273 276 return data, tree_metadata
274 277
275 278 sorted_map = sorted(map.items(), key=lambda x: x[0])
276 279
277 280 # Use a stack to not have to only remember the nodes we currently need
278 281 # instead of building the entire tree in memory
279 282 stack = []
280 283 current_node = Node(b"", None)
281 284 stack.append(current_node)
282 285
283 286 for index, (path, entry) in enumerate(sorted_map, 1):
284 287 if entry.need_delay(now):
285 288 # The file was last modified "simultaneously" with the current
286 289 # write to dirstate (i.e. within the same second for file-
287 290 # systems with a granularity of 1 sec). This commonly happens
288 291 # for at least a couple of files on 'update'.
289 292 # The user could change the file without changing its size
290 293 # within the same second. Invalidate the file's mtime in
291 294 # dirstate, forcing future 'status' calls to compare the
292 295 # contents of the file if the size is the same. This prevents
293 296 # mistakenly treating such files as clean.
294 297 entry.set_possibly_dirty()
295 298 nodes_with_entry_count += 1
296 299 if path in copy_map:
297 300 nodes_with_copy_source_count += 1
298 301 current_folder = get_folder(path)
299 302 current_node = move_to_correct_node_in_tree(
300 303 current_folder, current_node, stack
301 304 )
302 305
303 306 current_node.children_count += 1
304 307 # Entries from `map` are never `None`
305 308 if entry.tracked:
306 309 current_node.tracked_descendants += 1
307 310 current_node.descendants_with_entry += 1
308 311 stack.append(Node(path, entry, current_node))
309 312
310 313 should_pack = True
311 314 next_path = None
312 315 if index < len(sorted_map):
313 316 # Determine if the next entry is in the same sub-tree, if so don't
314 317 # pack yet
315 318 next_path = sorted_map[index][0]
316 319 should_pack = not get_folder(next_path).startswith(current_folder)
317 320 if should_pack:
318 321 pack_directory_children(current_node, copy_map, data, stack)
319 322 while stack and current_node.path != b"":
320 323 # Go up the tree and write until we reach the folder of the next
321 324 # entry (if any, otherwise the root)
322 325 parent = current_node.parent
323 326 in_parent_folder_of_next_entry = next_path is not None and (
324 327 get_folder(next_path).startswith(get_folder(stack[-1].path))
325 328 )
326 329 if parent is None or in_parent_folder_of_next_entry:
327 330 break
328 331 pack_directory_children(parent, copy_map, data, stack)
329 332 current_node = parent
330 333
331 334 # Special case for the root node since we don't write it to disk, only its
332 335 # children to the docket
333 336 current_node = stack.pop()
334 337 assert current_node.path == b"", current_node.path
335 338 assert len(stack) == 0, len(stack)
336 339
337 340 tree_metadata = TREE_METADATA.pack(
338 341 current_node.children_offset,
339 342 current_node.children_count,
340 343 nodes_with_entry_count,
341 344 nodes_with_copy_source_count,
342 345 unreachable_bytes,
343 346 unused,
344 347 ignore_patterns_hash,
345 348 )
346 349
347 350 return data, tree_metadata
348 351
349 352
350 353 def get_folder(path):
351 354 """
352 355 Return the folder of the path that's given, an empty string for root paths.
353 356 """
354 357 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
355 358
356 359
357 360 def move_to_correct_node_in_tree(target_folder, current_node, stack):
358 361 """
359 362 Move inside the dirstate node tree to the node corresponding to
360 363 `target_folder`, creating the missing nodes along the way if needed.
361 364 """
362 365 while target_folder != current_node.path:
363 366 if target_folder.startswith(current_node.path):
364 367 # We need to go down a folder
365 368 prefix = target_folder[len(current_node.path) :].lstrip(b'/')
366 369 subfolder_name = prefix.split(b'/', 1)[0]
367 370 if current_node.path:
368 371 subfolder_path = current_node.path + b'/' + subfolder_name
369 372 else:
370 373 subfolder_path = subfolder_name
371 374 next_node = stack[-1]
372 375 if next_node.path == target_folder:
373 376 # This folder is now a file and only contains removed entries
374 377 # merge with the last node
375 378 current_node = next_node
376 379 else:
377 380 current_node.children_count += 1
378 381 current_node = Node(subfolder_path, None, current_node)
379 382 stack.append(current_node)
380 383 else:
381 384 # We need to go up a folder
382 385 current_node = current_node.parent
383 386 return current_node
384 387
385 388
386 389 def pack_directory_children(node, copy_map, data, stack):
387 390 """
388 391 Write the binary representation of the direct sorted children of `node` to
389 392 `data`
390 393 """
391 394 direct_children = []
392 395
393 396 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
394 397 direct_children.append(stack.pop())
395 398 if not direct_children:
396 399 raise error.ProgrammingError(b"no direct children for %r" % node.path)
397 400
398 401 # Reverse the stack to get the correct sorted order
399 402 direct_children.reverse()
400 403 packed_children = bytearray()
401 404 # Write the paths to `data`. Pack child nodes but don't write them yet
402 405 for child in direct_children:
403 406 packed = child.pack(copy_map=copy_map, paths_offset=len(data))
404 407 packed_children.extend(packed)
405 408 data.extend(child.path)
406 409 data.extend(copy_map.get(child.path, b""))
407 410 node.tracked_descendants += child.tracked_descendants
408 411 node.descendants_with_entry += child.descendants_with_entry
409 412 # Write the fixed-size child nodes all together
410 413 node.children_offset = len(data)
411 414 data.extend(packed_children)
@@ -1,602 +1,610 b''
1 1 The *dirstate* is what Mercurial uses internally to track
2 2 the state of files in the working directory,
3 3 such as set by commands like `hg add` and `hg rm`.
4 4 It also contains some cached data that help make `hg status` faster.
5 5 The name refers both to `.hg/dirstate` on the filesystem
6 6 and the corresponding data structure in memory while a Mercurial process
7 7 is running.
8 8
9 9 The original file format, retroactively dubbed `dirstate-v1`,
10 10 is described at https://www.mercurial-scm.org/wiki/DirState.
11 11 It is made of a flat sequence of unordered variable-size entries,
12 12 so accessing any information in it requires parsing all of it.
13 13 Similarly, saving changes requires rewriting the entire file.
14 14
15 15 The newer `dirsate-v2` file format is designed to fix these limitations
16 16 and make `hg status` faster.
17 17
18 18 User guide
19 19 ==========
20 20
21 21 Compatibility
22 22 -------------
23 23
24 24 The file format is experimental and may still change.
25 25 Different versions of Mercurial may not be compatible with each other
26 26 when working on a local repository that uses this format.
27 27 When using an incompatible version with the experimental format,
28 28 anything can happen including data corruption.
29 29
30 30 Since the dirstate is entirely local and not relevant to the wire protocol,
31 31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
32 32
33 33 When `share-safe` is enabled, different repositories sharing the same store
34 34 can use different dirstate formats.
35 35
36 36 Enabling `dirsate-v2` for new local repositories
37 37 ------------------------------------------------
38 38
39 39 When creating a new local repository such as with `hg init` or `hg clone`,
40 40 the `exp-dirstate-v2` boolean in the `format` configuration section
41 41 controls whether to use this file format.
42 42 This is disabled by default as of this writing.
43 43 To enable it for a single repository, run for example::
44 44
45 45 $ hg init my-project --config format.exp-dirstate-v2=1
46 46
47 47 Checking the format of an existing local repsitory
48 48 --------------------------------------------------
49 49
50 50 The `debugformat` commands prints information about
51 51 which of multiple optional formats are used in the current repository,
52 52 including `dirstate-v2`::
53 53
54 54 $ hg debugformat
55 55 format-variant repo
56 56 fncache: yes
57 57 dirstate-v2: yes
58 58 […]
59 59
60 60 Upgrading or downgrading an existing local repository
61 61 -----------------------------------------------------
62 62
63 63 The `debugupgrade` command does various upgrades or downgrades
64 64 on a local repository
65 65 based on the current Mercurial version and on configuration.
66 66 The same `format.exp-dirstate-v2` configuration is used again.
67 67
68 68 Example to upgrade::
69 69
70 70 $ hg debugupgrade --config format.exp-dirstate-v2=1
71 71
72 72 Example to downgrade to `dirstate-v1`::
73 73
74 74 $ hg debugupgrade --config format.exp-dirstate-v2=0
75 75
76 76 Both of this commands do nothing but print a list of proposed changes,
77 77 which may include changes unrelated to the dirstate.
78 78 Those other changes are controlled by their own configuration keys.
79 79 Add `--run` to a command to actually apply the proposed changes.
80 80
81 81 Backups of `.hg/requires` and `.hg/dirstate` are created
82 82 in a `.hg/upgradebackup.*` directory.
83 83 If something goes wrong, restoring those files should undo the change.
84 84
85 85 Note that upgrading affects compatibility with older versions of Mercurial
86 86 as noted above.
87 87 This can be relevant when a repository’s files are on a USB drive
88 88 or some other removable media, or shared over the network, etc.
89 89
90 90 Internal filesystem representation
91 91 ==================================
92 92
93 93 Requirements file
94 94 -----------------
95 95
96 96 The `.hg/requires` file indicates which of various optional file formats
97 97 are used by a given repository.
98 98 Mercurial aborts when seeing a requirement it does not know about,
99 99 which avoids older version accidentally messing up a respository
100 100 that uses a format that was introduced later.
101 101 For versions that do support a format, the presence or absence of
102 102 the corresponding requirement indicates whether to use that format.
103 103
104 104 When the file contains a `exp-dirstate-v2` line,
105 105 the `dirstate-v2` format is used.
106 106 With no such line `dirstate-v1` is used.
107 107
108 108 High level description
109 109 ----------------------
110 110
111 111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
112 112 in `dirstate-v2` that file is a "docket" file
113 113 that only contains some metadata
114 114 and points to separate data file named `.hg/dirstate.{ID}`,
115 115 where `{ID}` is a random identifier.
116 116
117 117 This separation allows making data files append-only
118 118 and therefore safer to memory-map.
119 119 Creating a new data file (occasionally to clean up unused data)
120 120 can be done with a different ID
121 121 without disrupting another Mercurial process
122 122 that could still be using the previous data file.
123 123
124 124 Both files have a format designed to reduce the need for parsing,
125 125 by using fixed-size binary components as much as possible.
126 126 For data that is not fixed-size,
127 127 references to other parts of a file can be made by storing "pseudo-pointers":
128 128 integers counted in bytes from the start of a file.
129 129 For read-only access no data structure is needed,
130 130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
131 131 with specific parts read on demand.
132 132
133 133 The data file contains "nodes" organized in a tree.
134 134 Each node represents a file or directory inside the working directory
135 135 or its parent changeset.
136 136 This tree has the same structure as the filesystem,
137 137 so a node representing a directory has child nodes representing
138 138 the files and subdirectories contained directly in that directory.
139 139
140 140 The docket file format
141 141 ----------------------
142 142
143 143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
144 144 and `mercurial/dirstateutils/docket.py`.
145 145
146 146 Components of the docket file are found at fixed offsets,
147 147 counted in bytes from the start of the file:
148 148
149 149 * Offset 0:
150 150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
151 151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
152 152 although it is not strictly necessary
153 153 since `.hg/requires` determines which format to use.
154 154
155 155 * Offset 12:
156 156 The changeset node ID on the first parent of the working directory,
157 157 as up to 32 binary bytes.
158 158 If a node ID is shorter (20 bytes for SHA-1),
159 159 it is start-aligned and the rest of the bytes are set to zero.
160 160
161 161 * Offset 44:
162 162 The changeset node ID on the second parent of the working directory,
163 163 or all zeros if there isn’t one.
164 164 Also 32 binary bytes.
165 165
166 166 * Offset 76:
167 167 Tree metadata on 44 bytes, described below.
168 168 Its separation in this documentation from the rest of the docket
169 169 reflects a detail of the current implementation.
170 170 Since tree metadata is also made of fields at fixed offsets, those could
171 171 be inlined here by adding 76 bytes to each offset.
172 172
173 173 * Offset 120:
174 174 The used size of the data file, as a 32-bit big-endian integer.
175 175 The actual size of the data file may be larger
176 176 (if another Mercurial processis in appending to it
177 177 but has not updated the docket yet).
178 178 That extra data must be ignored.
179 179
180 180 * Offset 124:
181 181 The length of the data file identifier, as a 8-bit integer.
182 182
183 183 * Offset 125:
184 184 The data file identifier.
185 185
186 186 * Any additional data is current ignored, and dropped when updating the file.
187 187
188 188 Tree metadata in the docket file
189 189 --------------------------------
190 190
191 191 Tree metadata is similarly made of components at fixed offsets.
192 192 These offsets are counted in bytes from the start of tree metadata,
193 193 which is 76 bytes after the start of the docket file.
194 194
195 195 This metadata can be thought of as the singular root of the tree
196 196 formed by nodes in the data file.
197 197
198 198 * Offset 0:
199 199 Pseudo-pointer to the start of root nodes,
200 200 counted in bytes from the start of the data file,
201 201 as a 32-bit big-endian integer.
202 202 These nodes describe files and directories found directly
203 203 at the root of the working directory.
204 204
205 205 * Offset 4:
206 206 Number of root nodes, as a 32-bit big-endian integer.
207 207
208 208 * Offset 8:
209 209 Total number of nodes in the entire tree that "have a dirstate entry",
210 210 as a 32-bit big-endian integer.
211 211 Those nodes represent files that would be present at all in `dirstate-v1`.
212 212 This is typically less than the total number of nodes.
213 213 This counter is used to implement `len(dirstatemap)`.
214 214
215 215 * Offset 12:
216 216 Number of nodes in the entire tree that have a copy source,
217 217 as a 32-bit big-endian integer.
218 218 At the next commit, these files are recorded
219 219 as having been copied or moved/renamed from that source.
220 220 (A move is recorded as a copy and separate removal of the source.)
221 221 This counter is used to implement `len(dirstatemap.copymap)`.
222 222
223 223 * Offset 16:
224 224 An estimation of how many bytes of the data file
225 225 (within its used size) are unused, as a 32-bit big-endian integer.
226 226 When appending to an existing data file,
227 227 some existing nodes or paths can be unreachable from the new root
228 228 but they still take up space.
229 229 This counter is used to decide when to write a new data file from scratch
230 230 instead of appending to an existing one,
231 231 in order to get rid of that unreachable data
232 232 and avoid unbounded file size growth.
233 233
234 234 * Offset 20:
235 235 These four bytes are currently ignored
236 236 and reset to zero when updating a docket file.
237 237 This is an attempt at forward compatibility:
238 238 future Mercurial versions could use this as a bit field
239 239 to indicate that a dirstate has additional data or constraints.
240 240 Finding a dirstate file with the relevant bit unset indicates that
241 241 it was written by a then-older version
242 242 which is not aware of that future change.
243 243
244 244 * Offset 24:
245 245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
246 246 When present, the hash is of ignore patterns
247 247 that were used for some previous run of the `status` algorithm.
248 248
249 249 * (Offset 44: end of tree metadata)
250 250
251 251 Optional hash of ignore patterns
252 252 --------------------------------
253 253
254 254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
255 255 has been optimized such that its run time is dominated by calls
256 256 to `stat` for reading the filesystem metadata of a file or directory,
257 257 and to `readdir` for listing the contents of a directory.
258 258 In some cases the algorithm can skip calls to `readdir`
259 259 (saving significant time)
260 260 because the dirstate already contains enough of the relevant information
261 261 to build the correct `status` results.
262 262
263 263 The default configuration of `hg status` is to list unknown files
264 264 but not ignored files.
265 265 In this case, it matters for the `readdir`-skipping optimization
266 266 if a given file used to be ignored but became unknown
267 267 because `.hgignore` changed.
268 268 To detect the possibility of such a change,
269 269 the tree metadata contains an optional hash of all ignore patterns.
270 270
271 271 We define:
272 272
273 273 * "Root" ignore files as:
274 274
275 275 - `.hgignore` at the root of the repository if it exists
276 276 - And all files from `ui.ignore.*` config.
277 277
278 278 This set of files is sorted by the string representation of their path.
279 279
280 280 * The "expanded contents" of an ignore files is the byte string made
281 281 by the concatenation of its contents followed by the "expanded contents"
282 282 of other files included with `include:` or `subinclude:` directives,
283 283 in inclusion order. This definition is recursive, as included files can
284 284 themselves include more files.
285 285
286 286 This hash is defined as the SHA-1 of the concatenation (in sorted
287 287 order) of the "expanded contents" of each "root" ignore file.
288 288 (Note that computing this does not require actually concatenating
289 289 into a single contiguous byte sequence.
290 290 Instead a SHA-1 hasher object can be created
291 291 and fed separate chunks one by one.)
292 292
293 293 The data file format
294 294 --------------------
295 295
296 296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
297 297 and `mercurial/dirstateutils/v2.py`.
298 298
299 299 The data file contains two types of data: paths and nodes.
300 300
301 301 Paths and nodes can be organized in any order in the file, except that sibling
302 302 nodes must be next to each other and sorted by their path.
303 303 Contiguity lets the parent refer to them all
304 304 by their count and a single pseudo-pointer,
305 305 instead of storing one pseudo-pointer per child node.
306 306 Sorting allows using binary seach to find a child node with a given name
307 307 in `O(log(n))` byte sequence comparisons.
308 308
309 309 The current implemention writes paths and child node before a given node
310 310 for ease of figuring out the value of pseudo-pointers by the time the are to be
311 311 written, but this is not an obligation and readers must not rely on it.
312 312
313 313 A path is stored as a byte string anywhere in the file, without delimiter.
314 314 It is refered to by one or more node by a pseudo-pointer to its start, and its
315 315 length in bytes. Since there is no delimiter,
316 316 when a path is a substring of another the same bytes could be reused,
317 317 although the implementation does not exploit this as of this writing.
318 318
319 319 A node is stored on 43 bytes with components at fixed offsets. Paths and
320 320 child nodes relevant to a node are stored externally and referenced though
321 321 pseudo-pointers.
322 322
323 323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
324 324 counting bytes from the start of the data file. Path lengths and positions
325 325 are 16-bit integers, also counted in bytes.
326 326
327 327 Node components are:
328 328
329 329 * Offset 0:
330 330 Pseudo-pointer to the full path of this node,
331 331 from the working directory root.
332 332
333 333 * Offset 4:
334 334 Length of the full path.
335 335
336 336 * Offset 6:
337 337 Position of the last `/` path separator within the full path,
338 338 in bytes from the start of the full path,
339 339 or zero if there isn’t one.
340 340 The part of the full path after this position is the "base name".
341 341 Since sibling nodes have the same parent, only their base name vary
342 342 and needs to be considered when doing binary search to find a given path.
343 343
344 344 * Offset 8:
345 345 Pseudo-pointer to the "copy source" path for this node,
346 346 or zero if there is no copy source.
347 347
348 348 * Offset 12:
349 349 Length of the copy source path, or zero if there isn’t one.
350 350
351 351 * Offset 14:
352 352 Pseudo-pointer to the start of child nodes.
353 353
354 354 * Offset 18:
355 355 Number of child nodes, as a 32-bit integer.
356 356 They occupy 43 times this number of bytes
357 357 (not counting space for paths, and further descendants).
358 358
359 359 * Offset 22:
360 360 Number as a 32-bit integer of descendant nodes in this subtree,
361 361 not including this node itself,
362 362 that "have a dirstate entry".
363 363 Those nodes represent files that would be present at all in `dirstate-v1`.
364 364 This is typically less than the total number of descendants.
365 365 This counter is used to implement `has_dir`.
366 366
367 367 * Offset 26:
368 368 Number as a 32-bit integer of descendant nodes in this subtree,
369 369 not including this node itself,
370 370 that represent files tracked in the working directory.
371 371 (For example, `hg rm` makes a file untracked.)
372 372 This counter is used to implement `has_tracked_dir`.
373 373
374 374 * Offset 30:
375 375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
376 376 Starting from least-significant, bit masks are::
377 377
378 378 WDIR_TRACKED = 1 << 0
379 379 P1_TRACKED = 1 << 1
380 380 P2_INFO = 1 << 2
381 381 HAS_MODE_AND_SIZE = 1 << 3
382 HAS_FILE_MTIME = 1 << 4
383 HAS_DIRECTORY_MTIME = 1 << 5
382 HAS_MTIME = 1 << 4
383 DIRECTORY = 1 << 5
384 384 MODE_EXEC_PERM = 1 << 6
385 385 MODE_IS_SYMLINK = 1 << 7
386 386 EXPECTED_STATE_IS_MODIFIED = 1 << 8
387 387 ALL_UNKNOWN_RECORDED = 1 << 9
388 388 ALL_IGNORED_RECORDED = 1 << 10
389 389 HAS_FALLBACK_EXEC = 1 << 11
390 390 FALLBACK_EXEC = 1 << 12
391 391 HAS_FALLBACK_SYMLINK = 1 << 13
392 392 FALLBACK_SYMLINK = 1 << 14
393 393 MTIME_SECOND_AMBIGUOUS = 1 << 15
394 394
395 395 The meaning of each bit is described below.
396 396
397 397 Other bits are unset.
398 398 They may be assigned meaning if the future,
399 399 with the limitation that Mercurial versions that pre-date such meaning
400 400 will always reset those bits to unset when writing nodes.
401 401 (A new node is written for any mutation in its subtree,
402 402 leaving the bytes of the old node unreachable
403 403 until the data file is rewritten entirely.)
404 404
405 405 * Offset 32:
406 406 A `size` field described below, as a 32-bit integer.
407 407 Unlike in dirstate-v1, negative values are not used.
408 408
409 409 * Offset 36:
410 410 The seconds component of an `mtime` field described below,
411 411 as a 32-bit integer.
412 412 Unlike in dirstate-v1, negative values are not used.
413 413 When `mtime` is used, this is number of seconds since the Unix epoch
414 414 truncated to its lower 31 bits.
415 415
416 416 * Offset 40:
417 417 The nanoseconds component of an `mtime` field described below,
418 418 as a 32-bit integer.
419 419 When `mtime` is used,
420 420 this is the number of nanoseconds since `mtime.seconds`,
421 421 always stritctly less than one billion.
422 422
423 423 This may be zero if more precision is not available.
424 424 (This can happen because of limitations in any of Mercurial, Python,
425 425 libc, the operating system, …)
426 426
427 427 When comparing two mtimes and either has this component set to zero,
428 428 the sub-second precision of both should be ignored.
429 429 False positives when checking mtime equality due to clock resolution
430 430 are always possible and the status algorithm needs to deal with them,
431 431 but having too many false negatives could be harmful too.
432 432
433 433 * (Offset 44: end of this node)
434 434
435 435 The meaning of the boolean values packed in `flags` is:
436 436
437 437 `WDIR_TRACKED`
438 438 Set if the working directory contains a tracked file at this node’s path.
439 439 This is typically set and unset by `hg add` and `hg rm`.
440 440
441 441 `P1_TRACKED`
442 442 Set if the working directory’s first parent changeset
443 443 (whose node identifier is found in tree metadata)
444 444 contains a tracked file at this node’s path.
445 445 This is a cache to reduce manifest lookups.
446 446
447 447 `P2_INFO`
448 448 Set if the file has been involved in some merge operation.
449 449 Either because it was actually merged,
450 450 or because the version in the second parent p2 version was ahead,
451 451 or because some rename moved it there.
452 452 In either case `hg status` will want it displayed as modified.
453 453
454 454 Files that would be mentioned at all in the `dirstate-v1` file format
455 455 have a node with at least one of the above three bits set in `dirstate-v2`.
456 456 Let’s call these files "tracked anywhere",
457 457 and "untracked" the nodes with all three of these bits unset.
458 458 Untracked nodes are typically for directories:
459 459 they hold child nodes and form the tree structure.
460 460 Additional untracked nodes may also exist.
461 461 Although implementations should strive to clean up nodes
462 462 that are entirely unused, other untracked nodes may also exist.
463 463 For example, a future version of Mercurial might in some cases
464 464 add nodes for untracked files or/and ignored files in the working directory
465 465 in order to optimize `hg status`
466 466 by enabling it to skip `readdir` in more cases.
467 467
468 468 `HAS_MODE_AND_SIZE`
469 469 Must be unset for untracked nodes.
470 470 For files tracked anywhere, if this is set:
471 471 - The `size` field is the expected file size,
472 472 in bytes truncated its lower to 31 bits.
473 473 - The expected execute permission for the file’s owner
474 474 is given by `MODE_EXEC_PERM`
475 475 - The expected file type is given by `MODE_IS_SIMLINK`:
476 476 a symbolic link if set, or a normal file if unset.
477 477 If this is unset the expected size, permission, and file type are unknown.
478 478 The `size` field is unused (set to zero).
479 479
480 `HAS_FILE_MTIME`
481 Must be unset for untracked nodes.
482 If this and `HAS_DIRECTORY_MTIME` are both unset,
483 the `mtime` field is unused (set to zero).
484 If this is set, `mtime` is the expected modification time.
480 `HAS_MTIME`
481 The nodes contains a "valid" last modification time in the `mtime` field.
482
485 483
486 `HAS_DIRECTORY_MTIME`
487 Must be unset for file tracked anywhere.
488 If this and `HAS_DIRECTORY_MTIME` are both unset,
489 the `mtime` field is unused (set to zero).
490 If this is set, at some point,
491 this path in the working directory was observed:
492
493 - To be a directory
494 - With the modification time given in `mtime`
495 - That time was already strictly in the past when observed,
496 meaning that later changes cannot happen in the same clock tick
497 and must cause a different modification time
498 (unless the system clock jumps back and we get unlucky,
499 which is not impossible but deemed unlikely enough).
500 - All direct children of this directory
501 (as returned by `std::fs::read_dir`)
502 either have a corresponding dirstate node,
503 or are ignored by ignore patterns whose hash is in tree metadata.
484 It means the `mtime` was already strictly in the past when observed,
485 meaning that later changes cannot happen in the same clock tick
486 and must cause a different modification time
487 (unless the system clock jumps back and we get unlucky,
488 which is not impossible but deemed unlikely enough).
504 489
505 490 This means that if `std::fs::symlink_metadata` later reports
506 491 the same modification time
507 492 and ignored patterns haven’t changed,
508 a run of status that is not listing ignored files
509 can skip calling `std::fs::read_dir` again for this directory,
493 we can assume the node to be unchanged on disk.
494
495 The `mtime` field can then be used to skip more expensive lookup when
496 checking the status of "tracked" nodes.
497
498 It can also be set for node where `DIRECTORY` is set.
499 See `DIRECTORY` documentation for details.
500
501 `DIRECTORY`
502 When set, this entry will match a directory that exists or existed on the
503 file system.
504
505 * When `HAS_MTIME` is set a directory has been seen on the file system and
506 `mtime` matches its last modificiation time. However, `HAS_MTIME` not being set
507 does not indicate the lack of directory on the file system.
508
509 * When not tracked anywhere, this node does not represent an ignored or
510 unknown file on disk.
511
512 If `HAS_MTIME` is set
513 and `mtime` matches the last modification time of the directory on disk,
514 the directory is unchanged
515 and we can skip calling `std::fs::read_dir` again for this directory,
510 516 and iterate child dirstate nodes instead.
517 (as long as `ALL_UNKNOWN_RECORDED` and `ALL_IGNORED_RECORDED` are taken
518 into account)
511 519
512 520 `MODE_EXEC_PERM`
513 521 Must be unset if `HAS_MODE_AND_SIZE` is unset.
514 522 If `HAS_MODE_AND_SIZE` is set,
515 523 this indicates whether the file’s own is expected
516 524 to have execute permission.
517 525
518 526 `MODE_IS_SYMLINK`
519 527 Must be unset if `HAS_MODE_AND_SIZE` is unset.
520 528 If `HAS_MODE_AND_SIZE` is set,
521 529 this indicates whether the file is expected to be a symlink
522 530 as opposed to a normal file.
523 531
524 532 `EXPECTED_STATE_IS_MODIFIED`
525 533 Must be unset for untracked nodes.
526 534 For:
527 535 - a file tracked anywhere
528 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
536 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_MTIME`)
529 537 - if that metadata matches
530 538 metadata found in the working directory with `stat`
531 539 This bit indicates the status of the file.
532 540 If set, the status is modified. If unset, it is clean.
533 541
534 542 In cases where `hg status` needs to read the contents of a file
535 543 because metadata is ambiguous, this bit lets it record the result
536 544 if the result is modified so that a future run of `hg status`
537 545 does not need to do the same again.
538 546 It is valid to never set this bit,
539 547 and consider expected metadata ambiguous if it is set.
540 548
541 549 `ALL_UNKNOWN_RECORDED`
542 550 If set, all "unknown" children existing on disk (at the time of the last
543 551 status) have been recorded and the `mtime` associated with
544 `HAS_DIRECTORY_MTIME` can be used for optimization even when "unknown" file
552 `DIRECTORY` can be used for optimization even when "unknown" file
545 553 are listed.
546 554
547 555 Note that the amount recorded "unknown" children can still be zero if None
548 556 where present.
549 557
550 558 Also note that having this flag unset does not imply that no "unknown"
551 559 children have been recorded. Some might be present, but there is no garantee
552 560 that is will be all of them.
553 561
554 562 `ALL_IGNORED_RECORDED`
555 563 If set, all "ignored" children existing on disk (at the time of the last
556 564 status) have been recorded and the `mtime` associated with
557 `HAS_DIRECTORY_MTIME` can be used for optimization even when "ignored" file
565 `DIRECTORY` can be used for optimization even when "ignored" file
558 566 are listed.
559 567
560 568 Note that the amount recorded "ignored" children can still be zero if None
561 569 where present.
562 570
563 571 Also note that having this flag unset does not imply that no "ignored"
564 572 children have been recorded. Some might be present, but there is no garantee
565 573 that is will be all of them.
566 574
567 575 `HAS_FALLBACK_EXEC`
568 576 If this flag is set, the entry carries "fallback" information for the
569 577 executable bit in the `FALLBACK_EXEC` flag.
570 578
571 579 Fallback information can be stored in the dirstate to keep track of
572 580 filesystem attribute tracked by Mercurial when the underlying file
573 581 system or operating system does not support that property, (e.g.
574 582 Windows).
575 583
576 584 `FALLBACK_EXEC`
577 585 Should be ignored if `HAS_FALLBACK_EXEC` is unset. If set the file for this
578 586 entry should be considered executable if that information cannot be
579 587 extracted from the file system. If unset it should be considered
580 588 non-executable instead.
581 589
582 590 `HAS_FALLBACK_SYMLINK`
583 591 If this flag is set, the entry carries "fallback" information for symbolic
584 592 link status in the `FALLBACK_SYMLINK` flag.
585 593
586 594 Fallback information can be stored in the dirstate to keep track of
587 595 filesystem attribute tracked by Mercurial when the underlying file
588 596 system or operating system does not support that property, (e.g.
589 597 Windows).
590 598
591 599 `FALLBACK_SYMLINK`
592 600 Should be ignored if `HAS_FALLBACK_SYMLINK` is unset. If set the file for
593 601 this entry should be considered a symlink if that information cannot be
594 602 extracted from the file system. If unset it should be considered a normal
595 603 file instead.
596 604
597 605 `MTIME_SECOND_AMBIGUOUS`
598 606 This flag is relevant only when `HAS_FILE_MTIME` is set. When set, the
599 607 `mtime` stored in the entry is only valid for comparison with timestamps
600 608 that have nanosecond information. If available timestamp does not carries
601 609 nanosecond information, the `mtime` should be ignored and no optimisation
602 610 can be applied.
@@ -1,935 +1,935 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import stat
11 11 import struct
12 12 import zlib
13 13
14 14 from ..node import (
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from ..thirdparty import attr
19 19 from .. import (
20 20 error,
21 21 pycompat,
22 22 revlogutils,
23 23 util,
24 24 )
25 25
26 26 from ..revlogutils import nodemap as nodemaputil
27 27 from ..revlogutils import constants as revlog_constants
28 28
29 29 stringio = pycompat.bytesio
30 30
31 31
32 32 _pack = struct.pack
33 33 _unpack = struct.unpack
34 34 _compress = zlib.compress
35 35 _decompress = zlib.decompress
36 36
37 37
38 38 # a special value used internally for `size` if the file come from the other parent
39 39 FROM_P2 = -2
40 40
41 41 # a special value used internally for `size` if the file is modified/merged/added
42 42 NONNORMAL = -1
43 43
44 44 # a special value used internally for `time` if the time is ambigeous
45 45 AMBIGUOUS_TIME = -1
46 46
47 47 # Bits of the `flags` byte inside a node in the file format
48 48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 50 DIRSTATE_V2_P2_INFO = 1 << 2
51 51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
52 DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
53 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available
52 DIRSTATE_V2_HAS_MTIME = 1 << 4
53 DIRSTATE_V2_DIRECTORY = 1 << 5
54 54 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
55 55 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
56 56 DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
57 57 DIRSTATE_V2_ALL_UNKNOWN_RECORDED = 1 << 9
58 58 DIRSTATE_V2_ALL_IGNORED_RECORDED = 1 << 10
59 59 DIRSTATE_V2_HAS_FALLBACK_EXEC = 1 << 11
60 60 DIRSTATE_V2_FALLBACK_EXEC = 1 << 12
61 61 DIRSTATE_V2_HAS_FALLBACK_SYMLINK = 1 << 13
62 62 DIRSTATE_V2_FALLBACK_SYMLINK = 1 << 14
63 63 DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS = 1 << 15
64 64
65 65
66 66 @attr.s(slots=True, init=False)
67 67 class DirstateItem(object):
68 68 """represent a dirstate entry
69 69
70 70 It hold multiple attributes
71 71
72 72 # about file tracking
73 73 - wc_tracked: is the file tracked by the working copy
74 74 - p1_tracked: is the file tracked in working copy first parent
75 75 - p2_info: the file has been involved in some merge operation. Either
76 76 because it was actually merged, or because the p2 version was
77 77 ahead, or because some rename moved it there. In either case
78 78 `hg status` will want it displayed as modified.
79 79
80 80 # about the file state expected from p1 manifest:
81 81 - mode: the file mode in p1
82 82 - size: the file size in p1
83 83
84 84 These value can be set to None, which mean we don't have a meaningful value
85 85 to compare with. Either because we don't really care about them as there
86 86 `status` is known without having to look at the disk or because we don't
87 87 know these right now and a full comparison will be needed to find out if
88 88 the file is clean.
89 89
90 90 # about the file state on disk last time we saw it:
91 91 - mtime: the last known clean mtime for the file.
92 92
93 93 This value can be set to None if no cachable state exist. Either because we
94 94 do not care (see previous section) or because we could not cache something
95 95 yet.
96 96 """
97 97
98 98 _wc_tracked = attr.ib()
99 99 _p1_tracked = attr.ib()
100 100 _p2_info = attr.ib()
101 101 _mode = attr.ib()
102 102 _size = attr.ib()
103 103 _mtime_s = attr.ib()
104 104 _mtime_ns = attr.ib()
105 105 _fallback_exec = attr.ib()
106 106 _fallback_symlink = attr.ib()
107 107
108 108 def __init__(
109 109 self,
110 110 wc_tracked=False,
111 111 p1_tracked=False,
112 112 p2_info=False,
113 113 has_meaningful_data=True,
114 114 has_meaningful_mtime=True,
115 115 parentfiledata=None,
116 116 fallback_exec=None,
117 117 fallback_symlink=None,
118 118 ):
119 119 self._wc_tracked = wc_tracked
120 120 self._p1_tracked = p1_tracked
121 121 self._p2_info = p2_info
122 122
123 123 self._fallback_exec = fallback_exec
124 124 self._fallback_symlink = fallback_symlink
125 125
126 126 self._mode = None
127 127 self._size = None
128 128 self._mtime_s = None
129 129 self._mtime_ns = None
130 130 if parentfiledata is None:
131 131 has_meaningful_mtime = False
132 132 has_meaningful_data = False
133 133 if has_meaningful_data:
134 134 self._mode = parentfiledata[0]
135 135 self._size = parentfiledata[1]
136 136 if has_meaningful_mtime:
137 137 self._mtime_s, self._mtime_ns = parentfiledata[2]
138 138
139 139 @classmethod
140 140 def from_v2_data(cls, flags, size, mtime_s, mtime_ns):
141 141 """Build a new DirstateItem object from V2 data"""
142 142 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
143 has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
143 has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_MTIME)
144 144 if flags & DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS:
145 145 # The current code is not able to do the more subtle comparison that the
146 146 # MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
147 147 has_meaningful_mtime = False
148 148 mode = None
149 149
150 150 if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
151 151 # we do not have support for this flag in the code yet,
152 152 # force a lookup for this file.
153 153 has_mode_size = False
154 154 has_meaningful_mtime = False
155 155
156 156 fallback_exec = None
157 157 if flags & DIRSTATE_V2_HAS_FALLBACK_EXEC:
158 158 fallback_exec = flags & DIRSTATE_V2_FALLBACK_EXEC
159 159
160 160 fallback_symlink = None
161 161 if flags & DIRSTATE_V2_HAS_FALLBACK_SYMLINK:
162 162 fallback_symlink = flags & DIRSTATE_V2_FALLBACK_SYMLINK
163 163
164 164 if has_mode_size:
165 165 assert stat.S_IXUSR == 0o100
166 166 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
167 167 mode = 0o755
168 168 else:
169 169 mode = 0o644
170 170 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
171 171 mode |= stat.S_IFLNK
172 172 else:
173 173 mode |= stat.S_IFREG
174 174 return cls(
175 175 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
176 176 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
177 177 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
178 178 has_meaningful_data=has_mode_size,
179 179 has_meaningful_mtime=has_meaningful_mtime,
180 180 parentfiledata=(mode, size, (mtime_s, mtime_ns)),
181 181 fallback_exec=fallback_exec,
182 182 fallback_symlink=fallback_symlink,
183 183 )
184 184
185 185 @classmethod
186 186 def from_v1_data(cls, state, mode, size, mtime):
187 187 """Build a new DirstateItem object from V1 data
188 188
189 189 Since the dirstate-v1 format is frozen, the signature of this function
190 190 is not expected to change, unlike the __init__ one.
191 191 """
192 192 if state == b'm':
193 193 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
194 194 elif state == b'a':
195 195 return cls(wc_tracked=True)
196 196 elif state == b'r':
197 197 if size == NONNORMAL:
198 198 p1_tracked = True
199 199 p2_info = True
200 200 elif size == FROM_P2:
201 201 p1_tracked = False
202 202 p2_info = True
203 203 else:
204 204 p1_tracked = True
205 205 p2_info = False
206 206 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
207 207 elif state == b'n':
208 208 if size == FROM_P2:
209 209 return cls(wc_tracked=True, p2_info=True)
210 210 elif size == NONNORMAL:
211 211 return cls(wc_tracked=True, p1_tracked=True)
212 212 elif mtime == AMBIGUOUS_TIME:
213 213 return cls(
214 214 wc_tracked=True,
215 215 p1_tracked=True,
216 216 has_meaningful_mtime=False,
217 217 parentfiledata=(mode, size, (42, 0)),
218 218 )
219 219 else:
220 220 return cls(
221 221 wc_tracked=True,
222 222 p1_tracked=True,
223 223 parentfiledata=(mode, size, (mtime, 0)),
224 224 )
225 225 else:
226 226 raise RuntimeError(b'unknown state: %s' % state)
227 227
228 228 def set_possibly_dirty(self):
229 229 """Mark a file as "possibly dirty"
230 230
231 231 This means the next status call will have to actually check its content
232 232 to make sure it is correct.
233 233 """
234 234 self._mtime_s = None
235 235 self._mtime_ns = None
236 236
237 237 def set_clean(self, mode, size, mtime):
238 238 """mark a file as "clean" cancelling potential "possibly dirty call"
239 239
240 240 Note: this function is a descendant of `dirstate.normal` and is
241 241 currently expected to be call on "normal" entry only. There are not
242 242 reason for this to not change in the future as long as the ccode is
243 243 updated to preserve the proper state of the non-normal files.
244 244 """
245 245 self._wc_tracked = True
246 246 self._p1_tracked = True
247 247 self._mode = mode
248 248 self._size = size
249 249 self._mtime_s, self._mtime_ns = mtime
250 250
251 251 def set_tracked(self):
252 252 """mark a file as tracked in the working copy
253 253
254 254 This will ultimately be called by command like `hg add`.
255 255 """
256 256 self._wc_tracked = True
257 257 # `set_tracked` is replacing various `normallookup` call. So we mark
258 258 # the files as needing lookup
259 259 #
260 260 # Consider dropping this in the future in favor of something less broad.
261 261 self._mtime_s = None
262 262 self._mtime_ns = None
263 263
264 264 def set_untracked(self):
265 265 """mark a file as untracked in the working copy
266 266
267 267 This will ultimately be called by command like `hg remove`.
268 268 """
269 269 self._wc_tracked = False
270 270 self._mode = None
271 271 self._size = None
272 272 self._mtime_s = None
273 273 self._mtime_ns = None
274 274
275 275 def drop_merge_data(self):
276 276 """remove all "merge-only" from a DirstateItem
277 277
278 278 This is to be call by the dirstatemap code when the second parent is dropped
279 279 """
280 280 if self._p2_info:
281 281 self._p2_info = False
282 282 self._mode = None
283 283 self._size = None
284 284 self._mtime_s = None
285 285 self._mtime_ns = None
286 286
287 287 @property
288 288 def mode(self):
289 289 return self.v1_mode()
290 290
291 291 @property
292 292 def size(self):
293 293 return self.v1_size()
294 294
295 295 @property
296 296 def mtime(self):
297 297 return self.v1_mtime()
298 298
299 299 def mtime_likely_equal_to(self, other_mtime):
300 300 self_sec = self._mtime_s
301 301 if self_sec is None:
302 302 return False
303 303 self_ns = self._mtime_ns
304 304 other_sec, other_ns = other_mtime
305 305 return self_sec == other_sec and (
306 306 self_ns == other_ns or self_ns == 0 or other_ns == 0
307 307 )
308 308
309 309 @property
310 310 def state(self):
311 311 """
312 312 States are:
313 313 n normal
314 314 m needs merging
315 315 r marked for removal
316 316 a marked for addition
317 317
318 318 XXX This "state" is a bit obscure and mostly a direct expression of the
319 319 dirstatev1 format. It would make sense to ultimately deprecate it in
320 320 favor of the more "semantic" attributes.
321 321 """
322 322 if not self.any_tracked:
323 323 return b'?'
324 324 return self.v1_state()
325 325
326 326 @property
327 327 def has_fallback_exec(self):
328 328 """True if "fallback" information are available for the "exec" bit
329 329
330 330 Fallback information can be stored in the dirstate to keep track of
331 331 filesystem attribute tracked by Mercurial when the underlying file
332 332 system or operating system does not support that property, (e.g.
333 333 Windows).
334 334
335 335 Not all version of the dirstate on-disk storage support preserving this
336 336 information.
337 337 """
338 338 return self._fallback_exec is not None
339 339
340 340 @property
341 341 def fallback_exec(self):
342 342 """ "fallback" information for the executable bit
343 343
344 344 True if the file should be considered executable when we cannot get
345 345 this information from the files system. False if it should be
346 346 considered non-executable.
347 347
348 348 See has_fallback_exec for details."""
349 349 return self._fallback_exec
350 350
351 351 @fallback_exec.setter
352 352 def set_fallback_exec(self, value):
353 353 """control "fallback" executable bit
354 354
355 355 Set to:
356 356 - True if the file should be considered executable,
357 357 - False if the file should be considered non-executable,
358 358 - None if we do not have valid fallback data.
359 359
360 360 See has_fallback_exec for details."""
361 361 if value is None:
362 362 self._fallback_exec = None
363 363 else:
364 364 self._fallback_exec = bool(value)
365 365
366 366 @property
367 367 def has_fallback_symlink(self):
368 368 """True if "fallback" information are available for symlink status
369 369
370 370 Fallback information can be stored in the dirstate to keep track of
371 371 filesystem attribute tracked by Mercurial when the underlying file
372 372 system or operating system does not support that property, (e.g.
373 373 Windows).
374 374
375 375 Not all version of the dirstate on-disk storage support preserving this
376 376 information."""
377 377 return self._fallback_symlink is not None
378 378
379 379 @property
380 380 def fallback_symlink(self):
381 381 """ "fallback" information for symlink status
382 382
383 383 True if the file should be considered executable when we cannot get
384 384 this information from the files system. False if it should be
385 385 considered non-executable.
386 386
387 387 See has_fallback_exec for details."""
388 388 return self._fallback_symlink
389 389
390 390 @fallback_symlink.setter
391 391 def set_fallback_symlink(self, value):
392 392 """control "fallback" symlink status
393 393
394 394 Set to:
395 395 - True if the file should be considered a symlink,
396 396 - False if the file should be considered not a symlink,
397 397 - None if we do not have valid fallback data.
398 398
399 399 See has_fallback_symlink for details."""
400 400 if value is None:
401 401 self._fallback_symlink = None
402 402 else:
403 403 self._fallback_symlink = bool(value)
404 404
405 405 @property
406 406 def tracked(self):
407 407 """True is the file is tracked in the working copy"""
408 408 return self._wc_tracked
409 409
410 410 @property
411 411 def any_tracked(self):
412 412 """True is the file is tracked anywhere (wc or parents)"""
413 413 return self._wc_tracked or self._p1_tracked or self._p2_info
414 414
415 415 @property
416 416 def added(self):
417 417 """True if the file has been added"""
418 418 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
419 419
420 420 @property
421 421 def maybe_clean(self):
422 422 """True if the file has a chance to be in the "clean" state"""
423 423 if not self._wc_tracked:
424 424 return False
425 425 elif not self._p1_tracked:
426 426 return False
427 427 elif self._p2_info:
428 428 return False
429 429 return True
430 430
431 431 @property
432 432 def p1_tracked(self):
433 433 """True if the file is tracked in the first parent manifest"""
434 434 return self._p1_tracked
435 435
436 436 @property
437 437 def p2_info(self):
438 438 """True if the file needed to merge or apply any input from p2
439 439
440 440 See the class documentation for details.
441 441 """
442 442 return self._wc_tracked and self._p2_info
443 443
444 444 @property
445 445 def removed(self):
446 446 """True if the file has been removed"""
447 447 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
448 448
449 449 def v2_data(self):
450 450 """Returns (flags, mode, size, mtime) for v2 serialization"""
451 451 flags = 0
452 452 if self._wc_tracked:
453 453 flags |= DIRSTATE_V2_WDIR_TRACKED
454 454 if self._p1_tracked:
455 455 flags |= DIRSTATE_V2_P1_TRACKED
456 456 if self._p2_info:
457 457 flags |= DIRSTATE_V2_P2_INFO
458 458 if self._mode is not None and self._size is not None:
459 459 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
460 460 if self.mode & stat.S_IXUSR:
461 461 flags |= DIRSTATE_V2_MODE_EXEC_PERM
462 462 if stat.S_ISLNK(self.mode):
463 463 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
464 464 if self._mtime_s is not None:
465 flags |= DIRSTATE_V2_HAS_FILE_MTIME
465 flags |= DIRSTATE_V2_HAS_MTIME
466 466
467 467 if self._fallback_exec is not None:
468 468 flags |= DIRSTATE_V2_HAS_FALLBACK_EXEC
469 469 if self._fallback_exec:
470 470 flags |= DIRSTATE_V2_FALLBACK_EXEC
471 471
472 472 if self._fallback_symlink is not None:
473 473 flags |= DIRSTATE_V2_HAS_FALLBACK_SYMLINK
474 474 if self._fallback_symlink:
475 475 flags |= DIRSTATE_V2_FALLBACK_SYMLINK
476 476
477 477 # Note: we do not need to do anything regarding
478 478 # DIRSTATE_V2_ALL_UNKNOWN_RECORDED and DIRSTATE_V2_ALL_IGNORED_RECORDED
479 479 # since we never set _DIRSTATE_V2_HAS_DIRCTORY_MTIME
480 480 return (flags, self._size or 0, self._mtime_s or 0, self._mtime_ns or 0)
481 481
482 482 def v1_state(self):
483 483 """return a "state" suitable for v1 serialization"""
484 484 if not self.any_tracked:
485 485 # the object has no state to record, this is -currently-
486 486 # unsupported
487 487 raise RuntimeError('untracked item')
488 488 elif self.removed:
489 489 return b'r'
490 490 elif self._p1_tracked and self._p2_info:
491 491 return b'm'
492 492 elif self.added:
493 493 return b'a'
494 494 else:
495 495 return b'n'
496 496
497 497 def v1_mode(self):
498 498 """return a "mode" suitable for v1 serialization"""
499 499 return self._mode if self._mode is not None else 0
500 500
501 501 def v1_size(self):
502 502 """return a "size" suitable for v1 serialization"""
503 503 if not self.any_tracked:
504 504 # the object has no state to record, this is -currently-
505 505 # unsupported
506 506 raise RuntimeError('untracked item')
507 507 elif self.removed and self._p1_tracked and self._p2_info:
508 508 return NONNORMAL
509 509 elif self._p2_info:
510 510 return FROM_P2
511 511 elif self.removed:
512 512 return 0
513 513 elif self.added:
514 514 return NONNORMAL
515 515 elif self._size is None:
516 516 return NONNORMAL
517 517 else:
518 518 return self._size
519 519
520 520 def v1_mtime(self):
521 521 """return a "mtime" suitable for v1 serialization"""
522 522 if not self.any_tracked:
523 523 # the object has no state to record, this is -currently-
524 524 # unsupported
525 525 raise RuntimeError('untracked item')
526 526 elif self.removed:
527 527 return 0
528 528 elif self._mtime_s is None:
529 529 return AMBIGUOUS_TIME
530 530 elif self._p2_info:
531 531 return AMBIGUOUS_TIME
532 532 elif not self._p1_tracked:
533 533 return AMBIGUOUS_TIME
534 534 else:
535 535 return self._mtime_s
536 536
537 537 def need_delay(self, now):
538 538 """True if the stored mtime would be ambiguous with the current time"""
539 539 return self.v1_state() == b'n' and self._mtime_s == now[0]
540 540
541 541
542 542 def gettype(q):
543 543 return int(q & 0xFFFF)
544 544
545 545
546 546 class BaseIndexObject(object):
547 547 # Can I be passed to an algorithme implemented in Rust ?
548 548 rust_ext_compat = 0
549 549 # Format of an index entry according to Python's `struct` language
550 550 index_format = revlog_constants.INDEX_ENTRY_V1
551 551 # Size of a C unsigned long long int, platform independent
552 552 big_int_size = struct.calcsize(b'>Q')
553 553 # Size of a C long int, platform independent
554 554 int_size = struct.calcsize(b'>i')
555 555 # An empty index entry, used as a default value to be overridden, or nullrev
556 556 null_item = (
557 557 0,
558 558 0,
559 559 0,
560 560 -1,
561 561 -1,
562 562 -1,
563 563 -1,
564 564 sha1nodeconstants.nullid,
565 565 0,
566 566 0,
567 567 revlog_constants.COMP_MODE_INLINE,
568 568 revlog_constants.COMP_MODE_INLINE,
569 569 )
570 570
571 571 @util.propertycache
572 572 def entry_size(self):
573 573 return self.index_format.size
574 574
575 575 @property
576 576 def nodemap(self):
577 577 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
578 578 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
579 579 return self._nodemap
580 580
581 581 @util.propertycache
582 582 def _nodemap(self):
583 583 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
584 584 for r in range(0, len(self)):
585 585 n = self[r][7]
586 586 nodemap[n] = r
587 587 return nodemap
588 588
589 589 def has_node(self, node):
590 590 """return True if the node exist in the index"""
591 591 return node in self._nodemap
592 592
593 593 def rev(self, node):
594 594 """return a revision for a node
595 595
596 596 If the node is unknown, raise a RevlogError"""
597 597 return self._nodemap[node]
598 598
599 599 def get_rev(self, node):
600 600 """return a revision for a node
601 601
602 602 If the node is unknown, return None"""
603 603 return self._nodemap.get(node)
604 604
605 605 def _stripnodes(self, start):
606 606 if '_nodemap' in vars(self):
607 607 for r in range(start, len(self)):
608 608 n = self[r][7]
609 609 del self._nodemap[n]
610 610
611 611 def clearcaches(self):
612 612 self.__dict__.pop('_nodemap', None)
613 613
614 614 def __len__(self):
615 615 return self._lgt + len(self._extra)
616 616
617 617 def append(self, tup):
618 618 if '_nodemap' in vars(self):
619 619 self._nodemap[tup[7]] = len(self)
620 620 data = self._pack_entry(len(self), tup)
621 621 self._extra.append(data)
622 622
623 623 def _pack_entry(self, rev, entry):
624 624 assert entry[8] == 0
625 625 assert entry[9] == 0
626 626 return self.index_format.pack(*entry[:8])
627 627
628 628 def _check_index(self, i):
629 629 if not isinstance(i, int):
630 630 raise TypeError(b"expecting int indexes")
631 631 if i < 0 or i >= len(self):
632 632 raise IndexError
633 633
634 634 def __getitem__(self, i):
635 635 if i == -1:
636 636 return self.null_item
637 637 self._check_index(i)
638 638 if i >= self._lgt:
639 639 data = self._extra[i - self._lgt]
640 640 else:
641 641 index = self._calculate_index(i)
642 642 data = self._data[index : index + self.entry_size]
643 643 r = self._unpack_entry(i, data)
644 644 if self._lgt and i == 0:
645 645 offset = revlogutils.offset_type(0, gettype(r[0]))
646 646 r = (offset,) + r[1:]
647 647 return r
648 648
649 649 def _unpack_entry(self, rev, data):
650 650 r = self.index_format.unpack(data)
651 651 r = r + (
652 652 0,
653 653 0,
654 654 revlog_constants.COMP_MODE_INLINE,
655 655 revlog_constants.COMP_MODE_INLINE,
656 656 )
657 657 return r
658 658
659 659 def pack_header(self, header):
660 660 """pack header information as binary"""
661 661 v_fmt = revlog_constants.INDEX_HEADER
662 662 return v_fmt.pack(header)
663 663
664 664 def entry_binary(self, rev):
665 665 """return the raw binary string representing a revision"""
666 666 entry = self[rev]
667 667 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
668 668 if rev == 0:
669 669 p = p[revlog_constants.INDEX_HEADER.size :]
670 670 return p
671 671
672 672
673 673 class IndexObject(BaseIndexObject):
674 674 def __init__(self, data):
675 675 assert len(data) % self.entry_size == 0, (
676 676 len(data),
677 677 self.entry_size,
678 678 len(data) % self.entry_size,
679 679 )
680 680 self._data = data
681 681 self._lgt = len(data) // self.entry_size
682 682 self._extra = []
683 683
684 684 def _calculate_index(self, i):
685 685 return i * self.entry_size
686 686
687 687 def __delitem__(self, i):
688 688 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
689 689 raise ValueError(b"deleting slices only supports a:-1 with step 1")
690 690 i = i.start
691 691 self._check_index(i)
692 692 self._stripnodes(i)
693 693 if i < self._lgt:
694 694 self._data = self._data[: i * self.entry_size]
695 695 self._lgt = i
696 696 self._extra = []
697 697 else:
698 698 self._extra = self._extra[: i - self._lgt]
699 699
700 700
701 701 class PersistentNodeMapIndexObject(IndexObject):
702 702 """a Debug oriented class to test persistent nodemap
703 703
704 704 We need a simple python object to test API and higher level behavior. See
705 705 the Rust implementation for more serious usage. This should be used only
706 706 through the dedicated `devel.persistent-nodemap` config.
707 707 """
708 708
709 709 def nodemap_data_all(self):
710 710 """Return bytes containing a full serialization of a nodemap
711 711
712 712 The nodemap should be valid for the full set of revisions in the
713 713 index."""
714 714 return nodemaputil.persistent_data(self)
715 715
716 716 def nodemap_data_incremental(self):
717 717 """Return bytes containing a incremental update to persistent nodemap
718 718
719 719 This containst the data for an append-only update of the data provided
720 720 in the last call to `update_nodemap_data`.
721 721 """
722 722 if self._nm_root is None:
723 723 return None
724 724 docket = self._nm_docket
725 725 changed, data = nodemaputil.update_persistent_data(
726 726 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
727 727 )
728 728
729 729 self._nm_root = self._nm_max_idx = self._nm_docket = None
730 730 return docket, changed, data
731 731
732 732 def update_nodemap_data(self, docket, nm_data):
733 733 """provide full block of persisted binary data for a nodemap
734 734
735 735 The data are expected to come from disk. See `nodemap_data_all` for a
736 736 produceur of such data."""
737 737 if nm_data is not None:
738 738 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
739 739 if self._nm_root:
740 740 self._nm_docket = docket
741 741 else:
742 742 self._nm_root = self._nm_max_idx = self._nm_docket = None
743 743
744 744
745 745 class InlinedIndexObject(BaseIndexObject):
746 746 def __init__(self, data, inline=0):
747 747 self._data = data
748 748 self._lgt = self._inline_scan(None)
749 749 self._inline_scan(self._lgt)
750 750 self._extra = []
751 751
752 752 def _inline_scan(self, lgt):
753 753 off = 0
754 754 if lgt is not None:
755 755 self._offsets = [0] * lgt
756 756 count = 0
757 757 while off <= len(self._data) - self.entry_size:
758 758 start = off + self.big_int_size
759 759 (s,) = struct.unpack(
760 760 b'>i',
761 761 self._data[start : start + self.int_size],
762 762 )
763 763 if lgt is not None:
764 764 self._offsets[count] = off
765 765 count += 1
766 766 off += self.entry_size + s
767 767 if off != len(self._data):
768 768 raise ValueError(b"corrupted data")
769 769 return count
770 770
771 771 def __delitem__(self, i):
772 772 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
773 773 raise ValueError(b"deleting slices only supports a:-1 with step 1")
774 774 i = i.start
775 775 self._check_index(i)
776 776 self._stripnodes(i)
777 777 if i < self._lgt:
778 778 self._offsets = self._offsets[:i]
779 779 self._lgt = i
780 780 self._extra = []
781 781 else:
782 782 self._extra = self._extra[: i - self._lgt]
783 783
784 784 def _calculate_index(self, i):
785 785 return self._offsets[i]
786 786
787 787
788 788 def parse_index2(data, inline, revlogv2=False):
789 789 if not inline:
790 790 cls = IndexObject2 if revlogv2 else IndexObject
791 791 return cls(data), None
792 792 cls = InlinedIndexObject
793 793 return cls(data, inline), (0, data)
794 794
795 795
796 796 def parse_index_cl_v2(data):
797 797 return IndexChangelogV2(data), None
798 798
799 799
800 800 class IndexObject2(IndexObject):
801 801 index_format = revlog_constants.INDEX_ENTRY_V2
802 802
803 803 def replace_sidedata_info(
804 804 self,
805 805 rev,
806 806 sidedata_offset,
807 807 sidedata_length,
808 808 offset_flags,
809 809 compression_mode,
810 810 ):
811 811 """
812 812 Replace an existing index entry's sidedata offset and length with new
813 813 ones.
814 814 This cannot be used outside of the context of sidedata rewriting,
815 815 inside the transaction that creates the revision `rev`.
816 816 """
817 817 if rev < 0:
818 818 raise KeyError
819 819 self._check_index(rev)
820 820 if rev < self._lgt:
821 821 msg = b"cannot rewrite entries outside of this transaction"
822 822 raise KeyError(msg)
823 823 else:
824 824 entry = list(self[rev])
825 825 entry[0] = offset_flags
826 826 entry[8] = sidedata_offset
827 827 entry[9] = sidedata_length
828 828 entry[11] = compression_mode
829 829 entry = tuple(entry)
830 830 new = self._pack_entry(rev, entry)
831 831 self._extra[rev - self._lgt] = new
832 832
833 833 def _unpack_entry(self, rev, data):
834 834 data = self.index_format.unpack(data)
835 835 entry = data[:10]
836 836 data_comp = data[10] & 3
837 837 sidedata_comp = (data[10] & (3 << 2)) >> 2
838 838 return entry + (data_comp, sidedata_comp)
839 839
840 840 def _pack_entry(self, rev, entry):
841 841 data = entry[:10]
842 842 data_comp = entry[10] & 3
843 843 sidedata_comp = (entry[11] & 3) << 2
844 844 data += (data_comp | sidedata_comp,)
845 845
846 846 return self.index_format.pack(*data)
847 847
848 848 def entry_binary(self, rev):
849 849 """return the raw binary string representing a revision"""
850 850 entry = self[rev]
851 851 return self._pack_entry(rev, entry)
852 852
853 853 def pack_header(self, header):
854 854 """pack header information as binary"""
855 855 msg = 'version header should go in the docket, not the index: %d'
856 856 msg %= header
857 857 raise error.ProgrammingError(msg)
858 858
859 859
860 860 class IndexChangelogV2(IndexObject2):
861 861 index_format = revlog_constants.INDEX_ENTRY_CL_V2
862 862
863 863 def _unpack_entry(self, rev, data, r=True):
864 864 items = self.index_format.unpack(data)
865 865 entry = items[:3] + (rev, rev) + items[3:8]
866 866 data_comp = items[8] & 3
867 867 sidedata_comp = (items[8] >> 2) & 3
868 868 return entry + (data_comp, sidedata_comp)
869 869
870 870 def _pack_entry(self, rev, entry):
871 871 assert entry[3] == rev, entry[3]
872 872 assert entry[4] == rev, entry[4]
873 873 data = entry[:3] + entry[5:10]
874 874 data_comp = entry[10] & 3
875 875 sidedata_comp = (entry[11] & 3) << 2
876 876 data += (data_comp | sidedata_comp,)
877 877 return self.index_format.pack(*data)
878 878
879 879
880 880 def parse_index_devel_nodemap(data, inline):
881 881 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
882 882 return PersistentNodeMapIndexObject(data), None
883 883
884 884
885 885 def parse_dirstate(dmap, copymap, st):
886 886 parents = [st[:20], st[20:40]]
887 887 # dereference fields so they will be local in loop
888 888 format = b">cllll"
889 889 e_size = struct.calcsize(format)
890 890 pos1 = 40
891 891 l = len(st)
892 892
893 893 # the inner loop
894 894 while pos1 < l:
895 895 pos2 = pos1 + e_size
896 896 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
897 897 pos1 = pos2 + e[4]
898 898 f = st[pos2:pos1]
899 899 if b'\0' in f:
900 900 f, c = f.split(b'\0')
901 901 copymap[f] = c
902 902 dmap[f] = DirstateItem.from_v1_data(*e[:4])
903 903 return parents
904 904
905 905
906 906 def pack_dirstate(dmap, copymap, pl, now):
907 907 cs = stringio()
908 908 write = cs.write
909 909 write(b"".join(pl))
910 910 for f, e in pycompat.iteritems(dmap):
911 911 if e.need_delay(now):
912 912 # The file was last modified "simultaneously" with the current
913 913 # write to dirstate (i.e. within the same second for file-
914 914 # systems with a granularity of 1 sec). This commonly happens
915 915 # for at least a couple of files on 'update'.
916 916 # The user could change the file without changing its size
917 917 # within the same second. Invalidate the file's mtime in
918 918 # dirstate, forcing future 'status' calls to compare the
919 919 # contents of the file if the size is the same. This prevents
920 920 # mistakenly treating such files as clean.
921 921 e.set_possibly_dirty()
922 922
923 923 if f in copymap:
924 924 f = b"%s\0%s" % (f, copymap[f])
925 925 e = _pack(
926 926 b">cllll",
927 927 e.v1_state(),
928 928 e.v1_mode(),
929 929 e.v1_size(),
930 930 e.v1_mtime(),
931 931 len(f),
932 932 )
933 933 write(e)
934 934 write(f)
935 935 return cs.getvalue()
@@ -1,774 +1,774 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::TruncatedTimestamp;
6 6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 8 use crate::errors::HgError;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::DirstateEntry;
11 11 use crate::DirstateError;
12 12 use crate::DirstateParents;
13 13 use bitflags::bitflags;
14 14 use bytes_cast::unaligned::{U16Be, U32Be};
15 15 use bytes_cast::BytesCast;
16 16 use format_bytes::format_bytes;
17 17 use std::borrow::Cow;
18 18 use std::convert::{TryFrom, TryInto};
19 19
20 20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 21 /// This a redundant sanity check more than an actual "magic number" since
22 22 /// `.hg/requires` already governs which format should be used.
23 23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24 24
25 25 /// Keep space for 256-bit hashes
26 26 const STORED_NODE_ID_BYTES: usize = 32;
27 27
28 28 /// … even though only 160 bits are used for now, with SHA-1
29 29 const USED_NODE_ID_BYTES: usize = 20;
30 30
31 31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 33
34 34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
35 35 const TREE_METADATA_SIZE: usize = 44;
36 36 const NODE_SIZE: usize = 44;
37 37
38 38 /// Make sure that size-affecting changes are made knowingly
39 39 #[allow(unused)]
40 40 fn static_assert_size_of() {
41 41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
44 44 }
45 45
46 46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 47 #[derive(BytesCast)]
48 48 #[repr(C)]
49 49 struct DocketHeader {
50 50 marker: [u8; V2_FORMAT_MARKER.len()],
51 51 parent_1: [u8; STORED_NODE_ID_BYTES],
52 52 parent_2: [u8; STORED_NODE_ID_BYTES],
53 53
54 54 metadata: TreeMetadata,
55 55
56 56 /// Counted in bytes
57 57 data_size: Size,
58 58
59 59 uuid_size: u8,
60 60 }
61 61
62 62 pub struct Docket<'on_disk> {
63 63 header: &'on_disk DocketHeader,
64 64 uuid: &'on_disk [u8],
65 65 }
66 66
67 67 /// Fields are documented in the *Tree metadata in the docket file*
68 68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 69 #[derive(BytesCast)]
70 70 #[repr(C)]
71 71 struct TreeMetadata {
72 72 root_nodes: ChildNodes,
73 73 nodes_with_entry_count: Size,
74 74 nodes_with_copy_source_count: Size,
75 75 unreachable_bytes: Size,
76 76 unused: [u8; 4],
77 77
78 78 /// See *Optional hash of ignore patterns* section of
79 79 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 80 ignore_patterns_hash: IgnorePatternsHash,
81 81 }
82 82
83 83 /// Fields are documented in the *The data file format*
84 84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 85 #[derive(BytesCast)]
86 86 #[repr(C)]
87 87 pub(super) struct Node {
88 88 full_path: PathSlice,
89 89
90 90 /// In bytes from `self.full_path.start`
91 91 base_name_start: PathSize,
92 92
93 93 copy_source: OptPathSlice,
94 94 children: ChildNodes,
95 95 pub(super) descendants_with_entry_count: Size,
96 96 pub(super) tracked_descendants_count: Size,
97 97 flags: U16Be,
98 98 size: U32Be,
99 99 mtime: PackedTruncatedTimestamp,
100 100 }
101 101
102 102 bitflags! {
103 103 #[repr(C)]
104 104 struct Flags: u16 {
105 105 const WDIR_TRACKED = 1 << 0;
106 106 const P1_TRACKED = 1 << 1;
107 107 const P2_INFO = 1 << 2;
108 108 const HAS_MODE_AND_SIZE = 1 << 3;
109 const HAS_FILE_MTIME = 1 << 4;
110 const HAS_DIRECTORY_MTIME = 1 << 5;
109 const HAS_MTIME = 1 << 4;
110 const DIRECTORY = 1 << 5;
111 111 const MODE_EXEC_PERM = 1 << 6;
112 112 const MODE_IS_SYMLINK = 1 << 7;
113 113 const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
114 114 const ALL_UNKNOWN_RECORDED = 1 << 9;
115 115 const ALL_IGNORED_RECORDED = 1 << 10;
116 116 const HAS_FALLBACK_EXEC = 1 << 11;
117 117 const FALLBACK_EXEC = 1 << 12;
118 118 const HAS_FALLBACK_SYMLINK = 1 << 13;
119 119 const FALLBACK_SYMLINK = 1 << 14;
120 120 const MTIME_SECOND_AMBIGUOUS = 1 << 15;
121 121 }
122 122 }
123 123
124 124 /// Duration since the Unix epoch
125 125 #[derive(BytesCast, Copy, Clone)]
126 126 #[repr(C)]
127 127 struct PackedTruncatedTimestamp {
128 128 truncated_seconds: U32Be,
129 129 nanoseconds: U32Be,
130 130 }
131 131
132 132 /// Counted in bytes from the start of the file
133 133 ///
134 134 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
135 135 type Offset = U32Be;
136 136
137 137 /// Counted in number of items
138 138 ///
139 139 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
140 140 type Size = U32Be;
141 141
142 142 /// Counted in bytes
143 143 ///
144 144 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
145 145 type PathSize = U16Be;
146 146
147 147 /// A contiguous sequence of `len` times `Node`, representing the child nodes
148 148 /// of either some other node or of the repository root.
149 149 ///
150 150 /// Always sorted by ascending `full_path`, to allow binary search.
151 151 /// Since nodes with the same parent nodes also have the same parent path,
152 152 /// only the `base_name`s need to be compared during binary search.
153 153 #[derive(BytesCast, Copy, Clone)]
154 154 #[repr(C)]
155 155 struct ChildNodes {
156 156 start: Offset,
157 157 len: Size,
158 158 }
159 159
160 160 /// A `HgPath` of `len` bytes
161 161 #[derive(BytesCast, Copy, Clone)]
162 162 #[repr(C)]
163 163 struct PathSlice {
164 164 start: Offset,
165 165 len: PathSize,
166 166 }
167 167
168 168 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
169 169 type OptPathSlice = PathSlice;
170 170
171 171 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
172 172 ///
173 173 /// This should only happen if Mercurial is buggy or a repository is corrupted.
174 174 #[derive(Debug)]
175 175 pub struct DirstateV2ParseError;
176 176
177 177 impl From<DirstateV2ParseError> for HgError {
178 178 fn from(_: DirstateV2ParseError) -> Self {
179 179 HgError::corrupted("dirstate-v2 parse error")
180 180 }
181 181 }
182 182
183 183 impl From<DirstateV2ParseError> for crate::DirstateError {
184 184 fn from(error: DirstateV2ParseError) -> Self {
185 185 HgError::from(error).into()
186 186 }
187 187 }
188 188
189 189 impl<'on_disk> Docket<'on_disk> {
190 190 pub fn parents(&self) -> DirstateParents {
191 191 use crate::Node;
192 192 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
193 193 .unwrap()
194 194 .clone();
195 195 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
196 196 .unwrap()
197 197 .clone();
198 198 DirstateParents { p1, p2 }
199 199 }
200 200
201 201 pub fn tree_metadata(&self) -> &[u8] {
202 202 self.header.metadata.as_bytes()
203 203 }
204 204
205 205 pub fn data_size(&self) -> usize {
206 206 // This `unwrap` could only panic on a 16-bit CPU
207 207 self.header.data_size.get().try_into().unwrap()
208 208 }
209 209
210 210 pub fn data_filename(&self) -> String {
211 211 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
212 212 }
213 213 }
214 214
215 215 pub fn read_docket(
216 216 on_disk: &[u8],
217 217 ) -> Result<Docket<'_>, DirstateV2ParseError> {
218 218 let (header, uuid) =
219 219 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
220 220 let uuid_size = header.uuid_size as usize;
221 221 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
222 222 Ok(Docket { header, uuid })
223 223 } else {
224 224 Err(DirstateV2ParseError)
225 225 }
226 226 }
227 227
228 228 pub(super) fn read<'on_disk>(
229 229 on_disk: &'on_disk [u8],
230 230 metadata: &[u8],
231 231 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
232 232 if on_disk.is_empty() {
233 233 return Ok(DirstateMap::empty(on_disk));
234 234 }
235 235 let (meta, _) = TreeMetadata::from_bytes(metadata)
236 236 .map_err(|_| DirstateV2ParseError)?;
237 237 let dirstate_map = DirstateMap {
238 238 on_disk,
239 239 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
240 240 on_disk,
241 241 meta.root_nodes,
242 242 )?),
243 243 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
244 244 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
245 245 ignore_patterns_hash: meta.ignore_patterns_hash,
246 246 unreachable_bytes: meta.unreachable_bytes.get(),
247 247 };
248 248 Ok(dirstate_map)
249 249 }
250 250
251 251 impl Node {
252 252 pub(super) fn full_path<'on_disk>(
253 253 &self,
254 254 on_disk: &'on_disk [u8],
255 255 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
256 256 read_hg_path(on_disk, self.full_path)
257 257 }
258 258
259 259 pub(super) fn base_name_start<'on_disk>(
260 260 &self,
261 261 ) -> Result<usize, DirstateV2ParseError> {
262 262 let start = self.base_name_start.get();
263 263 if start < self.full_path.len.get() {
264 264 let start = usize::try_from(start)
265 265 // u32 -> usize, could only panic on a 16-bit CPU
266 266 .expect("dirstate-v2 base_name_start out of bounds");
267 267 Ok(start)
268 268 } else {
269 269 Err(DirstateV2ParseError)
270 270 }
271 271 }
272 272
273 273 pub(super) fn base_name<'on_disk>(
274 274 &self,
275 275 on_disk: &'on_disk [u8],
276 276 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
277 277 let full_path = self.full_path(on_disk)?;
278 278 let base_name_start = self.base_name_start()?;
279 279 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
280 280 }
281 281
282 282 pub(super) fn path<'on_disk>(
283 283 &self,
284 284 on_disk: &'on_disk [u8],
285 285 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
286 286 Ok(WithBasename::from_raw_parts(
287 287 Cow::Borrowed(self.full_path(on_disk)?),
288 288 self.base_name_start()?,
289 289 ))
290 290 }
291 291
292 292 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
293 293 self.copy_source.start.get() != 0
294 294 }
295 295
296 296 pub(super) fn copy_source<'on_disk>(
297 297 &self,
298 298 on_disk: &'on_disk [u8],
299 299 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
300 300 Ok(if self.has_copy_source() {
301 301 Some(read_hg_path(on_disk, self.copy_source)?)
302 302 } else {
303 303 None
304 304 })
305 305 }
306 306
307 307 fn flags(&self) -> Flags {
308 308 Flags::from_bits_truncate(self.flags.get())
309 309 }
310 310
311 311 fn has_entry(&self) -> bool {
312 312 self.flags().intersects(
313 313 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
314 314 )
315 315 }
316 316
317 317 pub(super) fn node_data(
318 318 &self,
319 319 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
320 320 if self.has_entry() {
321 321 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
322 322 } else if let Some(mtime) = self.cached_directory_mtime()? {
323 323 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
324 324 } else {
325 325 Ok(dirstate_map::NodeData::None)
326 326 }
327 327 }
328 328
329 329 pub(super) fn cached_directory_mtime(
330 330 &self,
331 331 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
332 // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
333 // ignore the mtime if the flag is set.
334 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
332 // For now we do not have code to handle the absence of
333 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
334 // unset.
335 if self.flags().contains(Flags::DIRECTORY)
336 && self.flags().contains(Flags::HAS_MTIME)
335 337 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
336 338 {
337 if self.flags().contains(Flags::HAS_FILE_MTIME) {
338 Err(DirstateV2ParseError)
339 } else {
340 Ok(Some(self.mtime.try_into()?))
341 }
339 Ok(Some(self.mtime.try_into()?))
342 340 } else {
343 341 Ok(None)
344 342 }
345 343 }
346 344
347 345 fn synthesize_unix_mode(&self) -> u32 {
348 346 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
349 347 libc::S_IFLNK
350 348 } else {
351 349 libc::S_IFREG
352 350 };
353 351 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
354 352 0o755
355 353 } else {
356 354 0o644
357 355 };
358 356 file_type | permisions
359 357 }
360 358
361 359 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
362 360 // TODO: convert through raw bits instead?
363 361 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
364 362 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
365 363 let p2_info = self.flags().contains(Flags::P2_INFO);
366 364 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
367 365 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
368 366 {
369 367 Some((self.synthesize_unix_mode(), self.size.into()))
370 368 } else {
371 369 None
372 370 };
373 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
371 let mtime = if self.flags().contains(Flags::HAS_MTIME)
372 && !self.flags().contains(Flags::DIRECTORY)
374 373 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
375 374 // The current code is not able to do the more subtle comparison that the
376 375 // MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
377 376 && !self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS)
378 377 {
379 378 Some(self.mtime.try_into()?)
380 379 } else {
381 380 None
382 381 };
383 382 Ok(DirstateEntry::from_v2_data(
384 383 wdir_tracked,
385 384 p1_tracked,
386 385 p2_info,
387 386 mode_size,
388 387 mtime,
389 388 None,
390 389 None,
391 390 ))
392 391 }
393 392
394 393 pub(super) fn entry(
395 394 &self,
396 395 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
397 396 if self.has_entry() {
398 397 Ok(Some(self.assume_entry()?))
399 398 } else {
400 399 Ok(None)
401 400 }
402 401 }
403 402
404 403 pub(super) fn children<'on_disk>(
405 404 &self,
406 405 on_disk: &'on_disk [u8],
407 406 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
408 407 read_nodes(on_disk, self.children)
409 408 }
410 409
411 410 pub(super) fn to_in_memory_node<'on_disk>(
412 411 &self,
413 412 on_disk: &'on_disk [u8],
414 413 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
415 414 Ok(dirstate_map::Node {
416 415 children: dirstate_map::ChildNodes::OnDisk(
417 416 self.children(on_disk)?,
418 417 ),
419 418 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
420 419 data: self.node_data()?,
421 420 descendants_with_entry_count: self
422 421 .descendants_with_entry_count
423 422 .get(),
424 423 tracked_descendants_count: self.tracked_descendants_count.get(),
425 424 })
426 425 }
427 426
428 427 fn from_dirstate_entry(
429 428 entry: &DirstateEntry,
430 429 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
431 430 let (
432 431 wdir_tracked,
433 432 p1_tracked,
434 433 p2_info,
435 434 mode_size_opt,
436 435 mtime_opt,
437 436 fallback_exec,
438 437 fallback_symlink,
439 438 ) = entry.v2_data();
440 439 // TODO: convert throug raw flag bits instead?
441 440 let mut flags = Flags::empty();
442 441 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
443 442 flags.set(Flags::P1_TRACKED, p1_tracked);
444 443 flags.set(Flags::P2_INFO, p2_info);
445 444 let size = if let Some((m, s)) = mode_size_opt {
446 445 let exec_perm = m & libc::S_IXUSR != 0;
447 446 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
448 447 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
449 448 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
450 449 flags.insert(Flags::HAS_MODE_AND_SIZE);
451 450 s.into()
452 451 } else {
453 452 0.into()
454 453 };
455 454 let mtime = if let Some(m) = mtime_opt {
456 flags.insert(Flags::HAS_FILE_MTIME);
455 flags.insert(Flags::HAS_MTIME);
457 456 m.into()
458 457 } else {
459 458 PackedTruncatedTimestamp::null()
460 459 };
461 460 if let Some(f_exec) = fallback_exec {
462 461 flags.insert(Flags::HAS_FALLBACK_EXEC);
463 462 if f_exec {
464 463 flags.insert(Flags::FALLBACK_EXEC);
465 464 }
466 465 }
467 466 if let Some(f_symlink) = fallback_symlink {
468 467 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
469 468 if f_symlink {
470 469 flags.insert(Flags::FALLBACK_SYMLINK);
471 470 }
472 471 }
473 472 (flags, size, mtime)
474 473 }
475 474 }
476 475
477 476 fn read_hg_path(
478 477 on_disk: &[u8],
479 478 slice: PathSlice,
480 479 ) -> Result<&HgPath, DirstateV2ParseError> {
481 480 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
482 481 }
483 482
484 483 fn read_nodes(
485 484 on_disk: &[u8],
486 485 slice: ChildNodes,
487 486 ) -> Result<&[Node], DirstateV2ParseError> {
488 487 read_slice(on_disk, slice.start, slice.len.get())
489 488 }
490 489
491 490 fn read_slice<T, Len>(
492 491 on_disk: &[u8],
493 492 start: Offset,
494 493 len: Len,
495 494 ) -> Result<&[T], DirstateV2ParseError>
496 495 where
497 496 T: BytesCast,
498 497 Len: TryInto<usize>,
499 498 {
500 499 // Either `usize::MAX` would result in "out of bounds" error since a single
501 500 // `&[u8]` cannot occupy the entire addess space.
502 501 let start = start.get().try_into().unwrap_or(std::usize::MAX);
503 502 let len = len.try_into().unwrap_or(std::usize::MAX);
504 503 on_disk
505 504 .get(start..)
506 505 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
507 506 .map(|(slice, _rest)| slice)
508 507 .ok_or_else(|| DirstateV2ParseError)
509 508 }
510 509
511 510 pub(crate) fn for_each_tracked_path<'on_disk>(
512 511 on_disk: &'on_disk [u8],
513 512 metadata: &[u8],
514 513 mut f: impl FnMut(&'on_disk HgPath),
515 514 ) -> Result<(), DirstateV2ParseError> {
516 515 let (meta, _) = TreeMetadata::from_bytes(metadata)
517 516 .map_err(|_| DirstateV2ParseError)?;
518 517 fn recur<'on_disk>(
519 518 on_disk: &'on_disk [u8],
520 519 nodes: ChildNodes,
521 520 f: &mut impl FnMut(&'on_disk HgPath),
522 521 ) -> Result<(), DirstateV2ParseError> {
523 522 for node in read_nodes(on_disk, nodes)? {
524 523 if let Some(entry) = node.entry()? {
525 524 if entry.state().is_tracked() {
526 525 f(node.full_path(on_disk)?)
527 526 }
528 527 }
529 528 recur(on_disk, node.children, f)?
530 529 }
531 530 Ok(())
532 531 }
533 532 recur(on_disk, meta.root_nodes, &mut f)
534 533 }
535 534
536 535 /// Returns new data and metadata, together with whether that data should be
537 536 /// appended to the existing data file whose content is at
538 537 /// `dirstate_map.on_disk` (true), instead of written to a new data file
539 538 /// (false).
540 539 pub(super) fn write(
541 540 dirstate_map: &mut DirstateMap,
542 541 can_append: bool,
543 542 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
544 543 let append = can_append && dirstate_map.write_should_append();
545 544
546 545 // This ignores the space for paths, and for nodes without an entry.
547 546 // TODO: better estimate? Skip the `Vec` and write to a file directly?
548 547 let size_guess = std::mem::size_of::<Node>()
549 548 * dirstate_map.nodes_with_entry_count as usize;
550 549
551 550 let mut writer = Writer {
552 551 dirstate_map,
553 552 append,
554 553 out: Vec::with_capacity(size_guess),
555 554 };
556 555
557 556 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
558 557
559 558 let meta = TreeMetadata {
560 559 root_nodes,
561 560 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
562 561 nodes_with_copy_source_count: dirstate_map
563 562 .nodes_with_copy_source_count
564 563 .into(),
565 564 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
566 565 unused: [0; 4],
567 566 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
568 567 };
569 568 Ok((writer.out, meta.as_bytes().to_vec(), append))
570 569 }
571 570
572 571 struct Writer<'dmap, 'on_disk> {
573 572 dirstate_map: &'dmap DirstateMap<'on_disk>,
574 573 append: bool,
575 574 out: Vec<u8>,
576 575 }
577 576
578 577 impl Writer<'_, '_> {
579 578 fn write_nodes(
580 579 &mut self,
581 580 nodes: dirstate_map::ChildNodesRef,
582 581 ) -> Result<ChildNodes, DirstateError> {
583 582 // Reuse already-written nodes if possible
584 583 if self.append {
585 584 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
586 585 let start = self.on_disk_offset_of(nodes_slice).expect(
587 586 "dirstate-v2 OnDisk nodes not found within on_disk",
588 587 );
589 588 let len = child_nodes_len_from_usize(nodes_slice.len());
590 589 return Ok(ChildNodes { start, len });
591 590 }
592 591 }
593 592
594 593 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
595 594 // undefined iteration order. Sort to enable binary search in the
596 595 // written file.
597 596 let nodes = nodes.sorted();
598 597 let nodes_len = nodes.len();
599 598
600 599 // First accumulate serialized nodes in a `Vec`
601 600 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
602 601 for node in nodes {
603 602 let children =
604 603 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
605 604 let full_path = node.full_path(self.dirstate_map.on_disk)?;
606 605 let full_path = self.write_path(full_path.as_bytes());
607 606 let copy_source = if let Some(source) =
608 607 node.copy_source(self.dirstate_map.on_disk)?
609 608 {
610 609 self.write_path(source.as_bytes())
611 610 } else {
612 611 PathSlice {
613 612 start: 0.into(),
614 613 len: 0.into(),
615 614 }
616 615 };
617 616 on_disk_nodes.push(match node {
618 617 NodeRef::InMemory(path, node) => {
619 618 let (flags, size, mtime) = match &node.data {
620 619 dirstate_map::NodeData::Entry(entry) => {
621 620 Node::from_dirstate_entry(entry)
622 621 }
623 622 dirstate_map::NodeData::CachedDirectory { mtime } => (
624 623 // we currently never set a mtime if unknown file
625 624 // are present.
626 625 // So if we have a mtime for a directory, we know
627 626 // they are no unknown
628 627 // files and we
629 628 // blindly set ALL_UNKNOWN_RECORDED.
630 629 //
631 630 // We never set ALL_IGNORED_RECORDED since we
632 631 // don't track that case
633 632 // currently.
634 Flags::HAS_DIRECTORY_MTIME
633 Flags::DIRECTORY
634 | Flags::HAS_MTIME
635 635 | Flags::ALL_UNKNOWN_RECORDED,
636 636 0.into(),
637 637 (*mtime).into(),
638 638 ),
639 639 dirstate_map::NodeData::None => (
640 Flags::empty(),
640 Flags::DIRECTORY,
641 641 0.into(),
642 642 PackedTruncatedTimestamp::null(),
643 643 ),
644 644 };
645 645 Node {
646 646 children,
647 647 copy_source,
648 648 full_path,
649 649 base_name_start: u16::try_from(path.base_name_start())
650 650 // Could only panic for paths over 64 KiB
651 651 .expect("dirstate-v2 path length overflow")
652 652 .into(),
653 653 descendants_with_entry_count: node
654 654 .descendants_with_entry_count
655 655 .into(),
656 656 tracked_descendants_count: node
657 657 .tracked_descendants_count
658 658 .into(),
659 659 flags: flags.bits().into(),
660 660 size,
661 661 mtime,
662 662 }
663 663 }
664 664 NodeRef::OnDisk(node) => Node {
665 665 children,
666 666 copy_source,
667 667 full_path,
668 668 ..*node
669 669 },
670 670 })
671 671 }
672 672 // … so we can write them contiguously, after writing everything else
673 673 // they refer to.
674 674 let start = self.current_offset();
675 675 let len = child_nodes_len_from_usize(nodes_len);
676 676 self.out.extend(on_disk_nodes.as_bytes());
677 677 Ok(ChildNodes { start, len })
678 678 }
679 679
680 680 /// If the given slice of items is within `on_disk`, returns its offset
681 681 /// from the start of `on_disk`.
682 682 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
683 683 where
684 684 T: BytesCast,
685 685 {
686 686 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
687 687 let start = slice.as_ptr() as usize;
688 688 let end = start + slice.len();
689 689 start..=end
690 690 }
691 691 let slice_addresses = address_range(slice.as_bytes());
692 692 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
693 693 if on_disk_addresses.contains(slice_addresses.start())
694 694 && on_disk_addresses.contains(slice_addresses.end())
695 695 {
696 696 let offset = slice_addresses.start() - on_disk_addresses.start();
697 697 Some(offset_from_usize(offset))
698 698 } else {
699 699 None
700 700 }
701 701 }
702 702
703 703 fn current_offset(&mut self) -> Offset {
704 704 let mut offset = self.out.len();
705 705 if self.append {
706 706 offset += self.dirstate_map.on_disk.len()
707 707 }
708 708 offset_from_usize(offset)
709 709 }
710 710
711 711 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
712 712 let len = path_len_from_usize(slice.len());
713 713 // Reuse an already-written path if possible
714 714 if self.append {
715 715 if let Some(start) = self.on_disk_offset_of(slice) {
716 716 return PathSlice { start, len };
717 717 }
718 718 }
719 719 let start = self.current_offset();
720 720 self.out.extend(slice.as_bytes());
721 721 PathSlice { start, len }
722 722 }
723 723 }
724 724
725 725 fn offset_from_usize(x: usize) -> Offset {
726 726 u32::try_from(x)
727 727 // Could only panic for a dirstate file larger than 4 GiB
728 728 .expect("dirstate-v2 offset overflow")
729 729 .into()
730 730 }
731 731
732 732 fn child_nodes_len_from_usize(x: usize) -> Size {
733 733 u32::try_from(x)
734 734 // Could only panic with over 4 billion nodes
735 735 .expect("dirstate-v2 slice length overflow")
736 736 .into()
737 737 }
738 738
739 739 fn path_len_from_usize(x: usize) -> PathSize {
740 740 u16::try_from(x)
741 741 // Could only panic for paths over 64 KiB
742 742 .expect("dirstate-v2 path length overflow")
743 743 .into()
744 744 }
745 745
746 746 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
747 747 fn from(timestamp: TruncatedTimestamp) -> Self {
748 748 Self {
749 749 truncated_seconds: timestamp.truncated_seconds().into(),
750 750 nanoseconds: timestamp.nanoseconds().into(),
751 751 }
752 752 }
753 753 }
754 754
755 755 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
756 756 type Error = DirstateV2ParseError;
757 757
758 758 fn try_from(
759 759 timestamp: PackedTruncatedTimestamp,
760 760 ) -> Result<Self, Self::Error> {
761 761 Self::from_already_truncated(
762 762 timestamp.truncated_seconds.get(),
763 763 timestamp.nanoseconds.get(),
764 764 )
765 765 }
766 766 }
767 767 impl PackedTruncatedTimestamp {
768 768 fn null() -> Self {
769 769 Self {
770 770 truncated_seconds: 0.into(),
771 771 nanoseconds: 0.into(),
772 772 }
773 773 }
774 774 }
General Comments 0
You need to be logged in to leave comments. Login now