##// END OF EJS Templates
dirstate-v2: add a new MTIME_SECOND_AMBIGUOUS flags...
marmoute -
r49080:9205d9be default
parent child Browse files
Show More
@@ -1,1311 +1,1319 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static const int dirstate_v1_from_p2 = -2;
33 33 static const int dirstate_v1_nonnormal = -1;
34 34 static const int ambiguous_time = -1;
35 35
36 36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
37 37 {
38 38 Py_ssize_t expected_size;
39 39
40 40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
41 41 return NULL;
42 42 }
43 43
44 44 return _dict_new_presized(expected_size);
45 45 }
46 46
47 47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
48 48 PyObject *kwds)
49 49 {
50 50 /* We do all the initialization here and not a tp_init function because
51 51 * dirstate_item is immutable. */
52 52 dirstateItemObject *t;
53 53 int wc_tracked;
54 54 int p1_tracked;
55 55 int p2_info;
56 56 int has_meaningful_data;
57 57 int has_meaningful_mtime;
58 58 int mode;
59 59 int size;
60 60 int mtime_s;
61 61 int mtime_ns;
62 62 PyObject *parentfiledata;
63 63 PyObject *fallback_exec;
64 64 PyObject *fallback_symlink;
65 65 static char *keywords_name[] = {
66 66 "wc_tracked", "p1_tracked", "p2_info",
67 67 "has_meaningful_data", "has_meaningful_mtime", "parentfiledata",
68 68 "fallback_exec", "fallback_symlink", NULL,
69 69 };
70 70 wc_tracked = 0;
71 71 p1_tracked = 0;
72 72 p2_info = 0;
73 73 has_meaningful_mtime = 1;
74 74 has_meaningful_data = 1;
75 75 parentfiledata = Py_None;
76 76 fallback_exec = Py_None;
77 77 fallback_symlink = Py_None;
78 78 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiOOO", keywords_name,
79 79 &wc_tracked, &p1_tracked, &p2_info,
80 80 &has_meaningful_data,
81 81 &has_meaningful_mtime, &parentfiledata,
82 82 &fallback_exec, &fallback_symlink)) {
83 83 return NULL;
84 84 }
85 85 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
86 86 if (!t) {
87 87 return NULL;
88 88 }
89 89
90 90 t->flags = 0;
91 91 if (wc_tracked) {
92 92 t->flags |= dirstate_flag_wc_tracked;
93 93 }
94 94 if (p1_tracked) {
95 95 t->flags |= dirstate_flag_p1_tracked;
96 96 }
97 97 if (p2_info) {
98 98 t->flags |= dirstate_flag_p2_info;
99 99 }
100 100
101 101 if (fallback_exec != Py_None) {
102 102 t->flags |= dirstate_flag_has_fallback_exec;
103 103 if (PyObject_IsTrue(fallback_exec)) {
104 104 t->flags |= dirstate_flag_fallback_exec;
105 105 }
106 106 }
107 107 if (fallback_symlink != Py_None) {
108 108 t->flags |= dirstate_flag_has_fallback_symlink;
109 109 if (PyObject_IsTrue(fallback_symlink)) {
110 110 t->flags |= dirstate_flag_fallback_symlink;
111 111 }
112 112 }
113 113
114 114 if (parentfiledata != Py_None) {
115 115 if (!PyArg_ParseTuple(parentfiledata, "ii(ii)", &mode, &size,
116 116 &mtime_s, &mtime_ns)) {
117 117 return NULL;
118 118 }
119 119 } else {
120 120 has_meaningful_data = 0;
121 121 has_meaningful_mtime = 0;
122 122 }
123 123 if (has_meaningful_data) {
124 124 t->flags |= dirstate_flag_has_meaningful_data;
125 125 t->mode = mode;
126 126 t->size = size;
127 127 } else {
128 128 t->mode = 0;
129 129 t->size = 0;
130 130 }
131 131 if (has_meaningful_mtime) {
132 132 t->flags |= dirstate_flag_has_file_mtime;
133 133 t->mtime_s = mtime_s;
134 134 t->mtime_ns = mtime_ns;
135 135 } else {
136 136 t->mtime_s = 0;
137 137 t->mtime_ns = 0;
138 138 }
139 139 return (PyObject *)t;
140 140 }
141 141
142 142 static void dirstate_item_dealloc(PyObject *o)
143 143 {
144 144 PyObject_Del(o);
145 145 }
146 146
147 147 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
148 148 {
149 149 return (self->flags & dirstate_flag_wc_tracked);
150 150 }
151 151
152 152 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
153 153 {
154 154 const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
155 155 dirstate_flag_p2_info;
156 156 return (self->flags & mask);
157 157 }
158 158
159 159 static inline bool dirstate_item_c_added(dirstateItemObject *self)
160 160 {
161 161 const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
162 162 dirstate_flag_p2_info);
163 163 const int target = dirstate_flag_wc_tracked;
164 164 return (self->flags & mask) == target;
165 165 }
166 166
167 167 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
168 168 {
169 169 if (self->flags & dirstate_flag_wc_tracked) {
170 170 return false;
171 171 }
172 172 return (self->flags &
173 173 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
174 174 }
175 175
176 176 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
177 177 {
178 178 return ((self->flags & dirstate_flag_wc_tracked) &&
179 179 (self->flags & dirstate_flag_p1_tracked) &&
180 180 (self->flags & dirstate_flag_p2_info));
181 181 }
182 182
183 183 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
184 184 {
185 185 return ((self->flags & dirstate_flag_wc_tracked) &&
186 186 !(self->flags & dirstate_flag_p1_tracked) &&
187 187 (self->flags & dirstate_flag_p2_info));
188 188 }
189 189
190 190 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
191 191 {
192 192 if (dirstate_item_c_removed(self)) {
193 193 return 'r';
194 194 } else if (dirstate_item_c_merged(self)) {
195 195 return 'm';
196 196 } else if (dirstate_item_c_added(self)) {
197 197 return 'a';
198 198 } else {
199 199 return 'n';
200 200 }
201 201 }
202 202
203 203 static inline bool dirstate_item_c_has_fallback_exec(dirstateItemObject *self)
204 204 {
205 205 return (bool)self->flags & dirstate_flag_has_fallback_exec;
206 206 }
207 207
208 208 static inline bool
209 209 dirstate_item_c_has_fallback_symlink(dirstateItemObject *self)
210 210 {
211 211 return (bool)self->flags & dirstate_flag_has_fallback_symlink;
212 212 }
213 213
214 214 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
215 215 {
216 216 if (self->flags & dirstate_flag_has_meaningful_data) {
217 217 return self->mode;
218 218 } else {
219 219 return 0;
220 220 }
221 221 }
222 222
223 223 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
224 224 {
225 225 if (!(self->flags & dirstate_flag_wc_tracked) &&
226 226 (self->flags & dirstate_flag_p2_info)) {
227 227 if (self->flags & dirstate_flag_p1_tracked) {
228 228 return dirstate_v1_nonnormal;
229 229 } else {
230 230 return dirstate_v1_from_p2;
231 231 }
232 232 } else if (dirstate_item_c_removed(self)) {
233 233 return 0;
234 234 } else if (self->flags & dirstate_flag_p2_info) {
235 235 return dirstate_v1_from_p2;
236 236 } else if (dirstate_item_c_added(self)) {
237 237 return dirstate_v1_nonnormal;
238 238 } else if (self->flags & dirstate_flag_has_meaningful_data) {
239 239 return self->size;
240 240 } else {
241 241 return dirstate_v1_nonnormal;
242 242 }
243 243 }
244 244
245 245 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
246 246 {
247 247 if (dirstate_item_c_removed(self)) {
248 248 return 0;
249 249 } else if (!(self->flags & dirstate_flag_has_file_mtime) ||
250 250 !(self->flags & dirstate_flag_p1_tracked) ||
251 251 !(self->flags & dirstate_flag_wc_tracked) ||
252 252 (self->flags & dirstate_flag_p2_info)) {
253 253 return ambiguous_time;
254 254 } else {
255 255 return self->mtime_s;
256 256 }
257 257 }
258 258
259 259 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
260 260 {
261 261 int flags = self->flags;
262 262 int mode = dirstate_item_c_v1_mode(self);
263 263 if ((mode & S_IXUSR) != 0) {
264 264 flags |= dirstate_flag_mode_exec_perm;
265 265 } else {
266 266 flags &= ~dirstate_flag_mode_exec_perm;
267 267 }
268 268 if (S_ISLNK(mode)) {
269 269 flags |= dirstate_flag_mode_is_symlink;
270 270 } else {
271 271 flags &= ~dirstate_flag_mode_is_symlink;
272 272 }
273 273 return Py_BuildValue("iiii", flags, self->size, self->mtime_s,
274 274 self->mtime_ns);
275 275 };
276 276
277 277 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
278 278 {
279 279 char state = dirstate_item_c_v1_state(self);
280 280 return PyBytes_FromStringAndSize(&state, 1);
281 281 };
282 282
283 283 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
284 284 {
285 285 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
286 286 };
287 287
288 288 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
289 289 {
290 290 return PyInt_FromLong(dirstate_item_c_v1_size(self));
291 291 };
292 292
293 293 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
294 294 {
295 295 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
296 296 };
297 297
298 298 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
299 299 PyObject *now)
300 300 {
301 301 int now_s;
302 302 int now_ns;
303 303 if (!PyArg_ParseTuple(now, "ii", &now_s, &now_ns)) {
304 304 return NULL;
305 305 }
306 306 if (dirstate_item_c_v1_state(self) == 'n' && self->mtime_s == now_s) {
307 307 Py_RETURN_TRUE;
308 308 } else {
309 309 Py_RETURN_FALSE;
310 310 }
311 311 };
312 312
313 313 static PyObject *dirstate_item_mtime_likely_equal_to(dirstateItemObject *self,
314 314 PyObject *other)
315 315 {
316 316 int other_s;
317 317 int other_ns;
318 318 if (!PyArg_ParseTuple(other, "ii", &other_s, &other_ns)) {
319 319 return NULL;
320 320 }
321 321 if ((self->flags & dirstate_flag_has_file_mtime) &&
322 322 self->mtime_s == other_s && self->mtime_ns == other_ns) {
323 323 Py_RETURN_TRUE;
324 324 } else {
325 325 Py_RETURN_FALSE;
326 326 }
327 327 };
328 328
329 329 /* This will never change since it's bound to V1
330 330 */
331 331 static inline dirstateItemObject *
332 332 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
333 333 {
334 334 dirstateItemObject *t =
335 335 PyObject_New(dirstateItemObject, &dirstateItemType);
336 336 if (!t) {
337 337 return NULL;
338 338 }
339 339 t->flags = 0;
340 340 t->mode = 0;
341 341 t->size = 0;
342 342 t->mtime_s = 0;
343 343 t->mtime_ns = 0;
344 344
345 345 if (state == 'm') {
346 346 t->flags = (dirstate_flag_wc_tracked |
347 347 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
348 348 } else if (state == 'a') {
349 349 t->flags = dirstate_flag_wc_tracked;
350 350 } else if (state == 'r') {
351 351 if (size == dirstate_v1_nonnormal) {
352 352 t->flags =
353 353 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
354 354 } else if (size == dirstate_v1_from_p2) {
355 355 t->flags = dirstate_flag_p2_info;
356 356 } else {
357 357 t->flags = dirstate_flag_p1_tracked;
358 358 }
359 359 } else if (state == 'n') {
360 360 if (size == dirstate_v1_from_p2) {
361 361 t->flags =
362 362 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
363 363 } else if (size == dirstate_v1_nonnormal) {
364 364 t->flags =
365 365 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
366 366 } else if (mtime == ambiguous_time) {
367 367 t->flags = (dirstate_flag_wc_tracked |
368 368 dirstate_flag_p1_tracked |
369 369 dirstate_flag_has_meaningful_data);
370 370 t->mode = mode;
371 371 t->size = size;
372 372 } else {
373 373 t->flags = (dirstate_flag_wc_tracked |
374 374 dirstate_flag_p1_tracked |
375 375 dirstate_flag_has_meaningful_data |
376 376 dirstate_flag_has_file_mtime);
377 377 t->mode = mode;
378 378 t->size = size;
379 379 t->mtime_s = mtime;
380 380 }
381 381 } else {
382 382 PyErr_Format(PyExc_RuntimeError,
383 383 "unknown state: `%c` (%d, %d, %d)", state, mode,
384 384 size, mtime, NULL);
385 385 Py_DECREF(t);
386 386 return NULL;
387 387 }
388 388
389 389 return t;
390 390 }
391 391
392 392 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
393 393 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
394 394 PyObject *args)
395 395 {
396 396 /* We do all the initialization here and not a tp_init function because
397 397 * dirstate_item is immutable. */
398 398 char state;
399 399 int size, mode, mtime;
400 400 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
401 401 return NULL;
402 402 }
403 403 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
404 404 };
405 405
406 406 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
407 407 PyObject *args)
408 408 {
409 409 dirstateItemObject *t =
410 410 PyObject_New(dirstateItemObject, &dirstateItemType);
411 411 if (!t) {
412 412 return NULL;
413 413 }
414 414 if (!PyArg_ParseTuple(args, "iiii", &t->flags, &t->size, &t->mtime_s,
415 415 &t->mtime_ns)) {
416 416 return NULL;
417 417 }
418 418 if (t->flags & dirstate_flag_expected_state_is_modified) {
419 419 t->flags &= ~(dirstate_flag_expected_state_is_modified |
420 420 dirstate_flag_has_meaningful_data |
421 421 dirstate_flag_has_file_mtime);
422 422 }
423 if (t->flags & dirstate_flag_mtime_second_ambiguous) {
424 /* The current code is not able to do the more subtle comparison
425 * that the MTIME_SECOND_AMBIGUOUS requires. So we ignore the
426 * mtime */
427 t->flags &= ~(dirstate_flag_mtime_second_ambiguous |
428 dirstate_flag_has_meaningful_data |
429 dirstate_flag_has_file_mtime);
430 }
423 431 t->mode = 0;
424 432 if (t->flags & dirstate_flag_has_meaningful_data) {
425 433 if (t->flags & dirstate_flag_mode_exec_perm) {
426 434 t->mode = 0755;
427 435 } else {
428 436 t->mode = 0644;
429 437 }
430 438 if (t->flags & dirstate_flag_mode_is_symlink) {
431 439 t->mode |= S_IFLNK;
432 440 } else {
433 441 t->mode |= S_IFREG;
434 442 }
435 443 }
436 444 return (PyObject *)t;
437 445 };
438 446
439 447 /* This means the next status call will have to actually check its content
440 448 to make sure it is correct. */
441 449 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
442 450 {
443 451 self->flags &= ~dirstate_flag_has_file_mtime;
444 452 Py_RETURN_NONE;
445 453 }
446 454
447 455 /* See docstring of the python implementation for details */
448 456 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
449 457 PyObject *args)
450 458 {
451 459 int size, mode, mtime_s, mtime_ns;
452 460 if (!PyArg_ParseTuple(args, "ii(ii)", &mode, &size, &mtime_s,
453 461 &mtime_ns)) {
454 462 return NULL;
455 463 }
456 464 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
457 465 dirstate_flag_has_meaningful_data |
458 466 dirstate_flag_has_file_mtime;
459 467 self->mode = mode;
460 468 self->size = size;
461 469 self->mtime_s = mtime_s;
462 470 self->mtime_ns = mtime_ns;
463 471 Py_RETURN_NONE;
464 472 }
465 473
466 474 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
467 475 {
468 476 self->flags |= dirstate_flag_wc_tracked;
469 477 self->flags &= ~dirstate_flag_has_file_mtime;
470 478 Py_RETURN_NONE;
471 479 }
472 480
473 481 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
474 482 {
475 483 self->flags &= ~dirstate_flag_wc_tracked;
476 484 self->mode = 0;
477 485 self->size = 0;
478 486 self->mtime_s = 0;
479 487 self->mtime_ns = 0;
480 488 Py_RETURN_NONE;
481 489 }
482 490
483 491 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
484 492 {
485 493 if (self->flags & dirstate_flag_p2_info) {
486 494 self->flags &= ~(dirstate_flag_p2_info |
487 495 dirstate_flag_has_meaningful_data |
488 496 dirstate_flag_has_file_mtime);
489 497 self->mode = 0;
490 498 self->size = 0;
491 499 self->mtime_s = 0;
492 500 self->mtime_ns = 0;
493 501 }
494 502 Py_RETURN_NONE;
495 503 }
496 504 static PyMethodDef dirstate_item_methods[] = {
497 505 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
498 506 "return data suitable for v2 serialization"},
499 507 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
500 508 "return a \"state\" suitable for v1 serialization"},
501 509 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
502 510 "return a \"mode\" suitable for v1 serialization"},
503 511 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
504 512 "return a \"size\" suitable for v1 serialization"},
505 513 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
506 514 "return a \"mtime\" suitable for v1 serialization"},
507 515 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
508 516 "True if the stored mtime would be ambiguous with the current time"},
509 517 {"mtime_likely_equal_to", (PyCFunction)dirstate_item_mtime_likely_equal_to,
510 518 METH_O, "True if the stored mtime is likely equal to the given mtime"},
511 519 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
512 520 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
513 521 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
514 522 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
515 523 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
516 524 METH_NOARGS, "mark a file as \"possibly dirty\""},
517 525 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
518 526 "mark a file as \"clean\""},
519 527 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
520 528 "mark a file as \"tracked\""},
521 529 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
522 530 "mark a file as \"untracked\""},
523 531 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
524 532 "remove all \"merge-only\" from a DirstateItem"},
525 533 {NULL} /* Sentinel */
526 534 };
527 535
528 536 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
529 537 {
530 538 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
531 539 };
532 540
533 541 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
534 542 {
535 543 return PyInt_FromLong(dirstate_item_c_v1_size(self));
536 544 };
537 545
538 546 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
539 547 {
540 548 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
541 549 };
542 550
543 551 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
544 552 {
545 553 char state = dirstate_item_c_v1_state(self);
546 554 return PyBytes_FromStringAndSize(&state, 1);
547 555 };
548 556
549 557 static PyObject *dirstate_item_get_has_fallback_exec(dirstateItemObject *self)
550 558 {
551 559 if (dirstate_item_c_has_fallback_exec(self)) {
552 560 Py_RETURN_TRUE;
553 561 } else {
554 562 Py_RETURN_FALSE;
555 563 }
556 564 };
557 565
558 566 static PyObject *dirstate_item_get_fallback_exec(dirstateItemObject *self)
559 567 {
560 568 if (dirstate_item_c_has_fallback_exec(self)) {
561 569 if (self->flags & dirstate_flag_fallback_exec) {
562 570 Py_RETURN_TRUE;
563 571 } else {
564 572 Py_RETURN_FALSE;
565 573 }
566 574 } else {
567 575 Py_RETURN_NONE;
568 576 }
569 577 };
570 578
571 579 static int dirstate_item_set_fallback_exec(dirstateItemObject *self,
572 580 PyObject *value)
573 581 {
574 582 if ((value == Py_None) || (value == NULL)) {
575 583 self->flags &= ~dirstate_flag_has_fallback_exec;
576 584 } else {
577 585 self->flags |= dirstate_flag_has_fallback_exec;
578 586 if (PyObject_IsTrue(value)) {
579 587 self->flags |= dirstate_flag_fallback_exec;
580 588 } else {
581 589 self->flags &= ~dirstate_flag_fallback_exec;
582 590 }
583 591 }
584 592 return 0;
585 593 };
586 594
587 595 static PyObject *
588 596 dirstate_item_get_has_fallback_symlink(dirstateItemObject *self)
589 597 {
590 598 if (dirstate_item_c_has_fallback_symlink(self)) {
591 599 Py_RETURN_TRUE;
592 600 } else {
593 601 Py_RETURN_FALSE;
594 602 }
595 603 };
596 604
597 605 static PyObject *dirstate_item_get_fallback_symlink(dirstateItemObject *self)
598 606 {
599 607 if (dirstate_item_c_has_fallback_symlink(self)) {
600 608 if (self->flags & dirstate_flag_fallback_symlink) {
601 609 Py_RETURN_TRUE;
602 610 } else {
603 611 Py_RETURN_FALSE;
604 612 }
605 613 } else {
606 614 Py_RETURN_NONE;
607 615 }
608 616 };
609 617
610 618 static int dirstate_item_set_fallback_symlink(dirstateItemObject *self,
611 619 PyObject *value)
612 620 {
613 621 if ((value == Py_None) || (value == NULL)) {
614 622 self->flags &= ~dirstate_flag_has_fallback_symlink;
615 623 } else {
616 624 self->flags |= dirstate_flag_has_fallback_symlink;
617 625 if (PyObject_IsTrue(value)) {
618 626 self->flags |= dirstate_flag_fallback_symlink;
619 627 } else {
620 628 self->flags &= ~dirstate_flag_fallback_symlink;
621 629 }
622 630 }
623 631 return 0;
624 632 };
625 633
626 634 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
627 635 {
628 636 if (dirstate_item_c_tracked(self)) {
629 637 Py_RETURN_TRUE;
630 638 } else {
631 639 Py_RETURN_FALSE;
632 640 }
633 641 };
634 642 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
635 643 {
636 644 if (self->flags & dirstate_flag_p1_tracked) {
637 645 Py_RETURN_TRUE;
638 646 } else {
639 647 Py_RETURN_FALSE;
640 648 }
641 649 };
642 650
643 651 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
644 652 {
645 653 if (dirstate_item_c_added(self)) {
646 654 Py_RETURN_TRUE;
647 655 } else {
648 656 Py_RETURN_FALSE;
649 657 }
650 658 };
651 659
652 660 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
653 661 {
654 662 if (self->flags & dirstate_flag_wc_tracked &&
655 663 self->flags & dirstate_flag_p2_info) {
656 664 Py_RETURN_TRUE;
657 665 } else {
658 666 Py_RETURN_FALSE;
659 667 }
660 668 };
661 669
662 670 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
663 671 {
664 672 if (dirstate_item_c_merged(self)) {
665 673 Py_RETURN_TRUE;
666 674 } else {
667 675 Py_RETURN_FALSE;
668 676 }
669 677 };
670 678
671 679 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
672 680 {
673 681 if (dirstate_item_c_from_p2(self)) {
674 682 Py_RETURN_TRUE;
675 683 } else {
676 684 Py_RETURN_FALSE;
677 685 }
678 686 };
679 687
680 688 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
681 689 {
682 690 if (!(self->flags & dirstate_flag_wc_tracked)) {
683 691 Py_RETURN_FALSE;
684 692 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
685 693 Py_RETURN_FALSE;
686 694 } else if (self->flags & dirstate_flag_p2_info) {
687 695 Py_RETURN_FALSE;
688 696 } else {
689 697 Py_RETURN_TRUE;
690 698 }
691 699 };
692 700
693 701 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
694 702 {
695 703 if (dirstate_item_c_any_tracked(self)) {
696 704 Py_RETURN_TRUE;
697 705 } else {
698 706 Py_RETURN_FALSE;
699 707 }
700 708 };
701 709
702 710 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
703 711 {
704 712 if (dirstate_item_c_removed(self)) {
705 713 Py_RETURN_TRUE;
706 714 } else {
707 715 Py_RETURN_FALSE;
708 716 }
709 717 };
710 718
711 719 static PyGetSetDef dirstate_item_getset[] = {
712 720 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
713 721 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
714 722 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
715 723 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
716 724 {"has_fallback_exec", (getter)dirstate_item_get_has_fallback_exec, NULL,
717 725 "has_fallback_exec", NULL},
718 726 {"fallback_exec", (getter)dirstate_item_get_fallback_exec,
719 727 (setter)dirstate_item_set_fallback_exec, "fallback_exec", NULL},
720 728 {"has_fallback_symlink", (getter)dirstate_item_get_has_fallback_symlink,
721 729 NULL, "has_fallback_symlink", NULL},
722 730 {"fallback_symlink", (getter)dirstate_item_get_fallback_symlink,
723 731 (setter)dirstate_item_set_fallback_symlink, "fallback_symlink", NULL},
724 732 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
725 733 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
726 734 NULL},
727 735 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
728 736 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
729 737 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
730 738 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
731 739 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
732 740 NULL},
733 741 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
734 742 NULL},
735 743 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
736 744 {NULL} /* Sentinel */
737 745 };
738 746
739 747 PyTypeObject dirstateItemType = {
740 748 PyVarObject_HEAD_INIT(NULL, 0) /* header */
741 749 "dirstate_tuple", /* tp_name */
742 750 sizeof(dirstateItemObject), /* tp_basicsize */
743 751 0, /* tp_itemsize */
744 752 (destructor)dirstate_item_dealloc, /* tp_dealloc */
745 753 0, /* tp_print */
746 754 0, /* tp_getattr */
747 755 0, /* tp_setattr */
748 756 0, /* tp_compare */
749 757 0, /* tp_repr */
750 758 0, /* tp_as_number */
751 759 0, /* tp_as_sequence */
752 760 0, /* tp_as_mapping */
753 761 0, /* tp_hash */
754 762 0, /* tp_call */
755 763 0, /* tp_str */
756 764 0, /* tp_getattro */
757 765 0, /* tp_setattro */
758 766 0, /* tp_as_buffer */
759 767 Py_TPFLAGS_DEFAULT, /* tp_flags */
760 768 "dirstate tuple", /* tp_doc */
761 769 0, /* tp_traverse */
762 770 0, /* tp_clear */
763 771 0, /* tp_richcompare */
764 772 0, /* tp_weaklistoffset */
765 773 0, /* tp_iter */
766 774 0, /* tp_iternext */
767 775 dirstate_item_methods, /* tp_methods */
768 776 0, /* tp_members */
769 777 dirstate_item_getset, /* tp_getset */
770 778 0, /* tp_base */
771 779 0, /* tp_dict */
772 780 0, /* tp_descr_get */
773 781 0, /* tp_descr_set */
774 782 0, /* tp_dictoffset */
775 783 0, /* tp_init */
776 784 0, /* tp_alloc */
777 785 dirstate_item_new, /* tp_new */
778 786 };
779 787
780 788 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
781 789 {
782 790 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
783 791 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
784 792 char state, *cur, *str, *cpos;
785 793 int mode, size, mtime;
786 794 unsigned int flen, pos = 40;
787 795 Py_ssize_t len = 40;
788 796 Py_ssize_t readlen;
789 797
790 798 if (!PyArg_ParseTuple(
791 799 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
792 800 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
793 801 goto quit;
794 802 }
795 803
796 804 len = readlen;
797 805
798 806 /* read parents */
799 807 if (len < 40) {
800 808 PyErr_SetString(PyExc_ValueError,
801 809 "too little data for parents");
802 810 goto quit;
803 811 }
804 812
805 813 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
806 814 str + 20, (Py_ssize_t)20);
807 815 if (!parents) {
808 816 goto quit;
809 817 }
810 818
811 819 /* read filenames */
812 820 while (pos >= 40 && pos < len) {
813 821 if (pos + 17 > len) {
814 822 PyErr_SetString(PyExc_ValueError,
815 823 "overflow in dirstate");
816 824 goto quit;
817 825 }
818 826 cur = str + pos;
819 827 /* unpack header */
820 828 state = *cur;
821 829 mode = getbe32(cur + 1);
822 830 size = getbe32(cur + 5);
823 831 mtime = getbe32(cur + 9);
824 832 flen = getbe32(cur + 13);
825 833 pos += 17;
826 834 cur += 17;
827 835 if (flen > len - pos) {
828 836 PyErr_SetString(PyExc_ValueError,
829 837 "overflow in dirstate");
830 838 goto quit;
831 839 }
832 840
833 841 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
834 842 size, mtime);
835 843 if (!entry)
836 844 goto quit;
837 845 cpos = memchr(cur, 0, flen);
838 846 if (cpos) {
839 847 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
840 848 cname = PyBytes_FromStringAndSize(
841 849 cpos + 1, flen - (cpos - cur) - 1);
842 850 if (!fname || !cname ||
843 851 PyDict_SetItem(cmap, fname, cname) == -1 ||
844 852 PyDict_SetItem(dmap, fname, entry) == -1) {
845 853 goto quit;
846 854 }
847 855 Py_DECREF(cname);
848 856 } else {
849 857 fname = PyBytes_FromStringAndSize(cur, flen);
850 858 if (!fname ||
851 859 PyDict_SetItem(dmap, fname, entry) == -1) {
852 860 goto quit;
853 861 }
854 862 }
855 863 Py_DECREF(fname);
856 864 Py_DECREF(entry);
857 865 fname = cname = entry = NULL;
858 866 pos += flen;
859 867 }
860 868
861 869 ret = parents;
862 870 Py_INCREF(ret);
863 871 quit:
864 872 Py_XDECREF(fname);
865 873 Py_XDECREF(cname);
866 874 Py_XDECREF(entry);
867 875 Py_XDECREF(parents);
868 876 return ret;
869 877 }
870 878
871 879 /*
872 880 * Efficiently pack a dirstate object into its on-disk format.
873 881 */
874 882 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
875 883 {
876 884 PyObject *packobj = NULL;
877 885 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
878 886 Py_ssize_t nbytes, pos, l;
879 887 PyObject *k, *v = NULL, *pn;
880 888 char *p, *s;
881 889 int now_s;
882 890 int now_ns;
883 891
884 892 if (!PyArg_ParseTuple(args, "O!O!O!(ii):pack_dirstate", &PyDict_Type,
885 893 &map, &PyDict_Type, &copymap, &PyTuple_Type, &pl,
886 894 &now_s, &now_ns)) {
887 895 return NULL;
888 896 }
889 897
890 898 if (PyTuple_Size(pl) != 2) {
891 899 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
892 900 return NULL;
893 901 }
894 902
895 903 /* Figure out how much we need to allocate. */
896 904 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
897 905 PyObject *c;
898 906 if (!PyBytes_Check(k)) {
899 907 PyErr_SetString(PyExc_TypeError, "expected string key");
900 908 goto bail;
901 909 }
902 910 nbytes += PyBytes_GET_SIZE(k) + 17;
903 911 c = PyDict_GetItem(copymap, k);
904 912 if (c) {
905 913 if (!PyBytes_Check(c)) {
906 914 PyErr_SetString(PyExc_TypeError,
907 915 "expected string key");
908 916 goto bail;
909 917 }
910 918 nbytes += PyBytes_GET_SIZE(c) + 1;
911 919 }
912 920 }
913 921
914 922 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
915 923 if (packobj == NULL) {
916 924 goto bail;
917 925 }
918 926
919 927 p = PyBytes_AS_STRING(packobj);
920 928
921 929 pn = PyTuple_GET_ITEM(pl, 0);
922 930 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
923 931 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
924 932 goto bail;
925 933 }
926 934 memcpy(p, s, l);
927 935 p += 20;
928 936 pn = PyTuple_GET_ITEM(pl, 1);
929 937 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
930 938 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
931 939 goto bail;
932 940 }
933 941 memcpy(p, s, l);
934 942 p += 20;
935 943
936 944 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
937 945 dirstateItemObject *tuple;
938 946 char state;
939 947 int mode, size, mtime;
940 948 Py_ssize_t len, l;
941 949 PyObject *o;
942 950 char *t;
943 951
944 952 if (!dirstate_tuple_check(v)) {
945 953 PyErr_SetString(PyExc_TypeError,
946 954 "expected a dirstate tuple");
947 955 goto bail;
948 956 }
949 957 tuple = (dirstateItemObject *)v;
950 958
951 959 state = dirstate_item_c_v1_state(tuple);
952 960 mode = dirstate_item_c_v1_mode(tuple);
953 961 size = dirstate_item_c_v1_size(tuple);
954 962 mtime = dirstate_item_c_v1_mtime(tuple);
955 963 if (state == 'n' && tuple->mtime_s == now_s) {
956 964 /* See pure/parsers.py:pack_dirstate for why we do
957 965 * this. */
958 966 mtime = -1;
959 967 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
960 968 state, mode, size, mtime);
961 969 if (!mtime_unset) {
962 970 goto bail;
963 971 }
964 972 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
965 973 goto bail;
966 974 }
967 975 Py_DECREF(mtime_unset);
968 976 mtime_unset = NULL;
969 977 }
970 978 *p++ = state;
971 979 putbe32((uint32_t)mode, p);
972 980 putbe32((uint32_t)size, p + 4);
973 981 putbe32((uint32_t)mtime, p + 8);
974 982 t = p + 12;
975 983 p += 16;
976 984 len = PyBytes_GET_SIZE(k);
977 985 memcpy(p, PyBytes_AS_STRING(k), len);
978 986 p += len;
979 987 o = PyDict_GetItem(copymap, k);
980 988 if (o) {
981 989 *p++ = '\0';
982 990 l = PyBytes_GET_SIZE(o);
983 991 memcpy(p, PyBytes_AS_STRING(o), l);
984 992 p += l;
985 993 len += l + 1;
986 994 }
987 995 putbe32((uint32_t)len, t);
988 996 }
989 997
990 998 pos = p - PyBytes_AS_STRING(packobj);
991 999 if (pos != nbytes) {
992 1000 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
993 1001 (long)pos, (long)nbytes);
994 1002 goto bail;
995 1003 }
996 1004
997 1005 return packobj;
998 1006 bail:
999 1007 Py_XDECREF(mtime_unset);
1000 1008 Py_XDECREF(packobj);
1001 1009 Py_XDECREF(v);
1002 1010 return NULL;
1003 1011 }
1004 1012
1005 1013 #define BUMPED_FIX 1
1006 1014 #define USING_SHA_256 2
1007 1015 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
1008 1016
1009 1017 static PyObject *readshas(const char *source, unsigned char num,
1010 1018 Py_ssize_t hashwidth)
1011 1019 {
1012 1020 int i;
1013 1021 PyObject *list = PyTuple_New(num);
1014 1022 if (list == NULL) {
1015 1023 return NULL;
1016 1024 }
1017 1025 for (i = 0; i < num; i++) {
1018 1026 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
1019 1027 if (hash == NULL) {
1020 1028 Py_DECREF(list);
1021 1029 return NULL;
1022 1030 }
1023 1031 PyTuple_SET_ITEM(list, i, hash);
1024 1032 source += hashwidth;
1025 1033 }
1026 1034 return list;
1027 1035 }
1028 1036
1029 1037 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
1030 1038 uint32_t *msize)
1031 1039 {
1032 1040 const char *data = databegin;
1033 1041 const char *meta;
1034 1042
1035 1043 double mtime;
1036 1044 int16_t tz;
1037 1045 uint16_t flags;
1038 1046 unsigned char nsuccs, nparents, nmetadata;
1039 1047 Py_ssize_t hashwidth = 20;
1040 1048
1041 1049 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
1042 1050 PyObject *metadata = NULL, *ret = NULL;
1043 1051 int i;
1044 1052
1045 1053 if (data + FM1_HEADER_SIZE > dataend) {
1046 1054 goto overflow;
1047 1055 }
1048 1056
1049 1057 *msize = getbe32(data);
1050 1058 data += 4;
1051 1059 mtime = getbefloat64(data);
1052 1060 data += 8;
1053 1061 tz = getbeint16(data);
1054 1062 data += 2;
1055 1063 flags = getbeuint16(data);
1056 1064 data += 2;
1057 1065
1058 1066 if (flags & USING_SHA_256) {
1059 1067 hashwidth = 32;
1060 1068 }
1061 1069
1062 1070 nsuccs = (unsigned char)(*data++);
1063 1071 nparents = (unsigned char)(*data++);
1064 1072 nmetadata = (unsigned char)(*data++);
1065 1073
1066 1074 if (databegin + *msize > dataend) {
1067 1075 goto overflow;
1068 1076 }
1069 1077 dataend = databegin + *msize; /* narrow down to marker size */
1070 1078
1071 1079 if (data + hashwidth > dataend) {
1072 1080 goto overflow;
1073 1081 }
1074 1082 prec = PyBytes_FromStringAndSize(data, hashwidth);
1075 1083 data += hashwidth;
1076 1084 if (prec == NULL) {
1077 1085 goto bail;
1078 1086 }
1079 1087
1080 1088 if (data + nsuccs * hashwidth > dataend) {
1081 1089 goto overflow;
1082 1090 }
1083 1091 succs = readshas(data, nsuccs, hashwidth);
1084 1092 if (succs == NULL) {
1085 1093 goto bail;
1086 1094 }
1087 1095 data += nsuccs * hashwidth;
1088 1096
1089 1097 if (nparents == 1 || nparents == 2) {
1090 1098 if (data + nparents * hashwidth > dataend) {
1091 1099 goto overflow;
1092 1100 }
1093 1101 parents = readshas(data, nparents, hashwidth);
1094 1102 if (parents == NULL) {
1095 1103 goto bail;
1096 1104 }
1097 1105 data += nparents * hashwidth;
1098 1106 } else {
1099 1107 parents = Py_None;
1100 1108 Py_INCREF(parents);
1101 1109 }
1102 1110
1103 1111 if (data + 2 * nmetadata > dataend) {
1104 1112 goto overflow;
1105 1113 }
1106 1114 meta = data + (2 * nmetadata);
1107 1115 metadata = PyTuple_New(nmetadata);
1108 1116 if (metadata == NULL) {
1109 1117 goto bail;
1110 1118 }
1111 1119 for (i = 0; i < nmetadata; i++) {
1112 1120 PyObject *tmp, *left = NULL, *right = NULL;
1113 1121 Py_ssize_t leftsize = (unsigned char)(*data++);
1114 1122 Py_ssize_t rightsize = (unsigned char)(*data++);
1115 1123 if (meta + leftsize + rightsize > dataend) {
1116 1124 goto overflow;
1117 1125 }
1118 1126 left = PyBytes_FromStringAndSize(meta, leftsize);
1119 1127 meta += leftsize;
1120 1128 right = PyBytes_FromStringAndSize(meta, rightsize);
1121 1129 meta += rightsize;
1122 1130 tmp = PyTuple_New(2);
1123 1131 if (!left || !right || !tmp) {
1124 1132 Py_XDECREF(left);
1125 1133 Py_XDECREF(right);
1126 1134 Py_XDECREF(tmp);
1127 1135 goto bail;
1128 1136 }
1129 1137 PyTuple_SET_ITEM(tmp, 0, left);
1130 1138 PyTuple_SET_ITEM(tmp, 1, right);
1131 1139 PyTuple_SET_ITEM(metadata, i, tmp);
1132 1140 }
1133 1141 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
1134 1142 (int)tz * 60, parents);
1135 1143 goto bail; /* return successfully */
1136 1144
1137 1145 overflow:
1138 1146 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1139 1147 bail:
1140 1148 Py_XDECREF(prec);
1141 1149 Py_XDECREF(succs);
1142 1150 Py_XDECREF(metadata);
1143 1151 Py_XDECREF(parents);
1144 1152 return ret;
1145 1153 }
1146 1154
1147 1155 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1148 1156 {
1149 1157 const char *data, *dataend;
1150 1158 Py_ssize_t datalen, offset, stop;
1151 1159 PyObject *markers = NULL;
1152 1160
1153 1161 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1154 1162 &offset, &stop)) {
1155 1163 return NULL;
1156 1164 }
1157 1165 if (offset < 0) {
1158 1166 PyErr_SetString(PyExc_ValueError,
1159 1167 "invalid negative offset in fm1readmarkers");
1160 1168 return NULL;
1161 1169 }
1162 1170 if (stop > datalen) {
1163 1171 PyErr_SetString(
1164 1172 PyExc_ValueError,
1165 1173 "stop longer than data length in fm1readmarkers");
1166 1174 return NULL;
1167 1175 }
1168 1176 dataend = data + datalen;
1169 1177 data += offset;
1170 1178 markers = PyList_New(0);
1171 1179 if (!markers) {
1172 1180 return NULL;
1173 1181 }
1174 1182 while (offset < stop) {
1175 1183 uint32_t msize;
1176 1184 int error;
1177 1185 PyObject *record = fm1readmarker(data, dataend, &msize);
1178 1186 if (!record) {
1179 1187 goto bail;
1180 1188 }
1181 1189 error = PyList_Append(markers, record);
1182 1190 Py_DECREF(record);
1183 1191 if (error) {
1184 1192 goto bail;
1185 1193 }
1186 1194 data += msize;
1187 1195 offset += msize;
1188 1196 }
1189 1197 return markers;
1190 1198 bail:
1191 1199 Py_DECREF(markers);
1192 1200 return NULL;
1193 1201 }
1194 1202
1195 1203 static char parsers_doc[] = "Efficient content parsing.";
1196 1204
1197 1205 PyObject *encodedir(PyObject *self, PyObject *args);
1198 1206 PyObject *pathencode(PyObject *self, PyObject *args);
1199 1207 PyObject *lowerencode(PyObject *self, PyObject *args);
1200 1208 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1201 1209
1202 1210 static PyMethodDef methods[] = {
1203 1211 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1204 1212 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1205 1213 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1206 1214 "parse a revlog index\n"},
1207 1215 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1208 1216 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1209 1217 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1210 1218 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1211 1219 "construct a dict with an expected size\n"},
1212 1220 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1213 1221 "make file foldmap\n"},
1214 1222 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1215 1223 "escape a UTF-8 byte string to JSON (fast path)\n"},
1216 1224 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1217 1225 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1218 1226 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1219 1227 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1220 1228 "parse v1 obsolete markers\n"},
1221 1229 {NULL, NULL}};
1222 1230
1223 1231 void dirs_module_init(PyObject *mod);
1224 1232 void manifest_module_init(PyObject *mod);
1225 1233 void revlog_module_init(PyObject *mod);
1226 1234
1227 1235 static const int version = 20;
1228 1236
1229 1237 static void module_init(PyObject *mod)
1230 1238 {
1231 1239 PyModule_AddIntConstant(mod, "version", version);
1232 1240
1233 1241 /* This module constant has two purposes. First, it lets us unit test
1234 1242 * the ImportError raised without hard-coding any error text. This
1235 1243 * means we can change the text in the future without breaking tests,
1236 1244 * even across changesets without a recompile. Second, its presence
1237 1245 * can be used to determine whether the version-checking logic is
1238 1246 * present, which also helps in testing across changesets without a
1239 1247 * recompile. Note that this means the pure-Python version of parsers
1240 1248 * should not have this module constant. */
1241 1249 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1242 1250
1243 1251 dirs_module_init(mod);
1244 1252 manifest_module_init(mod);
1245 1253 revlog_module_init(mod);
1246 1254
1247 1255 if (PyType_Ready(&dirstateItemType) < 0) {
1248 1256 return;
1249 1257 }
1250 1258 Py_INCREF(&dirstateItemType);
1251 1259 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1252 1260 }
1253 1261
1254 1262 static int check_python_version(void)
1255 1263 {
1256 1264 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1257 1265 long hexversion;
1258 1266 if (!sys) {
1259 1267 return -1;
1260 1268 }
1261 1269 ver = PyObject_GetAttrString(sys, "hexversion");
1262 1270 Py_DECREF(sys);
1263 1271 if (!ver) {
1264 1272 return -1;
1265 1273 }
1266 1274 hexversion = PyInt_AsLong(ver);
1267 1275 Py_DECREF(ver);
1268 1276 /* sys.hexversion is a 32-bit number by default, so the -1 case
1269 1277 * should only occur in unusual circumstances (e.g. if sys.hexversion
1270 1278 * is manually set to an invalid value). */
1271 1279 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1272 1280 PyErr_Format(PyExc_ImportError,
1273 1281 "%s: The Mercurial extension "
1274 1282 "modules were compiled with Python " PY_VERSION
1275 1283 ", but "
1276 1284 "Mercurial is currently using Python with "
1277 1285 "sys.hexversion=%ld: "
1278 1286 "Python %s\n at: %s",
1279 1287 versionerrortext, hexversion, Py_GetVersion(),
1280 1288 Py_GetProgramFullPath());
1281 1289 return -1;
1282 1290 }
1283 1291 return 0;
1284 1292 }
1285 1293
1286 1294 #ifdef IS_PY3K
1287 1295 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1288 1296 parsers_doc, -1, methods};
1289 1297
1290 1298 PyMODINIT_FUNC PyInit_parsers(void)
1291 1299 {
1292 1300 PyObject *mod;
1293 1301
1294 1302 if (check_python_version() == -1)
1295 1303 return NULL;
1296 1304 mod = PyModule_Create(&parsers_module);
1297 1305 module_init(mod);
1298 1306 return mod;
1299 1307 }
1300 1308 #else
1301 1309 PyMODINIT_FUNC initparsers(void)
1302 1310 {
1303 1311 PyObject *mod;
1304 1312
1305 1313 if (check_python_version() == -1) {
1306 1314 return;
1307 1315 }
1308 1316 mod = Py_InitModule3("parsers", methods, parsers_doc);
1309 1317 module_init(mod);
1310 1318 }
1311 1319 #endif
@@ -1,91 +1,92 b''
1 1 /*
2 2 util.h - utility functions for interfacing with the various python APIs.
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_UTIL_H_
9 9 #define _HG_UTIL_H_
10 10
11 11 #include "compat.h"
12 12
13 13 #if PY_MAJOR_VERSION >= 3
14 14 #define IS_PY3K
15 15 #endif
16 16
17 17 /* helper to switch things like string literal depending on Python version */
18 18 #ifdef IS_PY3K
19 19 #define PY23(py2, py3) py3
20 20 #else
21 21 #define PY23(py2, py3) py2
22 22 #endif
23 23
24 24 /* clang-format off */
25 25 typedef struct {
26 26 PyObject_HEAD
27 27 int flags;
28 28 int mode;
29 29 int size;
30 30 int mtime_s;
31 31 int mtime_ns;
32 32 } dirstateItemObject;
33 33 /* clang-format on */
34 34
35 35 static const int dirstate_flag_wc_tracked = 1;
36 36 static const int dirstate_flag_p1_tracked = 1 << 1;
37 37 static const int dirstate_flag_p2_info = 1 << 2;
38 38 static const int dirstate_flag_has_meaningful_data = 1 << 3;
39 39 static const int dirstate_flag_has_file_mtime = 1 << 4;
40 40 static const int dirstate_flag_has_directory_mtime = 1 << 5;
41 41 static const int dirstate_flag_mode_exec_perm = 1 << 6;
42 42 static const int dirstate_flag_mode_is_symlink = 1 << 7;
43 43 static const int dirstate_flag_expected_state_is_modified = 1 << 8;
44 44 static const int dirstate_flag_all_unknown_recorded = 1 << 9;
45 45 static const int dirstate_flag_all_ignored_recorded = 1 << 10;
46 46 static const int dirstate_flag_fallback_exec = 1 << 11;
47 47 static const int dirstate_flag_has_fallback_exec = 1 << 12;
48 48 static const int dirstate_flag_fallback_symlink = 1 << 13;
49 49 static const int dirstate_flag_has_fallback_symlink = 1 << 14;
50 static const int dirstate_flag_mtime_second_ambiguous = 1 << 15;
50 51
51 52 extern PyTypeObject dirstateItemType;
52 53 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
53 54
54 55 #ifndef MIN
55 56 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
56 57 #endif
57 58 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
58 59 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
59 60 #define true 1
60 61 #define false 0
61 62 typedef unsigned char bool;
62 63 #else
63 64 #include <stdbool.h>
64 65 #endif
65 66
66 67 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
67 68 {
68 69 /* _PyDict_NewPresized expects a minused parameter, but it actually
69 70 creates a dictionary that's the nearest power of two bigger than the
70 71 parameter. For example, with the initial minused = 1000, the
71 72 dictionary created has size 1024. Of course in a lot of cases that
72 73 can be greater than the maximum load factor Python's dict object
73 74 expects (= 2/3), so as soon as we cross the threshold we'll resize
74 75 anyway. So create a dictionary that's at least 3/2 the size. */
75 76 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
76 77 }
77 78
78 79 /* Convert a PyInt or PyLong to a long. Returns false if there is an
79 80 error, in which case an exception will already have been set. */
80 81 static inline bool pylong_to_long(PyObject *pylong, long *out)
81 82 {
82 83 *out = PyLong_AsLong(pylong);
83 84 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
84 85 * not an error. */
85 86 if (*out != -1) {
86 87 return true;
87 88 }
88 89 return PyErr_Occurred() == NULL;
89 90 }
90 91
91 92 #endif /* _HG_UTIL_H_ */
@@ -1,594 +1,602 b''
1 1 The *dirstate* is what Mercurial uses internally to track
2 2 the state of files in the working directory,
3 3 such as set by commands like `hg add` and `hg rm`.
4 4 It also contains some cached data that help make `hg status` faster.
5 5 The name refers both to `.hg/dirstate` on the filesystem
6 6 and the corresponding data structure in memory while a Mercurial process
7 7 is running.
8 8
9 9 The original file format, retroactively dubbed `dirstate-v1`,
10 10 is described at https://www.mercurial-scm.org/wiki/DirState.
11 11 It is made of a flat sequence of unordered variable-size entries,
12 12 so accessing any information in it requires parsing all of it.
13 13 Similarly, saving changes requires rewriting the entire file.
14 14
15 15 The newer `dirsate-v2` file format is designed to fix these limitations
16 16 and make `hg status` faster.
17 17
18 18 User guide
19 19 ==========
20 20
21 21 Compatibility
22 22 -------------
23 23
24 24 The file format is experimental and may still change.
25 25 Different versions of Mercurial may not be compatible with each other
26 26 when working on a local repository that uses this format.
27 27 When using an incompatible version with the experimental format,
28 28 anything can happen including data corruption.
29 29
30 30 Since the dirstate is entirely local and not relevant to the wire protocol,
31 31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
32 32
33 33 When `share-safe` is enabled, different repositories sharing the same store
34 34 can use different dirstate formats.
35 35
36 36 Enabling `dirsate-v2` for new local repositories
37 37 ------------------------------------------------
38 38
39 39 When creating a new local repository such as with `hg init` or `hg clone`,
40 40 the `exp-dirstate-v2` boolean in the `format` configuration section
41 41 controls whether to use this file format.
42 42 This is disabled by default as of this writing.
43 43 To enable it for a single repository, run for example::
44 44
45 45 $ hg init my-project --config format.exp-dirstate-v2=1
46 46
47 47 Checking the format of an existing local repsitory
48 48 --------------------------------------------------
49 49
50 50 The `debugformat` commands prints information about
51 51 which of multiple optional formats are used in the current repository,
52 52 including `dirstate-v2`::
53 53
54 54 $ hg debugformat
55 55 format-variant repo
56 56 fncache: yes
57 57 dirstate-v2: yes
58 58 […]
59 59
60 60 Upgrading or downgrading an existing local repository
61 61 -----------------------------------------------------
62 62
63 63 The `debugupgrade` command does various upgrades or downgrades
64 64 on a local repository
65 65 based on the current Mercurial version and on configuration.
66 66 The same `format.exp-dirstate-v2` configuration is used again.
67 67
68 68 Example to upgrade::
69 69
70 70 $ hg debugupgrade --config format.exp-dirstate-v2=1
71 71
72 72 Example to downgrade to `dirstate-v1`::
73 73
74 74 $ hg debugupgrade --config format.exp-dirstate-v2=0
75 75
76 76 Both of this commands do nothing but print a list of proposed changes,
77 77 which may include changes unrelated to the dirstate.
78 78 Those other changes are controlled by their own configuration keys.
79 79 Add `--run` to a command to actually apply the proposed changes.
80 80
81 81 Backups of `.hg/requires` and `.hg/dirstate` are created
82 82 in a `.hg/upgradebackup.*` directory.
83 83 If something goes wrong, restoring those files should undo the change.
84 84
85 85 Note that upgrading affects compatibility with older versions of Mercurial
86 86 as noted above.
87 87 This can be relevant when a repository’s files are on a USB drive
88 88 or some other removable media, or shared over the network, etc.
89 89
90 90 Internal filesystem representation
91 91 ==================================
92 92
93 93 Requirements file
94 94 -----------------
95 95
96 96 The `.hg/requires` file indicates which of various optional file formats
97 97 are used by a given repository.
98 98 Mercurial aborts when seeing a requirement it does not know about,
99 99 which avoids older version accidentally messing up a respository
100 100 that uses a format that was introduced later.
101 101 For versions that do support a format, the presence or absence of
102 102 the corresponding requirement indicates whether to use that format.
103 103
104 104 When the file contains a `exp-dirstate-v2` line,
105 105 the `dirstate-v2` format is used.
106 106 With no such line `dirstate-v1` is used.
107 107
108 108 High level description
109 109 ----------------------
110 110
111 111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
112 112 in `dirstate-v2` that file is a "docket" file
113 113 that only contains some metadata
114 114 and points to separate data file named `.hg/dirstate.{ID}`,
115 115 where `{ID}` is a random identifier.
116 116
117 117 This separation allows making data files append-only
118 118 and therefore safer to memory-map.
119 119 Creating a new data file (occasionally to clean up unused data)
120 120 can be done with a different ID
121 121 without disrupting another Mercurial process
122 122 that could still be using the previous data file.
123 123
124 124 Both files have a format designed to reduce the need for parsing,
125 125 by using fixed-size binary components as much as possible.
126 126 For data that is not fixed-size,
127 127 references to other parts of a file can be made by storing "pseudo-pointers":
128 128 integers counted in bytes from the start of a file.
129 129 For read-only access no data structure is needed,
130 130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
131 131 with specific parts read on demand.
132 132
133 133 The data file contains "nodes" organized in a tree.
134 134 Each node represents a file or directory inside the working directory
135 135 or its parent changeset.
136 136 This tree has the same structure as the filesystem,
137 137 so a node representing a directory has child nodes representing
138 138 the files and subdirectories contained directly in that directory.
139 139
140 140 The docket file format
141 141 ----------------------
142 142
143 143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
144 144 and `mercurial/dirstateutils/docket.py`.
145 145
146 146 Components of the docket file are found at fixed offsets,
147 147 counted in bytes from the start of the file:
148 148
149 149 * Offset 0:
150 150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
151 151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
152 152 although it is not strictly necessary
153 153 since `.hg/requires` determines which format to use.
154 154
155 155 * Offset 12:
156 156 The changeset node ID on the first parent of the working directory,
157 157 as up to 32 binary bytes.
158 158 If a node ID is shorter (20 bytes for SHA-1),
159 159 it is start-aligned and the rest of the bytes are set to zero.
160 160
161 161 * Offset 44:
162 162 The changeset node ID on the second parent of the working directory,
163 163 or all zeros if there isn’t one.
164 164 Also 32 binary bytes.
165 165
166 166 * Offset 76:
167 167 Tree metadata on 44 bytes, described below.
168 168 Its separation in this documentation from the rest of the docket
169 169 reflects a detail of the current implementation.
170 170 Since tree metadata is also made of fields at fixed offsets, those could
171 171 be inlined here by adding 76 bytes to each offset.
172 172
173 173 * Offset 120:
174 174 The used size of the data file, as a 32-bit big-endian integer.
175 175 The actual size of the data file may be larger
176 176 (if another Mercurial processis in appending to it
177 177 but has not updated the docket yet).
178 178 That extra data must be ignored.
179 179
180 180 * Offset 124:
181 181 The length of the data file identifier, as a 8-bit integer.
182 182
183 183 * Offset 125:
184 184 The data file identifier.
185 185
186 186 * Any additional data is current ignored, and dropped when updating the file.
187 187
188 188 Tree metadata in the docket file
189 189 --------------------------------
190 190
191 191 Tree metadata is similarly made of components at fixed offsets.
192 192 These offsets are counted in bytes from the start of tree metadata,
193 193 which is 76 bytes after the start of the docket file.
194 194
195 195 This metadata can be thought of as the singular root of the tree
196 196 formed by nodes in the data file.
197 197
198 198 * Offset 0:
199 199 Pseudo-pointer to the start of root nodes,
200 200 counted in bytes from the start of the data file,
201 201 as a 32-bit big-endian integer.
202 202 These nodes describe files and directories found directly
203 203 at the root of the working directory.
204 204
205 205 * Offset 4:
206 206 Number of root nodes, as a 32-bit big-endian integer.
207 207
208 208 * Offset 8:
209 209 Total number of nodes in the entire tree that "have a dirstate entry",
210 210 as a 32-bit big-endian integer.
211 211 Those nodes represent files that would be present at all in `dirstate-v1`.
212 212 This is typically less than the total number of nodes.
213 213 This counter is used to implement `len(dirstatemap)`.
214 214
215 215 * Offset 12:
216 216 Number of nodes in the entire tree that have a copy source,
217 217 as a 32-bit big-endian integer.
218 218 At the next commit, these files are recorded
219 219 as having been copied or moved/renamed from that source.
220 220 (A move is recorded as a copy and separate removal of the source.)
221 221 This counter is used to implement `len(dirstatemap.copymap)`.
222 222
223 223 * Offset 16:
224 224 An estimation of how many bytes of the data file
225 225 (within its used size) are unused, as a 32-bit big-endian integer.
226 226 When appending to an existing data file,
227 227 some existing nodes or paths can be unreachable from the new root
228 228 but they still take up space.
229 229 This counter is used to decide when to write a new data file from scratch
230 230 instead of appending to an existing one,
231 231 in order to get rid of that unreachable data
232 232 and avoid unbounded file size growth.
233 233
234 234 * Offset 20:
235 235 These four bytes are currently ignored
236 236 and reset to zero when updating a docket file.
237 237 This is an attempt at forward compatibility:
238 238 future Mercurial versions could use this as a bit field
239 239 to indicate that a dirstate has additional data or constraints.
240 240 Finding a dirstate file with the relevant bit unset indicates that
241 241 it was written by a then-older version
242 242 which is not aware of that future change.
243 243
244 244 * Offset 24:
245 245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
246 246 When present, the hash is of ignore patterns
247 247 that were used for some previous run of the `status` algorithm.
248 248
249 249 * (Offset 44: end of tree metadata)
250 250
251 251 Optional hash of ignore patterns
252 252 --------------------------------
253 253
254 254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
255 255 has been optimized such that its run time is dominated by calls
256 256 to `stat` for reading the filesystem metadata of a file or directory,
257 257 and to `readdir` for listing the contents of a directory.
258 258 In some cases the algorithm can skip calls to `readdir`
259 259 (saving significant time)
260 260 because the dirstate already contains enough of the relevant information
261 261 to build the correct `status` results.
262 262
263 263 The default configuration of `hg status` is to list unknown files
264 264 but not ignored files.
265 265 In this case, it matters for the `readdir`-skipping optimization
266 266 if a given file used to be ignored but became unknown
267 267 because `.hgignore` changed.
268 268 To detect the possibility of such a change,
269 269 the tree metadata contains an optional hash of all ignore patterns.
270 270
271 271 We define:
272 272
273 273 * "Root" ignore files as:
274 274
275 275 - `.hgignore` at the root of the repository if it exists
276 276 - And all files from `ui.ignore.*` config.
277 277
278 278 This set of files is sorted by the string representation of their path.
279 279
280 280 * The "expanded contents" of an ignore files is the byte string made
281 281 by the concatenation of its contents followed by the "expanded contents"
282 282 of other files included with `include:` or `subinclude:` directives,
283 283 in inclusion order. This definition is recursive, as included files can
284 284 themselves include more files.
285 285
286 286 This hash is defined as the SHA-1 of the concatenation (in sorted
287 287 order) of the "expanded contents" of each "root" ignore file.
288 288 (Note that computing this does not require actually concatenating
289 289 into a single contiguous byte sequence.
290 290 Instead a SHA-1 hasher object can be created
291 291 and fed separate chunks one by one.)
292 292
293 293 The data file format
294 294 --------------------
295 295
296 296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
297 297 and `mercurial/dirstateutils/v2.py`.
298 298
299 299 The data file contains two types of data: paths and nodes.
300 300
301 301 Paths and nodes can be organized in any order in the file, except that sibling
302 302 nodes must be next to each other and sorted by their path.
303 303 Contiguity lets the parent refer to them all
304 304 by their count and a single pseudo-pointer,
305 305 instead of storing one pseudo-pointer per child node.
306 306 Sorting allows using binary seach to find a child node with a given name
307 307 in `O(log(n))` byte sequence comparisons.
308 308
309 309 The current implemention writes paths and child node before a given node
310 310 for ease of figuring out the value of pseudo-pointers by the time the are to be
311 311 written, but this is not an obligation and readers must not rely on it.
312 312
313 313 A path is stored as a byte string anywhere in the file, without delimiter.
314 314 It is refered to by one or more node by a pseudo-pointer to its start, and its
315 315 length in bytes. Since there is no delimiter,
316 316 when a path is a substring of another the same bytes could be reused,
317 317 although the implementation does not exploit this as of this writing.
318 318
319 319 A node is stored on 43 bytes with components at fixed offsets. Paths and
320 320 child nodes relevant to a node are stored externally and referenced though
321 321 pseudo-pointers.
322 322
323 323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
324 324 counting bytes from the start of the data file. Path lengths and positions
325 325 are 16-bit integers, also counted in bytes.
326 326
327 327 Node components are:
328 328
329 329 * Offset 0:
330 330 Pseudo-pointer to the full path of this node,
331 331 from the working directory root.
332 332
333 333 * Offset 4:
334 334 Length of the full path.
335 335
336 336 * Offset 6:
337 337 Position of the last `/` path separator within the full path,
338 338 in bytes from the start of the full path,
339 339 or zero if there isn’t one.
340 340 The part of the full path after this position is the "base name".
341 341 Since sibling nodes have the same parent, only their base name vary
342 342 and needs to be considered when doing binary search to find a given path.
343 343
344 344 * Offset 8:
345 345 Pseudo-pointer to the "copy source" path for this node,
346 346 or zero if there is no copy source.
347 347
348 348 * Offset 12:
349 349 Length of the copy source path, or zero if there isn’t one.
350 350
351 351 * Offset 14:
352 352 Pseudo-pointer to the start of child nodes.
353 353
354 354 * Offset 18:
355 355 Number of child nodes, as a 32-bit integer.
356 356 They occupy 43 times this number of bytes
357 357 (not counting space for paths, and further descendants).
358 358
359 359 * Offset 22:
360 360 Number as a 32-bit integer of descendant nodes in this subtree,
361 361 not including this node itself,
362 362 that "have a dirstate entry".
363 363 Those nodes represent files that would be present at all in `dirstate-v1`.
364 364 This is typically less than the total number of descendants.
365 365 This counter is used to implement `has_dir`.
366 366
367 367 * Offset 26:
368 368 Number as a 32-bit integer of descendant nodes in this subtree,
369 369 not including this node itself,
370 370 that represent files tracked in the working directory.
371 371 (For example, `hg rm` makes a file untracked.)
372 372 This counter is used to implement `has_tracked_dir`.
373 373
374 374 * Offset 30:
375 375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
376 376 Starting from least-significant, bit masks are::
377 377
378 378 WDIR_TRACKED = 1 << 0
379 379 P1_TRACKED = 1 << 1
380 380 P2_INFO = 1 << 2
381 381 HAS_MODE_AND_SIZE = 1 << 3
382 382 HAS_FILE_MTIME = 1 << 4
383 383 HAS_DIRECTORY_MTIME = 1 << 5
384 384 MODE_EXEC_PERM = 1 << 6
385 385 MODE_IS_SYMLINK = 1 << 7
386 386 EXPECTED_STATE_IS_MODIFIED = 1 << 8
387 387 ALL_UNKNOWN_RECORDED = 1 << 9
388 388 ALL_IGNORED_RECORDED = 1 << 10
389 389 HAS_FALLBACK_EXEC = 1 << 11
390 390 FALLBACK_EXEC = 1 << 12
391 391 HAS_FALLBACK_SYMLINK = 1 << 13
392 392 FALLBACK_SYMLINK = 1 << 14
393 MTIME_SECOND_AMBIGUOUS = 1 << 15
393 394
394 395 The meaning of each bit is described below.
395 396
396 397 Other bits are unset.
397 398 They may be assigned meaning if the future,
398 399 with the limitation that Mercurial versions that pre-date such meaning
399 400 will always reset those bits to unset when writing nodes.
400 401 (A new node is written for any mutation in its subtree,
401 402 leaving the bytes of the old node unreachable
402 403 until the data file is rewritten entirely.)
403 404
404 405 * Offset 32:
405 406 A `size` field described below, as a 32-bit integer.
406 407 Unlike in dirstate-v1, negative values are not used.
407 408
408 409 * Offset 36:
409 410 The seconds component of an `mtime` field described below,
410 411 as a 32-bit integer.
411 412 Unlike in dirstate-v1, negative values are not used.
412 413 When `mtime` is used, this is number of seconds since the Unix epoch
413 414 truncated to its lower 31 bits.
414 415
415 416 * Offset 40:
416 417 The nanoseconds component of an `mtime` field described below,
417 418 as a 32-bit integer.
418 419 When `mtime` is used,
419 420 this is the number of nanoseconds since `mtime.seconds`,
420 421 always stritctly less than one billion.
421 422
422 423 This may be zero if more precision is not available.
423 424 (This can happen because of limitations in any of Mercurial, Python,
424 425 libc, the operating system, …)
425 426
426 427 When comparing two mtimes and either has this component set to zero,
427 428 the sub-second precision of both should be ignored.
428 429 False positives when checking mtime equality due to clock resolution
429 430 are always possible and the status algorithm needs to deal with them,
430 431 but having too many false negatives could be harmful too.
431 432
432 433 * (Offset 44: end of this node)
433 434
434 435 The meaning of the boolean values packed in `flags` is:
435 436
436 437 `WDIR_TRACKED`
437 438 Set if the working directory contains a tracked file at this node’s path.
438 439 This is typically set and unset by `hg add` and `hg rm`.
439 440
440 441 `P1_TRACKED`
441 442 Set if the working directory’s first parent changeset
442 443 (whose node identifier is found in tree metadata)
443 444 contains a tracked file at this node’s path.
444 445 This is a cache to reduce manifest lookups.
445 446
446 447 `P2_INFO`
447 448 Set if the file has been involved in some merge operation.
448 449 Either because it was actually merged,
449 450 or because the version in the second parent p2 version was ahead,
450 451 or because some rename moved it there.
451 452 In either case `hg status` will want it displayed as modified.
452 453
453 454 Files that would be mentioned at all in the `dirstate-v1` file format
454 455 have a node with at least one of the above three bits set in `dirstate-v2`.
455 456 Let’s call these files "tracked anywhere",
456 457 and "untracked" the nodes with all three of these bits unset.
457 458 Untracked nodes are typically for directories:
458 459 they hold child nodes and form the tree structure.
459 460 Additional untracked nodes may also exist.
460 461 Although implementations should strive to clean up nodes
461 462 that are entirely unused, other untracked nodes may also exist.
462 463 For example, a future version of Mercurial might in some cases
463 464 add nodes for untracked files or/and ignored files in the working directory
464 465 in order to optimize `hg status`
465 466 by enabling it to skip `readdir` in more cases.
466 467
467 468 `HAS_MODE_AND_SIZE`
468 469 Must be unset for untracked nodes.
469 470 For files tracked anywhere, if this is set:
470 471 - The `size` field is the expected file size,
471 472 in bytes truncated its lower to 31 bits.
472 473 - The expected execute permission for the file’s owner
473 474 is given by `MODE_EXEC_PERM`
474 475 - The expected file type is given by `MODE_IS_SIMLINK`:
475 476 a symbolic link if set, or a normal file if unset.
476 477 If this is unset the expected size, permission, and file type are unknown.
477 478 The `size` field is unused (set to zero).
478 479
479 480 `HAS_FILE_MTIME`
480 481 Must be unset for untracked nodes.
481 482 If this and `HAS_DIRECTORY_MTIME` are both unset,
482 483 the `mtime` field is unused (set to zero).
483 484 If this is set, `mtime` is the expected modification time.
484 485
485 486 `HAS_DIRECTORY_MTIME`
486 487 Must be unset for file tracked anywhere.
487 488 If this and `HAS_DIRECTORY_MTIME` are both unset,
488 489 the `mtime` field is unused (set to zero).
489 490 If this is set, at some point,
490 491 this path in the working directory was observed:
491 492
492 493 - To be a directory
493 494 - With the modification time given in `mtime`
494 495 - That time was already strictly in the past when observed,
495 496 meaning that later changes cannot happen in the same clock tick
496 497 and must cause a different modification time
497 498 (unless the system clock jumps back and we get unlucky,
498 499 which is not impossible but deemed unlikely enough).
499 500 - All direct children of this directory
500 501 (as returned by `std::fs::read_dir`)
501 502 either have a corresponding dirstate node,
502 503 or are ignored by ignore patterns whose hash is in tree metadata.
503 504
504 505 This means that if `std::fs::symlink_metadata` later reports
505 506 the same modification time
506 507 and ignored patterns haven’t changed,
507 508 a run of status that is not listing ignored files
508 509 can skip calling `std::fs::read_dir` again for this directory,
509 510 and iterate child dirstate nodes instead.
510 511
511 512 `MODE_EXEC_PERM`
512 513 Must be unset if `HAS_MODE_AND_SIZE` is unset.
513 514 If `HAS_MODE_AND_SIZE` is set,
514 515 this indicates whether the file’s own is expected
515 516 to have execute permission.
516 517
517 518 `MODE_IS_SYMLINK`
518 519 Must be unset if `HAS_MODE_AND_SIZE` is unset.
519 520 If `HAS_MODE_AND_SIZE` is set,
520 521 this indicates whether the file is expected to be a symlink
521 522 as opposed to a normal file.
522 523
523 524 `EXPECTED_STATE_IS_MODIFIED`
524 525 Must be unset for untracked nodes.
525 526 For:
526 527 - a file tracked anywhere
527 528 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
528 529 - if that metadata matches
529 530 metadata found in the working directory with `stat`
530 531 This bit indicates the status of the file.
531 532 If set, the status is modified. If unset, it is clean.
532 533
533 534 In cases where `hg status` needs to read the contents of a file
534 535 because metadata is ambiguous, this bit lets it record the result
535 536 if the result is modified so that a future run of `hg status`
536 537 does not need to do the same again.
537 538 It is valid to never set this bit,
538 539 and consider expected metadata ambiguous if it is set.
539 540
540 541 `ALL_UNKNOWN_RECORDED`
541 542 If set, all "unknown" children existing on disk (at the time of the last
542 543 status) have been recorded and the `mtime` associated with
543 544 `HAS_DIRECTORY_MTIME` can be used for optimization even when "unknown" file
544 545 are listed.
545 546
546 547 Note that the amount recorded "unknown" children can still be zero if None
547 548 where present.
548 549
549 550 Also note that having this flag unset does not imply that no "unknown"
550 551 children have been recorded. Some might be present, but there is no garantee
551 552 that is will be all of them.
552 553
553 554 `ALL_IGNORED_RECORDED`
554 555 If set, all "ignored" children existing on disk (at the time of the last
555 556 status) have been recorded and the `mtime` associated with
556 557 `HAS_DIRECTORY_MTIME` can be used for optimization even when "ignored" file
557 558 are listed.
558 559
559 560 Note that the amount recorded "ignored" children can still be zero if None
560 561 where present.
561 562
562 563 Also note that having this flag unset does not imply that no "ignored"
563 564 children have been recorded. Some might be present, but there is no garantee
564 565 that is will be all of them.
565 566
566 567 `HAS_FALLBACK_EXEC`
567 568 If this flag is set, the entry carries "fallback" information for the
568 569 executable bit in the `FALLBACK_EXEC` flag.
569 570
570 571 Fallback information can be stored in the dirstate to keep track of
571 572 filesystem attribute tracked by Mercurial when the underlying file
572 573 system or operating system does not support that property, (e.g.
573 574 Windows).
574 575
575 576 `FALLBACK_EXEC`
576 577 Should be ignored if `HAS_FALLBACK_EXEC` is unset. If set the file for this
577 578 entry should be considered executable if that information cannot be
578 579 extracted from the file system. If unset it should be considered
579 580 non-executable instead.
580 581
581 582 `HAS_FALLBACK_SYMLINK`
582 583 If this flag is set, the entry carries "fallback" information for symbolic
583 584 link status in the `FALLBACK_SYMLINK` flag.
584 585
585 586 Fallback information can be stored in the dirstate to keep track of
586 587 filesystem attribute tracked by Mercurial when the underlying file
587 588 system or operating system does not support that property, (e.g.
588 589 Windows).
589 590
590 591 `FALLBACK_SYMLINK`
591 592 Should be ignored if `HAS_FALLBACK_SYMLINK` is unset. If set the file for
592 593 this entry should be considered a symlink if that information cannot be
593 594 extracted from the file system. If unset it should be considered a normal
594 595 file instead.
596
597 `MTIME_SECOND_AMBIGUOUS`
598 This flag is relevant only when `HAS_FILE_MTIME` is set. When set, the
599 `mtime` stored in the entry is only valid for comparison with timestamps
600 that have nanosecond information. If available timestamp does not carries
601 nanosecond information, the `mtime` should be ignored and no optimisation
602 can be applied.
@@ -1,928 +1,933 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import stat
11 11 import struct
12 12 import zlib
13 13
14 14 from ..node import (
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from ..thirdparty import attr
19 19 from .. import (
20 20 error,
21 21 pycompat,
22 22 revlogutils,
23 23 util,
24 24 )
25 25
26 26 from ..revlogutils import nodemap as nodemaputil
27 27 from ..revlogutils import constants as revlog_constants
28 28
29 29 stringio = pycompat.bytesio
30 30
31 31
32 32 _pack = struct.pack
33 33 _unpack = struct.unpack
34 34 _compress = zlib.compress
35 35 _decompress = zlib.decompress
36 36
37 37
38 38 # a special value used internally for `size` if the file come from the other parent
39 39 FROM_P2 = -2
40 40
41 41 # a special value used internally for `size` if the file is modified/merged/added
42 42 NONNORMAL = -1
43 43
44 44 # a special value used internally for `time` if the time is ambigeous
45 45 AMBIGUOUS_TIME = -1
46 46
47 47 # Bits of the `flags` byte inside a node in the file format
48 48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 50 DIRSTATE_V2_P2_INFO = 1 << 2
51 51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
52 52 DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
53 53 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available
54 54 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
55 55 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
56 56 DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
57 57 DIRSTATE_V2_ALL_UNKNOWN_RECORDED = 1 << 9
58 58 DIRSTATE_V2_ALL_IGNORED_RECORDED = 1 << 10
59 59 DIRSTATE_V2_HAS_FALLBACK_EXEC = 1 << 11
60 60 DIRSTATE_V2_FALLBACK_EXEC = 1 << 12
61 61 DIRSTATE_V2_HAS_FALLBACK_SYMLINK = 1 << 13
62 62 DIRSTATE_V2_FALLBACK_SYMLINK = 1 << 14
63 DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS = 1 << 15
63 64
64 65
65 66 @attr.s(slots=True, init=False)
66 67 class DirstateItem(object):
67 68 """represent a dirstate entry
68 69
69 70 It hold multiple attributes
70 71
71 72 # about file tracking
72 73 - wc_tracked: is the file tracked by the working copy
73 74 - p1_tracked: is the file tracked in working copy first parent
74 75 - p2_info: the file has been involved in some merge operation. Either
75 76 because it was actually merged, or because the p2 version was
76 77 ahead, or because some rename moved it there. In either case
77 78 `hg status` will want it displayed as modified.
78 79
79 80 # about the file state expected from p1 manifest:
80 81 - mode: the file mode in p1
81 82 - size: the file size in p1
82 83
83 84 These value can be set to None, which mean we don't have a meaningful value
84 85 to compare with. Either because we don't really care about them as there
85 86 `status` is known without having to look at the disk or because we don't
86 87 know these right now and a full comparison will be needed to find out if
87 88 the file is clean.
88 89
89 90 # about the file state on disk last time we saw it:
90 91 - mtime: the last known clean mtime for the file.
91 92
92 93 This value can be set to None if no cachable state exist. Either because we
93 94 do not care (see previous section) or because we could not cache something
94 95 yet.
95 96 """
96 97
97 98 _wc_tracked = attr.ib()
98 99 _p1_tracked = attr.ib()
99 100 _p2_info = attr.ib()
100 101 _mode = attr.ib()
101 102 _size = attr.ib()
102 103 _mtime_s = attr.ib()
103 104 _mtime_ns = attr.ib()
104 105 _fallback_exec = attr.ib()
105 106 _fallback_symlink = attr.ib()
106 107
107 108 def __init__(
108 109 self,
109 110 wc_tracked=False,
110 111 p1_tracked=False,
111 112 p2_info=False,
112 113 has_meaningful_data=True,
113 114 has_meaningful_mtime=True,
114 115 parentfiledata=None,
115 116 fallback_exec=None,
116 117 fallback_symlink=None,
117 118 ):
118 119 self._wc_tracked = wc_tracked
119 120 self._p1_tracked = p1_tracked
120 121 self._p2_info = p2_info
121 122
122 123 self._fallback_exec = fallback_exec
123 124 self._fallback_symlink = fallback_symlink
124 125
125 126 self._mode = None
126 127 self._size = None
127 128 self._mtime_s = None
128 129 self._mtime_ns = None
129 130 if parentfiledata is None:
130 131 has_meaningful_mtime = False
131 132 has_meaningful_data = False
132 133 if has_meaningful_data:
133 134 self._mode = parentfiledata[0]
134 135 self._size = parentfiledata[1]
135 136 if has_meaningful_mtime:
136 137 self._mtime_s, self._mtime_ns = parentfiledata[2]
137 138
138 139 @classmethod
139 140 def from_v2_data(cls, flags, size, mtime_s, mtime_ns):
140 141 """Build a new DirstateItem object from V2 data"""
141 142 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
142 143 has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
144 if flags & DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS:
145 # The current code is not able to do the more subtle comparison that the
146 # MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
147 has_meaningful_mtime = False
143 148 mode = None
144 149
145 150 if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
146 151 # we do not have support for this flag in the code yet,
147 152 # force a lookup for this file.
148 153 has_mode_size = False
149 154 has_meaningful_mtime = False
150 155
151 156 fallback_exec = None
152 157 if flags & DIRSTATE_V2_HAS_FALLBACK_EXEC:
153 158 fallback_exec = flags & DIRSTATE_V2_FALLBACK_EXEC
154 159
155 160 fallback_symlink = None
156 161 if flags & DIRSTATE_V2_HAS_FALLBACK_SYMLINK:
157 162 fallback_symlink = flags & DIRSTATE_V2_FALLBACK_SYMLINK
158 163
159 164 if has_mode_size:
160 165 assert stat.S_IXUSR == 0o100
161 166 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
162 167 mode = 0o755
163 168 else:
164 169 mode = 0o644
165 170 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
166 171 mode |= stat.S_IFLNK
167 172 else:
168 173 mode |= stat.S_IFREG
169 174 return cls(
170 175 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
171 176 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
172 177 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
173 178 has_meaningful_data=has_mode_size,
174 179 has_meaningful_mtime=has_meaningful_mtime,
175 180 parentfiledata=(mode, size, (mtime_s, mtime_ns)),
176 181 fallback_exec=fallback_exec,
177 182 fallback_symlink=fallback_symlink,
178 183 )
179 184
180 185 @classmethod
181 186 def from_v1_data(cls, state, mode, size, mtime):
182 187 """Build a new DirstateItem object from V1 data
183 188
184 189 Since the dirstate-v1 format is frozen, the signature of this function
185 190 is not expected to change, unlike the __init__ one.
186 191 """
187 192 if state == b'm':
188 193 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
189 194 elif state == b'a':
190 195 return cls(wc_tracked=True)
191 196 elif state == b'r':
192 197 if size == NONNORMAL:
193 198 p1_tracked = True
194 199 p2_info = True
195 200 elif size == FROM_P2:
196 201 p1_tracked = False
197 202 p2_info = True
198 203 else:
199 204 p1_tracked = True
200 205 p2_info = False
201 206 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
202 207 elif state == b'n':
203 208 if size == FROM_P2:
204 209 return cls(wc_tracked=True, p2_info=True)
205 210 elif size == NONNORMAL:
206 211 return cls(wc_tracked=True, p1_tracked=True)
207 212 elif mtime == AMBIGUOUS_TIME:
208 213 return cls(
209 214 wc_tracked=True,
210 215 p1_tracked=True,
211 216 has_meaningful_mtime=False,
212 217 parentfiledata=(mode, size, (42, 0)),
213 218 )
214 219 else:
215 220 return cls(
216 221 wc_tracked=True,
217 222 p1_tracked=True,
218 223 parentfiledata=(mode, size, (mtime, 0)),
219 224 )
220 225 else:
221 226 raise RuntimeError(b'unknown state: %s' % state)
222 227
223 228 def set_possibly_dirty(self):
224 229 """Mark a file as "possibly dirty"
225 230
226 231 This means the next status call will have to actually check its content
227 232 to make sure it is correct.
228 233 """
229 234 self._mtime_s = None
230 235 self._mtime_ns = None
231 236
232 237 def set_clean(self, mode, size, mtime):
233 238 """mark a file as "clean" cancelling potential "possibly dirty call"
234 239
235 240 Note: this function is a descendant of `dirstate.normal` and is
236 241 currently expected to be call on "normal" entry only. There are not
237 242 reason for this to not change in the future as long as the ccode is
238 243 updated to preserve the proper state of the non-normal files.
239 244 """
240 245 self._wc_tracked = True
241 246 self._p1_tracked = True
242 247 self._mode = mode
243 248 self._size = size
244 249 self._mtime_s, self._mtime_ns = mtime
245 250
246 251 def set_tracked(self):
247 252 """mark a file as tracked in the working copy
248 253
249 254 This will ultimately be called by command like `hg add`.
250 255 """
251 256 self._wc_tracked = True
252 257 # `set_tracked` is replacing various `normallookup` call. So we mark
253 258 # the files as needing lookup
254 259 #
255 260 # Consider dropping this in the future in favor of something less broad.
256 261 self._mtime_s = None
257 262 self._mtime_ns = None
258 263
259 264 def set_untracked(self):
260 265 """mark a file as untracked in the working copy
261 266
262 267 This will ultimately be called by command like `hg remove`.
263 268 """
264 269 self._wc_tracked = False
265 270 self._mode = None
266 271 self._size = None
267 272 self._mtime_s = None
268 273 self._mtime_ns = None
269 274
270 275 def drop_merge_data(self):
271 276 """remove all "merge-only" from a DirstateItem
272 277
273 278 This is to be call by the dirstatemap code when the second parent is dropped
274 279 """
275 280 if self._p2_info:
276 281 self._p2_info = False
277 282 self._mode = None
278 283 self._size = None
279 284 self._mtime_s = None
280 285 self._mtime_ns = None
281 286
282 287 @property
283 288 def mode(self):
284 289 return self.v1_mode()
285 290
286 291 @property
287 292 def size(self):
288 293 return self.v1_size()
289 294
290 295 @property
291 296 def mtime(self):
292 297 return self.v1_mtime()
293 298
294 299 def mtime_likely_equal_to(self, other_mtime):
295 300 self_sec = self._mtime_s
296 301 if self_sec is None:
297 302 return False
298 303 self_ns = self._mtime_ns
299 304 other_sec, other_ns = other_mtime
300 305 return self_sec == other_sec and self_ns == other_ns
301 306
302 307 @property
303 308 def state(self):
304 309 """
305 310 States are:
306 311 n normal
307 312 m needs merging
308 313 r marked for removal
309 314 a marked for addition
310 315
311 316 XXX This "state" is a bit obscure and mostly a direct expression of the
312 317 dirstatev1 format. It would make sense to ultimately deprecate it in
313 318 favor of the more "semantic" attributes.
314 319 """
315 320 if not self.any_tracked:
316 321 return b'?'
317 322 return self.v1_state()
318 323
319 324 @property
320 325 def has_fallback_exec(self):
321 326 """True if "fallback" information are available for the "exec" bit
322 327
323 328 Fallback information can be stored in the dirstate to keep track of
324 329 filesystem attribute tracked by Mercurial when the underlying file
325 330 system or operating system does not support that property, (e.g.
326 331 Windows).
327 332
328 333 Not all version of the dirstate on-disk storage support preserving this
329 334 information.
330 335 """
331 336 return self._fallback_exec is not None
332 337
333 338 @property
334 339 def fallback_exec(self):
335 340 """ "fallback" information for the executable bit
336 341
337 342 True if the file should be considered executable when we cannot get
338 343 this information from the files system. False if it should be
339 344 considered non-executable.
340 345
341 346 See has_fallback_exec for details."""
342 347 return self._fallback_exec
343 348
344 349 @fallback_exec.setter
345 350 def set_fallback_exec(self, value):
346 351 """control "fallback" executable bit
347 352
348 353 Set to:
349 354 - True if the file should be considered executable,
350 355 - False if the file should be considered non-executable,
351 356 - None if we do not have valid fallback data.
352 357
353 358 See has_fallback_exec for details."""
354 359 if value is None:
355 360 self._fallback_exec = None
356 361 else:
357 362 self._fallback_exec = bool(value)
358 363
359 364 @property
360 365 def has_fallback_symlink(self):
361 366 """True if "fallback" information are available for symlink status
362 367
363 368 Fallback information can be stored in the dirstate to keep track of
364 369 filesystem attribute tracked by Mercurial when the underlying file
365 370 system or operating system does not support that property, (e.g.
366 371 Windows).
367 372
368 373 Not all version of the dirstate on-disk storage support preserving this
369 374 information."""
370 375 return self._fallback_symlink is not None
371 376
372 377 @property
373 378 def fallback_symlink(self):
374 379 """ "fallback" information for symlink status
375 380
376 381 True if the file should be considered executable when we cannot get
377 382 this information from the files system. False if it should be
378 383 considered non-executable.
379 384
380 385 See has_fallback_exec for details."""
381 386 return self._fallback_symlink
382 387
383 388 @fallback_symlink.setter
384 389 def set_fallback_symlink(self, value):
385 390 """control "fallback" symlink status
386 391
387 392 Set to:
388 393 - True if the file should be considered a symlink,
389 394 - False if the file should be considered not a symlink,
390 395 - None if we do not have valid fallback data.
391 396
392 397 See has_fallback_symlink for details."""
393 398 if value is None:
394 399 self._fallback_symlink = None
395 400 else:
396 401 self._fallback_symlink = bool(value)
397 402
398 403 @property
399 404 def tracked(self):
400 405 """True is the file is tracked in the working copy"""
401 406 return self._wc_tracked
402 407
403 408 @property
404 409 def any_tracked(self):
405 410 """True is the file is tracked anywhere (wc or parents)"""
406 411 return self._wc_tracked or self._p1_tracked or self._p2_info
407 412
408 413 @property
409 414 def added(self):
410 415 """True if the file has been added"""
411 416 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
412 417
413 418 @property
414 419 def maybe_clean(self):
415 420 """True if the file has a chance to be in the "clean" state"""
416 421 if not self._wc_tracked:
417 422 return False
418 423 elif not self._p1_tracked:
419 424 return False
420 425 elif self._p2_info:
421 426 return False
422 427 return True
423 428
424 429 @property
425 430 def p1_tracked(self):
426 431 """True if the file is tracked in the first parent manifest"""
427 432 return self._p1_tracked
428 433
429 434 @property
430 435 def p2_info(self):
431 436 """True if the file needed to merge or apply any input from p2
432 437
433 438 See the class documentation for details.
434 439 """
435 440 return self._wc_tracked and self._p2_info
436 441
437 442 @property
438 443 def removed(self):
439 444 """True if the file has been removed"""
440 445 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
441 446
442 447 def v2_data(self):
443 448 """Returns (flags, mode, size, mtime) for v2 serialization"""
444 449 flags = 0
445 450 if self._wc_tracked:
446 451 flags |= DIRSTATE_V2_WDIR_TRACKED
447 452 if self._p1_tracked:
448 453 flags |= DIRSTATE_V2_P1_TRACKED
449 454 if self._p2_info:
450 455 flags |= DIRSTATE_V2_P2_INFO
451 456 if self._mode is not None and self._size is not None:
452 457 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
453 458 if self.mode & stat.S_IXUSR:
454 459 flags |= DIRSTATE_V2_MODE_EXEC_PERM
455 460 if stat.S_ISLNK(self.mode):
456 461 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
457 462 if self._mtime_s is not None:
458 463 flags |= DIRSTATE_V2_HAS_FILE_MTIME
459 464
460 465 if self._fallback_exec is not None:
461 466 flags |= DIRSTATE_V2_HAS_FALLBACK_EXEC
462 467 if self._fallback_exec:
463 468 flags |= DIRSTATE_V2_FALLBACK_EXEC
464 469
465 470 if self._fallback_symlink is not None:
466 471 flags |= DIRSTATE_V2_HAS_FALLBACK_SYMLINK
467 472 if self._fallback_symlink:
468 473 flags |= DIRSTATE_V2_FALLBACK_SYMLINK
469 474
470 475 # Note: we do not need to do anything regarding
471 476 # DIRSTATE_V2_ALL_UNKNOWN_RECORDED and DIRSTATE_V2_ALL_IGNORED_RECORDED
472 477 # since we never set _DIRSTATE_V2_HAS_DIRCTORY_MTIME
473 478 return (flags, self._size or 0, self._mtime_s or 0, self._mtime_ns or 0)
474 479
475 480 def v1_state(self):
476 481 """return a "state" suitable for v1 serialization"""
477 482 if not self.any_tracked:
478 483 # the object has no state to record, this is -currently-
479 484 # unsupported
480 485 raise RuntimeError('untracked item')
481 486 elif self.removed:
482 487 return b'r'
483 488 elif self._p1_tracked and self._p2_info:
484 489 return b'm'
485 490 elif self.added:
486 491 return b'a'
487 492 else:
488 493 return b'n'
489 494
490 495 def v1_mode(self):
491 496 """return a "mode" suitable for v1 serialization"""
492 497 return self._mode if self._mode is not None else 0
493 498
494 499 def v1_size(self):
495 500 """return a "size" suitable for v1 serialization"""
496 501 if not self.any_tracked:
497 502 # the object has no state to record, this is -currently-
498 503 # unsupported
499 504 raise RuntimeError('untracked item')
500 505 elif self.removed and self._p1_tracked and self._p2_info:
501 506 return NONNORMAL
502 507 elif self._p2_info:
503 508 return FROM_P2
504 509 elif self.removed:
505 510 return 0
506 511 elif self.added:
507 512 return NONNORMAL
508 513 elif self._size is None:
509 514 return NONNORMAL
510 515 else:
511 516 return self._size
512 517
513 518 def v1_mtime(self):
514 519 """return a "mtime" suitable for v1 serialization"""
515 520 if not self.any_tracked:
516 521 # the object has no state to record, this is -currently-
517 522 # unsupported
518 523 raise RuntimeError('untracked item')
519 524 elif self.removed:
520 525 return 0
521 526 elif self._mtime_s is None:
522 527 return AMBIGUOUS_TIME
523 528 elif self._p2_info:
524 529 return AMBIGUOUS_TIME
525 530 elif not self._p1_tracked:
526 531 return AMBIGUOUS_TIME
527 532 else:
528 533 return self._mtime_s
529 534
530 535 def need_delay(self, now):
531 536 """True if the stored mtime would be ambiguous with the current time"""
532 537 return self.v1_state() == b'n' and self._mtime_s == now[0]
533 538
534 539
535 540 def gettype(q):
536 541 return int(q & 0xFFFF)
537 542
538 543
539 544 class BaseIndexObject(object):
540 545 # Can I be passed to an algorithme implemented in Rust ?
541 546 rust_ext_compat = 0
542 547 # Format of an index entry according to Python's `struct` language
543 548 index_format = revlog_constants.INDEX_ENTRY_V1
544 549 # Size of a C unsigned long long int, platform independent
545 550 big_int_size = struct.calcsize(b'>Q')
546 551 # Size of a C long int, platform independent
547 552 int_size = struct.calcsize(b'>i')
548 553 # An empty index entry, used as a default value to be overridden, or nullrev
549 554 null_item = (
550 555 0,
551 556 0,
552 557 0,
553 558 -1,
554 559 -1,
555 560 -1,
556 561 -1,
557 562 sha1nodeconstants.nullid,
558 563 0,
559 564 0,
560 565 revlog_constants.COMP_MODE_INLINE,
561 566 revlog_constants.COMP_MODE_INLINE,
562 567 )
563 568
564 569 @util.propertycache
565 570 def entry_size(self):
566 571 return self.index_format.size
567 572
568 573 @property
569 574 def nodemap(self):
570 575 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
571 576 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
572 577 return self._nodemap
573 578
574 579 @util.propertycache
575 580 def _nodemap(self):
576 581 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
577 582 for r in range(0, len(self)):
578 583 n = self[r][7]
579 584 nodemap[n] = r
580 585 return nodemap
581 586
582 587 def has_node(self, node):
583 588 """return True if the node exist in the index"""
584 589 return node in self._nodemap
585 590
586 591 def rev(self, node):
587 592 """return a revision for a node
588 593
589 594 If the node is unknown, raise a RevlogError"""
590 595 return self._nodemap[node]
591 596
592 597 def get_rev(self, node):
593 598 """return a revision for a node
594 599
595 600 If the node is unknown, return None"""
596 601 return self._nodemap.get(node)
597 602
598 603 def _stripnodes(self, start):
599 604 if '_nodemap' in vars(self):
600 605 for r in range(start, len(self)):
601 606 n = self[r][7]
602 607 del self._nodemap[n]
603 608
604 609 def clearcaches(self):
605 610 self.__dict__.pop('_nodemap', None)
606 611
607 612 def __len__(self):
608 613 return self._lgt + len(self._extra)
609 614
610 615 def append(self, tup):
611 616 if '_nodemap' in vars(self):
612 617 self._nodemap[tup[7]] = len(self)
613 618 data = self._pack_entry(len(self), tup)
614 619 self._extra.append(data)
615 620
616 621 def _pack_entry(self, rev, entry):
617 622 assert entry[8] == 0
618 623 assert entry[9] == 0
619 624 return self.index_format.pack(*entry[:8])
620 625
621 626 def _check_index(self, i):
622 627 if not isinstance(i, int):
623 628 raise TypeError(b"expecting int indexes")
624 629 if i < 0 or i >= len(self):
625 630 raise IndexError
626 631
627 632 def __getitem__(self, i):
628 633 if i == -1:
629 634 return self.null_item
630 635 self._check_index(i)
631 636 if i >= self._lgt:
632 637 data = self._extra[i - self._lgt]
633 638 else:
634 639 index = self._calculate_index(i)
635 640 data = self._data[index : index + self.entry_size]
636 641 r = self._unpack_entry(i, data)
637 642 if self._lgt and i == 0:
638 643 offset = revlogutils.offset_type(0, gettype(r[0]))
639 644 r = (offset,) + r[1:]
640 645 return r
641 646
642 647 def _unpack_entry(self, rev, data):
643 648 r = self.index_format.unpack(data)
644 649 r = r + (
645 650 0,
646 651 0,
647 652 revlog_constants.COMP_MODE_INLINE,
648 653 revlog_constants.COMP_MODE_INLINE,
649 654 )
650 655 return r
651 656
652 657 def pack_header(self, header):
653 658 """pack header information as binary"""
654 659 v_fmt = revlog_constants.INDEX_HEADER
655 660 return v_fmt.pack(header)
656 661
657 662 def entry_binary(self, rev):
658 663 """return the raw binary string representing a revision"""
659 664 entry = self[rev]
660 665 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
661 666 if rev == 0:
662 667 p = p[revlog_constants.INDEX_HEADER.size :]
663 668 return p
664 669
665 670
666 671 class IndexObject(BaseIndexObject):
667 672 def __init__(self, data):
668 673 assert len(data) % self.entry_size == 0, (
669 674 len(data),
670 675 self.entry_size,
671 676 len(data) % self.entry_size,
672 677 )
673 678 self._data = data
674 679 self._lgt = len(data) // self.entry_size
675 680 self._extra = []
676 681
677 682 def _calculate_index(self, i):
678 683 return i * self.entry_size
679 684
680 685 def __delitem__(self, i):
681 686 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
682 687 raise ValueError(b"deleting slices only supports a:-1 with step 1")
683 688 i = i.start
684 689 self._check_index(i)
685 690 self._stripnodes(i)
686 691 if i < self._lgt:
687 692 self._data = self._data[: i * self.entry_size]
688 693 self._lgt = i
689 694 self._extra = []
690 695 else:
691 696 self._extra = self._extra[: i - self._lgt]
692 697
693 698
694 699 class PersistentNodeMapIndexObject(IndexObject):
695 700 """a Debug oriented class to test persistent nodemap
696 701
697 702 We need a simple python object to test API and higher level behavior. See
698 703 the Rust implementation for more serious usage. This should be used only
699 704 through the dedicated `devel.persistent-nodemap` config.
700 705 """
701 706
702 707 def nodemap_data_all(self):
703 708 """Return bytes containing a full serialization of a nodemap
704 709
705 710 The nodemap should be valid for the full set of revisions in the
706 711 index."""
707 712 return nodemaputil.persistent_data(self)
708 713
709 714 def nodemap_data_incremental(self):
710 715 """Return bytes containing a incremental update to persistent nodemap
711 716
712 717 This containst the data for an append-only update of the data provided
713 718 in the last call to `update_nodemap_data`.
714 719 """
715 720 if self._nm_root is None:
716 721 return None
717 722 docket = self._nm_docket
718 723 changed, data = nodemaputil.update_persistent_data(
719 724 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
720 725 )
721 726
722 727 self._nm_root = self._nm_max_idx = self._nm_docket = None
723 728 return docket, changed, data
724 729
725 730 def update_nodemap_data(self, docket, nm_data):
726 731 """provide full block of persisted binary data for a nodemap
727 732
728 733 The data are expected to come from disk. See `nodemap_data_all` for a
729 734 produceur of such data."""
730 735 if nm_data is not None:
731 736 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
732 737 if self._nm_root:
733 738 self._nm_docket = docket
734 739 else:
735 740 self._nm_root = self._nm_max_idx = self._nm_docket = None
736 741
737 742
738 743 class InlinedIndexObject(BaseIndexObject):
739 744 def __init__(self, data, inline=0):
740 745 self._data = data
741 746 self._lgt = self._inline_scan(None)
742 747 self._inline_scan(self._lgt)
743 748 self._extra = []
744 749
745 750 def _inline_scan(self, lgt):
746 751 off = 0
747 752 if lgt is not None:
748 753 self._offsets = [0] * lgt
749 754 count = 0
750 755 while off <= len(self._data) - self.entry_size:
751 756 start = off + self.big_int_size
752 757 (s,) = struct.unpack(
753 758 b'>i',
754 759 self._data[start : start + self.int_size],
755 760 )
756 761 if lgt is not None:
757 762 self._offsets[count] = off
758 763 count += 1
759 764 off += self.entry_size + s
760 765 if off != len(self._data):
761 766 raise ValueError(b"corrupted data")
762 767 return count
763 768
764 769 def __delitem__(self, i):
765 770 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
766 771 raise ValueError(b"deleting slices only supports a:-1 with step 1")
767 772 i = i.start
768 773 self._check_index(i)
769 774 self._stripnodes(i)
770 775 if i < self._lgt:
771 776 self._offsets = self._offsets[:i]
772 777 self._lgt = i
773 778 self._extra = []
774 779 else:
775 780 self._extra = self._extra[: i - self._lgt]
776 781
777 782 def _calculate_index(self, i):
778 783 return self._offsets[i]
779 784
780 785
781 786 def parse_index2(data, inline, revlogv2=False):
782 787 if not inline:
783 788 cls = IndexObject2 if revlogv2 else IndexObject
784 789 return cls(data), None
785 790 cls = InlinedIndexObject
786 791 return cls(data, inline), (0, data)
787 792
788 793
789 794 def parse_index_cl_v2(data):
790 795 return IndexChangelogV2(data), None
791 796
792 797
793 798 class IndexObject2(IndexObject):
794 799 index_format = revlog_constants.INDEX_ENTRY_V2
795 800
796 801 def replace_sidedata_info(
797 802 self,
798 803 rev,
799 804 sidedata_offset,
800 805 sidedata_length,
801 806 offset_flags,
802 807 compression_mode,
803 808 ):
804 809 """
805 810 Replace an existing index entry's sidedata offset and length with new
806 811 ones.
807 812 This cannot be used outside of the context of sidedata rewriting,
808 813 inside the transaction that creates the revision `rev`.
809 814 """
810 815 if rev < 0:
811 816 raise KeyError
812 817 self._check_index(rev)
813 818 if rev < self._lgt:
814 819 msg = b"cannot rewrite entries outside of this transaction"
815 820 raise KeyError(msg)
816 821 else:
817 822 entry = list(self[rev])
818 823 entry[0] = offset_flags
819 824 entry[8] = sidedata_offset
820 825 entry[9] = sidedata_length
821 826 entry[11] = compression_mode
822 827 entry = tuple(entry)
823 828 new = self._pack_entry(rev, entry)
824 829 self._extra[rev - self._lgt] = new
825 830
826 831 def _unpack_entry(self, rev, data):
827 832 data = self.index_format.unpack(data)
828 833 entry = data[:10]
829 834 data_comp = data[10] & 3
830 835 sidedata_comp = (data[10] & (3 << 2)) >> 2
831 836 return entry + (data_comp, sidedata_comp)
832 837
833 838 def _pack_entry(self, rev, entry):
834 839 data = entry[:10]
835 840 data_comp = entry[10] & 3
836 841 sidedata_comp = (entry[11] & 3) << 2
837 842 data += (data_comp | sidedata_comp,)
838 843
839 844 return self.index_format.pack(*data)
840 845
841 846 def entry_binary(self, rev):
842 847 """return the raw binary string representing a revision"""
843 848 entry = self[rev]
844 849 return self._pack_entry(rev, entry)
845 850
846 851 def pack_header(self, header):
847 852 """pack header information as binary"""
848 853 msg = 'version header should go in the docket, not the index: %d'
849 854 msg %= header
850 855 raise error.ProgrammingError(msg)
851 856
852 857
853 858 class IndexChangelogV2(IndexObject2):
854 859 index_format = revlog_constants.INDEX_ENTRY_CL_V2
855 860
856 861 def _unpack_entry(self, rev, data, r=True):
857 862 items = self.index_format.unpack(data)
858 863 entry = items[:3] + (rev, rev) + items[3:8]
859 864 data_comp = items[8] & 3
860 865 sidedata_comp = (items[8] >> 2) & 3
861 866 return entry + (data_comp, sidedata_comp)
862 867
863 868 def _pack_entry(self, rev, entry):
864 869 assert entry[3] == rev, entry[3]
865 870 assert entry[4] == rev, entry[4]
866 871 data = entry[:3] + entry[5:10]
867 872 data_comp = entry[10] & 3
868 873 sidedata_comp = (entry[11] & 3) << 2
869 874 data += (data_comp | sidedata_comp,)
870 875 return self.index_format.pack(*data)
871 876
872 877
873 878 def parse_index_devel_nodemap(data, inline):
874 879 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
875 880 return PersistentNodeMapIndexObject(data), None
876 881
877 882
878 883 def parse_dirstate(dmap, copymap, st):
879 884 parents = [st[:20], st[20:40]]
880 885 # dereference fields so they will be local in loop
881 886 format = b">cllll"
882 887 e_size = struct.calcsize(format)
883 888 pos1 = 40
884 889 l = len(st)
885 890
886 891 # the inner loop
887 892 while pos1 < l:
888 893 pos2 = pos1 + e_size
889 894 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
890 895 pos1 = pos2 + e[4]
891 896 f = st[pos2:pos1]
892 897 if b'\0' in f:
893 898 f, c = f.split(b'\0')
894 899 copymap[f] = c
895 900 dmap[f] = DirstateItem.from_v1_data(*e[:4])
896 901 return parents
897 902
898 903
899 904 def pack_dirstate(dmap, copymap, pl, now):
900 905 cs = stringio()
901 906 write = cs.write
902 907 write(b"".join(pl))
903 908 for f, e in pycompat.iteritems(dmap):
904 909 if e.need_delay(now):
905 910 # The file was last modified "simultaneously" with the current
906 911 # write to dirstate (i.e. within the same second for file-
907 912 # systems with a granularity of 1 sec). This commonly happens
908 913 # for at least a couple of files on 'update'.
909 914 # The user could change the file without changing its size
910 915 # within the same second. Invalidate the file's mtime in
911 916 # dirstate, forcing future 'status' calls to compare the
912 917 # contents of the file if the size is the same. This prevents
913 918 # mistakenly treating such files as clean.
914 919 e.set_possibly_dirty()
915 920
916 921 if f in copymap:
917 922 f = b"%s\0%s" % (f, copymap[f])
918 923 e = _pack(
919 924 b">cllll",
920 925 e.v1_state(),
921 926 e.v1_mode(),
922 927 e.v1_size(),
923 928 e.v1_mtime(),
924 929 len(f),
925 930 )
926 931 write(e)
927 932 write(f)
928 933 return cs.getvalue()
@@ -1,778 +1,782 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::TruncatedTimestamp;
6 6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 8 use crate::errors::HgError;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::DirstateEntry;
11 11 use crate::DirstateError;
12 12 use crate::DirstateParents;
13 13 use bitflags::bitflags;
14 14 use bytes_cast::unaligned::{U16Be, U32Be};
15 15 use bytes_cast::BytesCast;
16 16 use format_bytes::format_bytes;
17 17 use std::borrow::Cow;
18 18 use std::convert::{TryFrom, TryInto};
19 19
20 20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 21 /// This a redundant sanity check more than an actual "magic number" since
22 22 /// `.hg/requires` already governs which format should be used.
23 23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24 24
25 25 /// Keep space for 256-bit hashes
26 26 const STORED_NODE_ID_BYTES: usize = 32;
27 27
28 28 /// … even though only 160 bits are used for now, with SHA-1
29 29 const USED_NODE_ID_BYTES: usize = 20;
30 30
31 31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 33
34 34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
35 35 const TREE_METADATA_SIZE: usize = 44;
36 36 const NODE_SIZE: usize = 44;
37 37
38 38 /// Make sure that size-affecting changes are made knowingly
39 39 #[allow(unused)]
40 40 fn static_assert_size_of() {
41 41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
44 44 }
45 45
46 46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 47 #[derive(BytesCast)]
48 48 #[repr(C)]
49 49 struct DocketHeader {
50 50 marker: [u8; V2_FORMAT_MARKER.len()],
51 51 parent_1: [u8; STORED_NODE_ID_BYTES],
52 52 parent_2: [u8; STORED_NODE_ID_BYTES],
53 53
54 54 metadata: TreeMetadata,
55 55
56 56 /// Counted in bytes
57 57 data_size: Size,
58 58
59 59 uuid_size: u8,
60 60 }
61 61
62 62 pub struct Docket<'on_disk> {
63 63 header: &'on_disk DocketHeader,
64 64 uuid: &'on_disk [u8],
65 65 }
66 66
67 67 /// Fields are documented in the *Tree metadata in the docket file*
68 68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 69 #[derive(BytesCast)]
70 70 #[repr(C)]
71 71 struct TreeMetadata {
72 72 root_nodes: ChildNodes,
73 73 nodes_with_entry_count: Size,
74 74 nodes_with_copy_source_count: Size,
75 75 unreachable_bytes: Size,
76 76 unused: [u8; 4],
77 77
78 78 /// See *Optional hash of ignore patterns* section of
79 79 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 80 ignore_patterns_hash: IgnorePatternsHash,
81 81 }
82 82
83 83 /// Fields are documented in the *The data file format*
84 84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 85 #[derive(BytesCast)]
86 86 #[repr(C)]
87 87 pub(super) struct Node {
88 88 full_path: PathSlice,
89 89
90 90 /// In bytes from `self.full_path.start`
91 91 base_name_start: PathSize,
92 92
93 93 copy_source: OptPathSlice,
94 94 children: ChildNodes,
95 95 pub(super) descendants_with_entry_count: Size,
96 96 pub(super) tracked_descendants_count: Size,
97 97 flags: U16Be,
98 98 size: U32Be,
99 99 mtime: PackedTruncatedTimestamp,
100 100 }
101 101
102 102 bitflags! {
103 103 #[repr(C)]
104 104 struct Flags: u16 {
105 105 const WDIR_TRACKED = 1 << 0;
106 106 const P1_TRACKED = 1 << 1;
107 107 const P2_INFO = 1 << 2;
108 108 const HAS_MODE_AND_SIZE = 1 << 3;
109 109 const HAS_FILE_MTIME = 1 << 4;
110 110 const HAS_DIRECTORY_MTIME = 1 << 5;
111 111 const MODE_EXEC_PERM = 1 << 6;
112 112 const MODE_IS_SYMLINK = 1 << 7;
113 113 const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
114 114 const ALL_UNKNOWN_RECORDED = 1 << 9;
115 115 const ALL_IGNORED_RECORDED = 1 << 10;
116 116 const HAS_FALLBACK_EXEC = 1 << 11;
117 117 const FALLBACK_EXEC = 1 << 12;
118 118 const HAS_FALLBACK_SYMLINK = 1 << 13;
119 119 const FALLBACK_SYMLINK = 1 << 14;
120 const MTIME_SECOND_AMBIGUOUS = 1 << 15;
120 121 }
121 122 }
122 123
123 124 /// Duration since the Unix epoch
124 125 #[derive(BytesCast, Copy, Clone)]
125 126 #[repr(C)]
126 127 struct PackedTruncatedTimestamp {
127 128 truncated_seconds: U32Be,
128 129 nanoseconds: U32Be,
129 130 }
130 131
131 132 /// Counted in bytes from the start of the file
132 133 ///
133 134 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
134 135 type Offset = U32Be;
135 136
136 137 /// Counted in number of items
137 138 ///
138 139 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
139 140 type Size = U32Be;
140 141
141 142 /// Counted in bytes
142 143 ///
143 144 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
144 145 type PathSize = U16Be;
145 146
146 147 /// A contiguous sequence of `len` times `Node`, representing the child nodes
147 148 /// of either some other node or of the repository root.
148 149 ///
149 150 /// Always sorted by ascending `full_path`, to allow binary search.
150 151 /// Since nodes with the same parent nodes also have the same parent path,
151 152 /// only the `base_name`s need to be compared during binary search.
152 153 #[derive(BytesCast, Copy, Clone)]
153 154 #[repr(C)]
154 155 struct ChildNodes {
155 156 start: Offset,
156 157 len: Size,
157 158 }
158 159
159 160 /// A `HgPath` of `len` bytes
160 161 #[derive(BytesCast, Copy, Clone)]
161 162 #[repr(C)]
162 163 struct PathSlice {
163 164 start: Offset,
164 165 len: PathSize,
165 166 }
166 167
167 168 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
168 169 type OptPathSlice = PathSlice;
169 170
170 171 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
171 172 ///
172 173 /// This should only happen if Mercurial is buggy or a repository is corrupted.
173 174 #[derive(Debug)]
174 175 pub struct DirstateV2ParseError;
175 176
176 177 impl From<DirstateV2ParseError> for HgError {
177 178 fn from(_: DirstateV2ParseError) -> Self {
178 179 HgError::corrupted("dirstate-v2 parse error")
179 180 }
180 181 }
181 182
182 183 impl From<DirstateV2ParseError> for crate::DirstateError {
183 184 fn from(error: DirstateV2ParseError) -> Self {
184 185 HgError::from(error).into()
185 186 }
186 187 }
187 188
188 189 impl<'on_disk> Docket<'on_disk> {
189 190 pub fn parents(&self) -> DirstateParents {
190 191 use crate::Node;
191 192 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
192 193 .unwrap()
193 194 .clone();
194 195 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
195 196 .unwrap()
196 197 .clone();
197 198 DirstateParents { p1, p2 }
198 199 }
199 200
200 201 pub fn tree_metadata(&self) -> &[u8] {
201 202 self.header.metadata.as_bytes()
202 203 }
203 204
204 205 pub fn data_size(&self) -> usize {
205 206 // This `unwrap` could only panic on a 16-bit CPU
206 207 self.header.data_size.get().try_into().unwrap()
207 208 }
208 209
209 210 pub fn data_filename(&self) -> String {
210 211 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
211 212 }
212 213 }
213 214
214 215 pub fn read_docket(
215 216 on_disk: &[u8],
216 217 ) -> Result<Docket<'_>, DirstateV2ParseError> {
217 218 let (header, uuid) =
218 219 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
219 220 let uuid_size = header.uuid_size as usize;
220 221 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
221 222 Ok(Docket { header, uuid })
222 223 } else {
223 224 Err(DirstateV2ParseError)
224 225 }
225 226 }
226 227
227 228 pub(super) fn read<'on_disk>(
228 229 on_disk: &'on_disk [u8],
229 230 metadata: &[u8],
230 231 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
231 232 if on_disk.is_empty() {
232 233 return Ok(DirstateMap::empty(on_disk));
233 234 }
234 235 let (meta, _) = TreeMetadata::from_bytes(metadata)
235 236 .map_err(|_| DirstateV2ParseError)?;
236 237 let dirstate_map = DirstateMap {
237 238 on_disk,
238 239 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
239 240 on_disk,
240 241 meta.root_nodes,
241 242 )?),
242 243 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
243 244 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
244 245 ignore_patterns_hash: meta.ignore_patterns_hash,
245 246 unreachable_bytes: meta.unreachable_bytes.get(),
246 247 };
247 248 Ok(dirstate_map)
248 249 }
249 250
250 251 impl Node {
251 252 pub(super) fn full_path<'on_disk>(
252 253 &self,
253 254 on_disk: &'on_disk [u8],
254 255 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
255 256 read_hg_path(on_disk, self.full_path)
256 257 }
257 258
258 259 pub(super) fn base_name_start<'on_disk>(
259 260 &self,
260 261 ) -> Result<usize, DirstateV2ParseError> {
261 262 let start = self.base_name_start.get();
262 263 if start < self.full_path.len.get() {
263 264 let start = usize::try_from(start)
264 265 // u32 -> usize, could only panic on a 16-bit CPU
265 266 .expect("dirstate-v2 base_name_start out of bounds");
266 267 Ok(start)
267 268 } else {
268 269 Err(DirstateV2ParseError)
269 270 }
270 271 }
271 272
272 273 pub(super) fn base_name<'on_disk>(
273 274 &self,
274 275 on_disk: &'on_disk [u8],
275 276 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
276 277 let full_path = self.full_path(on_disk)?;
277 278 let base_name_start = self.base_name_start()?;
278 279 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
279 280 }
280 281
281 282 pub(super) fn path<'on_disk>(
282 283 &self,
283 284 on_disk: &'on_disk [u8],
284 285 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
285 286 Ok(WithBasename::from_raw_parts(
286 287 Cow::Borrowed(self.full_path(on_disk)?),
287 288 self.base_name_start()?,
288 289 ))
289 290 }
290 291
291 292 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
292 293 self.copy_source.start.get() != 0
293 294 }
294 295
295 296 pub(super) fn copy_source<'on_disk>(
296 297 &self,
297 298 on_disk: &'on_disk [u8],
298 299 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
299 300 Ok(if self.has_copy_source() {
300 301 Some(read_hg_path(on_disk, self.copy_source)?)
301 302 } else {
302 303 None
303 304 })
304 305 }
305 306
306 307 fn flags(&self) -> Flags {
307 308 Flags::from_bits_truncate(self.flags.get())
308 309 }
309 310
310 311 fn has_entry(&self) -> bool {
311 312 self.flags().intersects(
312 313 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
313 314 )
314 315 }
315 316
316 317 pub(super) fn node_data(
317 318 &self,
318 319 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
319 320 if self.has_entry() {
320 321 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
321 322 } else if let Some(mtime) = self.cached_directory_mtime()? {
322 323 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
323 324 } else {
324 325 Ok(dirstate_map::NodeData::None)
325 326 }
326 327 }
327 328
328 329 pub(super) fn cached_directory_mtime(
329 330 &self,
330 331 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
331 332 // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
332 333 // ignore the mtime if the flag is set.
333 334 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
334 335 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
335 336 {
336 337 if self.flags().contains(Flags::HAS_FILE_MTIME) {
337 338 Err(DirstateV2ParseError)
338 339 } else {
339 340 Ok(Some(self.mtime.try_into()?))
340 341 }
341 342 } else {
342 343 Ok(None)
343 344 }
344 345 }
345 346
346 347 fn synthesize_unix_mode(&self) -> u32 {
347 348 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
348 349 libc::S_IFLNK
349 350 } else {
350 351 libc::S_IFREG
351 352 };
352 353 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
353 354 0o755
354 355 } else {
355 356 0o644
356 357 };
357 358 file_type | permisions
358 359 }
359 360
360 361 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
361 362 // TODO: convert through raw bits instead?
362 363 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
363 364 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
364 365 let p2_info = self.flags().contains(Flags::P2_INFO);
365 366 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
366 367 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
367 368 {
368 369 Some((self.synthesize_unix_mode(), self.size.into()))
369 370 } else {
370 371 None
371 372 };
372 373 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
373 374 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
375 // The current code is not able to do the more subtle comparison that the
376 // MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
377 && !self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS)
374 378 {
375 379 // TODO: replace this by `self.mtime.try_into()?` to use
376 380 // sub-second precision from the file.
377 381 // We don’t do this yet because other parts of the code
378 382 // always set it to zero.
379 383 let mtime = TruncatedTimestamp::from_already_truncated(
380 384 self.mtime.truncated_seconds.get(),
381 385 0,
382 386 )?;
383 387 Some(mtime)
384 388 } else {
385 389 None
386 390 };
387 391 Ok(DirstateEntry::from_v2_data(
388 392 wdir_tracked,
389 393 p1_tracked,
390 394 p2_info,
391 395 mode_size,
392 396 mtime,
393 397 None,
394 398 None,
395 399 ))
396 400 }
397 401
398 402 pub(super) fn entry(
399 403 &self,
400 404 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
401 405 if self.has_entry() {
402 406 Ok(Some(self.assume_entry()?))
403 407 } else {
404 408 Ok(None)
405 409 }
406 410 }
407 411
408 412 pub(super) fn children<'on_disk>(
409 413 &self,
410 414 on_disk: &'on_disk [u8],
411 415 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
412 416 read_nodes(on_disk, self.children)
413 417 }
414 418
415 419 pub(super) fn to_in_memory_node<'on_disk>(
416 420 &self,
417 421 on_disk: &'on_disk [u8],
418 422 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
419 423 Ok(dirstate_map::Node {
420 424 children: dirstate_map::ChildNodes::OnDisk(
421 425 self.children(on_disk)?,
422 426 ),
423 427 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
424 428 data: self.node_data()?,
425 429 descendants_with_entry_count: self
426 430 .descendants_with_entry_count
427 431 .get(),
428 432 tracked_descendants_count: self.tracked_descendants_count.get(),
429 433 })
430 434 }
431 435
432 436 fn from_dirstate_entry(
433 437 entry: &DirstateEntry,
434 438 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
435 439 let (
436 440 wdir_tracked,
437 441 p1_tracked,
438 442 p2_info,
439 443 mode_size_opt,
440 444 mtime_opt,
441 445 fallback_exec,
442 446 fallback_symlink,
443 447 ) = entry.v2_data();
444 448 // TODO: convert throug raw flag bits instead?
445 449 let mut flags = Flags::empty();
446 450 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
447 451 flags.set(Flags::P1_TRACKED, p1_tracked);
448 452 flags.set(Flags::P2_INFO, p2_info);
449 453 let size = if let Some((m, s)) = mode_size_opt {
450 454 let exec_perm = m & libc::S_IXUSR != 0;
451 455 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
452 456 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
453 457 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
454 458 flags.insert(Flags::HAS_MODE_AND_SIZE);
455 459 s.into()
456 460 } else {
457 461 0.into()
458 462 };
459 463 let mtime = if let Some(m) = mtime_opt {
460 464 flags.insert(Flags::HAS_FILE_MTIME);
461 465 m.into()
462 466 } else {
463 467 PackedTruncatedTimestamp::null()
464 468 };
465 469 if let Some(f_exec) = fallback_exec {
466 470 flags.insert(Flags::HAS_FALLBACK_EXEC);
467 471 if f_exec {
468 472 flags.insert(Flags::FALLBACK_EXEC);
469 473 }
470 474 }
471 475 if let Some(f_symlink) = fallback_symlink {
472 476 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
473 477 if f_symlink {
474 478 flags.insert(Flags::FALLBACK_SYMLINK);
475 479 }
476 480 }
477 481 (flags, size, mtime)
478 482 }
479 483 }
480 484
481 485 fn read_hg_path(
482 486 on_disk: &[u8],
483 487 slice: PathSlice,
484 488 ) -> Result<&HgPath, DirstateV2ParseError> {
485 489 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
486 490 }
487 491
488 492 fn read_nodes(
489 493 on_disk: &[u8],
490 494 slice: ChildNodes,
491 495 ) -> Result<&[Node], DirstateV2ParseError> {
492 496 read_slice(on_disk, slice.start, slice.len.get())
493 497 }
494 498
495 499 fn read_slice<T, Len>(
496 500 on_disk: &[u8],
497 501 start: Offset,
498 502 len: Len,
499 503 ) -> Result<&[T], DirstateV2ParseError>
500 504 where
501 505 T: BytesCast,
502 506 Len: TryInto<usize>,
503 507 {
504 508 // Either `usize::MAX` would result in "out of bounds" error since a single
505 509 // `&[u8]` cannot occupy the entire addess space.
506 510 let start = start.get().try_into().unwrap_or(std::usize::MAX);
507 511 let len = len.try_into().unwrap_or(std::usize::MAX);
508 512 on_disk
509 513 .get(start..)
510 514 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
511 515 .map(|(slice, _rest)| slice)
512 516 .ok_or_else(|| DirstateV2ParseError)
513 517 }
514 518
515 519 pub(crate) fn for_each_tracked_path<'on_disk>(
516 520 on_disk: &'on_disk [u8],
517 521 metadata: &[u8],
518 522 mut f: impl FnMut(&'on_disk HgPath),
519 523 ) -> Result<(), DirstateV2ParseError> {
520 524 let (meta, _) = TreeMetadata::from_bytes(metadata)
521 525 .map_err(|_| DirstateV2ParseError)?;
522 526 fn recur<'on_disk>(
523 527 on_disk: &'on_disk [u8],
524 528 nodes: ChildNodes,
525 529 f: &mut impl FnMut(&'on_disk HgPath),
526 530 ) -> Result<(), DirstateV2ParseError> {
527 531 for node in read_nodes(on_disk, nodes)? {
528 532 if let Some(entry) = node.entry()? {
529 533 if entry.state().is_tracked() {
530 534 f(node.full_path(on_disk)?)
531 535 }
532 536 }
533 537 recur(on_disk, node.children, f)?
534 538 }
535 539 Ok(())
536 540 }
537 541 recur(on_disk, meta.root_nodes, &mut f)
538 542 }
539 543
540 544 /// Returns new data and metadata, together with whether that data should be
541 545 /// appended to the existing data file whose content is at
542 546 /// `dirstate_map.on_disk` (true), instead of written to a new data file
543 547 /// (false).
544 548 pub(super) fn write(
545 549 dirstate_map: &mut DirstateMap,
546 550 can_append: bool,
547 551 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
548 552 let append = can_append && dirstate_map.write_should_append();
549 553
550 554 // This ignores the space for paths, and for nodes without an entry.
551 555 // TODO: better estimate? Skip the `Vec` and write to a file directly?
552 556 let size_guess = std::mem::size_of::<Node>()
553 557 * dirstate_map.nodes_with_entry_count as usize;
554 558
555 559 let mut writer = Writer {
556 560 dirstate_map,
557 561 append,
558 562 out: Vec::with_capacity(size_guess),
559 563 };
560 564
561 565 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
562 566
563 567 let meta = TreeMetadata {
564 568 root_nodes,
565 569 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
566 570 nodes_with_copy_source_count: dirstate_map
567 571 .nodes_with_copy_source_count
568 572 .into(),
569 573 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
570 574 unused: [0; 4],
571 575 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
572 576 };
573 577 Ok((writer.out, meta.as_bytes().to_vec(), append))
574 578 }
575 579
576 580 struct Writer<'dmap, 'on_disk> {
577 581 dirstate_map: &'dmap DirstateMap<'on_disk>,
578 582 append: bool,
579 583 out: Vec<u8>,
580 584 }
581 585
582 586 impl Writer<'_, '_> {
583 587 fn write_nodes(
584 588 &mut self,
585 589 nodes: dirstate_map::ChildNodesRef,
586 590 ) -> Result<ChildNodes, DirstateError> {
587 591 // Reuse already-written nodes if possible
588 592 if self.append {
589 593 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
590 594 let start = self.on_disk_offset_of(nodes_slice).expect(
591 595 "dirstate-v2 OnDisk nodes not found within on_disk",
592 596 );
593 597 let len = child_nodes_len_from_usize(nodes_slice.len());
594 598 return Ok(ChildNodes { start, len });
595 599 }
596 600 }
597 601
598 602 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
599 603 // undefined iteration order. Sort to enable binary search in the
600 604 // written file.
601 605 let nodes = nodes.sorted();
602 606 let nodes_len = nodes.len();
603 607
604 608 // First accumulate serialized nodes in a `Vec`
605 609 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
606 610 for node in nodes {
607 611 let children =
608 612 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
609 613 let full_path = node.full_path(self.dirstate_map.on_disk)?;
610 614 let full_path = self.write_path(full_path.as_bytes());
611 615 let copy_source = if let Some(source) =
612 616 node.copy_source(self.dirstate_map.on_disk)?
613 617 {
614 618 self.write_path(source.as_bytes())
615 619 } else {
616 620 PathSlice {
617 621 start: 0.into(),
618 622 len: 0.into(),
619 623 }
620 624 };
621 625 on_disk_nodes.push(match node {
622 626 NodeRef::InMemory(path, node) => {
623 627 let (flags, size, mtime) = match &node.data {
624 628 dirstate_map::NodeData::Entry(entry) => {
625 629 Node::from_dirstate_entry(entry)
626 630 }
627 631 dirstate_map::NodeData::CachedDirectory { mtime } => (
628 632 // we currently never set a mtime if unknown file
629 633 // are present.
630 634 // So if we have a mtime for a directory, we know
631 635 // they are no unknown
632 636 // files and we
633 637 // blindly set ALL_UNKNOWN_RECORDED.
634 638 //
635 639 // We never set ALL_IGNORED_RECORDED since we
636 640 // don't track that case
637 641 // currently.
638 642 Flags::HAS_DIRECTORY_MTIME
639 643 | Flags::ALL_UNKNOWN_RECORDED,
640 644 0.into(),
641 645 (*mtime).into(),
642 646 ),
643 647 dirstate_map::NodeData::None => (
644 648 Flags::empty(),
645 649 0.into(),
646 650 PackedTruncatedTimestamp::null(),
647 651 ),
648 652 };
649 653 Node {
650 654 children,
651 655 copy_source,
652 656 full_path,
653 657 base_name_start: u16::try_from(path.base_name_start())
654 658 // Could only panic for paths over 64 KiB
655 659 .expect("dirstate-v2 path length overflow")
656 660 .into(),
657 661 descendants_with_entry_count: node
658 662 .descendants_with_entry_count
659 663 .into(),
660 664 tracked_descendants_count: node
661 665 .tracked_descendants_count
662 666 .into(),
663 667 flags: flags.bits().into(),
664 668 size,
665 669 mtime,
666 670 }
667 671 }
668 672 NodeRef::OnDisk(node) => Node {
669 673 children,
670 674 copy_source,
671 675 full_path,
672 676 ..*node
673 677 },
674 678 })
675 679 }
676 680 // … so we can write them contiguously, after writing everything else
677 681 // they refer to.
678 682 let start = self.current_offset();
679 683 let len = child_nodes_len_from_usize(nodes_len);
680 684 self.out.extend(on_disk_nodes.as_bytes());
681 685 Ok(ChildNodes { start, len })
682 686 }
683 687
684 688 /// If the given slice of items is within `on_disk`, returns its offset
685 689 /// from the start of `on_disk`.
686 690 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
687 691 where
688 692 T: BytesCast,
689 693 {
690 694 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
691 695 let start = slice.as_ptr() as usize;
692 696 let end = start + slice.len();
693 697 start..=end
694 698 }
695 699 let slice_addresses = address_range(slice.as_bytes());
696 700 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
697 701 if on_disk_addresses.contains(slice_addresses.start())
698 702 && on_disk_addresses.contains(slice_addresses.end())
699 703 {
700 704 let offset = slice_addresses.start() - on_disk_addresses.start();
701 705 Some(offset_from_usize(offset))
702 706 } else {
703 707 None
704 708 }
705 709 }
706 710
707 711 fn current_offset(&mut self) -> Offset {
708 712 let mut offset = self.out.len();
709 713 if self.append {
710 714 offset += self.dirstate_map.on_disk.len()
711 715 }
712 716 offset_from_usize(offset)
713 717 }
714 718
715 719 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
716 720 let len = path_len_from_usize(slice.len());
717 721 // Reuse an already-written path if possible
718 722 if self.append {
719 723 if let Some(start) = self.on_disk_offset_of(slice) {
720 724 return PathSlice { start, len };
721 725 }
722 726 }
723 727 let start = self.current_offset();
724 728 self.out.extend(slice.as_bytes());
725 729 PathSlice { start, len }
726 730 }
727 731 }
728 732
729 733 fn offset_from_usize(x: usize) -> Offset {
730 734 u32::try_from(x)
731 735 // Could only panic for a dirstate file larger than 4 GiB
732 736 .expect("dirstate-v2 offset overflow")
733 737 .into()
734 738 }
735 739
736 740 fn child_nodes_len_from_usize(x: usize) -> Size {
737 741 u32::try_from(x)
738 742 // Could only panic with over 4 billion nodes
739 743 .expect("dirstate-v2 slice length overflow")
740 744 .into()
741 745 }
742 746
743 747 fn path_len_from_usize(x: usize) -> PathSize {
744 748 u16::try_from(x)
745 749 // Could only panic for paths over 64 KiB
746 750 .expect("dirstate-v2 path length overflow")
747 751 .into()
748 752 }
749 753
750 754 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
751 755 fn from(timestamp: TruncatedTimestamp) -> Self {
752 756 Self {
753 757 truncated_seconds: timestamp.truncated_seconds().into(),
754 758 nanoseconds: timestamp.nanoseconds().into(),
755 759 }
756 760 }
757 761 }
758 762
759 763 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
760 764 type Error = DirstateV2ParseError;
761 765
762 766 fn try_from(
763 767 timestamp: PackedTruncatedTimestamp,
764 768 ) -> Result<Self, Self::Error> {
765 769 Self::from_already_truncated(
766 770 timestamp.truncated_seconds.get(),
767 771 timestamp.nanoseconds.get(),
768 772 )
769 773 }
770 774 }
771 775 impl PackedTruncatedTimestamp {
772 776 fn null() -> Self {
773 777 Self {
774 778 truncated_seconds: 0.into(),
775 779 nanoseconds: 0.into(),
776 780 }
777 781 }
778 782 }
General Comments 0
You need to be logged in to leave comments. Login now