##// END OF EJS Templates
dirstate-v2: Separate HAS_FILE_MTIME and HAS_DIRECTORY_MTIME flags...
Simon Sapin -
r49046:f7fd629f default
parent child Browse files
Show More
@@ -1,1174 +1,1174 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static const int dirstate_v1_from_p2 = -2;
33 33 static const int dirstate_v1_nonnormal = -1;
34 34 static const int ambiguous_time = -1;
35 35
36 36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
37 37 {
38 38 Py_ssize_t expected_size;
39 39
40 40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
41 41 return NULL;
42 42 }
43 43
44 44 return _dict_new_presized(expected_size);
45 45 }
46 46
47 47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
48 48 PyObject *kwds)
49 49 {
50 50 /* We do all the initialization here and not a tp_init function because
51 51 * dirstate_item is immutable. */
52 52 dirstateItemObject *t;
53 53 int wc_tracked;
54 54 int p1_tracked;
55 55 int p2_info;
56 56 int has_meaningful_data;
57 57 int has_meaningful_mtime;
58 58 int mode;
59 59 int size;
60 60 int mtime;
61 61 PyObject *parentfiledata;
62 62 static char *keywords_name[] = {
63 63 "wc_tracked",
64 64 "p1_tracked",
65 65 "p2_info",
66 66 "has_meaningful_data",
67 67 "has_meaningful_mtime",
68 68 "parentfiledata",
69 69 NULL,
70 70 };
71 71 wc_tracked = 0;
72 72 p1_tracked = 0;
73 73 p2_info = 0;
74 74 has_meaningful_mtime = 1;
75 75 has_meaningful_data = 1;
76 76 parentfiledata = Py_None;
77 77 if (!PyArg_ParseTupleAndKeywords(
78 78 args, kwds, "|iiiiiO", keywords_name, &wc_tracked, &p1_tracked,
79 79 &p2_info, &has_meaningful_data, &has_meaningful_mtime,
80 80 &parentfiledata)) {
81 81 return NULL;
82 82 }
83 83 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
84 84 if (!t) {
85 85 return NULL;
86 86 }
87 87
88 88 t->flags = 0;
89 89 if (wc_tracked) {
90 90 t->flags |= dirstate_flag_wc_tracked;
91 91 }
92 92 if (p1_tracked) {
93 93 t->flags |= dirstate_flag_p1_tracked;
94 94 }
95 95 if (p2_info) {
96 96 t->flags |= dirstate_flag_p2_info;
97 97 }
98 98
99 99 if (parentfiledata != Py_None) {
100 100 if (!PyTuple_CheckExact(parentfiledata)) {
101 101 PyErr_SetString(
102 102 PyExc_TypeError,
103 103 "parentfiledata should be a Tuple or None");
104 104 return NULL;
105 105 }
106 106 mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
107 107 size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
108 108 mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
109 109 } else {
110 110 has_meaningful_data = 0;
111 111 has_meaningful_mtime = 0;
112 112 }
113 113 if (has_meaningful_data) {
114 114 t->flags |= dirstate_flag_has_meaningful_data;
115 115 t->mode = mode;
116 116 t->size = size;
117 117 } else {
118 118 t->mode = 0;
119 119 t->size = 0;
120 120 }
121 121 if (has_meaningful_mtime) {
122 t->flags |= dirstate_flag_has_meaningful_mtime;
122 t->flags |= dirstate_flag_has_file_mtime;
123 123 t->mtime = mtime;
124 124 } else {
125 125 t->mtime = 0;
126 126 }
127 127 return (PyObject *)t;
128 128 }
129 129
130 130 static void dirstate_item_dealloc(PyObject *o)
131 131 {
132 132 PyObject_Del(o);
133 133 }
134 134
135 135 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
136 136 {
137 137 return (self->flags & dirstate_flag_wc_tracked);
138 138 }
139 139
140 140 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
141 141 {
142 142 const unsigned char mask = dirstate_flag_wc_tracked |
143 143 dirstate_flag_p1_tracked |
144 144 dirstate_flag_p2_info;
145 145 return (self->flags & mask);
146 146 }
147 147
148 148 static inline bool dirstate_item_c_added(dirstateItemObject *self)
149 149 {
150 150 const unsigned char mask =
151 151 (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
152 152 dirstate_flag_p2_info);
153 153 const unsigned char target = dirstate_flag_wc_tracked;
154 154 return (self->flags & mask) == target;
155 155 }
156 156
157 157 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
158 158 {
159 159 if (self->flags & dirstate_flag_wc_tracked) {
160 160 return false;
161 161 }
162 162 return (self->flags &
163 163 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
164 164 }
165 165
166 166 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
167 167 {
168 168 return ((self->flags & dirstate_flag_wc_tracked) &&
169 169 (self->flags & dirstate_flag_p1_tracked) &&
170 170 (self->flags & dirstate_flag_p2_info));
171 171 }
172 172
173 173 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
174 174 {
175 175 return ((self->flags & dirstate_flag_wc_tracked) &&
176 176 !(self->flags & dirstate_flag_p1_tracked) &&
177 177 (self->flags & dirstate_flag_p2_info));
178 178 }
179 179
180 180 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
181 181 {
182 182 if (dirstate_item_c_removed(self)) {
183 183 return 'r';
184 184 } else if (dirstate_item_c_merged(self)) {
185 185 return 'm';
186 186 } else if (dirstate_item_c_added(self)) {
187 187 return 'a';
188 188 } else {
189 189 return 'n';
190 190 }
191 191 }
192 192
193 193 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
194 194 {
195 195 if (self->flags & dirstate_flag_has_meaningful_data) {
196 196 return self->mode;
197 197 } else {
198 198 return 0;
199 199 }
200 200 }
201 201
202 202 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
203 203 {
204 204 if (!(self->flags & dirstate_flag_wc_tracked) &&
205 205 (self->flags & dirstate_flag_p2_info)) {
206 206 if (self->flags & dirstate_flag_p1_tracked) {
207 207 return dirstate_v1_nonnormal;
208 208 } else {
209 209 return dirstate_v1_from_p2;
210 210 }
211 211 } else if (dirstate_item_c_removed(self)) {
212 212 return 0;
213 213 } else if (self->flags & dirstate_flag_p2_info) {
214 214 return dirstate_v1_from_p2;
215 215 } else if (dirstate_item_c_added(self)) {
216 216 return dirstate_v1_nonnormal;
217 217 } else if (self->flags & dirstate_flag_has_meaningful_data) {
218 218 return self->size;
219 219 } else {
220 220 return dirstate_v1_nonnormal;
221 221 }
222 222 }
223 223
224 224 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
225 225 {
226 226 if (dirstate_item_c_removed(self)) {
227 227 return 0;
228 } else if (!(self->flags & dirstate_flag_has_meaningful_mtime) ||
228 } else if (!(self->flags & dirstate_flag_has_file_mtime) ||
229 229 !(self->flags & dirstate_flag_p1_tracked) ||
230 230 !(self->flags & dirstate_flag_wc_tracked) ||
231 231 (self->flags & dirstate_flag_p2_info)) {
232 232 return ambiguous_time;
233 233 } else {
234 234 return self->mtime;
235 235 }
236 236 }
237 237
238 238 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
239 239 {
240 240 unsigned char flags = self->flags;
241 241 int mode = dirstate_item_c_v1_mode(self);
242 242 if ((mode & S_IXUSR) != 0) {
243 243 flags |= dirstate_flag_mode_exec_perm;
244 244 } else {
245 245 flags &= ~dirstate_flag_mode_exec_perm;
246 246 }
247 247 if (S_ISLNK(mode)) {
248 248 flags |= dirstate_flag_mode_is_symlink;
249 249 } else {
250 250 flags &= ~dirstate_flag_mode_is_symlink;
251 251 }
252 252 return Py_BuildValue("Bii", flags, self->size, self->mtime);
253 253 };
254 254
255 255 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
256 256 {
257 257 char state = dirstate_item_c_v1_state(self);
258 258 return PyBytes_FromStringAndSize(&state, 1);
259 259 };
260 260
261 261 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
262 262 {
263 263 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
264 264 };
265 265
266 266 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
267 267 {
268 268 return PyInt_FromLong(dirstate_item_c_v1_size(self));
269 269 };
270 270
271 271 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
272 272 {
273 273 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
274 274 };
275 275
276 276 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
277 277 PyObject *value)
278 278 {
279 279 long now;
280 280 if (!pylong_to_long(value, &now)) {
281 281 return NULL;
282 282 }
283 283 if (dirstate_item_c_v1_state(self) == 'n' &&
284 284 dirstate_item_c_v1_mtime(self) == now) {
285 285 Py_RETURN_TRUE;
286 286 } else {
287 287 Py_RETURN_FALSE;
288 288 }
289 289 };
290 290
291 291 /* This will never change since it's bound to V1
292 292 */
293 293 static inline dirstateItemObject *
294 294 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
295 295 {
296 296 dirstateItemObject *t =
297 297 PyObject_New(dirstateItemObject, &dirstateItemType);
298 298 if (!t) {
299 299 return NULL;
300 300 }
301 301 t->flags = 0;
302 302 t->mode = 0;
303 303 t->size = 0;
304 304 t->mtime = 0;
305 305
306 306 if (state == 'm') {
307 307 t->flags = (dirstate_flag_wc_tracked |
308 308 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
309 309 } else if (state == 'a') {
310 310 t->flags = dirstate_flag_wc_tracked;
311 311 } else if (state == 'r') {
312 312 if (size == dirstate_v1_nonnormal) {
313 313 t->flags =
314 314 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
315 315 } else if (size == dirstate_v1_from_p2) {
316 316 t->flags = dirstate_flag_p2_info;
317 317 } else {
318 318 t->flags = dirstate_flag_p1_tracked;
319 319 }
320 320 } else if (state == 'n') {
321 321 if (size == dirstate_v1_from_p2) {
322 322 t->flags =
323 323 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
324 324 } else if (size == dirstate_v1_nonnormal) {
325 325 t->flags =
326 326 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
327 327 } else if (mtime == ambiguous_time) {
328 328 t->flags = (dirstate_flag_wc_tracked |
329 329 dirstate_flag_p1_tracked |
330 330 dirstate_flag_has_meaningful_data);
331 331 t->mode = mode;
332 332 t->size = size;
333 333 } else {
334 334 t->flags = (dirstate_flag_wc_tracked |
335 335 dirstate_flag_p1_tracked |
336 336 dirstate_flag_has_meaningful_data |
337 dirstate_flag_has_meaningful_mtime);
337 dirstate_flag_has_file_mtime);
338 338 t->mode = mode;
339 339 t->size = size;
340 340 t->mtime = mtime;
341 341 }
342 342 } else {
343 343 PyErr_Format(PyExc_RuntimeError,
344 344 "unknown state: `%c` (%d, %d, %d)", state, mode,
345 345 size, mtime, NULL);
346 346 Py_DECREF(t);
347 347 return NULL;
348 348 }
349 349
350 350 return t;
351 351 }
352 352
353 353 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
354 354 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
355 355 PyObject *args)
356 356 {
357 357 /* We do all the initialization here and not a tp_init function because
358 358 * dirstate_item is immutable. */
359 359 char state;
360 360 int size, mode, mtime;
361 361 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
362 362 return NULL;
363 363 }
364 364 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
365 365 };
366 366
367 367 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
368 368 PyObject *args)
369 369 {
370 370 dirstateItemObject *t =
371 371 PyObject_New(dirstateItemObject, &dirstateItemType);
372 372 if (!t) {
373 373 return NULL;
374 374 }
375 375 if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
376 376 return NULL;
377 377 }
378 378 t->mode = 0;
379 379 if (t->flags & dirstate_flag_has_meaningful_data) {
380 380 if (t->flags & dirstate_flag_mode_exec_perm) {
381 381 t->mode = 0755;
382 382 } else {
383 383 t->mode = 0644;
384 384 }
385 385 if (t->flags & dirstate_flag_mode_is_symlink) {
386 386 t->mode |= S_IFLNK;
387 387 } else {
388 388 t->mode |= S_IFREG;
389 389 }
390 390 }
391 391 return (PyObject *)t;
392 392 };
393 393
394 394 /* This means the next status call will have to actually check its content
395 395 to make sure it is correct. */
396 396 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
397 397 {
398 self->flags &= ~dirstate_flag_has_meaningful_mtime;
398 self->flags &= ~dirstate_flag_has_file_mtime;
399 399 Py_RETURN_NONE;
400 400 }
401 401
402 402 /* See docstring of the python implementation for details */
403 403 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
404 404 PyObject *args)
405 405 {
406 406 int size, mode, mtime;
407 407 if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
408 408 return NULL;
409 409 }
410 410 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
411 411 dirstate_flag_has_meaningful_data |
412 dirstate_flag_has_meaningful_mtime;
412 dirstate_flag_has_file_mtime;
413 413 self->mode = mode;
414 414 self->size = size;
415 415 self->mtime = mtime;
416 416 Py_RETURN_NONE;
417 417 }
418 418
419 419 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
420 420 {
421 421 self->flags |= dirstate_flag_wc_tracked;
422 self->flags &= ~dirstate_flag_has_meaningful_mtime;
422 self->flags &= ~dirstate_flag_has_file_mtime;
423 423 Py_RETURN_NONE;
424 424 }
425 425
426 426 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
427 427 {
428 428 self->flags &= ~dirstate_flag_wc_tracked;
429 429 self->mode = 0;
430 430 self->mtime = 0;
431 431 self->size = 0;
432 432 Py_RETURN_NONE;
433 433 }
434 434
435 435 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
436 436 {
437 437 if (self->flags & dirstate_flag_p2_info) {
438 438 self->flags &= ~(dirstate_flag_p2_info |
439 439 dirstate_flag_has_meaningful_data |
440 dirstate_flag_has_meaningful_mtime);
440 dirstate_flag_has_file_mtime);
441 441 self->mode = 0;
442 442 self->mtime = 0;
443 443 self->size = 0;
444 444 }
445 445 Py_RETURN_NONE;
446 446 }
447 447 static PyMethodDef dirstate_item_methods[] = {
448 448 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
449 449 "return data suitable for v2 serialization"},
450 450 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
451 451 "return a \"state\" suitable for v1 serialization"},
452 452 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
453 453 "return a \"mode\" suitable for v1 serialization"},
454 454 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
455 455 "return a \"size\" suitable for v1 serialization"},
456 456 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
457 457 "return a \"mtime\" suitable for v1 serialization"},
458 458 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
459 459 "True if the stored mtime would be ambiguous with the current time"},
460 460 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
461 461 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
462 462 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
463 463 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
464 464 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
465 465 METH_NOARGS, "mark a file as \"possibly dirty\""},
466 466 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
467 467 "mark a file as \"clean\""},
468 468 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
469 469 "mark a file as \"tracked\""},
470 470 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
471 471 "mark a file as \"untracked\""},
472 472 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
473 473 "remove all \"merge-only\" from a DirstateItem"},
474 474 {NULL} /* Sentinel */
475 475 };
476 476
477 477 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
478 478 {
479 479 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
480 480 };
481 481
482 482 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
483 483 {
484 484 return PyInt_FromLong(dirstate_item_c_v1_size(self));
485 485 };
486 486
487 487 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
488 488 {
489 489 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
490 490 };
491 491
492 492 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
493 493 {
494 494 char state = dirstate_item_c_v1_state(self);
495 495 return PyBytes_FromStringAndSize(&state, 1);
496 496 };
497 497
498 498 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
499 499 {
500 500 if (dirstate_item_c_tracked(self)) {
501 501 Py_RETURN_TRUE;
502 502 } else {
503 503 Py_RETURN_FALSE;
504 504 }
505 505 };
506 506 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
507 507 {
508 508 if (self->flags & dirstate_flag_p1_tracked) {
509 509 Py_RETURN_TRUE;
510 510 } else {
511 511 Py_RETURN_FALSE;
512 512 }
513 513 };
514 514
515 515 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
516 516 {
517 517 if (dirstate_item_c_added(self)) {
518 518 Py_RETURN_TRUE;
519 519 } else {
520 520 Py_RETURN_FALSE;
521 521 }
522 522 };
523 523
524 524 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
525 525 {
526 526 if (self->flags & dirstate_flag_wc_tracked &&
527 527 self->flags & dirstate_flag_p2_info) {
528 528 Py_RETURN_TRUE;
529 529 } else {
530 530 Py_RETURN_FALSE;
531 531 }
532 532 };
533 533
534 534 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
535 535 {
536 536 if (dirstate_item_c_merged(self)) {
537 537 Py_RETURN_TRUE;
538 538 } else {
539 539 Py_RETURN_FALSE;
540 540 }
541 541 };
542 542
543 543 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
544 544 {
545 545 if (dirstate_item_c_from_p2(self)) {
546 546 Py_RETURN_TRUE;
547 547 } else {
548 548 Py_RETURN_FALSE;
549 549 }
550 550 };
551 551
552 552 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
553 553 {
554 554 if (!(self->flags & dirstate_flag_wc_tracked)) {
555 555 Py_RETURN_FALSE;
556 556 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
557 557 Py_RETURN_FALSE;
558 558 } else if (self->flags & dirstate_flag_p2_info) {
559 559 Py_RETURN_FALSE;
560 560 } else {
561 561 Py_RETURN_TRUE;
562 562 }
563 563 };
564 564
565 565 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
566 566 {
567 567 if (dirstate_item_c_any_tracked(self)) {
568 568 Py_RETURN_TRUE;
569 569 } else {
570 570 Py_RETURN_FALSE;
571 571 }
572 572 };
573 573
574 574 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
575 575 {
576 576 if (dirstate_item_c_removed(self)) {
577 577 Py_RETURN_TRUE;
578 578 } else {
579 579 Py_RETURN_FALSE;
580 580 }
581 581 };
582 582
583 583 static PyGetSetDef dirstate_item_getset[] = {
584 584 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
585 585 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
586 586 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
587 587 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
588 588 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
589 589 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
590 590 NULL},
591 591 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
592 592 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
593 593 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
594 594 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
595 595 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
596 596 NULL},
597 597 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
598 598 NULL},
599 599 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
600 600 {NULL} /* Sentinel */
601 601 };
602 602
603 603 PyTypeObject dirstateItemType = {
604 604 PyVarObject_HEAD_INIT(NULL, 0) /* header */
605 605 "dirstate_tuple", /* tp_name */
606 606 sizeof(dirstateItemObject), /* tp_basicsize */
607 607 0, /* tp_itemsize */
608 608 (destructor)dirstate_item_dealloc, /* tp_dealloc */
609 609 0, /* tp_print */
610 610 0, /* tp_getattr */
611 611 0, /* tp_setattr */
612 612 0, /* tp_compare */
613 613 0, /* tp_repr */
614 614 0, /* tp_as_number */
615 615 0, /* tp_as_sequence */
616 616 0, /* tp_as_mapping */
617 617 0, /* tp_hash */
618 618 0, /* tp_call */
619 619 0, /* tp_str */
620 620 0, /* tp_getattro */
621 621 0, /* tp_setattro */
622 622 0, /* tp_as_buffer */
623 623 Py_TPFLAGS_DEFAULT, /* tp_flags */
624 624 "dirstate tuple", /* tp_doc */
625 625 0, /* tp_traverse */
626 626 0, /* tp_clear */
627 627 0, /* tp_richcompare */
628 628 0, /* tp_weaklistoffset */
629 629 0, /* tp_iter */
630 630 0, /* tp_iternext */
631 631 dirstate_item_methods, /* tp_methods */
632 632 0, /* tp_members */
633 633 dirstate_item_getset, /* tp_getset */
634 634 0, /* tp_base */
635 635 0, /* tp_dict */
636 636 0, /* tp_descr_get */
637 637 0, /* tp_descr_set */
638 638 0, /* tp_dictoffset */
639 639 0, /* tp_init */
640 640 0, /* tp_alloc */
641 641 dirstate_item_new, /* tp_new */
642 642 };
643 643
644 644 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
645 645 {
646 646 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
647 647 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
648 648 char state, *cur, *str, *cpos;
649 649 int mode, size, mtime;
650 650 unsigned int flen, pos = 40;
651 651 Py_ssize_t len = 40;
652 652 Py_ssize_t readlen;
653 653
654 654 if (!PyArg_ParseTuple(
655 655 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
656 656 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
657 657 goto quit;
658 658 }
659 659
660 660 len = readlen;
661 661
662 662 /* read parents */
663 663 if (len < 40) {
664 664 PyErr_SetString(PyExc_ValueError,
665 665 "too little data for parents");
666 666 goto quit;
667 667 }
668 668
669 669 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
670 670 str + 20, (Py_ssize_t)20);
671 671 if (!parents) {
672 672 goto quit;
673 673 }
674 674
675 675 /* read filenames */
676 676 while (pos >= 40 && pos < len) {
677 677 if (pos + 17 > len) {
678 678 PyErr_SetString(PyExc_ValueError,
679 679 "overflow in dirstate");
680 680 goto quit;
681 681 }
682 682 cur = str + pos;
683 683 /* unpack header */
684 684 state = *cur;
685 685 mode = getbe32(cur + 1);
686 686 size = getbe32(cur + 5);
687 687 mtime = getbe32(cur + 9);
688 688 flen = getbe32(cur + 13);
689 689 pos += 17;
690 690 cur += 17;
691 691 if (flen > len - pos) {
692 692 PyErr_SetString(PyExc_ValueError,
693 693 "overflow in dirstate");
694 694 goto quit;
695 695 }
696 696
697 697 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
698 698 size, mtime);
699 699 if (!entry)
700 700 goto quit;
701 701 cpos = memchr(cur, 0, flen);
702 702 if (cpos) {
703 703 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
704 704 cname = PyBytes_FromStringAndSize(
705 705 cpos + 1, flen - (cpos - cur) - 1);
706 706 if (!fname || !cname ||
707 707 PyDict_SetItem(cmap, fname, cname) == -1 ||
708 708 PyDict_SetItem(dmap, fname, entry) == -1) {
709 709 goto quit;
710 710 }
711 711 Py_DECREF(cname);
712 712 } else {
713 713 fname = PyBytes_FromStringAndSize(cur, flen);
714 714 if (!fname ||
715 715 PyDict_SetItem(dmap, fname, entry) == -1) {
716 716 goto quit;
717 717 }
718 718 }
719 719 Py_DECREF(fname);
720 720 Py_DECREF(entry);
721 721 fname = cname = entry = NULL;
722 722 pos += flen;
723 723 }
724 724
725 725 ret = parents;
726 726 Py_INCREF(ret);
727 727 quit:
728 728 Py_XDECREF(fname);
729 729 Py_XDECREF(cname);
730 730 Py_XDECREF(entry);
731 731 Py_XDECREF(parents);
732 732 return ret;
733 733 }
734 734
735 735 /*
736 736 * Efficiently pack a dirstate object into its on-disk format.
737 737 */
738 738 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
739 739 {
740 740 PyObject *packobj = NULL;
741 741 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
742 742 Py_ssize_t nbytes, pos, l;
743 743 PyObject *k, *v = NULL, *pn;
744 744 char *p, *s;
745 745 int now;
746 746
747 747 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
748 748 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
749 749 &now)) {
750 750 return NULL;
751 751 }
752 752
753 753 if (PyTuple_Size(pl) != 2) {
754 754 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
755 755 return NULL;
756 756 }
757 757
758 758 /* Figure out how much we need to allocate. */
759 759 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
760 760 PyObject *c;
761 761 if (!PyBytes_Check(k)) {
762 762 PyErr_SetString(PyExc_TypeError, "expected string key");
763 763 goto bail;
764 764 }
765 765 nbytes += PyBytes_GET_SIZE(k) + 17;
766 766 c = PyDict_GetItem(copymap, k);
767 767 if (c) {
768 768 if (!PyBytes_Check(c)) {
769 769 PyErr_SetString(PyExc_TypeError,
770 770 "expected string key");
771 771 goto bail;
772 772 }
773 773 nbytes += PyBytes_GET_SIZE(c) + 1;
774 774 }
775 775 }
776 776
777 777 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
778 778 if (packobj == NULL) {
779 779 goto bail;
780 780 }
781 781
782 782 p = PyBytes_AS_STRING(packobj);
783 783
784 784 pn = PyTuple_GET_ITEM(pl, 0);
785 785 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
786 786 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
787 787 goto bail;
788 788 }
789 789 memcpy(p, s, l);
790 790 p += 20;
791 791 pn = PyTuple_GET_ITEM(pl, 1);
792 792 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
793 793 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
794 794 goto bail;
795 795 }
796 796 memcpy(p, s, l);
797 797 p += 20;
798 798
799 799 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
800 800 dirstateItemObject *tuple;
801 801 char state;
802 802 int mode, size, mtime;
803 803 Py_ssize_t len, l;
804 804 PyObject *o;
805 805 char *t;
806 806
807 807 if (!dirstate_tuple_check(v)) {
808 808 PyErr_SetString(PyExc_TypeError,
809 809 "expected a dirstate tuple");
810 810 goto bail;
811 811 }
812 812 tuple = (dirstateItemObject *)v;
813 813
814 814 state = dirstate_item_c_v1_state(tuple);
815 815 mode = dirstate_item_c_v1_mode(tuple);
816 816 size = dirstate_item_c_v1_size(tuple);
817 817 mtime = dirstate_item_c_v1_mtime(tuple);
818 818 if (state == 'n' && mtime == now) {
819 819 /* See pure/parsers.py:pack_dirstate for why we do
820 820 * this. */
821 821 mtime = -1;
822 822 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
823 823 state, mode, size, mtime);
824 824 if (!mtime_unset) {
825 825 goto bail;
826 826 }
827 827 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
828 828 goto bail;
829 829 }
830 830 Py_DECREF(mtime_unset);
831 831 mtime_unset = NULL;
832 832 }
833 833 *p++ = state;
834 834 putbe32((uint32_t)mode, p);
835 835 putbe32((uint32_t)size, p + 4);
836 836 putbe32((uint32_t)mtime, p + 8);
837 837 t = p + 12;
838 838 p += 16;
839 839 len = PyBytes_GET_SIZE(k);
840 840 memcpy(p, PyBytes_AS_STRING(k), len);
841 841 p += len;
842 842 o = PyDict_GetItem(copymap, k);
843 843 if (o) {
844 844 *p++ = '\0';
845 845 l = PyBytes_GET_SIZE(o);
846 846 memcpy(p, PyBytes_AS_STRING(o), l);
847 847 p += l;
848 848 len += l + 1;
849 849 }
850 850 putbe32((uint32_t)len, t);
851 851 }
852 852
853 853 pos = p - PyBytes_AS_STRING(packobj);
854 854 if (pos != nbytes) {
855 855 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
856 856 (long)pos, (long)nbytes);
857 857 goto bail;
858 858 }
859 859
860 860 return packobj;
861 861 bail:
862 862 Py_XDECREF(mtime_unset);
863 863 Py_XDECREF(packobj);
864 864 Py_XDECREF(v);
865 865 return NULL;
866 866 }
867 867
868 868 #define BUMPED_FIX 1
869 869 #define USING_SHA_256 2
870 870 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
871 871
872 872 static PyObject *readshas(const char *source, unsigned char num,
873 873 Py_ssize_t hashwidth)
874 874 {
875 875 int i;
876 876 PyObject *list = PyTuple_New(num);
877 877 if (list == NULL) {
878 878 return NULL;
879 879 }
880 880 for (i = 0; i < num; i++) {
881 881 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
882 882 if (hash == NULL) {
883 883 Py_DECREF(list);
884 884 return NULL;
885 885 }
886 886 PyTuple_SET_ITEM(list, i, hash);
887 887 source += hashwidth;
888 888 }
889 889 return list;
890 890 }
891 891
892 892 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
893 893 uint32_t *msize)
894 894 {
895 895 const char *data = databegin;
896 896 const char *meta;
897 897
898 898 double mtime;
899 899 int16_t tz;
900 900 uint16_t flags;
901 901 unsigned char nsuccs, nparents, nmetadata;
902 902 Py_ssize_t hashwidth = 20;
903 903
904 904 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
905 905 PyObject *metadata = NULL, *ret = NULL;
906 906 int i;
907 907
908 908 if (data + FM1_HEADER_SIZE > dataend) {
909 909 goto overflow;
910 910 }
911 911
912 912 *msize = getbe32(data);
913 913 data += 4;
914 914 mtime = getbefloat64(data);
915 915 data += 8;
916 916 tz = getbeint16(data);
917 917 data += 2;
918 918 flags = getbeuint16(data);
919 919 data += 2;
920 920
921 921 if (flags & USING_SHA_256) {
922 922 hashwidth = 32;
923 923 }
924 924
925 925 nsuccs = (unsigned char)(*data++);
926 926 nparents = (unsigned char)(*data++);
927 927 nmetadata = (unsigned char)(*data++);
928 928
929 929 if (databegin + *msize > dataend) {
930 930 goto overflow;
931 931 }
932 932 dataend = databegin + *msize; /* narrow down to marker size */
933 933
934 934 if (data + hashwidth > dataend) {
935 935 goto overflow;
936 936 }
937 937 prec = PyBytes_FromStringAndSize(data, hashwidth);
938 938 data += hashwidth;
939 939 if (prec == NULL) {
940 940 goto bail;
941 941 }
942 942
943 943 if (data + nsuccs * hashwidth > dataend) {
944 944 goto overflow;
945 945 }
946 946 succs = readshas(data, nsuccs, hashwidth);
947 947 if (succs == NULL) {
948 948 goto bail;
949 949 }
950 950 data += nsuccs * hashwidth;
951 951
952 952 if (nparents == 1 || nparents == 2) {
953 953 if (data + nparents * hashwidth > dataend) {
954 954 goto overflow;
955 955 }
956 956 parents = readshas(data, nparents, hashwidth);
957 957 if (parents == NULL) {
958 958 goto bail;
959 959 }
960 960 data += nparents * hashwidth;
961 961 } else {
962 962 parents = Py_None;
963 963 Py_INCREF(parents);
964 964 }
965 965
966 966 if (data + 2 * nmetadata > dataend) {
967 967 goto overflow;
968 968 }
969 969 meta = data + (2 * nmetadata);
970 970 metadata = PyTuple_New(nmetadata);
971 971 if (metadata == NULL) {
972 972 goto bail;
973 973 }
974 974 for (i = 0; i < nmetadata; i++) {
975 975 PyObject *tmp, *left = NULL, *right = NULL;
976 976 Py_ssize_t leftsize = (unsigned char)(*data++);
977 977 Py_ssize_t rightsize = (unsigned char)(*data++);
978 978 if (meta + leftsize + rightsize > dataend) {
979 979 goto overflow;
980 980 }
981 981 left = PyBytes_FromStringAndSize(meta, leftsize);
982 982 meta += leftsize;
983 983 right = PyBytes_FromStringAndSize(meta, rightsize);
984 984 meta += rightsize;
985 985 tmp = PyTuple_New(2);
986 986 if (!left || !right || !tmp) {
987 987 Py_XDECREF(left);
988 988 Py_XDECREF(right);
989 989 Py_XDECREF(tmp);
990 990 goto bail;
991 991 }
992 992 PyTuple_SET_ITEM(tmp, 0, left);
993 993 PyTuple_SET_ITEM(tmp, 1, right);
994 994 PyTuple_SET_ITEM(metadata, i, tmp);
995 995 }
996 996 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
997 997 (int)tz * 60, parents);
998 998 goto bail; /* return successfully */
999 999
1000 1000 overflow:
1001 1001 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1002 1002 bail:
1003 1003 Py_XDECREF(prec);
1004 1004 Py_XDECREF(succs);
1005 1005 Py_XDECREF(metadata);
1006 1006 Py_XDECREF(parents);
1007 1007 return ret;
1008 1008 }
1009 1009
1010 1010 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1011 1011 {
1012 1012 const char *data, *dataend;
1013 1013 Py_ssize_t datalen, offset, stop;
1014 1014 PyObject *markers = NULL;
1015 1015
1016 1016 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1017 1017 &offset, &stop)) {
1018 1018 return NULL;
1019 1019 }
1020 1020 if (offset < 0) {
1021 1021 PyErr_SetString(PyExc_ValueError,
1022 1022 "invalid negative offset in fm1readmarkers");
1023 1023 return NULL;
1024 1024 }
1025 1025 if (stop > datalen) {
1026 1026 PyErr_SetString(
1027 1027 PyExc_ValueError,
1028 1028 "stop longer than data length in fm1readmarkers");
1029 1029 return NULL;
1030 1030 }
1031 1031 dataend = data + datalen;
1032 1032 data += offset;
1033 1033 markers = PyList_New(0);
1034 1034 if (!markers) {
1035 1035 return NULL;
1036 1036 }
1037 1037 while (offset < stop) {
1038 1038 uint32_t msize;
1039 1039 int error;
1040 1040 PyObject *record = fm1readmarker(data, dataend, &msize);
1041 1041 if (!record) {
1042 1042 goto bail;
1043 1043 }
1044 1044 error = PyList_Append(markers, record);
1045 1045 Py_DECREF(record);
1046 1046 if (error) {
1047 1047 goto bail;
1048 1048 }
1049 1049 data += msize;
1050 1050 offset += msize;
1051 1051 }
1052 1052 return markers;
1053 1053 bail:
1054 1054 Py_DECREF(markers);
1055 1055 return NULL;
1056 1056 }
1057 1057
1058 1058 static char parsers_doc[] = "Efficient content parsing.";
1059 1059
1060 1060 PyObject *encodedir(PyObject *self, PyObject *args);
1061 1061 PyObject *pathencode(PyObject *self, PyObject *args);
1062 1062 PyObject *lowerencode(PyObject *self, PyObject *args);
1063 1063 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1064 1064
1065 1065 static PyMethodDef methods[] = {
1066 1066 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1067 1067 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1068 1068 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1069 1069 "parse a revlog index\n"},
1070 1070 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1071 1071 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1072 1072 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1073 1073 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1074 1074 "construct a dict with an expected size\n"},
1075 1075 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1076 1076 "make file foldmap\n"},
1077 1077 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1078 1078 "escape a UTF-8 byte string to JSON (fast path)\n"},
1079 1079 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1080 1080 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1081 1081 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1082 1082 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1083 1083 "parse v1 obsolete markers\n"},
1084 1084 {NULL, NULL}};
1085 1085
1086 1086 void dirs_module_init(PyObject *mod);
1087 1087 void manifest_module_init(PyObject *mod);
1088 1088 void revlog_module_init(PyObject *mod);
1089 1089
1090 1090 static const int version = 20;
1091 1091
1092 1092 static void module_init(PyObject *mod)
1093 1093 {
1094 1094 PyModule_AddIntConstant(mod, "version", version);
1095 1095
1096 1096 /* This module constant has two purposes. First, it lets us unit test
1097 1097 * the ImportError raised without hard-coding any error text. This
1098 1098 * means we can change the text in the future without breaking tests,
1099 1099 * even across changesets without a recompile. Second, its presence
1100 1100 * can be used to determine whether the version-checking logic is
1101 1101 * present, which also helps in testing across changesets without a
1102 1102 * recompile. Note that this means the pure-Python version of parsers
1103 1103 * should not have this module constant. */
1104 1104 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1105 1105
1106 1106 dirs_module_init(mod);
1107 1107 manifest_module_init(mod);
1108 1108 revlog_module_init(mod);
1109 1109
1110 1110 if (PyType_Ready(&dirstateItemType) < 0) {
1111 1111 return;
1112 1112 }
1113 1113 Py_INCREF(&dirstateItemType);
1114 1114 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1115 1115 }
1116 1116
1117 1117 static int check_python_version(void)
1118 1118 {
1119 1119 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1120 1120 long hexversion;
1121 1121 if (!sys) {
1122 1122 return -1;
1123 1123 }
1124 1124 ver = PyObject_GetAttrString(sys, "hexversion");
1125 1125 Py_DECREF(sys);
1126 1126 if (!ver) {
1127 1127 return -1;
1128 1128 }
1129 1129 hexversion = PyInt_AsLong(ver);
1130 1130 Py_DECREF(ver);
1131 1131 /* sys.hexversion is a 32-bit number by default, so the -1 case
1132 1132 * should only occur in unusual circumstances (e.g. if sys.hexversion
1133 1133 * is manually set to an invalid value). */
1134 1134 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1135 1135 PyErr_Format(PyExc_ImportError,
1136 1136 "%s: The Mercurial extension "
1137 1137 "modules were compiled with Python " PY_VERSION
1138 1138 ", but "
1139 1139 "Mercurial is currently using Python with "
1140 1140 "sys.hexversion=%ld: "
1141 1141 "Python %s\n at: %s",
1142 1142 versionerrortext, hexversion, Py_GetVersion(),
1143 1143 Py_GetProgramFullPath());
1144 1144 return -1;
1145 1145 }
1146 1146 return 0;
1147 1147 }
1148 1148
1149 1149 #ifdef IS_PY3K
1150 1150 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1151 1151 parsers_doc, -1, methods};
1152 1152
1153 1153 PyMODINIT_FUNC PyInit_parsers(void)
1154 1154 {
1155 1155 PyObject *mod;
1156 1156
1157 1157 if (check_python_version() == -1)
1158 1158 return NULL;
1159 1159 mod = PyModule_Create(&parsers_module);
1160 1160 module_init(mod);
1161 1161 return mod;
1162 1162 }
1163 1163 #else
1164 1164 PyMODINIT_FUNC initparsers(void)
1165 1165 {
1166 1166 PyObject *mod;
1167 1167
1168 1168 if (check_python_version() == -1) {
1169 1169 return;
1170 1170 }
1171 1171 mod = Py_InitModule3("parsers", methods, parsers_doc);
1172 1172 module_init(mod);
1173 1173 }
1174 1174 #endif
@@ -1,82 +1,83 b''
1 1 /*
2 2 util.h - utility functions for interfacing with the various python APIs.
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_UTIL_H_
9 9 #define _HG_UTIL_H_
10 10
11 11 #include "compat.h"
12 12
13 13 #if PY_MAJOR_VERSION >= 3
14 14 #define IS_PY3K
15 15 #endif
16 16
17 17 /* helper to switch things like string literal depending on Python version */
18 18 #ifdef IS_PY3K
19 19 #define PY23(py2, py3) py3
20 20 #else
21 21 #define PY23(py2, py3) py2
22 22 #endif
23 23
24 24 /* clang-format off */
25 25 typedef struct {
26 26 PyObject_HEAD
27 27 unsigned char flags;
28 28 int mode;
29 29 int size;
30 30 int mtime;
31 31 } dirstateItemObject;
32 32 /* clang-format on */
33 33
34 34 static const unsigned char dirstate_flag_wc_tracked = 1;
35 35 static const unsigned char dirstate_flag_p1_tracked = 1 << 1;
36 36 static const unsigned char dirstate_flag_p2_info = 1 << 2;
37 37 static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
38 static const unsigned char dirstate_flag_has_meaningful_mtime = 1 << 4;
39 static const unsigned char dirstate_flag_mode_exec_perm = 1 << 5;
40 static const unsigned char dirstate_flag_mode_is_symlink = 1 << 6;
38 static const unsigned char dirstate_flag_has_file_mtime = 1 << 4;
39 static const unsigned char dirstate_flag_has_directory_mtime = 1 << 5;
40 static const unsigned char dirstate_flag_mode_exec_perm = 1 << 6;
41 static const unsigned char dirstate_flag_mode_is_symlink = 1 << 7;
41 42
42 43 extern PyTypeObject dirstateItemType;
43 44 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
44 45
45 46 #ifndef MIN
46 47 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
47 48 #endif
48 49 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
49 50 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
50 51 #define true 1
51 52 #define false 0
52 53 typedef unsigned char bool;
53 54 #else
54 55 #include <stdbool.h>
55 56 #endif
56 57
57 58 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
58 59 {
59 60 /* _PyDict_NewPresized expects a minused parameter, but it actually
60 61 creates a dictionary that's the nearest power of two bigger than the
61 62 parameter. For example, with the initial minused = 1000, the
62 63 dictionary created has size 1024. Of course in a lot of cases that
63 64 can be greater than the maximum load factor Python's dict object
64 65 expects (= 2/3), so as soon as we cross the threshold we'll resize
65 66 anyway. So create a dictionary that's at least 3/2 the size. */
66 67 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
67 68 }
68 69
69 70 /* Convert a PyInt or PyLong to a long. Returns false if there is an
70 71 error, in which case an exception will already have been set. */
71 72 static inline bool pylong_to_long(PyObject *pylong, long *out)
72 73 {
73 74 *out = PyLong_AsLong(pylong);
74 75 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
75 76 * not an error. */
76 77 if (*out != -1) {
77 78 return true;
78 79 }
79 80 return PyErr_Occurred() == NULL;
80 81 }
81 82
82 83 #endif /* _HG_UTIL_H_ */
@@ -1,503 +1,516 b''
1 1 The *dirstate* is what Mercurial uses internally to track
2 2 the state of files in the working directory,
3 3 such as set by commands like `hg add` and `hg rm`.
4 4 It also contains some cached data that help make `hg status` faster.
5 5 The name refers both to `.hg/dirstate` on the filesystem
6 6 and the corresponding data structure in memory while a Mercurial process
7 7 is running.
8 8
9 9 The original file format, retroactively dubbed `dirstate-v1`,
10 10 is described at https://www.mercurial-scm.org/wiki/DirState.
11 11 It is made of a flat sequence of unordered variable-size entries,
12 12 so accessing any information in it requires parsing all of it.
13 13 Similarly, saving changes requires rewriting the entire file.
14 14
15 15 The newer `dirsate-v2` file format is designed to fix these limitations
16 16 and make `hg status` faster.
17 17
18 18 User guide
19 19 ==========
20 20
21 21 Compatibility
22 22 -------------
23 23
24 24 The file format is experimental and may still change.
25 25 Different versions of Mercurial may not be compatible with each other
26 26 when working on a local repository that uses this format.
27 27 When using an incompatible version with the experimental format,
28 28 anything can happen including data corruption.
29 29
30 30 Since the dirstate is entirely local and not relevant to the wire protocol,
31 31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
32 32
33 33 When `share-safe` is enabled, different repositories sharing the same store
34 34 can use different dirstate formats.
35 35
36 36 Enabling `dirsate-v2` for new local repositories
37 37 ------------------------------------------------
38 38
39 39 When creating a new local repository such as with `hg init` or `hg clone`,
40 40 the `exp-dirstate-v2` boolean in the `format` configuration section
41 41 controls whether to use this file format.
42 42 This is disabled by default as of this writing.
43 43 To enable it for a single repository, run for example::
44 44
45 45 $ hg init my-project --config format.exp-dirstate-v2=1
46 46
47 47 Checking the format of an existing local repsitory
48 48 --------------------------------------------------
49 49
50 50 The `debugformat` commands prints information about
51 51 which of multiple optional formats are used in the current repository,
52 52 including `dirstate-v2`::
53 53
54 54 $ hg debugformat
55 55 format-variant repo
56 56 fncache: yes
57 57 dirstate-v2: yes
58 58 […]
59 59
60 60 Upgrading or downgrading an existing local repository
61 61 -----------------------------------------------------
62 62
63 63 The `debugupgrade` command does various upgrades or downgrades
64 64 on a local repository
65 65 based on the current Mercurial version and on configuration.
66 66 The same `format.exp-dirstate-v2` configuration is used again.
67 67
68 68 Example to upgrade::
69 69
70 70 $ hg debugupgrade --config format.exp-dirstate-v2=1
71 71
72 72 Example to downgrade to `dirstate-v1`::
73 73
74 74 $ hg debugupgrade --config format.exp-dirstate-v2=0
75 75
76 76 Both of this commands do nothing but print a list of proposed changes,
77 77 which may include changes unrelated to the dirstate.
78 78 Those other changes are controlled by their own configuration keys.
79 79 Add `--run` to a command to actually apply the proposed changes.
80 80
81 81 Backups of `.hg/requires` and `.hg/dirstate` are created
82 82 in a `.hg/upgradebackup.*` directory.
83 83 If something goes wrong, restoring those files should undo the change.
84 84
85 85 Note that upgrading affects compatibility with older versions of Mercurial
86 86 as noted above.
87 87 This can be relevant when a repository’s files are on a USB drive
88 88 or some other removable media, or shared over the network, etc.
89 89
90 90 Internal filesystem representation
91 91 ==================================
92 92
93 93 Requirements file
94 94 -----------------
95 95
96 96 The `.hg/requires` file indicates which of various optional file formats
97 97 are used by a given repository.
98 98 Mercurial aborts when seeing a requirement it does not know about,
99 99 which avoids older version accidentally messing up a respository
100 100 that uses a format that was introduced later.
101 101 For versions that do support a format, the presence or absence of
102 102 the corresponding requirement indicates whether to use that format.
103 103
104 104 When the file contains a `exp-dirstate-v2` line,
105 105 the `dirstate-v2` format is used.
106 106 With no such line `dirstate-v1` is used.
107 107
108 108 High level description
109 109 ----------------------
110 110
111 111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
112 112 in `dirstate-v2` that file is a "docket" file
113 113 that only contains some metadata
114 114 and points to separate data file named `.hg/dirstate.{ID}`,
115 115 where `{ID}` is a random identifier.
116 116
117 117 This separation allows making data files append-only
118 118 and therefore safer to memory-map.
119 119 Creating a new data file (occasionally to clean up unused data)
120 120 can be done with a different ID
121 121 without disrupting another Mercurial process
122 122 that could still be using the previous data file.
123 123
124 124 Both files have a format designed to reduce the need for parsing,
125 125 by using fixed-size binary components as much as possible.
126 126 For data that is not fixed-size,
127 127 references to other parts of a file can be made by storing "pseudo-pointers":
128 128 integers counted in bytes from the start of a file.
129 129 For read-only access no data structure is needed,
130 130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
131 131 with specific parts read on demand.
132 132
133 133 The data file contains "nodes" organized in a tree.
134 134 Each node represents a file or directory inside the working directory
135 135 or its parent changeset.
136 136 This tree has the same structure as the filesystem,
137 137 so a node representing a directory has child nodes representing
138 138 the files and subdirectories contained directly in that directory.
139 139
140 140 The docket file format
141 141 ----------------------
142 142
143 143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
144 144 and `mercurial/dirstateutils/docket.py`.
145 145
146 146 Components of the docket file are found at fixed offsets,
147 147 counted in bytes from the start of the file:
148 148
149 149 * Offset 0:
150 150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
151 151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
152 152 although it is not strictly necessary
153 153 since `.hg/requires` determines which format to use.
154 154
155 155 * Offset 12:
156 156 The changeset node ID on the first parent of the working directory,
157 157 as up to 32 binary bytes.
158 158 If a node ID is shorter (20 bytes for SHA-1),
159 159 it is start-aligned and the rest of the bytes are set to zero.
160 160
161 161 * Offset 44:
162 162 The changeset node ID on the second parent of the working directory,
163 163 or all zeros if there isn’t one.
164 164 Also 32 binary bytes.
165 165
166 166 * Offset 76:
167 167 Tree metadata on 44 bytes, described below.
168 168 Its separation in this documentation from the rest of the docket
169 169 reflects a detail of the current implementation.
170 170 Since tree metadata is also made of fields at fixed offsets, those could
171 171 be inlined here by adding 76 bytes to each offset.
172 172
173 173 * Offset 120:
174 174 The used size of the data file, as a 32-bit big-endian integer.
175 175 The actual size of the data file may be larger
176 176 (if another Mercurial processis in appending to it
177 177 but has not updated the docket yet).
178 178 That extra data must be ignored.
179 179
180 180 * Offset 124:
181 181 The length of the data file identifier, as a 8-bit integer.
182 182
183 183 * Offset 125:
184 184 The data file identifier.
185 185
186 186 * Any additional data is current ignored, and dropped when updating the file.
187 187
188 188 Tree metadata in the docket file
189 189 --------------------------------
190 190
191 191 Tree metadata is similarly made of components at fixed offsets.
192 192 These offsets are counted in bytes from the start of tree metadata,
193 193 which is 76 bytes after the start of the docket file.
194 194
195 195 This metadata can be thought of as the singular root of the tree
196 196 formed by nodes in the data file.
197 197
198 198 * Offset 0:
199 199 Pseudo-pointer to the start of root nodes,
200 200 counted in bytes from the start of the data file,
201 201 as a 32-bit big-endian integer.
202 202 These nodes describe files and directories found directly
203 203 at the root of the working directory.
204 204
205 205 * Offset 4:
206 206 Number of root nodes, as a 32-bit big-endian integer.
207 207
208 208 * Offset 8:
209 209 Total number of nodes in the entire tree that "have a dirstate entry",
210 210 as a 32-bit big-endian integer.
211 211 Those nodes represent files that would be present at all in `dirstate-v1`.
212 212 This is typically less than the total number of nodes.
213 213 This counter is used to implement `len(dirstatemap)`.
214 214
215 215 * Offset 12:
216 216 Number of nodes in the entire tree that have a copy source,
217 217 as a 32-bit big-endian integer.
218 218 At the next commit, these files are recorded
219 219 as having been copied or moved/renamed from that source.
220 220 (A move is recorded as a copy and separate removal of the source.)
221 221 This counter is used to implement `len(dirstatemap.copymap)`.
222 222
223 223 * Offset 16:
224 224 An estimation of how many bytes of the data file
225 225 (within its used size) are unused, as a 32-bit big-endian integer.
226 226 When appending to an existing data file,
227 227 some existing nodes or paths can be unreachable from the new root
228 228 but they still take up space.
229 229 This counter is used to decide when to write a new data file from scratch
230 230 instead of appending to an existing one,
231 231 in order to get rid of that unreachable data
232 232 and avoid unbounded file size growth.
233 233
234 234 * Offset 20:
235 235 These four bytes are currently ignored
236 236 and reset to zero when updating a docket file.
237 237 This is an attempt at forward compatibility:
238 238 future Mercurial versions could use this as a bit field
239 239 to indicate that a dirstate has additional data or constraints.
240 240 Finding a dirstate file with the relevant bit unset indicates that
241 241 it was written by a then-older version
242 242 which is not aware of that future change.
243 243
244 244 * Offset 24:
245 245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
246 246 When present, the hash is of ignore patterns
247 247 that were used for some previous run of the `status` algorithm.
248 248
249 249 * (Offset 44: end of tree metadata)
250 250
251 251 Optional hash of ignore patterns
252 252 --------------------------------
253 253
254 254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
255 255 has been optimized such that its run time is dominated by calls
256 256 to `stat` for reading the filesystem metadata of a file or directory,
257 257 and to `readdir` for listing the contents of a directory.
258 258 In some cases the algorithm can skip calls to `readdir`
259 259 (saving significant time)
260 260 because the dirstate already contains enough of the relevant information
261 261 to build the correct `status` results.
262 262
263 263 The default configuration of `hg status` is to list unknown files
264 264 but not ignored files.
265 265 In this case, it matters for the `readdir`-skipping optimization
266 266 if a given file used to be ignored but became unknown
267 267 because `.hgignore` changed.
268 268 To detect the possibility of such a change,
269 269 the tree metadata contains an optional hash of all ignore patterns.
270 270
271 271 We define:
272 272
273 273 * "Root" ignore files as:
274 274
275 275 - `.hgignore` at the root of the repository if it exists
276 276 - And all files from `ui.ignore.*` config.
277 277
278 278 This set of files is sorted by the string representation of their path.
279 279
280 280 * The "expanded contents" of an ignore files is the byte string made
281 281 by the concatenation of its contents followed by the "expanded contents"
282 282 of other files included with `include:` or `subinclude:` directives,
283 283 in inclusion order. This definition is recursive, as included files can
284 284 themselves include more files.
285 285
286 286 This hash is defined as the SHA-1 of the concatenation (in sorted
287 287 order) of the "expanded contents" of each "root" ignore file.
288 288 (Note that computing this does not require actually concatenating
289 289 into a single contiguous byte sequence.
290 290 Instead a SHA-1 hasher object can be created
291 291 and fed separate chunks one by one.)
292 292
293 293 The data file format
294 294 --------------------
295 295
296 296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
297 297 and `mercurial/dirstateutils/v2.py`.
298 298
299 299 The data file contains two types of data: paths and nodes.
300 300
301 301 Paths and nodes can be organized in any order in the file, except that sibling
302 302 nodes must be next to each other and sorted by their path.
303 303 Contiguity lets the parent refer to them all
304 304 by their count and a single pseudo-pointer,
305 305 instead of storing one pseudo-pointer per child node.
306 306 Sorting allows using binary seach to find a child node with a given name
307 307 in `O(log(n))` byte sequence comparisons.
308 308
309 309 The current implemention writes paths and child node before a given node
310 310 for ease of figuring out the value of pseudo-pointers by the time the are to be
311 311 written, but this is not an obligation and readers must not rely on it.
312 312
313 313 A path is stored as a byte string anywhere in the file, without delimiter.
314 314 It is refered to by one or more node by a pseudo-pointer to its start, and its
315 315 length in bytes. Since there is no delimiter,
316 316 when a path is a substring of another the same bytes could be reused,
317 317 although the implementation does not exploit this as of this writing.
318 318
319 319 A node is stored on 43 bytes with components at fixed offsets. Paths and
320 320 child nodes relevant to a node are stored externally and referenced though
321 321 pseudo-pointers.
322 322
323 323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
324 324 counting bytes from the start of the data file. Path lengths and positions
325 325 are 16-bit integers, also counted in bytes.
326 326
327 327 Node components are:
328 328
329 329 * Offset 0:
330 330 Pseudo-pointer to the full path of this node,
331 331 from the working directory root.
332 332
333 333 * Offset 4:
334 334 Length of the full path.
335 335
336 336 * Offset 6:
337 337 Position of the last `/` path separator within the full path,
338 338 in bytes from the start of the full path,
339 339 or zero if there isn’t one.
340 340 The part of the full path after this position is the "base name".
341 341 Since sibling nodes have the same parent, only their base name vary
342 342 and needs to be considered when doing binary search to find a given path.
343 343
344 344 * Offset 8:
345 345 Pseudo-pointer to the "copy source" path for this node,
346 346 or zero if there is no copy source.
347 347
348 348 * Offset 12:
349 349 Length of the copy source path, or zero if there isn’t one.
350 350
351 351 * Offset 14:
352 352 Pseudo-pointer to the start of child nodes.
353 353
354 354 * Offset 18:
355 355 Number of child nodes, as a 32-bit integer.
356 356 They occupy 43 times this number of bytes
357 357 (not counting space for paths, and further descendants).
358 358
359 359 * Offset 22:
360 360 Number as a 32-bit integer of descendant nodes in this subtree,
361 361 not including this node itself,
362 362 that "have a dirstate entry".
363 363 Those nodes represent files that would be present at all in `dirstate-v1`.
364 364 This is typically less than the total number of descendants.
365 365 This counter is used to implement `has_dir`.
366 366
367 367 * Offset 26:
368 368 Number as a 32-bit integer of descendant nodes in this subtree,
369 369 not including this node itself,
370 370 that represent files tracked in the working directory.
371 371 (For example, `hg rm` makes a file untracked.)
372 372 This counter is used to implement `has_tracked_dir`.
373 373
374 374 * Offset 30:
375 375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
376 376 Starting from least-significant, bit masks are::
377 377
378 378 WDIR_TRACKED = 1 << 0
379 379 P1_TRACKED = 1 << 1
380 380 P2_INFO = 1 << 2
381 381 HAS_MODE_AND_SIZE = 1 << 3
382 HAS_MTIME = 1 << 4
383 MODE_EXEC_PERM = 1 << 5
384 MODE_IS_SYMLINK = 1 << 6
382 HAS_FILE_MTIME = 1 << 4
383 HAS_DIRECTORY_MTIME = 1 << 5
384 MODE_EXEC_PERM = 1 << 6
385 MODE_IS_SYMLINK = 1 << 7
385 386
386 387 The meaning of each bit is described below.
387 388
388 389 Other bits are unset.
389 390 They may be assigned meaning if the future,
390 391 with the limitation that Mercurial versions that pre-date such meaning
391 392 will always reset those bits to unset when writing nodes.
392 393 (A new node is written for any mutation in its subtree,
393 394 leaving the bytes of the old node unreachable
394 395 until the data file is rewritten entirely.)
395 396
396 397 * Offset 32:
397 398 A `size` field described below, as a 32-bit integer.
398 399 Unlike in dirstate-v1, negative values are not used.
399 400
400 401 * Offset 36:
401 402 The seconds component of an `mtime` field described below,
402 403 as a 32-bit integer.
403 404 Unlike in dirstate-v1, negative values are not used.
405 When `mtime` is used, this is number of seconds since the Unix epoch
406 truncated to its lower 31 bits.
404 407
405 408 * Offset 40:
406 409 The nanoseconds component of an `mtime` field described below,
407 410 as a 32-bit integer.
411 When `mtime` is used,
412 this is the number of nanoseconds since `mtime.seconds`,
413 always stritctly less than one billion.
414
415 This may be zero if more precision is not available.
416 (This can happen because of limitations in any of Mercurial, Python,
417 libc, the operating system, …)
418
419 When comparing two mtimes and either has this component set to zero,
420 the sub-second precision of both should be ignored.
421 False positives when checking mtime equality due to clock resolution
422 are always possible and the status algorithm needs to deal with them,
423 but having too many false negatives could be harmful too.
408 424
409 425 * (Offset 44: end of this node)
410 426
411 427 The meaning of the boolean values packed in `flags` is:
412 428
413 429 `WDIR_TRACKED`
414 430 Set if the working directory contains a tracked file at this node’s path.
415 431 This is typically set and unset by `hg add` and `hg rm`.
416 432
417 433 `P1_TRACKED`
418 434 Set if the working directory’s first parent changeset
419 435 (whose node identifier is found in tree metadata)
420 436 contains a tracked file at this node’s path.
421 437 This is a cache to reduce manifest lookups.
422 438
423 439 `P2_INFO`
424 440 Set if the file has been involved in some merge operation.
425 441 Either because it was actually merged,
426 442 or because the version in the second parent p2 version was ahead,
427 443 or because some rename moved it there.
428 444 In either case `hg status` will want it displayed as modified.
429 445
430 446 Files that would be mentioned at all in the `dirstate-v1` file format
431 447 have a node with at least one of the above three bits set in `dirstate-v2`.
432 448 Let’s call these files "tracked anywhere",
433 449 and "untracked" the nodes with all three of these bits unset.
434 450 Untracked nodes are typically for directories:
435 451 they hold child nodes and form the tree structure.
436 452 Additional untracked nodes may also exist.
437 453 Although implementations should strive to clean up nodes
438 454 that are entirely unused, other untracked nodes may also exist.
439 455 For example, a future version of Mercurial might in some cases
440 456 add nodes for untracked files or/and ignored files in the working directory
441 457 in order to optimize `hg status`
442 458 by enabling it to skip `readdir` in more cases.
443 459
444 460 `HAS_MODE_AND_SIZE`
445 461 Must be unset for untracked nodes.
446 462 For files tracked anywhere, if this is set:
447 463 - The `size` field is the expected file size,
448 464 in bytes truncated its lower to 31 bits,
449 465 for the file to be clean.
450 466 - The expected execute permission for the file’s owner
451 467 is given by `MODE_EXEC_PERM`
452 468 - The expected file type is given by `MODE_IS_SIMLINK`:
453 469 a symbolic link if set, or a normal file if unset.
454 470 If this is unset the expected size, permission, and file type are unknown.
455 471 The `size` field is unused (set to zero).
456 472
457 `HAS_MTIME`
458 If unset, the `mtime` field is unused (set to zero).
459 If set, it contains a timestamp represented as
460 - the number of seconds since the Unix epoch,
461 truncated to its lower 31 bits.
462 - and the number of nanoseconds since `mtime.seconds`,
463 always stritctly less than one billion.
464 This may be zero if more precision is not available.
465 (This can happen because of limitations in any of Mercurial, Python,
466 libc, the operating system, …)
473 `HAS_FILE_MTIME`
474 Must be unset for untracked nodes.
475 If this and `HAS_DIRECTORY_MTIME` are both unset,
476 the `mtime` field is unused (set to zero).
477 If this is set, `mtime` is the modification time
478 expected for the file to be considered clean.
467 479
468 If set for a file tracked anywhere,
469 `mtime` is the expected modification time for the file to be clean.
470
471 If set for an untracked node, at some point,
480 `HAS_DIRECTORY_MTIME`
481 Must be unset for file tracked anywhere.
482 If this and `HAS_DIRECTORY_MTIME` are both unset,
483 the `mtime` field is unused (set to zero).
484 If this is set, at some point,
472 485 this path in the working directory was observed:
473 486
474 487 - To be a directory
475 488 - With the modification time given in `mtime`
476 489 - That time was already strictly in the past when observed,
477 490 meaning that later changes cannot happen in the same clock tick
478 491 and must cause a different modification time
479 492 (unless the system clock jumps back and we get unlucky,
480 493 which is not impossible but deemed unlikely enough).
481 494 - All direct children of this directory
482 495 (as returned by `std::fs::read_dir`)
483 496 either have a corresponding dirstate node,
484 497 or are ignored by ignore patterns whose hash is in tree metadata.
485 498
486 499 This means that if `std::fs::symlink_metadata` later reports
487 500 the same modification time
488 501 and ignored patterns haven’t changed,
489 502 a run of status that is not listing ignored files
490 503 can skip calling `std::fs::read_dir` again for this directory,
491 504 and iterate child dirstate nodes instead.
492 505
493 506 `MODE_EXEC_PERM`
494 507 Must be unset if `HAS_MODE_AND_SIZE` is unset.
495 508 If `HAS_MODE_AND_SIZE` is set,
496 509 this indicates whether the file’s own is expected
497 510 to have execute permission.
498 511
499 512 `MODE_IS_SYMLINK`
500 513 Must be unset if `HAS_MODE_AND_SIZE` is unset.
501 514 If `HAS_MODE_AND_SIZE` is set,
502 515 this indicates whether the file is expected to be a symlink
503 516 as opposed to a normal file.
@@ -1,789 +1,790 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import stat
11 11 import struct
12 12 import zlib
13 13
14 14 from ..node import (
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from ..thirdparty import attr
19 19 from .. import (
20 20 error,
21 21 pycompat,
22 22 revlogutils,
23 23 util,
24 24 )
25 25
26 26 from ..revlogutils import nodemap as nodemaputil
27 27 from ..revlogutils import constants as revlog_constants
28 28
29 29 stringio = pycompat.bytesio
30 30
31 31
32 32 _pack = struct.pack
33 33 _unpack = struct.unpack
34 34 _compress = zlib.compress
35 35 _decompress = zlib.decompress
36 36
37 37
38 38 # a special value used internally for `size` if the file come from the other parent
39 39 FROM_P2 = -2
40 40
41 41 # a special value used internally for `size` if the file is modified/merged/added
42 42 NONNORMAL = -1
43 43
44 44 # a special value used internally for `time` if the time is ambigeous
45 45 AMBIGUOUS_TIME = -1
46 46
47 47 # Bits of the `flags` byte inside a node in the file format
48 48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 50 DIRSTATE_V2_P2_INFO = 1 << 2
51 51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
52 DIRSTATE_V2_HAS_MTIME = 1 << 4
53 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 5
54 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 6
52 DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
53 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available
54 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
55 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
55 56
56 57
57 58 @attr.s(slots=True, init=False)
58 59 class DirstateItem(object):
59 60 """represent a dirstate entry
60 61
61 62 It hold multiple attributes
62 63
63 64 # about file tracking
64 65 - wc_tracked: is the file tracked by the working copy
65 66 - p1_tracked: is the file tracked in working copy first parent
66 67 - p2_info: the file has been involved in some merge operation. Either
67 68 because it was actually merged, or because the p2 version was
68 69 ahead, or because some rename moved it there. In either case
69 70 `hg status` will want it displayed as modified.
70 71
71 72 # about the file state expected from p1 manifest:
72 73 - mode: the file mode in p1
73 74 - size: the file size in p1
74 75
75 76 These value can be set to None, which mean we don't have a meaningful value
76 77 to compare with. Either because we don't really care about them as there
77 78 `status` is known without having to look at the disk or because we don't
78 79 know these right now and a full comparison will be needed to find out if
79 80 the file is clean.
80 81
81 82 # about the file state on disk last time we saw it:
82 83 - mtime: the last known clean mtime for the file.
83 84
84 85 This value can be set to None if no cachable state exist. Either because we
85 86 do not care (see previous section) or because we could not cache something
86 87 yet.
87 88 """
88 89
89 90 _wc_tracked = attr.ib()
90 91 _p1_tracked = attr.ib()
91 92 _p2_info = attr.ib()
92 93 _mode = attr.ib()
93 94 _size = attr.ib()
94 95 _mtime = attr.ib()
95 96
96 97 def __init__(
97 98 self,
98 99 wc_tracked=False,
99 100 p1_tracked=False,
100 101 p2_info=False,
101 102 has_meaningful_data=True,
102 103 has_meaningful_mtime=True,
103 104 parentfiledata=None,
104 105 ):
105 106 self._wc_tracked = wc_tracked
106 107 self._p1_tracked = p1_tracked
107 108 self._p2_info = p2_info
108 109
109 110 self._mode = None
110 111 self._size = None
111 112 self._mtime = None
112 113 if parentfiledata is None:
113 114 has_meaningful_mtime = False
114 115 has_meaningful_data = False
115 116 if has_meaningful_data:
116 117 self._mode = parentfiledata[0]
117 118 self._size = parentfiledata[1]
118 119 if has_meaningful_mtime:
119 120 self._mtime = parentfiledata[2]
120 121
121 122 @classmethod
122 123 def from_v2_data(cls, flags, size, mtime):
123 124 """Build a new DirstateItem object from V2 data"""
124 125 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
125 126 mode = None
126 127 if has_mode_size:
127 128 assert stat.S_IXUSR == 0o100
128 129 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
129 130 mode = 0o755
130 131 else:
131 132 mode = 0o644
132 133 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
133 134 mode |= stat.S_IFLNK
134 135 else:
135 136 mode |= stat.S_IFREG
136 137 return cls(
137 138 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
138 139 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
139 140 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
140 141 has_meaningful_data=has_mode_size,
141 has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_MTIME),
142 has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_FILE_MTIME),
142 143 parentfiledata=(mode, size, mtime),
143 144 )
144 145
145 146 @classmethod
146 147 def from_v1_data(cls, state, mode, size, mtime):
147 148 """Build a new DirstateItem object from V1 data
148 149
149 150 Since the dirstate-v1 format is frozen, the signature of this function
150 151 is not expected to change, unlike the __init__ one.
151 152 """
152 153 if state == b'm':
153 154 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
154 155 elif state == b'a':
155 156 return cls(wc_tracked=True)
156 157 elif state == b'r':
157 158 if size == NONNORMAL:
158 159 p1_tracked = True
159 160 p2_info = True
160 161 elif size == FROM_P2:
161 162 p1_tracked = False
162 163 p2_info = True
163 164 else:
164 165 p1_tracked = True
165 166 p2_info = False
166 167 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
167 168 elif state == b'n':
168 169 if size == FROM_P2:
169 170 return cls(wc_tracked=True, p2_info=True)
170 171 elif size == NONNORMAL:
171 172 return cls(wc_tracked=True, p1_tracked=True)
172 173 elif mtime == AMBIGUOUS_TIME:
173 174 return cls(
174 175 wc_tracked=True,
175 176 p1_tracked=True,
176 177 has_meaningful_mtime=False,
177 178 parentfiledata=(mode, size, 42),
178 179 )
179 180 else:
180 181 return cls(
181 182 wc_tracked=True,
182 183 p1_tracked=True,
183 184 parentfiledata=(mode, size, mtime),
184 185 )
185 186 else:
186 187 raise RuntimeError(b'unknown state: %s' % state)
187 188
188 189 def set_possibly_dirty(self):
189 190 """Mark a file as "possibly dirty"
190 191
191 192 This means the next status call will have to actually check its content
192 193 to make sure it is correct.
193 194 """
194 195 self._mtime = None
195 196
196 197 def set_clean(self, mode, size, mtime):
197 198 """mark a file as "clean" cancelling potential "possibly dirty call"
198 199
199 200 Note: this function is a descendant of `dirstate.normal` and is
200 201 currently expected to be call on "normal" entry only. There are not
201 202 reason for this to not change in the future as long as the ccode is
202 203 updated to preserve the proper state of the non-normal files.
203 204 """
204 205 self._wc_tracked = True
205 206 self._p1_tracked = True
206 207 self._mode = mode
207 208 self._size = size
208 209 self._mtime = mtime
209 210
210 211 def set_tracked(self):
211 212 """mark a file as tracked in the working copy
212 213
213 214 This will ultimately be called by command like `hg add`.
214 215 """
215 216 self._wc_tracked = True
216 217 # `set_tracked` is replacing various `normallookup` call. So we mark
217 218 # the files as needing lookup
218 219 #
219 220 # Consider dropping this in the future in favor of something less broad.
220 221 self._mtime = None
221 222
222 223 def set_untracked(self):
223 224 """mark a file as untracked in the working copy
224 225
225 226 This will ultimately be called by command like `hg remove`.
226 227 """
227 228 self._wc_tracked = False
228 229 self._mode = None
229 230 self._size = None
230 231 self._mtime = None
231 232
232 233 def drop_merge_data(self):
233 234 """remove all "merge-only" from a DirstateItem
234 235
235 236 This is to be call by the dirstatemap code when the second parent is dropped
236 237 """
237 238 if self._p2_info:
238 239 self._p2_info = False
239 240 self._mode = None
240 241 self._size = None
241 242 self._mtime = None
242 243
243 244 @property
244 245 def mode(self):
245 246 return self.v1_mode()
246 247
247 248 @property
248 249 def size(self):
249 250 return self.v1_size()
250 251
251 252 @property
252 253 def mtime(self):
253 254 return self.v1_mtime()
254 255
255 256 @property
256 257 def state(self):
257 258 """
258 259 States are:
259 260 n normal
260 261 m needs merging
261 262 r marked for removal
262 263 a marked for addition
263 264
264 265 XXX This "state" is a bit obscure and mostly a direct expression of the
265 266 dirstatev1 format. It would make sense to ultimately deprecate it in
266 267 favor of the more "semantic" attributes.
267 268 """
268 269 if not self.any_tracked:
269 270 return b'?'
270 271 return self.v1_state()
271 272
272 273 @property
273 274 def tracked(self):
274 275 """True is the file is tracked in the working copy"""
275 276 return self._wc_tracked
276 277
277 278 @property
278 279 def any_tracked(self):
279 280 """True is the file is tracked anywhere (wc or parents)"""
280 281 return self._wc_tracked or self._p1_tracked or self._p2_info
281 282
282 283 @property
283 284 def added(self):
284 285 """True if the file has been added"""
285 286 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
286 287
287 288 @property
288 289 def maybe_clean(self):
289 290 """True if the file has a chance to be in the "clean" state"""
290 291 if not self._wc_tracked:
291 292 return False
292 293 elif not self._p1_tracked:
293 294 return False
294 295 elif self._p2_info:
295 296 return False
296 297 return True
297 298
298 299 @property
299 300 def p1_tracked(self):
300 301 """True if the file is tracked in the first parent manifest"""
301 302 return self._p1_tracked
302 303
303 304 @property
304 305 def p2_info(self):
305 306 """True if the file needed to merge or apply any input from p2
306 307
307 308 See the class documentation for details.
308 309 """
309 310 return self._wc_tracked and self._p2_info
310 311
311 312 @property
312 313 def removed(self):
313 314 """True if the file has been removed"""
314 315 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
315 316
316 317 def v2_data(self):
317 318 """Returns (flags, mode, size, mtime) for v2 serialization"""
318 319 flags = 0
319 320 if self._wc_tracked:
320 321 flags |= DIRSTATE_V2_WDIR_TRACKED
321 322 if self._p1_tracked:
322 323 flags |= DIRSTATE_V2_P1_TRACKED
323 324 if self._p2_info:
324 325 flags |= DIRSTATE_V2_P2_INFO
325 326 if self._mode is not None and self._size is not None:
326 327 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
327 328 if self.mode & stat.S_IXUSR:
328 329 flags |= DIRSTATE_V2_MODE_EXEC_PERM
329 330 if stat.S_ISLNK(self.mode):
330 331 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
331 332 if self._mtime is not None:
332 flags |= DIRSTATE_V2_HAS_MTIME
333 flags |= DIRSTATE_V2_HAS_FILE_MTIME
333 334 return (flags, self._size or 0, self._mtime or 0)
334 335
335 336 def v1_state(self):
336 337 """return a "state" suitable for v1 serialization"""
337 338 if not self.any_tracked:
338 339 # the object has no state to record, this is -currently-
339 340 # unsupported
340 341 raise RuntimeError('untracked item')
341 342 elif self.removed:
342 343 return b'r'
343 344 elif self._p1_tracked and self._p2_info:
344 345 return b'm'
345 346 elif self.added:
346 347 return b'a'
347 348 else:
348 349 return b'n'
349 350
350 351 def v1_mode(self):
351 352 """return a "mode" suitable for v1 serialization"""
352 353 return self._mode if self._mode is not None else 0
353 354
354 355 def v1_size(self):
355 356 """return a "size" suitable for v1 serialization"""
356 357 if not self.any_tracked:
357 358 # the object has no state to record, this is -currently-
358 359 # unsupported
359 360 raise RuntimeError('untracked item')
360 361 elif self.removed and self._p1_tracked and self._p2_info:
361 362 return NONNORMAL
362 363 elif self._p2_info:
363 364 return FROM_P2
364 365 elif self.removed:
365 366 return 0
366 367 elif self.added:
367 368 return NONNORMAL
368 369 elif self._size is None:
369 370 return NONNORMAL
370 371 else:
371 372 return self._size
372 373
373 374 def v1_mtime(self):
374 375 """return a "mtime" suitable for v1 serialization"""
375 376 if not self.any_tracked:
376 377 # the object has no state to record, this is -currently-
377 378 # unsupported
378 379 raise RuntimeError('untracked item')
379 380 elif self.removed:
380 381 return 0
381 382 elif self._mtime is None:
382 383 return AMBIGUOUS_TIME
383 384 elif self._p2_info:
384 385 return AMBIGUOUS_TIME
385 386 elif not self._p1_tracked:
386 387 return AMBIGUOUS_TIME
387 388 else:
388 389 return self._mtime
389 390
390 391 def need_delay(self, now):
391 392 """True if the stored mtime would be ambiguous with the current time"""
392 393 return self.v1_state() == b'n' and self.v1_mtime() == now
393 394
394 395
395 396 def gettype(q):
396 397 return int(q & 0xFFFF)
397 398
398 399
399 400 class BaseIndexObject(object):
400 401 # Can I be passed to an algorithme implemented in Rust ?
401 402 rust_ext_compat = 0
402 403 # Format of an index entry according to Python's `struct` language
403 404 index_format = revlog_constants.INDEX_ENTRY_V1
404 405 # Size of a C unsigned long long int, platform independent
405 406 big_int_size = struct.calcsize(b'>Q')
406 407 # Size of a C long int, platform independent
407 408 int_size = struct.calcsize(b'>i')
408 409 # An empty index entry, used as a default value to be overridden, or nullrev
409 410 null_item = (
410 411 0,
411 412 0,
412 413 0,
413 414 -1,
414 415 -1,
415 416 -1,
416 417 -1,
417 418 sha1nodeconstants.nullid,
418 419 0,
419 420 0,
420 421 revlog_constants.COMP_MODE_INLINE,
421 422 revlog_constants.COMP_MODE_INLINE,
422 423 )
423 424
424 425 @util.propertycache
425 426 def entry_size(self):
426 427 return self.index_format.size
427 428
428 429 @property
429 430 def nodemap(self):
430 431 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
431 432 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
432 433 return self._nodemap
433 434
434 435 @util.propertycache
435 436 def _nodemap(self):
436 437 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
437 438 for r in range(0, len(self)):
438 439 n = self[r][7]
439 440 nodemap[n] = r
440 441 return nodemap
441 442
442 443 def has_node(self, node):
443 444 """return True if the node exist in the index"""
444 445 return node in self._nodemap
445 446
446 447 def rev(self, node):
447 448 """return a revision for a node
448 449
449 450 If the node is unknown, raise a RevlogError"""
450 451 return self._nodemap[node]
451 452
452 453 def get_rev(self, node):
453 454 """return a revision for a node
454 455
455 456 If the node is unknown, return None"""
456 457 return self._nodemap.get(node)
457 458
458 459 def _stripnodes(self, start):
459 460 if '_nodemap' in vars(self):
460 461 for r in range(start, len(self)):
461 462 n = self[r][7]
462 463 del self._nodemap[n]
463 464
464 465 def clearcaches(self):
465 466 self.__dict__.pop('_nodemap', None)
466 467
467 468 def __len__(self):
468 469 return self._lgt + len(self._extra)
469 470
470 471 def append(self, tup):
471 472 if '_nodemap' in vars(self):
472 473 self._nodemap[tup[7]] = len(self)
473 474 data = self._pack_entry(len(self), tup)
474 475 self._extra.append(data)
475 476
476 477 def _pack_entry(self, rev, entry):
477 478 assert entry[8] == 0
478 479 assert entry[9] == 0
479 480 return self.index_format.pack(*entry[:8])
480 481
481 482 def _check_index(self, i):
482 483 if not isinstance(i, int):
483 484 raise TypeError(b"expecting int indexes")
484 485 if i < 0 or i >= len(self):
485 486 raise IndexError
486 487
487 488 def __getitem__(self, i):
488 489 if i == -1:
489 490 return self.null_item
490 491 self._check_index(i)
491 492 if i >= self._lgt:
492 493 data = self._extra[i - self._lgt]
493 494 else:
494 495 index = self._calculate_index(i)
495 496 data = self._data[index : index + self.entry_size]
496 497 r = self._unpack_entry(i, data)
497 498 if self._lgt and i == 0:
498 499 offset = revlogutils.offset_type(0, gettype(r[0]))
499 500 r = (offset,) + r[1:]
500 501 return r
501 502
502 503 def _unpack_entry(self, rev, data):
503 504 r = self.index_format.unpack(data)
504 505 r = r + (
505 506 0,
506 507 0,
507 508 revlog_constants.COMP_MODE_INLINE,
508 509 revlog_constants.COMP_MODE_INLINE,
509 510 )
510 511 return r
511 512
512 513 def pack_header(self, header):
513 514 """pack header information as binary"""
514 515 v_fmt = revlog_constants.INDEX_HEADER
515 516 return v_fmt.pack(header)
516 517
517 518 def entry_binary(self, rev):
518 519 """return the raw binary string representing a revision"""
519 520 entry = self[rev]
520 521 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
521 522 if rev == 0:
522 523 p = p[revlog_constants.INDEX_HEADER.size :]
523 524 return p
524 525
525 526
526 527 class IndexObject(BaseIndexObject):
527 528 def __init__(self, data):
528 529 assert len(data) % self.entry_size == 0, (
529 530 len(data),
530 531 self.entry_size,
531 532 len(data) % self.entry_size,
532 533 )
533 534 self._data = data
534 535 self._lgt = len(data) // self.entry_size
535 536 self._extra = []
536 537
537 538 def _calculate_index(self, i):
538 539 return i * self.entry_size
539 540
540 541 def __delitem__(self, i):
541 542 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
542 543 raise ValueError(b"deleting slices only supports a:-1 with step 1")
543 544 i = i.start
544 545 self._check_index(i)
545 546 self._stripnodes(i)
546 547 if i < self._lgt:
547 548 self._data = self._data[: i * self.entry_size]
548 549 self._lgt = i
549 550 self._extra = []
550 551 else:
551 552 self._extra = self._extra[: i - self._lgt]
552 553
553 554
554 555 class PersistentNodeMapIndexObject(IndexObject):
555 556 """a Debug oriented class to test persistent nodemap
556 557
557 558 We need a simple python object to test API and higher level behavior. See
558 559 the Rust implementation for more serious usage. This should be used only
559 560 through the dedicated `devel.persistent-nodemap` config.
560 561 """
561 562
562 563 def nodemap_data_all(self):
563 564 """Return bytes containing a full serialization of a nodemap
564 565
565 566 The nodemap should be valid for the full set of revisions in the
566 567 index."""
567 568 return nodemaputil.persistent_data(self)
568 569
569 570 def nodemap_data_incremental(self):
570 571 """Return bytes containing a incremental update to persistent nodemap
571 572
572 573 This containst the data for an append-only update of the data provided
573 574 in the last call to `update_nodemap_data`.
574 575 """
575 576 if self._nm_root is None:
576 577 return None
577 578 docket = self._nm_docket
578 579 changed, data = nodemaputil.update_persistent_data(
579 580 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
580 581 )
581 582
582 583 self._nm_root = self._nm_max_idx = self._nm_docket = None
583 584 return docket, changed, data
584 585
585 586 def update_nodemap_data(self, docket, nm_data):
586 587 """provide full block of persisted binary data for a nodemap
587 588
588 589 The data are expected to come from disk. See `nodemap_data_all` for a
589 590 produceur of such data."""
590 591 if nm_data is not None:
591 592 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
592 593 if self._nm_root:
593 594 self._nm_docket = docket
594 595 else:
595 596 self._nm_root = self._nm_max_idx = self._nm_docket = None
596 597
597 598
598 599 class InlinedIndexObject(BaseIndexObject):
599 600 def __init__(self, data, inline=0):
600 601 self._data = data
601 602 self._lgt = self._inline_scan(None)
602 603 self._inline_scan(self._lgt)
603 604 self._extra = []
604 605
605 606 def _inline_scan(self, lgt):
606 607 off = 0
607 608 if lgt is not None:
608 609 self._offsets = [0] * lgt
609 610 count = 0
610 611 while off <= len(self._data) - self.entry_size:
611 612 start = off + self.big_int_size
612 613 (s,) = struct.unpack(
613 614 b'>i',
614 615 self._data[start : start + self.int_size],
615 616 )
616 617 if lgt is not None:
617 618 self._offsets[count] = off
618 619 count += 1
619 620 off += self.entry_size + s
620 621 if off != len(self._data):
621 622 raise ValueError(b"corrupted data")
622 623 return count
623 624
624 625 def __delitem__(self, i):
625 626 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
626 627 raise ValueError(b"deleting slices only supports a:-1 with step 1")
627 628 i = i.start
628 629 self._check_index(i)
629 630 self._stripnodes(i)
630 631 if i < self._lgt:
631 632 self._offsets = self._offsets[:i]
632 633 self._lgt = i
633 634 self._extra = []
634 635 else:
635 636 self._extra = self._extra[: i - self._lgt]
636 637
637 638 def _calculate_index(self, i):
638 639 return self._offsets[i]
639 640
640 641
641 642 def parse_index2(data, inline, revlogv2=False):
642 643 if not inline:
643 644 cls = IndexObject2 if revlogv2 else IndexObject
644 645 return cls(data), None
645 646 cls = InlinedIndexObject
646 647 return cls(data, inline), (0, data)
647 648
648 649
649 650 def parse_index_cl_v2(data):
650 651 return IndexChangelogV2(data), None
651 652
652 653
653 654 class IndexObject2(IndexObject):
654 655 index_format = revlog_constants.INDEX_ENTRY_V2
655 656
656 657 def replace_sidedata_info(
657 658 self,
658 659 rev,
659 660 sidedata_offset,
660 661 sidedata_length,
661 662 offset_flags,
662 663 compression_mode,
663 664 ):
664 665 """
665 666 Replace an existing index entry's sidedata offset and length with new
666 667 ones.
667 668 This cannot be used outside of the context of sidedata rewriting,
668 669 inside the transaction that creates the revision `rev`.
669 670 """
670 671 if rev < 0:
671 672 raise KeyError
672 673 self._check_index(rev)
673 674 if rev < self._lgt:
674 675 msg = b"cannot rewrite entries outside of this transaction"
675 676 raise KeyError(msg)
676 677 else:
677 678 entry = list(self[rev])
678 679 entry[0] = offset_flags
679 680 entry[8] = sidedata_offset
680 681 entry[9] = sidedata_length
681 682 entry[11] = compression_mode
682 683 entry = tuple(entry)
683 684 new = self._pack_entry(rev, entry)
684 685 self._extra[rev - self._lgt] = new
685 686
686 687 def _unpack_entry(self, rev, data):
687 688 data = self.index_format.unpack(data)
688 689 entry = data[:10]
689 690 data_comp = data[10] & 3
690 691 sidedata_comp = (data[10] & (3 << 2)) >> 2
691 692 return entry + (data_comp, sidedata_comp)
692 693
693 694 def _pack_entry(self, rev, entry):
694 695 data = entry[:10]
695 696 data_comp = entry[10] & 3
696 697 sidedata_comp = (entry[11] & 3) << 2
697 698 data += (data_comp | sidedata_comp,)
698 699
699 700 return self.index_format.pack(*data)
700 701
701 702 def entry_binary(self, rev):
702 703 """return the raw binary string representing a revision"""
703 704 entry = self[rev]
704 705 return self._pack_entry(rev, entry)
705 706
706 707 def pack_header(self, header):
707 708 """pack header information as binary"""
708 709 msg = 'version header should go in the docket, not the index: %d'
709 710 msg %= header
710 711 raise error.ProgrammingError(msg)
711 712
712 713
713 714 class IndexChangelogV2(IndexObject2):
714 715 index_format = revlog_constants.INDEX_ENTRY_CL_V2
715 716
716 717 def _unpack_entry(self, rev, data, r=True):
717 718 items = self.index_format.unpack(data)
718 719 entry = items[:3] + (rev, rev) + items[3:8]
719 720 data_comp = items[8] & 3
720 721 sidedata_comp = (items[8] >> 2) & 3
721 722 return entry + (data_comp, sidedata_comp)
722 723
723 724 def _pack_entry(self, rev, entry):
724 725 assert entry[3] == rev, entry[3]
725 726 assert entry[4] == rev, entry[4]
726 727 data = entry[:3] + entry[5:10]
727 728 data_comp = entry[10] & 3
728 729 sidedata_comp = (entry[11] & 3) << 2
729 730 data += (data_comp | sidedata_comp,)
730 731 return self.index_format.pack(*data)
731 732
732 733
733 734 def parse_index_devel_nodemap(data, inline):
734 735 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
735 736 return PersistentNodeMapIndexObject(data), None
736 737
737 738
738 739 def parse_dirstate(dmap, copymap, st):
739 740 parents = [st[:20], st[20:40]]
740 741 # dereference fields so they will be local in loop
741 742 format = b">cllll"
742 743 e_size = struct.calcsize(format)
743 744 pos1 = 40
744 745 l = len(st)
745 746
746 747 # the inner loop
747 748 while pos1 < l:
748 749 pos2 = pos1 + e_size
749 750 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
750 751 pos1 = pos2 + e[4]
751 752 f = st[pos2:pos1]
752 753 if b'\0' in f:
753 754 f, c = f.split(b'\0')
754 755 copymap[f] = c
755 756 dmap[f] = DirstateItem.from_v1_data(*e[:4])
756 757 return parents
757 758
758 759
759 760 def pack_dirstate(dmap, copymap, pl, now):
760 761 now = int(now)
761 762 cs = stringio()
762 763 write = cs.write
763 764 write(b"".join(pl))
764 765 for f, e in pycompat.iteritems(dmap):
765 766 if e.need_delay(now):
766 767 # The file was last modified "simultaneously" with the current
767 768 # write to dirstate (i.e. within the same second for file-
768 769 # systems with a granularity of 1 sec). This commonly happens
769 770 # for at least a couple of files on 'update'.
770 771 # The user could change the file without changing its size
771 772 # within the same second. Invalidate the file's mtime in
772 773 # dirstate, forcing future 'status' calls to compare the
773 774 # contents of the file if the size is the same. This prevents
774 775 # mistakenly treating such files as clean.
775 776 e.set_possibly_dirty()
776 777
777 778 if f in copymap:
778 779 f = b"%s\0%s" % (f, copymap[f])
779 780 e = _pack(
780 781 b">cllll",
781 782 e.v1_state(),
782 783 e.v1_mode(),
783 784 e.v1_size(),
784 785 e.v1_mtime(),
785 786 len(f),
786 787 )
787 788 write(e)
788 789 write(f)
789 790 return cs.getvalue()
@@ -1,721 +1,726 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::TruncatedTimestamp;
6 6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 8 use crate::errors::HgError;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::DirstateEntry;
11 11 use crate::DirstateError;
12 12 use crate::DirstateParents;
13 13 use bitflags::bitflags;
14 14 use bytes_cast::unaligned::{U16Be, U32Be};
15 15 use bytes_cast::BytesCast;
16 16 use format_bytes::format_bytes;
17 17 use std::borrow::Cow;
18 18 use std::convert::{TryFrom, TryInto};
19 19
20 20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 21 /// This a redundant sanity check more than an actual "magic number" since
22 22 /// `.hg/requires` already governs which format should be used.
23 23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24 24
25 25 /// Keep space for 256-bit hashes
26 26 const STORED_NODE_ID_BYTES: usize = 32;
27 27
28 28 /// … even though only 160 bits are used for now, with SHA-1
29 29 const USED_NODE_ID_BYTES: usize = 20;
30 30
31 31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 33
34 34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
35 35 const TREE_METADATA_SIZE: usize = 44;
36 36 const NODE_SIZE: usize = 44;
37 37
38 38 /// Make sure that size-affecting changes are made knowingly
39 39 #[allow(unused)]
40 40 fn static_assert_size_of() {
41 41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
44 44 }
45 45
46 46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 47 #[derive(BytesCast)]
48 48 #[repr(C)]
49 49 struct DocketHeader {
50 50 marker: [u8; V2_FORMAT_MARKER.len()],
51 51 parent_1: [u8; STORED_NODE_ID_BYTES],
52 52 parent_2: [u8; STORED_NODE_ID_BYTES],
53 53
54 54 metadata: TreeMetadata,
55 55
56 56 /// Counted in bytes
57 57 data_size: Size,
58 58
59 59 uuid_size: u8,
60 60 }
61 61
62 62 pub struct Docket<'on_disk> {
63 63 header: &'on_disk DocketHeader,
64 64 uuid: &'on_disk [u8],
65 65 }
66 66
67 67 /// Fields are documented in the *Tree metadata in the docket file*
68 68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 69 #[derive(BytesCast)]
70 70 #[repr(C)]
71 71 struct TreeMetadata {
72 72 root_nodes: ChildNodes,
73 73 nodes_with_entry_count: Size,
74 74 nodes_with_copy_source_count: Size,
75 75 unreachable_bytes: Size,
76 76 unused: [u8; 4],
77 77
78 78 /// See *Optional hash of ignore patterns* section of
79 79 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 80 ignore_patterns_hash: IgnorePatternsHash,
81 81 }
82 82
83 83 /// Fields are documented in the *The data file format*
84 84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 85 #[derive(BytesCast)]
86 86 #[repr(C)]
87 87 pub(super) struct Node {
88 88 full_path: PathSlice,
89 89
90 90 /// In bytes from `self.full_path.start`
91 91 base_name_start: PathSize,
92 92
93 93 copy_source: OptPathSlice,
94 94 children: ChildNodes,
95 95 pub(super) descendants_with_entry_count: Size,
96 96 pub(super) tracked_descendants_count: Size,
97 97 flags: U16Be,
98 98 size: U32Be,
99 99 mtime: PackedTruncatedTimestamp,
100 100 }
101 101
102 102 bitflags! {
103 103 #[repr(C)]
104 104 struct Flags: u16 {
105 105 const WDIR_TRACKED = 1 << 0;
106 106 const P1_TRACKED = 1 << 1;
107 107 const P2_INFO = 1 << 2;
108 108 const HAS_MODE_AND_SIZE = 1 << 3;
109 const HAS_MTIME = 1 << 4;
110 const MODE_EXEC_PERM = 1 << 5;
111 const MODE_IS_SYMLINK = 1 << 6;
109 const HAS_FILE_MTIME = 1 << 4;
110 const HAS_DIRECTORY_MTIME = 1 << 5;
111 const MODE_EXEC_PERM = 1 << 6;
112 const MODE_IS_SYMLINK = 1 << 7;
112 113 }
113 114 }
114 115
115 116 /// Duration since the Unix epoch
116 117 #[derive(BytesCast, Copy, Clone)]
117 118 #[repr(C)]
118 119 struct PackedTruncatedTimestamp {
119 120 truncated_seconds: U32Be,
120 121 nanoseconds: U32Be,
121 122 }
122 123
123 124 /// Counted in bytes from the start of the file
124 125 ///
125 126 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
126 127 type Offset = U32Be;
127 128
128 129 /// Counted in number of items
129 130 ///
130 131 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
131 132 type Size = U32Be;
132 133
133 134 /// Counted in bytes
134 135 ///
135 136 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
136 137 type PathSize = U16Be;
137 138
138 139 /// A contiguous sequence of `len` times `Node`, representing the child nodes
139 140 /// of either some other node or of the repository root.
140 141 ///
141 142 /// Always sorted by ascending `full_path`, to allow binary search.
142 143 /// Since nodes with the same parent nodes also have the same parent path,
143 144 /// only the `base_name`s need to be compared during binary search.
144 145 #[derive(BytesCast, Copy, Clone)]
145 146 #[repr(C)]
146 147 struct ChildNodes {
147 148 start: Offset,
148 149 len: Size,
149 150 }
150 151
151 152 /// A `HgPath` of `len` bytes
152 153 #[derive(BytesCast, Copy, Clone)]
153 154 #[repr(C)]
154 155 struct PathSlice {
155 156 start: Offset,
156 157 len: PathSize,
157 158 }
158 159
159 160 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
160 161 type OptPathSlice = PathSlice;
161 162
162 163 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
163 164 ///
164 165 /// This should only happen if Mercurial is buggy or a repository is corrupted.
165 166 #[derive(Debug)]
166 167 pub struct DirstateV2ParseError;
167 168
168 169 impl From<DirstateV2ParseError> for HgError {
169 170 fn from(_: DirstateV2ParseError) -> Self {
170 171 HgError::corrupted("dirstate-v2 parse error")
171 172 }
172 173 }
173 174
174 175 impl From<DirstateV2ParseError> for crate::DirstateError {
175 176 fn from(error: DirstateV2ParseError) -> Self {
176 177 HgError::from(error).into()
177 178 }
178 179 }
179 180
180 181 impl<'on_disk> Docket<'on_disk> {
181 182 pub fn parents(&self) -> DirstateParents {
182 183 use crate::Node;
183 184 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
184 185 .unwrap()
185 186 .clone();
186 187 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
187 188 .unwrap()
188 189 .clone();
189 190 DirstateParents { p1, p2 }
190 191 }
191 192
192 193 pub fn tree_metadata(&self) -> &[u8] {
193 194 self.header.metadata.as_bytes()
194 195 }
195 196
196 197 pub fn data_size(&self) -> usize {
197 198 // This `unwrap` could only panic on a 16-bit CPU
198 199 self.header.data_size.get().try_into().unwrap()
199 200 }
200 201
201 202 pub fn data_filename(&self) -> String {
202 203 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
203 204 }
204 205 }
205 206
206 207 pub fn read_docket(
207 208 on_disk: &[u8],
208 209 ) -> Result<Docket<'_>, DirstateV2ParseError> {
209 210 let (header, uuid) =
210 211 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
211 212 let uuid_size = header.uuid_size as usize;
212 213 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
213 214 Ok(Docket { header, uuid })
214 215 } else {
215 216 Err(DirstateV2ParseError)
216 217 }
217 218 }
218 219
219 220 pub(super) fn read<'on_disk>(
220 221 on_disk: &'on_disk [u8],
221 222 metadata: &[u8],
222 223 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
223 224 if on_disk.is_empty() {
224 225 return Ok(DirstateMap::empty(on_disk));
225 226 }
226 227 let (meta, _) = TreeMetadata::from_bytes(metadata)
227 228 .map_err(|_| DirstateV2ParseError)?;
228 229 let dirstate_map = DirstateMap {
229 230 on_disk,
230 231 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
231 232 on_disk,
232 233 meta.root_nodes,
233 234 )?),
234 235 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
235 236 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
236 237 ignore_patterns_hash: meta.ignore_patterns_hash,
237 238 unreachable_bytes: meta.unreachable_bytes.get(),
238 239 };
239 240 Ok(dirstate_map)
240 241 }
241 242
242 243 impl Node {
243 244 pub(super) fn full_path<'on_disk>(
244 245 &self,
245 246 on_disk: &'on_disk [u8],
246 247 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
247 248 read_hg_path(on_disk, self.full_path)
248 249 }
249 250
250 251 pub(super) fn base_name_start<'on_disk>(
251 252 &self,
252 253 ) -> Result<usize, DirstateV2ParseError> {
253 254 let start = self.base_name_start.get();
254 255 if start < self.full_path.len.get() {
255 256 let start = usize::try_from(start)
256 257 // u32 -> usize, could only panic on a 16-bit CPU
257 258 .expect("dirstate-v2 base_name_start out of bounds");
258 259 Ok(start)
259 260 } else {
260 261 Err(DirstateV2ParseError)
261 262 }
262 263 }
263 264
264 265 pub(super) fn base_name<'on_disk>(
265 266 &self,
266 267 on_disk: &'on_disk [u8],
267 268 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
268 269 let full_path = self.full_path(on_disk)?;
269 270 let base_name_start = self.base_name_start()?;
270 271 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
271 272 }
272 273
273 274 pub(super) fn path<'on_disk>(
274 275 &self,
275 276 on_disk: &'on_disk [u8],
276 277 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
277 278 Ok(WithBasename::from_raw_parts(
278 279 Cow::Borrowed(self.full_path(on_disk)?),
279 280 self.base_name_start()?,
280 281 ))
281 282 }
282 283
283 284 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
284 285 self.copy_source.start.get() != 0
285 286 }
286 287
287 288 pub(super) fn copy_source<'on_disk>(
288 289 &self,
289 290 on_disk: &'on_disk [u8],
290 291 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
291 292 Ok(if self.has_copy_source() {
292 293 Some(read_hg_path(on_disk, self.copy_source)?)
293 294 } else {
294 295 None
295 296 })
296 297 }
297 298
298 299 fn flags(&self) -> Flags {
299 300 Flags::from_bits_truncate(self.flags.get())
300 301 }
301 302
302 303 fn has_entry(&self) -> bool {
303 304 self.flags().intersects(
304 305 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
305 306 )
306 307 }
307 308
308 309 pub(super) fn node_data(
309 310 &self,
310 311 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
311 312 if self.has_entry() {
312 313 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
313 314 } else if let Some(mtime) = self.cached_directory_mtime()? {
314 315 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
315 316 } else {
316 317 Ok(dirstate_map::NodeData::None)
317 318 }
318 319 }
319 320
320 321 pub(super) fn cached_directory_mtime(
321 322 &self,
322 323 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
323 Ok(
324 if self.flags().contains(Flags::HAS_MTIME) && !self.has_entry() {
325 Some(self.mtime.try_into()?)
324 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME) {
325 if self.flags().contains(Flags::HAS_FILE_MTIME) {
326 Err(DirstateV2ParseError)
326 327 } else {
327 None
328 },
329 )
328 Ok(Some(self.mtime.try_into()?))
329 }
330 } else {
331 Ok(None)
332 }
330 333 }
331 334
332 335 fn synthesize_unix_mode(&self) -> u32 {
333 336 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
334 337 libc::S_IFLNK
335 338 } else {
336 339 libc::S_IFREG
337 340 };
338 341 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
339 342 0o755
340 343 } else {
341 344 0o644
342 345 };
343 346 file_type | permisions
344 347 }
345 348
346 349 fn assume_entry(&self) -> DirstateEntry {
347 350 // TODO: convert through raw bits instead?
348 351 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
349 352 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
350 353 let p2_info = self.flags().contains(Flags::P2_INFO);
351 354 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) {
352 355 Some((self.synthesize_unix_mode(), self.size.into()))
353 356 } else {
354 357 None
355 358 };
356 let mtime = if self.flags().contains(Flags::HAS_MTIME) {
359 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) {
357 360 Some(self.mtime.truncated_seconds.into())
358 361 } else {
359 362 None
360 363 };
361 364 DirstateEntry::from_v2_data(
362 365 wdir_tracked,
363 366 p1_tracked,
364 367 p2_info,
365 368 mode_size,
366 369 mtime,
367 370 )
368 371 }
369 372
370 373 pub(super) fn entry(
371 374 &self,
372 375 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
373 376 if self.has_entry() {
374 377 Ok(Some(self.assume_entry()))
375 378 } else {
376 379 Ok(None)
377 380 }
378 381 }
379 382
380 383 pub(super) fn children<'on_disk>(
381 384 &self,
382 385 on_disk: &'on_disk [u8],
383 386 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
384 387 read_nodes(on_disk, self.children)
385 388 }
386 389
387 390 pub(super) fn to_in_memory_node<'on_disk>(
388 391 &self,
389 392 on_disk: &'on_disk [u8],
390 393 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
391 394 Ok(dirstate_map::Node {
392 395 children: dirstate_map::ChildNodes::OnDisk(
393 396 self.children(on_disk)?,
394 397 ),
395 398 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
396 399 data: self.node_data()?,
397 400 descendants_with_entry_count: self
398 401 .descendants_with_entry_count
399 402 .get(),
400 403 tracked_descendants_count: self.tracked_descendants_count.get(),
401 404 })
402 405 }
403 406
404 407 fn from_dirstate_entry(
405 408 entry: &DirstateEntry,
406 409 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
407 410 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
408 411 entry.v2_data();
409 412 // TODO: convert throug raw flag bits instead?
410 413 let mut flags = Flags::empty();
411 414 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
412 415 flags.set(Flags::P1_TRACKED, p1_tracked);
413 416 flags.set(Flags::P2_INFO, p2_info);
414 417 let size = if let Some((m, s)) = mode_size_opt {
415 418 let exec_perm = m & libc::S_IXUSR != 0;
416 419 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
417 420 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
418 421 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
419 422 flags.insert(Flags::HAS_MODE_AND_SIZE);
420 423 s.into()
421 424 } else {
422 425 0.into()
423 426 };
424 427 let mtime = if let Some(m) = mtime_opt {
425 flags.insert(Flags::HAS_MTIME);
428 flags.insert(Flags::HAS_FILE_MTIME);
426 429 PackedTruncatedTimestamp {
427 430 truncated_seconds: m.into(),
428 431 nanoseconds: 0.into(),
429 432 }
430 433 } else {
431 434 PackedTruncatedTimestamp::null()
432 435 };
433 436 (flags, size, mtime)
434 437 }
435 438 }
436 439
437 440 fn read_hg_path(
438 441 on_disk: &[u8],
439 442 slice: PathSlice,
440 443 ) -> Result<&HgPath, DirstateV2ParseError> {
441 444 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
442 445 }
443 446
444 447 fn read_nodes(
445 448 on_disk: &[u8],
446 449 slice: ChildNodes,
447 450 ) -> Result<&[Node], DirstateV2ParseError> {
448 451 read_slice(on_disk, slice.start, slice.len.get())
449 452 }
450 453
451 454 fn read_slice<T, Len>(
452 455 on_disk: &[u8],
453 456 start: Offset,
454 457 len: Len,
455 458 ) -> Result<&[T], DirstateV2ParseError>
456 459 where
457 460 T: BytesCast,
458 461 Len: TryInto<usize>,
459 462 {
460 463 // Either `usize::MAX` would result in "out of bounds" error since a single
461 464 // `&[u8]` cannot occupy the entire addess space.
462 465 let start = start.get().try_into().unwrap_or(std::usize::MAX);
463 466 let len = len.try_into().unwrap_or(std::usize::MAX);
464 467 on_disk
465 468 .get(start..)
466 469 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
467 470 .map(|(slice, _rest)| slice)
468 471 .ok_or_else(|| DirstateV2ParseError)
469 472 }
470 473
471 474 pub(crate) fn for_each_tracked_path<'on_disk>(
472 475 on_disk: &'on_disk [u8],
473 476 metadata: &[u8],
474 477 mut f: impl FnMut(&'on_disk HgPath),
475 478 ) -> Result<(), DirstateV2ParseError> {
476 479 let (meta, _) = TreeMetadata::from_bytes(metadata)
477 480 .map_err(|_| DirstateV2ParseError)?;
478 481 fn recur<'on_disk>(
479 482 on_disk: &'on_disk [u8],
480 483 nodes: ChildNodes,
481 484 f: &mut impl FnMut(&'on_disk HgPath),
482 485 ) -> Result<(), DirstateV2ParseError> {
483 486 for node in read_nodes(on_disk, nodes)? {
484 487 if let Some(entry) = node.entry()? {
485 488 if entry.state().is_tracked() {
486 489 f(node.full_path(on_disk)?)
487 490 }
488 491 }
489 492 recur(on_disk, node.children, f)?
490 493 }
491 494 Ok(())
492 495 }
493 496 recur(on_disk, meta.root_nodes, &mut f)
494 497 }
495 498
496 499 /// Returns new data and metadata, together with whether that data should be
497 500 /// appended to the existing data file whose content is at
498 501 /// `dirstate_map.on_disk` (true), instead of written to a new data file
499 502 /// (false).
500 503 pub(super) fn write(
501 504 dirstate_map: &mut DirstateMap,
502 505 can_append: bool,
503 506 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
504 507 let append = can_append && dirstate_map.write_should_append();
505 508
506 509 // This ignores the space for paths, and for nodes without an entry.
507 510 // TODO: better estimate? Skip the `Vec` and write to a file directly?
508 511 let size_guess = std::mem::size_of::<Node>()
509 512 * dirstate_map.nodes_with_entry_count as usize;
510 513
511 514 let mut writer = Writer {
512 515 dirstate_map,
513 516 append,
514 517 out: Vec::with_capacity(size_guess),
515 518 };
516 519
517 520 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
518 521
519 522 let meta = TreeMetadata {
520 523 root_nodes,
521 524 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
522 525 nodes_with_copy_source_count: dirstate_map
523 526 .nodes_with_copy_source_count
524 527 .into(),
525 528 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
526 529 unused: [0; 4],
527 530 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
528 531 };
529 532 Ok((writer.out, meta.as_bytes().to_vec(), append))
530 533 }
531 534
532 535 struct Writer<'dmap, 'on_disk> {
533 536 dirstate_map: &'dmap DirstateMap<'on_disk>,
534 537 append: bool,
535 538 out: Vec<u8>,
536 539 }
537 540
538 541 impl Writer<'_, '_> {
539 542 fn write_nodes(
540 543 &mut self,
541 544 nodes: dirstate_map::ChildNodesRef,
542 545 ) -> Result<ChildNodes, DirstateError> {
543 546 // Reuse already-written nodes if possible
544 547 if self.append {
545 548 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
546 549 let start = self.on_disk_offset_of(nodes_slice).expect(
547 550 "dirstate-v2 OnDisk nodes not found within on_disk",
548 551 );
549 552 let len = child_nodes_len_from_usize(nodes_slice.len());
550 553 return Ok(ChildNodes { start, len });
551 554 }
552 555 }
553 556
554 557 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
555 558 // undefined iteration order. Sort to enable binary search in the
556 559 // written file.
557 560 let nodes = nodes.sorted();
558 561 let nodes_len = nodes.len();
559 562
560 563 // First accumulate serialized nodes in a `Vec`
561 564 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
562 565 for node in nodes {
563 566 let children =
564 567 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
565 568 let full_path = node.full_path(self.dirstate_map.on_disk)?;
566 569 let full_path = self.write_path(full_path.as_bytes());
567 570 let copy_source = if let Some(source) =
568 571 node.copy_source(self.dirstate_map.on_disk)?
569 572 {
570 573 self.write_path(source.as_bytes())
571 574 } else {
572 575 PathSlice {
573 576 start: 0.into(),
574 577 len: 0.into(),
575 578 }
576 579 };
577 580 on_disk_nodes.push(match node {
578 581 NodeRef::InMemory(path, node) => {
579 582 let (flags, size, mtime) = match &node.data {
580 583 dirstate_map::NodeData::Entry(entry) => {
581 584 Node::from_dirstate_entry(entry)
582 585 }
583 dirstate_map::NodeData::CachedDirectory { mtime } => {
584 (Flags::HAS_MTIME, 0.into(), (*mtime).into())
585 }
586 dirstate_map::NodeData::CachedDirectory { mtime } => (
587 Flags::HAS_DIRECTORY_MTIME,
588 0.into(),
589 (*mtime).into(),
590 ),
586 591 dirstate_map::NodeData::None => (
587 592 Flags::empty(),
588 593 0.into(),
589 594 PackedTruncatedTimestamp::null(),
590 595 ),
591 596 };
592 597 Node {
593 598 children,
594 599 copy_source,
595 600 full_path,
596 601 base_name_start: u16::try_from(path.base_name_start())
597 602 // Could only panic for paths over 64 KiB
598 603 .expect("dirstate-v2 path length overflow")
599 604 .into(),
600 605 descendants_with_entry_count: node
601 606 .descendants_with_entry_count
602 607 .into(),
603 608 tracked_descendants_count: node
604 609 .tracked_descendants_count
605 610 .into(),
606 611 flags: flags.bits().into(),
607 612 size,
608 613 mtime,
609 614 }
610 615 }
611 616 NodeRef::OnDisk(node) => Node {
612 617 children,
613 618 copy_source,
614 619 full_path,
615 620 ..*node
616 621 },
617 622 })
618 623 }
619 624 // … so we can write them contiguously, after writing everything else
620 625 // they refer to.
621 626 let start = self.current_offset();
622 627 let len = child_nodes_len_from_usize(nodes_len);
623 628 self.out.extend(on_disk_nodes.as_bytes());
624 629 Ok(ChildNodes { start, len })
625 630 }
626 631
627 632 /// If the given slice of items is within `on_disk`, returns its offset
628 633 /// from the start of `on_disk`.
629 634 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
630 635 where
631 636 T: BytesCast,
632 637 {
633 638 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
634 639 let start = slice.as_ptr() as usize;
635 640 let end = start + slice.len();
636 641 start..=end
637 642 }
638 643 let slice_addresses = address_range(slice.as_bytes());
639 644 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
640 645 if on_disk_addresses.contains(slice_addresses.start())
641 646 && on_disk_addresses.contains(slice_addresses.end())
642 647 {
643 648 let offset = slice_addresses.start() - on_disk_addresses.start();
644 649 Some(offset_from_usize(offset))
645 650 } else {
646 651 None
647 652 }
648 653 }
649 654
650 655 fn current_offset(&mut self) -> Offset {
651 656 let mut offset = self.out.len();
652 657 if self.append {
653 658 offset += self.dirstate_map.on_disk.len()
654 659 }
655 660 offset_from_usize(offset)
656 661 }
657 662
658 663 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
659 664 let len = path_len_from_usize(slice.len());
660 665 // Reuse an already-written path if possible
661 666 if self.append {
662 667 if let Some(start) = self.on_disk_offset_of(slice) {
663 668 return PathSlice { start, len };
664 669 }
665 670 }
666 671 let start = self.current_offset();
667 672 self.out.extend(slice.as_bytes());
668 673 PathSlice { start, len }
669 674 }
670 675 }
671 676
672 677 fn offset_from_usize(x: usize) -> Offset {
673 678 u32::try_from(x)
674 679 // Could only panic for a dirstate file larger than 4 GiB
675 680 .expect("dirstate-v2 offset overflow")
676 681 .into()
677 682 }
678 683
679 684 fn child_nodes_len_from_usize(x: usize) -> Size {
680 685 u32::try_from(x)
681 686 // Could only panic with over 4 billion nodes
682 687 .expect("dirstate-v2 slice length overflow")
683 688 .into()
684 689 }
685 690
686 691 fn path_len_from_usize(x: usize) -> PathSize {
687 692 u16::try_from(x)
688 693 // Could only panic for paths over 64 KiB
689 694 .expect("dirstate-v2 path length overflow")
690 695 .into()
691 696 }
692 697
693 698 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
694 699 fn from(timestamp: TruncatedTimestamp) -> Self {
695 700 Self {
696 701 truncated_seconds: timestamp.truncated_seconds().into(),
697 702 nanoseconds: timestamp.nanoseconds().into(),
698 703 }
699 704 }
700 705 }
701 706
702 707 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
703 708 type Error = DirstateV2ParseError;
704 709
705 710 fn try_from(
706 711 timestamp: PackedTruncatedTimestamp,
707 712 ) -> Result<Self, Self::Error> {
708 713 Self::from_already_truncated(
709 714 timestamp.truncated_seconds.get(),
710 715 timestamp.nanoseconds.get(),
711 716 )
712 717 }
713 718 }
714 719 impl PackedTruncatedTimestamp {
715 720 fn null() -> Self {
716 721 Self {
717 722 truncated_seconds: 0.into(),
718 723 nanoseconds: 0.into(),
719 724 }
720 725 }
721 726 }
General Comments 0
You need to be logged in to leave comments. Login now