##// END OF EJS Templates
dirstate-v2: adds a flag to mark a file as modified...
Simon Sapin -
r49066:1730b2fc default
parent child Browse files
Show More
@@ -1,1174 +1,1177 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static const int dirstate_v1_from_p2 = -2;
33 33 static const int dirstate_v1_nonnormal = -1;
34 34 static const int ambiguous_time = -1;
35 35
36 36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
37 37 {
38 38 Py_ssize_t expected_size;
39 39
40 40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
41 41 return NULL;
42 42 }
43 43
44 44 return _dict_new_presized(expected_size);
45 45 }
46 46
47 47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
48 48 PyObject *kwds)
49 49 {
50 50 /* We do all the initialization here and not a tp_init function because
51 51 * dirstate_item is immutable. */
52 52 dirstateItemObject *t;
53 53 int wc_tracked;
54 54 int p1_tracked;
55 55 int p2_info;
56 56 int has_meaningful_data;
57 57 int has_meaningful_mtime;
58 58 int mode;
59 59 int size;
60 60 int mtime;
61 61 PyObject *parentfiledata;
62 62 static char *keywords_name[] = {
63 63 "wc_tracked",
64 64 "p1_tracked",
65 65 "p2_info",
66 66 "has_meaningful_data",
67 67 "has_meaningful_mtime",
68 68 "parentfiledata",
69 69 NULL,
70 70 };
71 71 wc_tracked = 0;
72 72 p1_tracked = 0;
73 73 p2_info = 0;
74 74 has_meaningful_mtime = 1;
75 75 has_meaningful_data = 1;
76 76 parentfiledata = Py_None;
77 77 if (!PyArg_ParseTupleAndKeywords(
78 78 args, kwds, "|iiiiiO", keywords_name, &wc_tracked, &p1_tracked,
79 79 &p2_info, &has_meaningful_data, &has_meaningful_mtime,
80 80 &parentfiledata)) {
81 81 return NULL;
82 82 }
83 83 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
84 84 if (!t) {
85 85 return NULL;
86 86 }
87 87
88 88 t->flags = 0;
89 89 if (wc_tracked) {
90 90 t->flags |= dirstate_flag_wc_tracked;
91 91 }
92 92 if (p1_tracked) {
93 93 t->flags |= dirstate_flag_p1_tracked;
94 94 }
95 95 if (p2_info) {
96 96 t->flags |= dirstate_flag_p2_info;
97 97 }
98 98
99 99 if (parentfiledata != Py_None) {
100 100 if (!PyTuple_CheckExact(parentfiledata)) {
101 101 PyErr_SetString(
102 102 PyExc_TypeError,
103 103 "parentfiledata should be a Tuple or None");
104 104 return NULL;
105 105 }
106 106 mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
107 107 size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
108 108 mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
109 109 } else {
110 110 has_meaningful_data = 0;
111 111 has_meaningful_mtime = 0;
112 112 }
113 113 if (has_meaningful_data) {
114 114 t->flags |= dirstate_flag_has_meaningful_data;
115 115 t->mode = mode;
116 116 t->size = size;
117 117 } else {
118 118 t->mode = 0;
119 119 t->size = 0;
120 120 }
121 121 if (has_meaningful_mtime) {
122 122 t->flags |= dirstate_flag_has_file_mtime;
123 123 t->mtime = mtime;
124 124 } else {
125 125 t->mtime = 0;
126 126 }
127 127 return (PyObject *)t;
128 128 }
129 129
130 130 static void dirstate_item_dealloc(PyObject *o)
131 131 {
132 132 PyObject_Del(o);
133 133 }
134 134
135 135 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
136 136 {
137 137 return (self->flags & dirstate_flag_wc_tracked);
138 138 }
139 139
140 140 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
141 141 {
142 const unsigned char mask = dirstate_flag_wc_tracked |
143 dirstate_flag_p1_tracked |
142 const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
144 143 dirstate_flag_p2_info;
145 144 return (self->flags & mask);
146 145 }
147 146
148 147 static inline bool dirstate_item_c_added(dirstateItemObject *self)
149 148 {
150 const unsigned char mask =
151 (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
149 const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
152 150 dirstate_flag_p2_info);
153 const unsigned char target = dirstate_flag_wc_tracked;
151 const int target = dirstate_flag_wc_tracked;
154 152 return (self->flags & mask) == target;
155 153 }
156 154
157 155 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
158 156 {
159 157 if (self->flags & dirstate_flag_wc_tracked) {
160 158 return false;
161 159 }
162 160 return (self->flags &
163 161 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
164 162 }
165 163
166 164 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
167 165 {
168 166 return ((self->flags & dirstate_flag_wc_tracked) &&
169 167 (self->flags & dirstate_flag_p1_tracked) &&
170 168 (self->flags & dirstate_flag_p2_info));
171 169 }
172 170
173 171 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
174 172 {
175 173 return ((self->flags & dirstate_flag_wc_tracked) &&
176 174 !(self->flags & dirstate_flag_p1_tracked) &&
177 175 (self->flags & dirstate_flag_p2_info));
178 176 }
179 177
180 178 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
181 179 {
182 180 if (dirstate_item_c_removed(self)) {
183 181 return 'r';
184 182 } else if (dirstate_item_c_merged(self)) {
185 183 return 'm';
186 184 } else if (dirstate_item_c_added(self)) {
187 185 return 'a';
188 186 } else {
189 187 return 'n';
190 188 }
191 189 }
192 190
193 191 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
194 192 {
195 193 if (self->flags & dirstate_flag_has_meaningful_data) {
196 194 return self->mode;
197 195 } else {
198 196 return 0;
199 197 }
200 198 }
201 199
202 200 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
203 201 {
204 202 if (!(self->flags & dirstate_flag_wc_tracked) &&
205 203 (self->flags & dirstate_flag_p2_info)) {
206 204 if (self->flags & dirstate_flag_p1_tracked) {
207 205 return dirstate_v1_nonnormal;
208 206 } else {
209 207 return dirstate_v1_from_p2;
210 208 }
211 209 } else if (dirstate_item_c_removed(self)) {
212 210 return 0;
213 211 } else if (self->flags & dirstate_flag_p2_info) {
214 212 return dirstate_v1_from_p2;
215 213 } else if (dirstate_item_c_added(self)) {
216 214 return dirstate_v1_nonnormal;
217 215 } else if (self->flags & dirstate_flag_has_meaningful_data) {
218 216 return self->size;
219 217 } else {
220 218 return dirstate_v1_nonnormal;
221 219 }
222 220 }
223 221
224 222 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
225 223 {
226 224 if (dirstate_item_c_removed(self)) {
227 225 return 0;
228 226 } else if (!(self->flags & dirstate_flag_has_file_mtime) ||
229 227 !(self->flags & dirstate_flag_p1_tracked) ||
230 228 !(self->flags & dirstate_flag_wc_tracked) ||
231 229 (self->flags & dirstate_flag_p2_info)) {
232 230 return ambiguous_time;
233 231 } else {
234 232 return self->mtime;
235 233 }
236 234 }
237 235
238 236 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
239 237 {
240 unsigned char flags = self->flags;
238 int flags = self->flags;
241 239 int mode = dirstate_item_c_v1_mode(self);
242 240 if ((mode & S_IXUSR) != 0) {
243 241 flags |= dirstate_flag_mode_exec_perm;
244 242 } else {
245 243 flags &= ~dirstate_flag_mode_exec_perm;
246 244 }
247 245 if (S_ISLNK(mode)) {
248 246 flags |= dirstate_flag_mode_is_symlink;
249 247 } else {
250 248 flags &= ~dirstate_flag_mode_is_symlink;
251 249 }
252 return Py_BuildValue("Bii", flags, self->size, self->mtime);
250 return Py_BuildValue("iii", flags, self->size, self->mtime);
253 251 };
254 252
255 253 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
256 254 {
257 255 char state = dirstate_item_c_v1_state(self);
258 256 return PyBytes_FromStringAndSize(&state, 1);
259 257 };
260 258
261 259 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
262 260 {
263 261 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
264 262 };
265 263
266 264 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
267 265 {
268 266 return PyInt_FromLong(dirstate_item_c_v1_size(self));
269 267 };
270 268
271 269 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
272 270 {
273 271 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
274 272 };
275 273
276 274 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
277 275 PyObject *value)
278 276 {
279 277 long now;
280 278 if (!pylong_to_long(value, &now)) {
281 279 return NULL;
282 280 }
283 281 if (dirstate_item_c_v1_state(self) == 'n' &&
284 282 dirstate_item_c_v1_mtime(self) == now) {
285 283 Py_RETURN_TRUE;
286 284 } else {
287 285 Py_RETURN_FALSE;
288 286 }
289 287 };
290 288
291 289 /* This will never change since it's bound to V1
292 290 */
293 291 static inline dirstateItemObject *
294 292 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
295 293 {
296 294 dirstateItemObject *t =
297 295 PyObject_New(dirstateItemObject, &dirstateItemType);
298 296 if (!t) {
299 297 return NULL;
300 298 }
301 299 t->flags = 0;
302 300 t->mode = 0;
303 301 t->size = 0;
304 302 t->mtime = 0;
305 303
306 304 if (state == 'm') {
307 305 t->flags = (dirstate_flag_wc_tracked |
308 306 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
309 307 } else if (state == 'a') {
310 308 t->flags = dirstate_flag_wc_tracked;
311 309 } else if (state == 'r') {
312 310 if (size == dirstate_v1_nonnormal) {
313 311 t->flags =
314 312 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
315 313 } else if (size == dirstate_v1_from_p2) {
316 314 t->flags = dirstate_flag_p2_info;
317 315 } else {
318 316 t->flags = dirstate_flag_p1_tracked;
319 317 }
320 318 } else if (state == 'n') {
321 319 if (size == dirstate_v1_from_p2) {
322 320 t->flags =
323 321 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
324 322 } else if (size == dirstate_v1_nonnormal) {
325 323 t->flags =
326 324 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
327 325 } else if (mtime == ambiguous_time) {
328 326 t->flags = (dirstate_flag_wc_tracked |
329 327 dirstate_flag_p1_tracked |
330 328 dirstate_flag_has_meaningful_data);
331 329 t->mode = mode;
332 330 t->size = size;
333 331 } else {
334 332 t->flags = (dirstate_flag_wc_tracked |
335 333 dirstate_flag_p1_tracked |
336 334 dirstate_flag_has_meaningful_data |
337 335 dirstate_flag_has_file_mtime);
338 336 t->mode = mode;
339 337 t->size = size;
340 338 t->mtime = mtime;
341 339 }
342 340 } else {
343 341 PyErr_Format(PyExc_RuntimeError,
344 342 "unknown state: `%c` (%d, %d, %d)", state, mode,
345 343 size, mtime, NULL);
346 344 Py_DECREF(t);
347 345 return NULL;
348 346 }
349 347
350 348 return t;
351 349 }
352 350
353 351 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
354 352 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
355 353 PyObject *args)
356 354 {
357 355 /* We do all the initialization here and not a tp_init function because
358 356 * dirstate_item is immutable. */
359 357 char state;
360 358 int size, mode, mtime;
361 359 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
362 360 return NULL;
363 361 }
364 362 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
365 363 };
366 364
367 365 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
368 366 PyObject *args)
369 367 {
370 368 dirstateItemObject *t =
371 369 PyObject_New(dirstateItemObject, &dirstateItemType);
372 370 if (!t) {
373 371 return NULL;
374 372 }
375 if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
373 if (!PyArg_ParseTuple(args, "iii", &t->flags, &t->size, &t->mtime)) {
376 374 return NULL;
377 375 }
376 if (t->flags & dirstate_flag_expected_state_is_modified) {
377 t->flags &= ~(dirstate_flag_expected_state_is_modified |
378 dirstate_flag_has_meaningful_data |
379 dirstate_flag_has_file_mtime);
380 }
378 381 t->mode = 0;
379 382 if (t->flags & dirstate_flag_has_meaningful_data) {
380 383 if (t->flags & dirstate_flag_mode_exec_perm) {
381 384 t->mode = 0755;
382 385 } else {
383 386 t->mode = 0644;
384 387 }
385 388 if (t->flags & dirstate_flag_mode_is_symlink) {
386 389 t->mode |= S_IFLNK;
387 390 } else {
388 391 t->mode |= S_IFREG;
389 392 }
390 393 }
391 394 return (PyObject *)t;
392 395 };
393 396
394 397 /* This means the next status call will have to actually check its content
395 398 to make sure it is correct. */
396 399 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
397 400 {
398 401 self->flags &= ~dirstate_flag_has_file_mtime;
399 402 Py_RETURN_NONE;
400 403 }
401 404
402 405 /* See docstring of the python implementation for details */
403 406 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
404 407 PyObject *args)
405 408 {
406 409 int size, mode, mtime;
407 410 if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
408 411 return NULL;
409 412 }
410 413 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
411 414 dirstate_flag_has_meaningful_data |
412 415 dirstate_flag_has_file_mtime;
413 416 self->mode = mode;
414 417 self->size = size;
415 418 self->mtime = mtime;
416 419 Py_RETURN_NONE;
417 420 }
418 421
419 422 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
420 423 {
421 424 self->flags |= dirstate_flag_wc_tracked;
422 425 self->flags &= ~dirstate_flag_has_file_mtime;
423 426 Py_RETURN_NONE;
424 427 }
425 428
426 429 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
427 430 {
428 431 self->flags &= ~dirstate_flag_wc_tracked;
429 432 self->mode = 0;
430 433 self->mtime = 0;
431 434 self->size = 0;
432 435 Py_RETURN_NONE;
433 436 }
434 437
435 438 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
436 439 {
437 440 if (self->flags & dirstate_flag_p2_info) {
438 441 self->flags &= ~(dirstate_flag_p2_info |
439 442 dirstate_flag_has_meaningful_data |
440 443 dirstate_flag_has_file_mtime);
441 444 self->mode = 0;
442 445 self->mtime = 0;
443 446 self->size = 0;
444 447 }
445 448 Py_RETURN_NONE;
446 449 }
447 450 static PyMethodDef dirstate_item_methods[] = {
448 451 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
449 452 "return data suitable for v2 serialization"},
450 453 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
451 454 "return a \"state\" suitable for v1 serialization"},
452 455 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
453 456 "return a \"mode\" suitable for v1 serialization"},
454 457 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
455 458 "return a \"size\" suitable for v1 serialization"},
456 459 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
457 460 "return a \"mtime\" suitable for v1 serialization"},
458 461 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
459 462 "True if the stored mtime would be ambiguous with the current time"},
460 463 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
461 464 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
462 465 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
463 466 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
464 467 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
465 468 METH_NOARGS, "mark a file as \"possibly dirty\""},
466 469 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
467 470 "mark a file as \"clean\""},
468 471 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
469 472 "mark a file as \"tracked\""},
470 473 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
471 474 "mark a file as \"untracked\""},
472 475 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
473 476 "remove all \"merge-only\" from a DirstateItem"},
474 477 {NULL} /* Sentinel */
475 478 };
476 479
477 480 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
478 481 {
479 482 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
480 483 };
481 484
482 485 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
483 486 {
484 487 return PyInt_FromLong(dirstate_item_c_v1_size(self));
485 488 };
486 489
487 490 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
488 491 {
489 492 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
490 493 };
491 494
492 495 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
493 496 {
494 497 char state = dirstate_item_c_v1_state(self);
495 498 return PyBytes_FromStringAndSize(&state, 1);
496 499 };
497 500
498 501 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
499 502 {
500 503 if (dirstate_item_c_tracked(self)) {
501 504 Py_RETURN_TRUE;
502 505 } else {
503 506 Py_RETURN_FALSE;
504 507 }
505 508 };
506 509 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
507 510 {
508 511 if (self->flags & dirstate_flag_p1_tracked) {
509 512 Py_RETURN_TRUE;
510 513 } else {
511 514 Py_RETURN_FALSE;
512 515 }
513 516 };
514 517
515 518 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
516 519 {
517 520 if (dirstate_item_c_added(self)) {
518 521 Py_RETURN_TRUE;
519 522 } else {
520 523 Py_RETURN_FALSE;
521 524 }
522 525 };
523 526
524 527 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
525 528 {
526 529 if (self->flags & dirstate_flag_wc_tracked &&
527 530 self->flags & dirstate_flag_p2_info) {
528 531 Py_RETURN_TRUE;
529 532 } else {
530 533 Py_RETURN_FALSE;
531 534 }
532 535 };
533 536
534 537 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
535 538 {
536 539 if (dirstate_item_c_merged(self)) {
537 540 Py_RETURN_TRUE;
538 541 } else {
539 542 Py_RETURN_FALSE;
540 543 }
541 544 };
542 545
543 546 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
544 547 {
545 548 if (dirstate_item_c_from_p2(self)) {
546 549 Py_RETURN_TRUE;
547 550 } else {
548 551 Py_RETURN_FALSE;
549 552 }
550 553 };
551 554
552 555 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
553 556 {
554 557 if (!(self->flags & dirstate_flag_wc_tracked)) {
555 558 Py_RETURN_FALSE;
556 559 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
557 560 Py_RETURN_FALSE;
558 561 } else if (self->flags & dirstate_flag_p2_info) {
559 562 Py_RETURN_FALSE;
560 563 } else {
561 564 Py_RETURN_TRUE;
562 565 }
563 566 };
564 567
565 568 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
566 569 {
567 570 if (dirstate_item_c_any_tracked(self)) {
568 571 Py_RETURN_TRUE;
569 572 } else {
570 573 Py_RETURN_FALSE;
571 574 }
572 575 };
573 576
574 577 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
575 578 {
576 579 if (dirstate_item_c_removed(self)) {
577 580 Py_RETURN_TRUE;
578 581 } else {
579 582 Py_RETURN_FALSE;
580 583 }
581 584 };
582 585
583 586 static PyGetSetDef dirstate_item_getset[] = {
584 587 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
585 588 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
586 589 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
587 590 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
588 591 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
589 592 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
590 593 NULL},
591 594 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
592 595 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
593 596 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
594 597 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
595 598 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
596 599 NULL},
597 600 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
598 601 NULL},
599 602 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
600 603 {NULL} /* Sentinel */
601 604 };
602 605
603 606 PyTypeObject dirstateItemType = {
604 607 PyVarObject_HEAD_INIT(NULL, 0) /* header */
605 608 "dirstate_tuple", /* tp_name */
606 609 sizeof(dirstateItemObject), /* tp_basicsize */
607 610 0, /* tp_itemsize */
608 611 (destructor)dirstate_item_dealloc, /* tp_dealloc */
609 612 0, /* tp_print */
610 613 0, /* tp_getattr */
611 614 0, /* tp_setattr */
612 615 0, /* tp_compare */
613 616 0, /* tp_repr */
614 617 0, /* tp_as_number */
615 618 0, /* tp_as_sequence */
616 619 0, /* tp_as_mapping */
617 620 0, /* tp_hash */
618 621 0, /* tp_call */
619 622 0, /* tp_str */
620 623 0, /* tp_getattro */
621 624 0, /* tp_setattro */
622 625 0, /* tp_as_buffer */
623 626 Py_TPFLAGS_DEFAULT, /* tp_flags */
624 627 "dirstate tuple", /* tp_doc */
625 628 0, /* tp_traverse */
626 629 0, /* tp_clear */
627 630 0, /* tp_richcompare */
628 631 0, /* tp_weaklistoffset */
629 632 0, /* tp_iter */
630 633 0, /* tp_iternext */
631 634 dirstate_item_methods, /* tp_methods */
632 635 0, /* tp_members */
633 636 dirstate_item_getset, /* tp_getset */
634 637 0, /* tp_base */
635 638 0, /* tp_dict */
636 639 0, /* tp_descr_get */
637 640 0, /* tp_descr_set */
638 641 0, /* tp_dictoffset */
639 642 0, /* tp_init */
640 643 0, /* tp_alloc */
641 644 dirstate_item_new, /* tp_new */
642 645 };
643 646
644 647 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
645 648 {
646 649 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
647 650 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
648 651 char state, *cur, *str, *cpos;
649 652 int mode, size, mtime;
650 653 unsigned int flen, pos = 40;
651 654 Py_ssize_t len = 40;
652 655 Py_ssize_t readlen;
653 656
654 657 if (!PyArg_ParseTuple(
655 658 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
656 659 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
657 660 goto quit;
658 661 }
659 662
660 663 len = readlen;
661 664
662 665 /* read parents */
663 666 if (len < 40) {
664 667 PyErr_SetString(PyExc_ValueError,
665 668 "too little data for parents");
666 669 goto quit;
667 670 }
668 671
669 672 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
670 673 str + 20, (Py_ssize_t)20);
671 674 if (!parents) {
672 675 goto quit;
673 676 }
674 677
675 678 /* read filenames */
676 679 while (pos >= 40 && pos < len) {
677 680 if (pos + 17 > len) {
678 681 PyErr_SetString(PyExc_ValueError,
679 682 "overflow in dirstate");
680 683 goto quit;
681 684 }
682 685 cur = str + pos;
683 686 /* unpack header */
684 687 state = *cur;
685 688 mode = getbe32(cur + 1);
686 689 size = getbe32(cur + 5);
687 690 mtime = getbe32(cur + 9);
688 691 flen = getbe32(cur + 13);
689 692 pos += 17;
690 693 cur += 17;
691 694 if (flen > len - pos) {
692 695 PyErr_SetString(PyExc_ValueError,
693 696 "overflow in dirstate");
694 697 goto quit;
695 698 }
696 699
697 700 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
698 701 size, mtime);
699 702 if (!entry)
700 703 goto quit;
701 704 cpos = memchr(cur, 0, flen);
702 705 if (cpos) {
703 706 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
704 707 cname = PyBytes_FromStringAndSize(
705 708 cpos + 1, flen - (cpos - cur) - 1);
706 709 if (!fname || !cname ||
707 710 PyDict_SetItem(cmap, fname, cname) == -1 ||
708 711 PyDict_SetItem(dmap, fname, entry) == -1) {
709 712 goto quit;
710 713 }
711 714 Py_DECREF(cname);
712 715 } else {
713 716 fname = PyBytes_FromStringAndSize(cur, flen);
714 717 if (!fname ||
715 718 PyDict_SetItem(dmap, fname, entry) == -1) {
716 719 goto quit;
717 720 }
718 721 }
719 722 Py_DECREF(fname);
720 723 Py_DECREF(entry);
721 724 fname = cname = entry = NULL;
722 725 pos += flen;
723 726 }
724 727
725 728 ret = parents;
726 729 Py_INCREF(ret);
727 730 quit:
728 731 Py_XDECREF(fname);
729 732 Py_XDECREF(cname);
730 733 Py_XDECREF(entry);
731 734 Py_XDECREF(parents);
732 735 return ret;
733 736 }
734 737
735 738 /*
736 739 * Efficiently pack a dirstate object into its on-disk format.
737 740 */
738 741 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
739 742 {
740 743 PyObject *packobj = NULL;
741 744 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
742 745 Py_ssize_t nbytes, pos, l;
743 746 PyObject *k, *v = NULL, *pn;
744 747 char *p, *s;
745 748 int now;
746 749
747 750 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
748 751 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
749 752 &now)) {
750 753 return NULL;
751 754 }
752 755
753 756 if (PyTuple_Size(pl) != 2) {
754 757 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
755 758 return NULL;
756 759 }
757 760
758 761 /* Figure out how much we need to allocate. */
759 762 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
760 763 PyObject *c;
761 764 if (!PyBytes_Check(k)) {
762 765 PyErr_SetString(PyExc_TypeError, "expected string key");
763 766 goto bail;
764 767 }
765 768 nbytes += PyBytes_GET_SIZE(k) + 17;
766 769 c = PyDict_GetItem(copymap, k);
767 770 if (c) {
768 771 if (!PyBytes_Check(c)) {
769 772 PyErr_SetString(PyExc_TypeError,
770 773 "expected string key");
771 774 goto bail;
772 775 }
773 776 nbytes += PyBytes_GET_SIZE(c) + 1;
774 777 }
775 778 }
776 779
777 780 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
778 781 if (packobj == NULL) {
779 782 goto bail;
780 783 }
781 784
782 785 p = PyBytes_AS_STRING(packobj);
783 786
784 787 pn = PyTuple_GET_ITEM(pl, 0);
785 788 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
786 789 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
787 790 goto bail;
788 791 }
789 792 memcpy(p, s, l);
790 793 p += 20;
791 794 pn = PyTuple_GET_ITEM(pl, 1);
792 795 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
793 796 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
794 797 goto bail;
795 798 }
796 799 memcpy(p, s, l);
797 800 p += 20;
798 801
799 802 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
800 803 dirstateItemObject *tuple;
801 804 char state;
802 805 int mode, size, mtime;
803 806 Py_ssize_t len, l;
804 807 PyObject *o;
805 808 char *t;
806 809
807 810 if (!dirstate_tuple_check(v)) {
808 811 PyErr_SetString(PyExc_TypeError,
809 812 "expected a dirstate tuple");
810 813 goto bail;
811 814 }
812 815 tuple = (dirstateItemObject *)v;
813 816
814 817 state = dirstate_item_c_v1_state(tuple);
815 818 mode = dirstate_item_c_v1_mode(tuple);
816 819 size = dirstate_item_c_v1_size(tuple);
817 820 mtime = dirstate_item_c_v1_mtime(tuple);
818 821 if (state == 'n' && mtime == now) {
819 822 /* See pure/parsers.py:pack_dirstate for why we do
820 823 * this. */
821 824 mtime = -1;
822 825 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
823 826 state, mode, size, mtime);
824 827 if (!mtime_unset) {
825 828 goto bail;
826 829 }
827 830 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
828 831 goto bail;
829 832 }
830 833 Py_DECREF(mtime_unset);
831 834 mtime_unset = NULL;
832 835 }
833 836 *p++ = state;
834 837 putbe32((uint32_t)mode, p);
835 838 putbe32((uint32_t)size, p + 4);
836 839 putbe32((uint32_t)mtime, p + 8);
837 840 t = p + 12;
838 841 p += 16;
839 842 len = PyBytes_GET_SIZE(k);
840 843 memcpy(p, PyBytes_AS_STRING(k), len);
841 844 p += len;
842 845 o = PyDict_GetItem(copymap, k);
843 846 if (o) {
844 847 *p++ = '\0';
845 848 l = PyBytes_GET_SIZE(o);
846 849 memcpy(p, PyBytes_AS_STRING(o), l);
847 850 p += l;
848 851 len += l + 1;
849 852 }
850 853 putbe32((uint32_t)len, t);
851 854 }
852 855
853 856 pos = p - PyBytes_AS_STRING(packobj);
854 857 if (pos != nbytes) {
855 858 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
856 859 (long)pos, (long)nbytes);
857 860 goto bail;
858 861 }
859 862
860 863 return packobj;
861 864 bail:
862 865 Py_XDECREF(mtime_unset);
863 866 Py_XDECREF(packobj);
864 867 Py_XDECREF(v);
865 868 return NULL;
866 869 }
867 870
868 871 #define BUMPED_FIX 1
869 872 #define USING_SHA_256 2
870 873 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
871 874
872 875 static PyObject *readshas(const char *source, unsigned char num,
873 876 Py_ssize_t hashwidth)
874 877 {
875 878 int i;
876 879 PyObject *list = PyTuple_New(num);
877 880 if (list == NULL) {
878 881 return NULL;
879 882 }
880 883 for (i = 0; i < num; i++) {
881 884 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
882 885 if (hash == NULL) {
883 886 Py_DECREF(list);
884 887 return NULL;
885 888 }
886 889 PyTuple_SET_ITEM(list, i, hash);
887 890 source += hashwidth;
888 891 }
889 892 return list;
890 893 }
891 894
892 895 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
893 896 uint32_t *msize)
894 897 {
895 898 const char *data = databegin;
896 899 const char *meta;
897 900
898 901 double mtime;
899 902 int16_t tz;
900 903 uint16_t flags;
901 904 unsigned char nsuccs, nparents, nmetadata;
902 905 Py_ssize_t hashwidth = 20;
903 906
904 907 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
905 908 PyObject *metadata = NULL, *ret = NULL;
906 909 int i;
907 910
908 911 if (data + FM1_HEADER_SIZE > dataend) {
909 912 goto overflow;
910 913 }
911 914
912 915 *msize = getbe32(data);
913 916 data += 4;
914 917 mtime = getbefloat64(data);
915 918 data += 8;
916 919 tz = getbeint16(data);
917 920 data += 2;
918 921 flags = getbeuint16(data);
919 922 data += 2;
920 923
921 924 if (flags & USING_SHA_256) {
922 925 hashwidth = 32;
923 926 }
924 927
925 928 nsuccs = (unsigned char)(*data++);
926 929 nparents = (unsigned char)(*data++);
927 930 nmetadata = (unsigned char)(*data++);
928 931
929 932 if (databegin + *msize > dataend) {
930 933 goto overflow;
931 934 }
932 935 dataend = databegin + *msize; /* narrow down to marker size */
933 936
934 937 if (data + hashwidth > dataend) {
935 938 goto overflow;
936 939 }
937 940 prec = PyBytes_FromStringAndSize(data, hashwidth);
938 941 data += hashwidth;
939 942 if (prec == NULL) {
940 943 goto bail;
941 944 }
942 945
943 946 if (data + nsuccs * hashwidth > dataend) {
944 947 goto overflow;
945 948 }
946 949 succs = readshas(data, nsuccs, hashwidth);
947 950 if (succs == NULL) {
948 951 goto bail;
949 952 }
950 953 data += nsuccs * hashwidth;
951 954
952 955 if (nparents == 1 || nparents == 2) {
953 956 if (data + nparents * hashwidth > dataend) {
954 957 goto overflow;
955 958 }
956 959 parents = readshas(data, nparents, hashwidth);
957 960 if (parents == NULL) {
958 961 goto bail;
959 962 }
960 963 data += nparents * hashwidth;
961 964 } else {
962 965 parents = Py_None;
963 966 Py_INCREF(parents);
964 967 }
965 968
966 969 if (data + 2 * nmetadata > dataend) {
967 970 goto overflow;
968 971 }
969 972 meta = data + (2 * nmetadata);
970 973 metadata = PyTuple_New(nmetadata);
971 974 if (metadata == NULL) {
972 975 goto bail;
973 976 }
974 977 for (i = 0; i < nmetadata; i++) {
975 978 PyObject *tmp, *left = NULL, *right = NULL;
976 979 Py_ssize_t leftsize = (unsigned char)(*data++);
977 980 Py_ssize_t rightsize = (unsigned char)(*data++);
978 981 if (meta + leftsize + rightsize > dataend) {
979 982 goto overflow;
980 983 }
981 984 left = PyBytes_FromStringAndSize(meta, leftsize);
982 985 meta += leftsize;
983 986 right = PyBytes_FromStringAndSize(meta, rightsize);
984 987 meta += rightsize;
985 988 tmp = PyTuple_New(2);
986 989 if (!left || !right || !tmp) {
987 990 Py_XDECREF(left);
988 991 Py_XDECREF(right);
989 992 Py_XDECREF(tmp);
990 993 goto bail;
991 994 }
992 995 PyTuple_SET_ITEM(tmp, 0, left);
993 996 PyTuple_SET_ITEM(tmp, 1, right);
994 997 PyTuple_SET_ITEM(metadata, i, tmp);
995 998 }
996 999 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
997 1000 (int)tz * 60, parents);
998 1001 goto bail; /* return successfully */
999 1002
1000 1003 overflow:
1001 1004 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1002 1005 bail:
1003 1006 Py_XDECREF(prec);
1004 1007 Py_XDECREF(succs);
1005 1008 Py_XDECREF(metadata);
1006 1009 Py_XDECREF(parents);
1007 1010 return ret;
1008 1011 }
1009 1012
1010 1013 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1011 1014 {
1012 1015 const char *data, *dataend;
1013 1016 Py_ssize_t datalen, offset, stop;
1014 1017 PyObject *markers = NULL;
1015 1018
1016 1019 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1017 1020 &offset, &stop)) {
1018 1021 return NULL;
1019 1022 }
1020 1023 if (offset < 0) {
1021 1024 PyErr_SetString(PyExc_ValueError,
1022 1025 "invalid negative offset in fm1readmarkers");
1023 1026 return NULL;
1024 1027 }
1025 1028 if (stop > datalen) {
1026 1029 PyErr_SetString(
1027 1030 PyExc_ValueError,
1028 1031 "stop longer than data length in fm1readmarkers");
1029 1032 return NULL;
1030 1033 }
1031 1034 dataend = data + datalen;
1032 1035 data += offset;
1033 1036 markers = PyList_New(0);
1034 1037 if (!markers) {
1035 1038 return NULL;
1036 1039 }
1037 1040 while (offset < stop) {
1038 1041 uint32_t msize;
1039 1042 int error;
1040 1043 PyObject *record = fm1readmarker(data, dataend, &msize);
1041 1044 if (!record) {
1042 1045 goto bail;
1043 1046 }
1044 1047 error = PyList_Append(markers, record);
1045 1048 Py_DECREF(record);
1046 1049 if (error) {
1047 1050 goto bail;
1048 1051 }
1049 1052 data += msize;
1050 1053 offset += msize;
1051 1054 }
1052 1055 return markers;
1053 1056 bail:
1054 1057 Py_DECREF(markers);
1055 1058 return NULL;
1056 1059 }
1057 1060
1058 1061 static char parsers_doc[] = "Efficient content parsing.";
1059 1062
1060 1063 PyObject *encodedir(PyObject *self, PyObject *args);
1061 1064 PyObject *pathencode(PyObject *self, PyObject *args);
1062 1065 PyObject *lowerencode(PyObject *self, PyObject *args);
1063 1066 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1064 1067
1065 1068 static PyMethodDef methods[] = {
1066 1069 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1067 1070 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1068 1071 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1069 1072 "parse a revlog index\n"},
1070 1073 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1071 1074 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1072 1075 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1073 1076 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1074 1077 "construct a dict with an expected size\n"},
1075 1078 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1076 1079 "make file foldmap\n"},
1077 1080 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1078 1081 "escape a UTF-8 byte string to JSON (fast path)\n"},
1079 1082 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1080 1083 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1081 1084 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1082 1085 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1083 1086 "parse v1 obsolete markers\n"},
1084 1087 {NULL, NULL}};
1085 1088
1086 1089 void dirs_module_init(PyObject *mod);
1087 1090 void manifest_module_init(PyObject *mod);
1088 1091 void revlog_module_init(PyObject *mod);
1089 1092
1090 1093 static const int version = 20;
1091 1094
1092 1095 static void module_init(PyObject *mod)
1093 1096 {
1094 1097 PyModule_AddIntConstant(mod, "version", version);
1095 1098
1096 1099 /* This module constant has two purposes. First, it lets us unit test
1097 1100 * the ImportError raised without hard-coding any error text. This
1098 1101 * means we can change the text in the future without breaking tests,
1099 1102 * even across changesets without a recompile. Second, its presence
1100 1103 * can be used to determine whether the version-checking logic is
1101 1104 * present, which also helps in testing across changesets without a
1102 1105 * recompile. Note that this means the pure-Python version of parsers
1103 1106 * should not have this module constant. */
1104 1107 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1105 1108
1106 1109 dirs_module_init(mod);
1107 1110 manifest_module_init(mod);
1108 1111 revlog_module_init(mod);
1109 1112
1110 1113 if (PyType_Ready(&dirstateItemType) < 0) {
1111 1114 return;
1112 1115 }
1113 1116 Py_INCREF(&dirstateItemType);
1114 1117 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1115 1118 }
1116 1119
1117 1120 static int check_python_version(void)
1118 1121 {
1119 1122 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1120 1123 long hexversion;
1121 1124 if (!sys) {
1122 1125 return -1;
1123 1126 }
1124 1127 ver = PyObject_GetAttrString(sys, "hexversion");
1125 1128 Py_DECREF(sys);
1126 1129 if (!ver) {
1127 1130 return -1;
1128 1131 }
1129 1132 hexversion = PyInt_AsLong(ver);
1130 1133 Py_DECREF(ver);
1131 1134 /* sys.hexversion is a 32-bit number by default, so the -1 case
1132 1135 * should only occur in unusual circumstances (e.g. if sys.hexversion
1133 1136 * is manually set to an invalid value). */
1134 1137 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1135 1138 PyErr_Format(PyExc_ImportError,
1136 1139 "%s: The Mercurial extension "
1137 1140 "modules were compiled with Python " PY_VERSION
1138 1141 ", but "
1139 1142 "Mercurial is currently using Python with "
1140 1143 "sys.hexversion=%ld: "
1141 1144 "Python %s\n at: %s",
1142 1145 versionerrortext, hexversion, Py_GetVersion(),
1143 1146 Py_GetProgramFullPath());
1144 1147 return -1;
1145 1148 }
1146 1149 return 0;
1147 1150 }
1148 1151
1149 1152 #ifdef IS_PY3K
1150 1153 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1151 1154 parsers_doc, -1, methods};
1152 1155
1153 1156 PyMODINIT_FUNC PyInit_parsers(void)
1154 1157 {
1155 1158 PyObject *mod;
1156 1159
1157 1160 if (check_python_version() == -1)
1158 1161 return NULL;
1159 1162 mod = PyModule_Create(&parsers_module);
1160 1163 module_init(mod);
1161 1164 return mod;
1162 1165 }
1163 1166 #else
1164 1167 PyMODINIT_FUNC initparsers(void)
1165 1168 {
1166 1169 PyObject *mod;
1167 1170
1168 1171 if (check_python_version() == -1) {
1169 1172 return;
1170 1173 }
1171 1174 mod = Py_InitModule3("parsers", methods, parsers_doc);
1172 1175 module_init(mod);
1173 1176 }
1174 1177 #endif
@@ -1,83 +1,84 b''
1 1 /*
2 2 util.h - utility functions for interfacing with the various python APIs.
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_UTIL_H_
9 9 #define _HG_UTIL_H_
10 10
11 11 #include "compat.h"
12 12
13 13 #if PY_MAJOR_VERSION >= 3
14 14 #define IS_PY3K
15 15 #endif
16 16
17 17 /* helper to switch things like string literal depending on Python version */
18 18 #ifdef IS_PY3K
19 19 #define PY23(py2, py3) py3
20 20 #else
21 21 #define PY23(py2, py3) py2
22 22 #endif
23 23
24 24 /* clang-format off */
25 25 typedef struct {
26 26 PyObject_HEAD
27 unsigned char flags;
27 int flags;
28 28 int mode;
29 29 int size;
30 30 int mtime;
31 31 } dirstateItemObject;
32 32 /* clang-format on */
33 33
34 static const unsigned char dirstate_flag_wc_tracked = 1;
35 static const unsigned char dirstate_flag_p1_tracked = 1 << 1;
36 static const unsigned char dirstate_flag_p2_info = 1 << 2;
37 static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
38 static const unsigned char dirstate_flag_has_file_mtime = 1 << 4;
39 static const unsigned char dirstate_flag_has_directory_mtime = 1 << 5;
40 static const unsigned char dirstate_flag_mode_exec_perm = 1 << 6;
41 static const unsigned char dirstate_flag_mode_is_symlink = 1 << 7;
34 static const int dirstate_flag_wc_tracked = 1;
35 static const int dirstate_flag_p1_tracked = 1 << 1;
36 static const int dirstate_flag_p2_info = 1 << 2;
37 static const int dirstate_flag_has_meaningful_data = 1 << 3;
38 static const int dirstate_flag_has_file_mtime = 1 << 4;
39 static const int dirstate_flag_has_directory_mtime = 1 << 5;
40 static const int dirstate_flag_mode_exec_perm = 1 << 6;
41 static const int dirstate_flag_mode_is_symlink = 1 << 7;
42 static const int dirstate_flag_expected_state_is_modified = 1 << 8;
42 43
43 44 extern PyTypeObject dirstateItemType;
44 45 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
45 46
46 47 #ifndef MIN
47 48 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
48 49 #endif
49 50 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
50 51 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
51 52 #define true 1
52 53 #define false 0
53 54 typedef unsigned char bool;
54 55 #else
55 56 #include <stdbool.h>
56 57 #endif
57 58
58 59 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
59 60 {
60 61 /* _PyDict_NewPresized expects a minused parameter, but it actually
61 62 creates a dictionary that's the nearest power of two bigger than the
62 63 parameter. For example, with the initial minused = 1000, the
63 64 dictionary created has size 1024. Of course in a lot of cases that
64 65 can be greater than the maximum load factor Python's dict object
65 66 expects (= 2/3), so as soon as we cross the threshold we'll resize
66 67 anyway. So create a dictionary that's at least 3/2 the size. */
67 68 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
68 69 }
69 70
70 71 /* Convert a PyInt or PyLong to a long. Returns false if there is an
71 72 error, in which case an exception will already have been set. */
72 73 static inline bool pylong_to_long(PyObject *pylong, long *out)
73 74 {
74 75 *out = PyLong_AsLong(pylong);
75 76 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
76 77 * not an error. */
77 78 if (*out != -1) {
78 79 return true;
79 80 }
80 81 return PyErr_Occurred() == NULL;
81 82 }
82 83
83 84 #endif /* _HG_UTIL_H_ */
@@ -1,516 +1,532 b''
1 1 The *dirstate* is what Mercurial uses internally to track
2 2 the state of files in the working directory,
3 3 such as set by commands like `hg add` and `hg rm`.
4 4 It also contains some cached data that help make `hg status` faster.
5 5 The name refers both to `.hg/dirstate` on the filesystem
6 6 and the corresponding data structure in memory while a Mercurial process
7 7 is running.
8 8
9 9 The original file format, retroactively dubbed `dirstate-v1`,
10 10 is described at https://www.mercurial-scm.org/wiki/DirState.
11 11 It is made of a flat sequence of unordered variable-size entries,
12 12 so accessing any information in it requires parsing all of it.
13 13 Similarly, saving changes requires rewriting the entire file.
14 14
15 15 The newer `dirsate-v2` file format is designed to fix these limitations
16 16 and make `hg status` faster.
17 17
18 18 User guide
19 19 ==========
20 20
21 21 Compatibility
22 22 -------------
23 23
24 24 The file format is experimental and may still change.
25 25 Different versions of Mercurial may not be compatible with each other
26 26 when working on a local repository that uses this format.
27 27 When using an incompatible version with the experimental format,
28 28 anything can happen including data corruption.
29 29
30 30 Since the dirstate is entirely local and not relevant to the wire protocol,
31 31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
32 32
33 33 When `share-safe` is enabled, different repositories sharing the same store
34 34 can use different dirstate formats.
35 35
36 36 Enabling `dirsate-v2` for new local repositories
37 37 ------------------------------------------------
38 38
39 39 When creating a new local repository such as with `hg init` or `hg clone`,
40 40 the `exp-dirstate-v2` boolean in the `format` configuration section
41 41 controls whether to use this file format.
42 42 This is disabled by default as of this writing.
43 43 To enable it for a single repository, run for example::
44 44
45 45 $ hg init my-project --config format.exp-dirstate-v2=1
46 46
47 47 Checking the format of an existing local repsitory
48 48 --------------------------------------------------
49 49
50 50 The `debugformat` commands prints information about
51 51 which of multiple optional formats are used in the current repository,
52 52 including `dirstate-v2`::
53 53
54 54 $ hg debugformat
55 55 format-variant repo
56 56 fncache: yes
57 57 dirstate-v2: yes
58 58 […]
59 59
60 60 Upgrading or downgrading an existing local repository
61 61 -----------------------------------------------------
62 62
63 63 The `debugupgrade` command does various upgrades or downgrades
64 64 on a local repository
65 65 based on the current Mercurial version and on configuration.
66 66 The same `format.exp-dirstate-v2` configuration is used again.
67 67
68 68 Example to upgrade::
69 69
70 70 $ hg debugupgrade --config format.exp-dirstate-v2=1
71 71
72 72 Example to downgrade to `dirstate-v1`::
73 73
74 74 $ hg debugupgrade --config format.exp-dirstate-v2=0
75 75
76 76 Both of this commands do nothing but print a list of proposed changes,
77 77 which may include changes unrelated to the dirstate.
78 78 Those other changes are controlled by their own configuration keys.
79 79 Add `--run` to a command to actually apply the proposed changes.
80 80
81 81 Backups of `.hg/requires` and `.hg/dirstate` are created
82 82 in a `.hg/upgradebackup.*` directory.
83 83 If something goes wrong, restoring those files should undo the change.
84 84
85 85 Note that upgrading affects compatibility with older versions of Mercurial
86 86 as noted above.
87 87 This can be relevant when a repository’s files are on a USB drive
88 88 or some other removable media, or shared over the network, etc.
89 89
90 90 Internal filesystem representation
91 91 ==================================
92 92
93 93 Requirements file
94 94 -----------------
95 95
96 96 The `.hg/requires` file indicates which of various optional file formats
97 97 are used by a given repository.
98 98 Mercurial aborts when seeing a requirement it does not know about,
99 99 which avoids older version accidentally messing up a respository
100 100 that uses a format that was introduced later.
101 101 For versions that do support a format, the presence or absence of
102 102 the corresponding requirement indicates whether to use that format.
103 103
104 104 When the file contains a `exp-dirstate-v2` line,
105 105 the `dirstate-v2` format is used.
106 106 With no such line `dirstate-v1` is used.
107 107
108 108 High level description
109 109 ----------------------
110 110
111 111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
112 112 in `dirstate-v2` that file is a "docket" file
113 113 that only contains some metadata
114 114 and points to separate data file named `.hg/dirstate.{ID}`,
115 115 where `{ID}` is a random identifier.
116 116
117 117 This separation allows making data files append-only
118 118 and therefore safer to memory-map.
119 119 Creating a new data file (occasionally to clean up unused data)
120 120 can be done with a different ID
121 121 without disrupting another Mercurial process
122 122 that could still be using the previous data file.
123 123
124 124 Both files have a format designed to reduce the need for parsing,
125 125 by using fixed-size binary components as much as possible.
126 126 For data that is not fixed-size,
127 127 references to other parts of a file can be made by storing "pseudo-pointers":
128 128 integers counted in bytes from the start of a file.
129 129 For read-only access no data structure is needed,
130 130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
131 131 with specific parts read on demand.
132 132
133 133 The data file contains "nodes" organized in a tree.
134 134 Each node represents a file or directory inside the working directory
135 135 or its parent changeset.
136 136 This tree has the same structure as the filesystem,
137 137 so a node representing a directory has child nodes representing
138 138 the files and subdirectories contained directly in that directory.
139 139
140 140 The docket file format
141 141 ----------------------
142 142
143 143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
144 144 and `mercurial/dirstateutils/docket.py`.
145 145
146 146 Components of the docket file are found at fixed offsets,
147 147 counted in bytes from the start of the file:
148 148
149 149 * Offset 0:
150 150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
151 151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
152 152 although it is not strictly necessary
153 153 since `.hg/requires` determines which format to use.
154 154
155 155 * Offset 12:
156 156 The changeset node ID on the first parent of the working directory,
157 157 as up to 32 binary bytes.
158 158 If a node ID is shorter (20 bytes for SHA-1),
159 159 it is start-aligned and the rest of the bytes are set to zero.
160 160
161 161 * Offset 44:
162 162 The changeset node ID on the second parent of the working directory,
163 163 or all zeros if there isn’t one.
164 164 Also 32 binary bytes.
165 165
166 166 * Offset 76:
167 167 Tree metadata on 44 bytes, described below.
168 168 Its separation in this documentation from the rest of the docket
169 169 reflects a detail of the current implementation.
170 170 Since tree metadata is also made of fields at fixed offsets, those could
171 171 be inlined here by adding 76 bytes to each offset.
172 172
173 173 * Offset 120:
174 174 The used size of the data file, as a 32-bit big-endian integer.
175 175 The actual size of the data file may be larger
176 176 (if another Mercurial processis in appending to it
177 177 but has not updated the docket yet).
178 178 That extra data must be ignored.
179 179
180 180 * Offset 124:
181 181 The length of the data file identifier, as a 8-bit integer.
182 182
183 183 * Offset 125:
184 184 The data file identifier.
185 185
186 186 * Any additional data is current ignored, and dropped when updating the file.
187 187
188 188 Tree metadata in the docket file
189 189 --------------------------------
190 190
191 191 Tree metadata is similarly made of components at fixed offsets.
192 192 These offsets are counted in bytes from the start of tree metadata,
193 193 which is 76 bytes after the start of the docket file.
194 194
195 195 This metadata can be thought of as the singular root of the tree
196 196 formed by nodes in the data file.
197 197
198 198 * Offset 0:
199 199 Pseudo-pointer to the start of root nodes,
200 200 counted in bytes from the start of the data file,
201 201 as a 32-bit big-endian integer.
202 202 These nodes describe files and directories found directly
203 203 at the root of the working directory.
204 204
205 205 * Offset 4:
206 206 Number of root nodes, as a 32-bit big-endian integer.
207 207
208 208 * Offset 8:
209 209 Total number of nodes in the entire tree that "have a dirstate entry",
210 210 as a 32-bit big-endian integer.
211 211 Those nodes represent files that would be present at all in `dirstate-v1`.
212 212 This is typically less than the total number of nodes.
213 213 This counter is used to implement `len(dirstatemap)`.
214 214
215 215 * Offset 12:
216 216 Number of nodes in the entire tree that have a copy source,
217 217 as a 32-bit big-endian integer.
218 218 At the next commit, these files are recorded
219 219 as having been copied or moved/renamed from that source.
220 220 (A move is recorded as a copy and separate removal of the source.)
221 221 This counter is used to implement `len(dirstatemap.copymap)`.
222 222
223 223 * Offset 16:
224 224 An estimation of how many bytes of the data file
225 225 (within its used size) are unused, as a 32-bit big-endian integer.
226 226 When appending to an existing data file,
227 227 some existing nodes or paths can be unreachable from the new root
228 228 but they still take up space.
229 229 This counter is used to decide when to write a new data file from scratch
230 230 instead of appending to an existing one,
231 231 in order to get rid of that unreachable data
232 232 and avoid unbounded file size growth.
233 233
234 234 * Offset 20:
235 235 These four bytes are currently ignored
236 236 and reset to zero when updating a docket file.
237 237 This is an attempt at forward compatibility:
238 238 future Mercurial versions could use this as a bit field
239 239 to indicate that a dirstate has additional data or constraints.
240 240 Finding a dirstate file with the relevant bit unset indicates that
241 241 it was written by a then-older version
242 242 which is not aware of that future change.
243 243
244 244 * Offset 24:
245 245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
246 246 When present, the hash is of ignore patterns
247 247 that were used for some previous run of the `status` algorithm.
248 248
249 249 * (Offset 44: end of tree metadata)
250 250
251 251 Optional hash of ignore patterns
252 252 --------------------------------
253 253
254 254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
255 255 has been optimized such that its run time is dominated by calls
256 256 to `stat` for reading the filesystem metadata of a file or directory,
257 257 and to `readdir` for listing the contents of a directory.
258 258 In some cases the algorithm can skip calls to `readdir`
259 259 (saving significant time)
260 260 because the dirstate already contains enough of the relevant information
261 261 to build the correct `status` results.
262 262
263 263 The default configuration of `hg status` is to list unknown files
264 264 but not ignored files.
265 265 In this case, it matters for the `readdir`-skipping optimization
266 266 if a given file used to be ignored but became unknown
267 267 because `.hgignore` changed.
268 268 To detect the possibility of such a change,
269 269 the tree metadata contains an optional hash of all ignore patterns.
270 270
271 271 We define:
272 272
273 273 * "Root" ignore files as:
274 274
275 275 - `.hgignore` at the root of the repository if it exists
276 276 - And all files from `ui.ignore.*` config.
277 277
278 278 This set of files is sorted by the string representation of their path.
279 279
280 280 * The "expanded contents" of an ignore files is the byte string made
281 281 by the concatenation of its contents followed by the "expanded contents"
282 282 of other files included with `include:` or `subinclude:` directives,
283 283 in inclusion order. This definition is recursive, as included files can
284 284 themselves include more files.
285 285
286 286 This hash is defined as the SHA-1 of the concatenation (in sorted
287 287 order) of the "expanded contents" of each "root" ignore file.
288 288 (Note that computing this does not require actually concatenating
289 289 into a single contiguous byte sequence.
290 290 Instead a SHA-1 hasher object can be created
291 291 and fed separate chunks one by one.)
292 292
293 293 The data file format
294 294 --------------------
295 295
296 296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
297 297 and `mercurial/dirstateutils/v2.py`.
298 298
299 299 The data file contains two types of data: paths and nodes.
300 300
301 301 Paths and nodes can be organized in any order in the file, except that sibling
302 302 nodes must be next to each other and sorted by their path.
303 303 Contiguity lets the parent refer to them all
304 304 by their count and a single pseudo-pointer,
305 305 instead of storing one pseudo-pointer per child node.
306 306 Sorting allows using binary seach to find a child node with a given name
307 307 in `O(log(n))` byte sequence comparisons.
308 308
309 309 The current implemention writes paths and child node before a given node
310 310 for ease of figuring out the value of pseudo-pointers by the time the are to be
311 311 written, but this is not an obligation and readers must not rely on it.
312 312
313 313 A path is stored as a byte string anywhere in the file, without delimiter.
314 314 It is refered to by one or more node by a pseudo-pointer to its start, and its
315 315 length in bytes. Since there is no delimiter,
316 316 when a path is a substring of another the same bytes could be reused,
317 317 although the implementation does not exploit this as of this writing.
318 318
319 319 A node is stored on 43 bytes with components at fixed offsets. Paths and
320 320 child nodes relevant to a node are stored externally and referenced though
321 321 pseudo-pointers.
322 322
323 323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
324 324 counting bytes from the start of the data file. Path lengths and positions
325 325 are 16-bit integers, also counted in bytes.
326 326
327 327 Node components are:
328 328
329 329 * Offset 0:
330 330 Pseudo-pointer to the full path of this node,
331 331 from the working directory root.
332 332
333 333 * Offset 4:
334 334 Length of the full path.
335 335
336 336 * Offset 6:
337 337 Position of the last `/` path separator within the full path,
338 338 in bytes from the start of the full path,
339 339 or zero if there isn’t one.
340 340 The part of the full path after this position is the "base name".
341 341 Since sibling nodes have the same parent, only their base name vary
342 342 and needs to be considered when doing binary search to find a given path.
343 343
344 344 * Offset 8:
345 345 Pseudo-pointer to the "copy source" path for this node,
346 346 or zero if there is no copy source.
347 347
348 348 * Offset 12:
349 349 Length of the copy source path, or zero if there isn’t one.
350 350
351 351 * Offset 14:
352 352 Pseudo-pointer to the start of child nodes.
353 353
354 354 * Offset 18:
355 355 Number of child nodes, as a 32-bit integer.
356 356 They occupy 43 times this number of bytes
357 357 (not counting space for paths, and further descendants).
358 358
359 359 * Offset 22:
360 360 Number as a 32-bit integer of descendant nodes in this subtree,
361 361 not including this node itself,
362 362 that "have a dirstate entry".
363 363 Those nodes represent files that would be present at all in `dirstate-v1`.
364 364 This is typically less than the total number of descendants.
365 365 This counter is used to implement `has_dir`.
366 366
367 367 * Offset 26:
368 368 Number as a 32-bit integer of descendant nodes in this subtree,
369 369 not including this node itself,
370 370 that represent files tracked in the working directory.
371 371 (For example, `hg rm` makes a file untracked.)
372 372 This counter is used to implement `has_tracked_dir`.
373 373
374 374 * Offset 30:
375 375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
376 376 Starting from least-significant, bit masks are::
377 377
378 378 WDIR_TRACKED = 1 << 0
379 379 P1_TRACKED = 1 << 1
380 380 P2_INFO = 1 << 2
381 381 HAS_MODE_AND_SIZE = 1 << 3
382 382 HAS_FILE_MTIME = 1 << 4
383 383 HAS_DIRECTORY_MTIME = 1 << 5
384 384 MODE_EXEC_PERM = 1 << 6
385 385 MODE_IS_SYMLINK = 1 << 7
386 EXPECTED_STATE_IS_MODIFIED = 1 << 8
386 387
387 388 The meaning of each bit is described below.
388 389
389 390 Other bits are unset.
390 391 They may be assigned meaning if the future,
391 392 with the limitation that Mercurial versions that pre-date such meaning
392 393 will always reset those bits to unset when writing nodes.
393 394 (A new node is written for any mutation in its subtree,
394 395 leaving the bytes of the old node unreachable
395 396 until the data file is rewritten entirely.)
396 397
397 398 * Offset 32:
398 399 A `size` field described below, as a 32-bit integer.
399 400 Unlike in dirstate-v1, negative values are not used.
400 401
401 402 * Offset 36:
402 403 The seconds component of an `mtime` field described below,
403 404 as a 32-bit integer.
404 405 Unlike in dirstate-v1, negative values are not used.
405 406 When `mtime` is used, this is number of seconds since the Unix epoch
406 407 truncated to its lower 31 bits.
407 408
408 409 * Offset 40:
409 410 The nanoseconds component of an `mtime` field described below,
410 411 as a 32-bit integer.
411 412 When `mtime` is used,
412 413 this is the number of nanoseconds since `mtime.seconds`,
413 414 always stritctly less than one billion.
414 415
415 416 This may be zero if more precision is not available.
416 417 (This can happen because of limitations in any of Mercurial, Python,
417 418 libc, the operating system, …)
418 419
419 420 When comparing two mtimes and either has this component set to zero,
420 421 the sub-second precision of both should be ignored.
421 422 False positives when checking mtime equality due to clock resolution
422 423 are always possible and the status algorithm needs to deal with them,
423 424 but having too many false negatives could be harmful too.
424 425
425 426 * (Offset 44: end of this node)
426 427
427 428 The meaning of the boolean values packed in `flags` is:
428 429
429 430 `WDIR_TRACKED`
430 431 Set if the working directory contains a tracked file at this node’s path.
431 432 This is typically set and unset by `hg add` and `hg rm`.
432 433
433 434 `P1_TRACKED`
434 435 Set if the working directory’s first parent changeset
435 436 (whose node identifier is found in tree metadata)
436 437 contains a tracked file at this node’s path.
437 438 This is a cache to reduce manifest lookups.
438 439
439 440 `P2_INFO`
440 441 Set if the file has been involved in some merge operation.
441 442 Either because it was actually merged,
442 443 or because the version in the second parent p2 version was ahead,
443 444 or because some rename moved it there.
444 445 In either case `hg status` will want it displayed as modified.
445 446
446 447 Files that would be mentioned at all in the `dirstate-v1` file format
447 448 have a node with at least one of the above three bits set in `dirstate-v2`.
448 449 Let’s call these files "tracked anywhere",
449 450 and "untracked" the nodes with all three of these bits unset.
450 451 Untracked nodes are typically for directories:
451 452 they hold child nodes and form the tree structure.
452 453 Additional untracked nodes may also exist.
453 454 Although implementations should strive to clean up nodes
454 455 that are entirely unused, other untracked nodes may also exist.
455 456 For example, a future version of Mercurial might in some cases
456 457 add nodes for untracked files or/and ignored files in the working directory
457 458 in order to optimize `hg status`
458 459 by enabling it to skip `readdir` in more cases.
459 460
460 461 `HAS_MODE_AND_SIZE`
461 462 Must be unset for untracked nodes.
462 463 For files tracked anywhere, if this is set:
463 464 - The `size` field is the expected file size,
464 in bytes truncated its lower to 31 bits,
465 for the file to be clean.
465 in bytes truncated its lower to 31 bits.
466 466 - The expected execute permission for the file’s owner
467 467 is given by `MODE_EXEC_PERM`
468 468 - The expected file type is given by `MODE_IS_SIMLINK`:
469 469 a symbolic link if set, or a normal file if unset.
470 470 If this is unset the expected size, permission, and file type are unknown.
471 471 The `size` field is unused (set to zero).
472 472
473 473 `HAS_FILE_MTIME`
474 474 Must be unset for untracked nodes.
475 475 If this and `HAS_DIRECTORY_MTIME` are both unset,
476 476 the `mtime` field is unused (set to zero).
477 If this is set, `mtime` is the modification time
478 expected for the file to be considered clean.
477 If this is set, `mtime` is the expected modification time.
479 478
480 479 `HAS_DIRECTORY_MTIME`
481 480 Must be unset for file tracked anywhere.
482 481 If this and `HAS_DIRECTORY_MTIME` are both unset,
483 482 the `mtime` field is unused (set to zero).
484 483 If this is set, at some point,
485 484 this path in the working directory was observed:
486 485
487 486 - To be a directory
488 487 - With the modification time given in `mtime`
489 488 - That time was already strictly in the past when observed,
490 489 meaning that later changes cannot happen in the same clock tick
491 490 and must cause a different modification time
492 491 (unless the system clock jumps back and we get unlucky,
493 492 which is not impossible but deemed unlikely enough).
494 493 - All direct children of this directory
495 494 (as returned by `std::fs::read_dir`)
496 495 either have a corresponding dirstate node,
497 496 or are ignored by ignore patterns whose hash is in tree metadata.
498 497
499 498 This means that if `std::fs::symlink_metadata` later reports
500 499 the same modification time
501 500 and ignored patterns haven’t changed,
502 501 a run of status that is not listing ignored files
503 502 can skip calling `std::fs::read_dir` again for this directory,
504 503 and iterate child dirstate nodes instead.
505 504
506 505 `MODE_EXEC_PERM`
507 506 Must be unset if `HAS_MODE_AND_SIZE` is unset.
508 507 If `HAS_MODE_AND_SIZE` is set,
509 508 this indicates whether the file’s own is expected
510 509 to have execute permission.
511 510
512 511 `MODE_IS_SYMLINK`
513 512 Must be unset if `HAS_MODE_AND_SIZE` is unset.
514 513 If `HAS_MODE_AND_SIZE` is set,
515 514 this indicates whether the file is expected to be a symlink
516 515 as opposed to a normal file.
516
517 `EXPECTED_STATE_IS_MODIFIED`
518 Must be unset for untracked nodes.
519 For:
520 - a file tracked anywhere
521 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
522 - if that metadata matches
523 metadata found in the working directory with `stat`
524 This bit indicates the status of the file.
525 If set, the status is modified. If unset, it is clean.
526
527 In cases where `hg status` needs to read the contents of a file
528 because metadata is ambiguous, this bit lets it record the result
529 if the result is modified so that a future run of `hg status`
530 does not need to do the same again.
531 It is valid to never set this bit,
532 and consider expected metadata ambiguous if it is set.
@@ -1,790 +1,799 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import stat
11 11 import struct
12 12 import zlib
13 13
14 14 from ..node import (
15 15 nullrev,
16 16 sha1nodeconstants,
17 17 )
18 18 from ..thirdparty import attr
19 19 from .. import (
20 20 error,
21 21 pycompat,
22 22 revlogutils,
23 23 util,
24 24 )
25 25
26 26 from ..revlogutils import nodemap as nodemaputil
27 27 from ..revlogutils import constants as revlog_constants
28 28
29 29 stringio = pycompat.bytesio
30 30
31 31
32 32 _pack = struct.pack
33 33 _unpack = struct.unpack
34 34 _compress = zlib.compress
35 35 _decompress = zlib.decompress
36 36
37 37
38 38 # a special value used internally for `size` if the file come from the other parent
39 39 FROM_P2 = -2
40 40
41 41 # a special value used internally for `size` if the file is modified/merged/added
42 42 NONNORMAL = -1
43 43
44 44 # a special value used internally for `time` if the time is ambigeous
45 45 AMBIGUOUS_TIME = -1
46 46
47 47 # Bits of the `flags` byte inside a node in the file format
48 48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 50 DIRSTATE_V2_P2_INFO = 1 << 2
51 51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
52 52 DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
53 53 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available
54 54 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
55 55 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
56 DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
56 57
57 58
58 59 @attr.s(slots=True, init=False)
59 60 class DirstateItem(object):
60 61 """represent a dirstate entry
61 62
62 63 It hold multiple attributes
63 64
64 65 # about file tracking
65 66 - wc_tracked: is the file tracked by the working copy
66 67 - p1_tracked: is the file tracked in working copy first parent
67 68 - p2_info: the file has been involved in some merge operation. Either
68 69 because it was actually merged, or because the p2 version was
69 70 ahead, or because some rename moved it there. In either case
70 71 `hg status` will want it displayed as modified.
71 72
72 73 # about the file state expected from p1 manifest:
73 74 - mode: the file mode in p1
74 75 - size: the file size in p1
75 76
76 77 These value can be set to None, which mean we don't have a meaningful value
77 78 to compare with. Either because we don't really care about them as there
78 79 `status` is known without having to look at the disk or because we don't
79 80 know these right now and a full comparison will be needed to find out if
80 81 the file is clean.
81 82
82 83 # about the file state on disk last time we saw it:
83 84 - mtime: the last known clean mtime for the file.
84 85
85 86 This value can be set to None if no cachable state exist. Either because we
86 87 do not care (see previous section) or because we could not cache something
87 88 yet.
88 89 """
89 90
90 91 _wc_tracked = attr.ib()
91 92 _p1_tracked = attr.ib()
92 93 _p2_info = attr.ib()
93 94 _mode = attr.ib()
94 95 _size = attr.ib()
95 96 _mtime = attr.ib()
96 97
97 98 def __init__(
98 99 self,
99 100 wc_tracked=False,
100 101 p1_tracked=False,
101 102 p2_info=False,
102 103 has_meaningful_data=True,
103 104 has_meaningful_mtime=True,
104 105 parentfiledata=None,
105 106 ):
106 107 self._wc_tracked = wc_tracked
107 108 self._p1_tracked = p1_tracked
108 109 self._p2_info = p2_info
109 110
110 111 self._mode = None
111 112 self._size = None
112 113 self._mtime = None
113 114 if parentfiledata is None:
114 115 has_meaningful_mtime = False
115 116 has_meaningful_data = False
116 117 if has_meaningful_data:
117 118 self._mode = parentfiledata[0]
118 119 self._size = parentfiledata[1]
119 120 if has_meaningful_mtime:
120 121 self._mtime = parentfiledata[2]
121 122
122 123 @classmethod
123 124 def from_v2_data(cls, flags, size, mtime):
124 125 """Build a new DirstateItem object from V2 data"""
125 126 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
127 has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
126 128 mode = None
129
130 if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
131 # we do not have support for this flag in the code yet,
132 # force a lookup for this file.
133 has_mode_size = False
134 has_meaningful_mtime = False
135
127 136 if has_mode_size:
128 137 assert stat.S_IXUSR == 0o100
129 138 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
130 139 mode = 0o755
131 140 else:
132 141 mode = 0o644
133 142 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
134 143 mode |= stat.S_IFLNK
135 144 else:
136 145 mode |= stat.S_IFREG
137 146 return cls(
138 147 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
139 148 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
140 149 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
141 150 has_meaningful_data=has_mode_size,
142 has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_FILE_MTIME),
151 has_meaningful_mtime=has_meaningful_mtime,
143 152 parentfiledata=(mode, size, mtime),
144 153 )
145 154
146 155 @classmethod
147 156 def from_v1_data(cls, state, mode, size, mtime):
148 157 """Build a new DirstateItem object from V1 data
149 158
150 159 Since the dirstate-v1 format is frozen, the signature of this function
151 160 is not expected to change, unlike the __init__ one.
152 161 """
153 162 if state == b'm':
154 163 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
155 164 elif state == b'a':
156 165 return cls(wc_tracked=True)
157 166 elif state == b'r':
158 167 if size == NONNORMAL:
159 168 p1_tracked = True
160 169 p2_info = True
161 170 elif size == FROM_P2:
162 171 p1_tracked = False
163 172 p2_info = True
164 173 else:
165 174 p1_tracked = True
166 175 p2_info = False
167 176 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
168 177 elif state == b'n':
169 178 if size == FROM_P2:
170 179 return cls(wc_tracked=True, p2_info=True)
171 180 elif size == NONNORMAL:
172 181 return cls(wc_tracked=True, p1_tracked=True)
173 182 elif mtime == AMBIGUOUS_TIME:
174 183 return cls(
175 184 wc_tracked=True,
176 185 p1_tracked=True,
177 186 has_meaningful_mtime=False,
178 187 parentfiledata=(mode, size, 42),
179 188 )
180 189 else:
181 190 return cls(
182 191 wc_tracked=True,
183 192 p1_tracked=True,
184 193 parentfiledata=(mode, size, mtime),
185 194 )
186 195 else:
187 196 raise RuntimeError(b'unknown state: %s' % state)
188 197
189 198 def set_possibly_dirty(self):
190 199 """Mark a file as "possibly dirty"
191 200
192 201 This means the next status call will have to actually check its content
193 202 to make sure it is correct.
194 203 """
195 204 self._mtime = None
196 205
197 206 def set_clean(self, mode, size, mtime):
198 207 """mark a file as "clean" cancelling potential "possibly dirty call"
199 208
200 209 Note: this function is a descendant of `dirstate.normal` and is
201 210 currently expected to be call on "normal" entry only. There are not
202 211 reason for this to not change in the future as long as the ccode is
203 212 updated to preserve the proper state of the non-normal files.
204 213 """
205 214 self._wc_tracked = True
206 215 self._p1_tracked = True
207 216 self._mode = mode
208 217 self._size = size
209 218 self._mtime = mtime
210 219
211 220 def set_tracked(self):
212 221 """mark a file as tracked in the working copy
213 222
214 223 This will ultimately be called by command like `hg add`.
215 224 """
216 225 self._wc_tracked = True
217 226 # `set_tracked` is replacing various `normallookup` call. So we mark
218 227 # the files as needing lookup
219 228 #
220 229 # Consider dropping this in the future in favor of something less broad.
221 230 self._mtime = None
222 231
223 232 def set_untracked(self):
224 233 """mark a file as untracked in the working copy
225 234
226 235 This will ultimately be called by command like `hg remove`.
227 236 """
228 237 self._wc_tracked = False
229 238 self._mode = None
230 239 self._size = None
231 240 self._mtime = None
232 241
233 242 def drop_merge_data(self):
234 243 """remove all "merge-only" from a DirstateItem
235 244
236 245 This is to be call by the dirstatemap code when the second parent is dropped
237 246 """
238 247 if self._p2_info:
239 248 self._p2_info = False
240 249 self._mode = None
241 250 self._size = None
242 251 self._mtime = None
243 252
244 253 @property
245 254 def mode(self):
246 255 return self.v1_mode()
247 256
248 257 @property
249 258 def size(self):
250 259 return self.v1_size()
251 260
252 261 @property
253 262 def mtime(self):
254 263 return self.v1_mtime()
255 264
256 265 @property
257 266 def state(self):
258 267 """
259 268 States are:
260 269 n normal
261 270 m needs merging
262 271 r marked for removal
263 272 a marked for addition
264 273
265 274 XXX This "state" is a bit obscure and mostly a direct expression of the
266 275 dirstatev1 format. It would make sense to ultimately deprecate it in
267 276 favor of the more "semantic" attributes.
268 277 """
269 278 if not self.any_tracked:
270 279 return b'?'
271 280 return self.v1_state()
272 281
273 282 @property
274 283 def tracked(self):
275 284 """True is the file is tracked in the working copy"""
276 285 return self._wc_tracked
277 286
278 287 @property
279 288 def any_tracked(self):
280 289 """True is the file is tracked anywhere (wc or parents)"""
281 290 return self._wc_tracked or self._p1_tracked or self._p2_info
282 291
283 292 @property
284 293 def added(self):
285 294 """True if the file has been added"""
286 295 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
287 296
288 297 @property
289 298 def maybe_clean(self):
290 299 """True if the file has a chance to be in the "clean" state"""
291 300 if not self._wc_tracked:
292 301 return False
293 302 elif not self._p1_tracked:
294 303 return False
295 304 elif self._p2_info:
296 305 return False
297 306 return True
298 307
299 308 @property
300 309 def p1_tracked(self):
301 310 """True if the file is tracked in the first parent manifest"""
302 311 return self._p1_tracked
303 312
304 313 @property
305 314 def p2_info(self):
306 315 """True if the file needed to merge or apply any input from p2
307 316
308 317 See the class documentation for details.
309 318 """
310 319 return self._wc_tracked and self._p2_info
311 320
312 321 @property
313 322 def removed(self):
314 323 """True if the file has been removed"""
315 324 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
316 325
317 326 def v2_data(self):
318 327 """Returns (flags, mode, size, mtime) for v2 serialization"""
319 328 flags = 0
320 329 if self._wc_tracked:
321 330 flags |= DIRSTATE_V2_WDIR_TRACKED
322 331 if self._p1_tracked:
323 332 flags |= DIRSTATE_V2_P1_TRACKED
324 333 if self._p2_info:
325 334 flags |= DIRSTATE_V2_P2_INFO
326 335 if self._mode is not None and self._size is not None:
327 336 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
328 337 if self.mode & stat.S_IXUSR:
329 338 flags |= DIRSTATE_V2_MODE_EXEC_PERM
330 339 if stat.S_ISLNK(self.mode):
331 340 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
332 341 if self._mtime is not None:
333 342 flags |= DIRSTATE_V2_HAS_FILE_MTIME
334 343 return (flags, self._size or 0, self._mtime or 0)
335 344
336 345 def v1_state(self):
337 346 """return a "state" suitable for v1 serialization"""
338 347 if not self.any_tracked:
339 348 # the object has no state to record, this is -currently-
340 349 # unsupported
341 350 raise RuntimeError('untracked item')
342 351 elif self.removed:
343 352 return b'r'
344 353 elif self._p1_tracked and self._p2_info:
345 354 return b'm'
346 355 elif self.added:
347 356 return b'a'
348 357 else:
349 358 return b'n'
350 359
351 360 def v1_mode(self):
352 361 """return a "mode" suitable for v1 serialization"""
353 362 return self._mode if self._mode is not None else 0
354 363
355 364 def v1_size(self):
356 365 """return a "size" suitable for v1 serialization"""
357 366 if not self.any_tracked:
358 367 # the object has no state to record, this is -currently-
359 368 # unsupported
360 369 raise RuntimeError('untracked item')
361 370 elif self.removed and self._p1_tracked and self._p2_info:
362 371 return NONNORMAL
363 372 elif self._p2_info:
364 373 return FROM_P2
365 374 elif self.removed:
366 375 return 0
367 376 elif self.added:
368 377 return NONNORMAL
369 378 elif self._size is None:
370 379 return NONNORMAL
371 380 else:
372 381 return self._size
373 382
374 383 def v1_mtime(self):
375 384 """return a "mtime" suitable for v1 serialization"""
376 385 if not self.any_tracked:
377 386 # the object has no state to record, this is -currently-
378 387 # unsupported
379 388 raise RuntimeError('untracked item')
380 389 elif self.removed:
381 390 return 0
382 391 elif self._mtime is None:
383 392 return AMBIGUOUS_TIME
384 393 elif self._p2_info:
385 394 return AMBIGUOUS_TIME
386 395 elif not self._p1_tracked:
387 396 return AMBIGUOUS_TIME
388 397 else:
389 398 return self._mtime
390 399
391 400 def need_delay(self, now):
392 401 """True if the stored mtime would be ambiguous with the current time"""
393 402 return self.v1_state() == b'n' and self.v1_mtime() == now
394 403
395 404
396 405 def gettype(q):
397 406 return int(q & 0xFFFF)
398 407
399 408
400 409 class BaseIndexObject(object):
401 410 # Can I be passed to an algorithme implemented in Rust ?
402 411 rust_ext_compat = 0
403 412 # Format of an index entry according to Python's `struct` language
404 413 index_format = revlog_constants.INDEX_ENTRY_V1
405 414 # Size of a C unsigned long long int, platform independent
406 415 big_int_size = struct.calcsize(b'>Q')
407 416 # Size of a C long int, platform independent
408 417 int_size = struct.calcsize(b'>i')
409 418 # An empty index entry, used as a default value to be overridden, or nullrev
410 419 null_item = (
411 420 0,
412 421 0,
413 422 0,
414 423 -1,
415 424 -1,
416 425 -1,
417 426 -1,
418 427 sha1nodeconstants.nullid,
419 428 0,
420 429 0,
421 430 revlog_constants.COMP_MODE_INLINE,
422 431 revlog_constants.COMP_MODE_INLINE,
423 432 )
424 433
425 434 @util.propertycache
426 435 def entry_size(self):
427 436 return self.index_format.size
428 437
429 438 @property
430 439 def nodemap(self):
431 440 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
432 441 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
433 442 return self._nodemap
434 443
435 444 @util.propertycache
436 445 def _nodemap(self):
437 446 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
438 447 for r in range(0, len(self)):
439 448 n = self[r][7]
440 449 nodemap[n] = r
441 450 return nodemap
442 451
443 452 def has_node(self, node):
444 453 """return True if the node exist in the index"""
445 454 return node in self._nodemap
446 455
447 456 def rev(self, node):
448 457 """return a revision for a node
449 458
450 459 If the node is unknown, raise a RevlogError"""
451 460 return self._nodemap[node]
452 461
453 462 def get_rev(self, node):
454 463 """return a revision for a node
455 464
456 465 If the node is unknown, return None"""
457 466 return self._nodemap.get(node)
458 467
459 468 def _stripnodes(self, start):
460 469 if '_nodemap' in vars(self):
461 470 for r in range(start, len(self)):
462 471 n = self[r][7]
463 472 del self._nodemap[n]
464 473
465 474 def clearcaches(self):
466 475 self.__dict__.pop('_nodemap', None)
467 476
468 477 def __len__(self):
469 478 return self._lgt + len(self._extra)
470 479
471 480 def append(self, tup):
472 481 if '_nodemap' in vars(self):
473 482 self._nodemap[tup[7]] = len(self)
474 483 data = self._pack_entry(len(self), tup)
475 484 self._extra.append(data)
476 485
477 486 def _pack_entry(self, rev, entry):
478 487 assert entry[8] == 0
479 488 assert entry[9] == 0
480 489 return self.index_format.pack(*entry[:8])
481 490
482 491 def _check_index(self, i):
483 492 if not isinstance(i, int):
484 493 raise TypeError(b"expecting int indexes")
485 494 if i < 0 or i >= len(self):
486 495 raise IndexError
487 496
488 497 def __getitem__(self, i):
489 498 if i == -1:
490 499 return self.null_item
491 500 self._check_index(i)
492 501 if i >= self._lgt:
493 502 data = self._extra[i - self._lgt]
494 503 else:
495 504 index = self._calculate_index(i)
496 505 data = self._data[index : index + self.entry_size]
497 506 r = self._unpack_entry(i, data)
498 507 if self._lgt and i == 0:
499 508 offset = revlogutils.offset_type(0, gettype(r[0]))
500 509 r = (offset,) + r[1:]
501 510 return r
502 511
503 512 def _unpack_entry(self, rev, data):
504 513 r = self.index_format.unpack(data)
505 514 r = r + (
506 515 0,
507 516 0,
508 517 revlog_constants.COMP_MODE_INLINE,
509 518 revlog_constants.COMP_MODE_INLINE,
510 519 )
511 520 return r
512 521
513 522 def pack_header(self, header):
514 523 """pack header information as binary"""
515 524 v_fmt = revlog_constants.INDEX_HEADER
516 525 return v_fmt.pack(header)
517 526
518 527 def entry_binary(self, rev):
519 528 """return the raw binary string representing a revision"""
520 529 entry = self[rev]
521 530 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
522 531 if rev == 0:
523 532 p = p[revlog_constants.INDEX_HEADER.size :]
524 533 return p
525 534
526 535
527 536 class IndexObject(BaseIndexObject):
528 537 def __init__(self, data):
529 538 assert len(data) % self.entry_size == 0, (
530 539 len(data),
531 540 self.entry_size,
532 541 len(data) % self.entry_size,
533 542 )
534 543 self._data = data
535 544 self._lgt = len(data) // self.entry_size
536 545 self._extra = []
537 546
538 547 def _calculate_index(self, i):
539 548 return i * self.entry_size
540 549
541 550 def __delitem__(self, i):
542 551 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
543 552 raise ValueError(b"deleting slices only supports a:-1 with step 1")
544 553 i = i.start
545 554 self._check_index(i)
546 555 self._stripnodes(i)
547 556 if i < self._lgt:
548 557 self._data = self._data[: i * self.entry_size]
549 558 self._lgt = i
550 559 self._extra = []
551 560 else:
552 561 self._extra = self._extra[: i - self._lgt]
553 562
554 563
555 564 class PersistentNodeMapIndexObject(IndexObject):
556 565 """a Debug oriented class to test persistent nodemap
557 566
558 567 We need a simple python object to test API and higher level behavior. See
559 568 the Rust implementation for more serious usage. This should be used only
560 569 through the dedicated `devel.persistent-nodemap` config.
561 570 """
562 571
563 572 def nodemap_data_all(self):
564 573 """Return bytes containing a full serialization of a nodemap
565 574
566 575 The nodemap should be valid for the full set of revisions in the
567 576 index."""
568 577 return nodemaputil.persistent_data(self)
569 578
570 579 def nodemap_data_incremental(self):
571 580 """Return bytes containing a incremental update to persistent nodemap
572 581
573 582 This containst the data for an append-only update of the data provided
574 583 in the last call to `update_nodemap_data`.
575 584 """
576 585 if self._nm_root is None:
577 586 return None
578 587 docket = self._nm_docket
579 588 changed, data = nodemaputil.update_persistent_data(
580 589 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
581 590 )
582 591
583 592 self._nm_root = self._nm_max_idx = self._nm_docket = None
584 593 return docket, changed, data
585 594
586 595 def update_nodemap_data(self, docket, nm_data):
587 596 """provide full block of persisted binary data for a nodemap
588 597
589 598 The data are expected to come from disk. See `nodemap_data_all` for a
590 599 produceur of such data."""
591 600 if nm_data is not None:
592 601 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
593 602 if self._nm_root:
594 603 self._nm_docket = docket
595 604 else:
596 605 self._nm_root = self._nm_max_idx = self._nm_docket = None
597 606
598 607
599 608 class InlinedIndexObject(BaseIndexObject):
600 609 def __init__(self, data, inline=0):
601 610 self._data = data
602 611 self._lgt = self._inline_scan(None)
603 612 self._inline_scan(self._lgt)
604 613 self._extra = []
605 614
606 615 def _inline_scan(self, lgt):
607 616 off = 0
608 617 if lgt is not None:
609 618 self._offsets = [0] * lgt
610 619 count = 0
611 620 while off <= len(self._data) - self.entry_size:
612 621 start = off + self.big_int_size
613 622 (s,) = struct.unpack(
614 623 b'>i',
615 624 self._data[start : start + self.int_size],
616 625 )
617 626 if lgt is not None:
618 627 self._offsets[count] = off
619 628 count += 1
620 629 off += self.entry_size + s
621 630 if off != len(self._data):
622 631 raise ValueError(b"corrupted data")
623 632 return count
624 633
625 634 def __delitem__(self, i):
626 635 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
627 636 raise ValueError(b"deleting slices only supports a:-1 with step 1")
628 637 i = i.start
629 638 self._check_index(i)
630 639 self._stripnodes(i)
631 640 if i < self._lgt:
632 641 self._offsets = self._offsets[:i]
633 642 self._lgt = i
634 643 self._extra = []
635 644 else:
636 645 self._extra = self._extra[: i - self._lgt]
637 646
638 647 def _calculate_index(self, i):
639 648 return self._offsets[i]
640 649
641 650
642 651 def parse_index2(data, inline, revlogv2=False):
643 652 if not inline:
644 653 cls = IndexObject2 if revlogv2 else IndexObject
645 654 return cls(data), None
646 655 cls = InlinedIndexObject
647 656 return cls(data, inline), (0, data)
648 657
649 658
650 659 def parse_index_cl_v2(data):
651 660 return IndexChangelogV2(data), None
652 661
653 662
654 663 class IndexObject2(IndexObject):
655 664 index_format = revlog_constants.INDEX_ENTRY_V2
656 665
657 666 def replace_sidedata_info(
658 667 self,
659 668 rev,
660 669 sidedata_offset,
661 670 sidedata_length,
662 671 offset_flags,
663 672 compression_mode,
664 673 ):
665 674 """
666 675 Replace an existing index entry's sidedata offset and length with new
667 676 ones.
668 677 This cannot be used outside of the context of sidedata rewriting,
669 678 inside the transaction that creates the revision `rev`.
670 679 """
671 680 if rev < 0:
672 681 raise KeyError
673 682 self._check_index(rev)
674 683 if rev < self._lgt:
675 684 msg = b"cannot rewrite entries outside of this transaction"
676 685 raise KeyError(msg)
677 686 else:
678 687 entry = list(self[rev])
679 688 entry[0] = offset_flags
680 689 entry[8] = sidedata_offset
681 690 entry[9] = sidedata_length
682 691 entry[11] = compression_mode
683 692 entry = tuple(entry)
684 693 new = self._pack_entry(rev, entry)
685 694 self._extra[rev - self._lgt] = new
686 695
687 696 def _unpack_entry(self, rev, data):
688 697 data = self.index_format.unpack(data)
689 698 entry = data[:10]
690 699 data_comp = data[10] & 3
691 700 sidedata_comp = (data[10] & (3 << 2)) >> 2
692 701 return entry + (data_comp, sidedata_comp)
693 702
694 703 def _pack_entry(self, rev, entry):
695 704 data = entry[:10]
696 705 data_comp = entry[10] & 3
697 706 sidedata_comp = (entry[11] & 3) << 2
698 707 data += (data_comp | sidedata_comp,)
699 708
700 709 return self.index_format.pack(*data)
701 710
702 711 def entry_binary(self, rev):
703 712 """return the raw binary string representing a revision"""
704 713 entry = self[rev]
705 714 return self._pack_entry(rev, entry)
706 715
707 716 def pack_header(self, header):
708 717 """pack header information as binary"""
709 718 msg = 'version header should go in the docket, not the index: %d'
710 719 msg %= header
711 720 raise error.ProgrammingError(msg)
712 721
713 722
714 723 class IndexChangelogV2(IndexObject2):
715 724 index_format = revlog_constants.INDEX_ENTRY_CL_V2
716 725
717 726 def _unpack_entry(self, rev, data, r=True):
718 727 items = self.index_format.unpack(data)
719 728 entry = items[:3] + (rev, rev) + items[3:8]
720 729 data_comp = items[8] & 3
721 730 sidedata_comp = (items[8] >> 2) & 3
722 731 return entry + (data_comp, sidedata_comp)
723 732
724 733 def _pack_entry(self, rev, entry):
725 734 assert entry[3] == rev, entry[3]
726 735 assert entry[4] == rev, entry[4]
727 736 data = entry[:3] + entry[5:10]
728 737 data_comp = entry[10] & 3
729 738 sidedata_comp = (entry[11] & 3) << 2
730 739 data += (data_comp | sidedata_comp,)
731 740 return self.index_format.pack(*data)
732 741
733 742
734 743 def parse_index_devel_nodemap(data, inline):
735 744 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
736 745 return PersistentNodeMapIndexObject(data), None
737 746
738 747
739 748 def parse_dirstate(dmap, copymap, st):
740 749 parents = [st[:20], st[20:40]]
741 750 # dereference fields so they will be local in loop
742 751 format = b">cllll"
743 752 e_size = struct.calcsize(format)
744 753 pos1 = 40
745 754 l = len(st)
746 755
747 756 # the inner loop
748 757 while pos1 < l:
749 758 pos2 = pos1 + e_size
750 759 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
751 760 pos1 = pos2 + e[4]
752 761 f = st[pos2:pos1]
753 762 if b'\0' in f:
754 763 f, c = f.split(b'\0')
755 764 copymap[f] = c
756 765 dmap[f] = DirstateItem.from_v1_data(*e[:4])
757 766 return parents
758 767
759 768
760 769 def pack_dirstate(dmap, copymap, pl, now):
761 770 now = int(now)
762 771 cs = stringio()
763 772 write = cs.write
764 773 write(b"".join(pl))
765 774 for f, e in pycompat.iteritems(dmap):
766 775 if e.need_delay(now):
767 776 # The file was last modified "simultaneously" with the current
768 777 # write to dirstate (i.e. within the same second for file-
769 778 # systems with a granularity of 1 sec). This commonly happens
770 779 # for at least a couple of files on 'update'.
771 780 # The user could change the file without changing its size
772 781 # within the same second. Invalidate the file's mtime in
773 782 # dirstate, forcing future 'status' calls to compare the
774 783 # contents of the file if the size is the same. This prevents
775 784 # mistakenly treating such files as clean.
776 785 e.set_possibly_dirty()
777 786
778 787 if f in copymap:
779 788 f = b"%s\0%s" % (f, copymap[f])
780 789 e = _pack(
781 790 b">cllll",
782 791 e.v1_state(),
783 792 e.v1_mode(),
784 793 e.v1_size(),
785 794 e.v1_mtime(),
786 795 len(f),
787 796 )
788 797 write(e)
789 798 write(f)
790 799 return cs.getvalue()
@@ -1,726 +1,731 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::TruncatedTimestamp;
6 6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 8 use crate::errors::HgError;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::DirstateEntry;
11 11 use crate::DirstateError;
12 12 use crate::DirstateParents;
13 13 use bitflags::bitflags;
14 14 use bytes_cast::unaligned::{U16Be, U32Be};
15 15 use bytes_cast::BytesCast;
16 16 use format_bytes::format_bytes;
17 17 use std::borrow::Cow;
18 18 use std::convert::{TryFrom, TryInto};
19 19
20 20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 21 /// This a redundant sanity check more than an actual "magic number" since
22 22 /// `.hg/requires` already governs which format should be used.
23 23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24 24
25 25 /// Keep space for 256-bit hashes
26 26 const STORED_NODE_ID_BYTES: usize = 32;
27 27
28 28 /// … even though only 160 bits are used for now, with SHA-1
29 29 const USED_NODE_ID_BYTES: usize = 20;
30 30
31 31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 33
34 34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
35 35 const TREE_METADATA_SIZE: usize = 44;
36 36 const NODE_SIZE: usize = 44;
37 37
38 38 /// Make sure that size-affecting changes are made knowingly
39 39 #[allow(unused)]
40 40 fn static_assert_size_of() {
41 41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
44 44 }
45 45
46 46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 47 #[derive(BytesCast)]
48 48 #[repr(C)]
49 49 struct DocketHeader {
50 50 marker: [u8; V2_FORMAT_MARKER.len()],
51 51 parent_1: [u8; STORED_NODE_ID_BYTES],
52 52 parent_2: [u8; STORED_NODE_ID_BYTES],
53 53
54 54 metadata: TreeMetadata,
55 55
56 56 /// Counted in bytes
57 57 data_size: Size,
58 58
59 59 uuid_size: u8,
60 60 }
61 61
62 62 pub struct Docket<'on_disk> {
63 63 header: &'on_disk DocketHeader,
64 64 uuid: &'on_disk [u8],
65 65 }
66 66
67 67 /// Fields are documented in the *Tree metadata in the docket file*
68 68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 69 #[derive(BytesCast)]
70 70 #[repr(C)]
71 71 struct TreeMetadata {
72 72 root_nodes: ChildNodes,
73 73 nodes_with_entry_count: Size,
74 74 nodes_with_copy_source_count: Size,
75 75 unreachable_bytes: Size,
76 76 unused: [u8; 4],
77 77
78 78 /// See *Optional hash of ignore patterns* section of
79 79 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 80 ignore_patterns_hash: IgnorePatternsHash,
81 81 }
82 82
83 83 /// Fields are documented in the *The data file format*
84 84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 85 #[derive(BytesCast)]
86 86 #[repr(C)]
87 87 pub(super) struct Node {
88 88 full_path: PathSlice,
89 89
90 90 /// In bytes from `self.full_path.start`
91 91 base_name_start: PathSize,
92 92
93 93 copy_source: OptPathSlice,
94 94 children: ChildNodes,
95 95 pub(super) descendants_with_entry_count: Size,
96 96 pub(super) tracked_descendants_count: Size,
97 97 flags: U16Be,
98 98 size: U32Be,
99 99 mtime: PackedTruncatedTimestamp,
100 100 }
101 101
102 102 bitflags! {
103 103 #[repr(C)]
104 104 struct Flags: u16 {
105 105 const WDIR_TRACKED = 1 << 0;
106 106 const P1_TRACKED = 1 << 1;
107 107 const P2_INFO = 1 << 2;
108 108 const HAS_MODE_AND_SIZE = 1 << 3;
109 109 const HAS_FILE_MTIME = 1 << 4;
110 110 const HAS_DIRECTORY_MTIME = 1 << 5;
111 111 const MODE_EXEC_PERM = 1 << 6;
112 112 const MODE_IS_SYMLINK = 1 << 7;
113 const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
113 114 }
114 115 }
115 116
116 117 /// Duration since the Unix epoch
117 118 #[derive(BytesCast, Copy, Clone)]
118 119 #[repr(C)]
119 120 struct PackedTruncatedTimestamp {
120 121 truncated_seconds: U32Be,
121 122 nanoseconds: U32Be,
122 123 }
123 124
124 125 /// Counted in bytes from the start of the file
125 126 ///
126 127 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
127 128 type Offset = U32Be;
128 129
129 130 /// Counted in number of items
130 131 ///
131 132 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
132 133 type Size = U32Be;
133 134
134 135 /// Counted in bytes
135 136 ///
136 137 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
137 138 type PathSize = U16Be;
138 139
139 140 /// A contiguous sequence of `len` times `Node`, representing the child nodes
140 141 /// of either some other node or of the repository root.
141 142 ///
142 143 /// Always sorted by ascending `full_path`, to allow binary search.
143 144 /// Since nodes with the same parent nodes also have the same parent path,
144 145 /// only the `base_name`s need to be compared during binary search.
145 146 #[derive(BytesCast, Copy, Clone)]
146 147 #[repr(C)]
147 148 struct ChildNodes {
148 149 start: Offset,
149 150 len: Size,
150 151 }
151 152
152 153 /// A `HgPath` of `len` bytes
153 154 #[derive(BytesCast, Copy, Clone)]
154 155 #[repr(C)]
155 156 struct PathSlice {
156 157 start: Offset,
157 158 len: PathSize,
158 159 }
159 160
160 161 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
161 162 type OptPathSlice = PathSlice;
162 163
163 164 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
164 165 ///
165 166 /// This should only happen if Mercurial is buggy or a repository is corrupted.
166 167 #[derive(Debug)]
167 168 pub struct DirstateV2ParseError;
168 169
169 170 impl From<DirstateV2ParseError> for HgError {
170 171 fn from(_: DirstateV2ParseError) -> Self {
171 172 HgError::corrupted("dirstate-v2 parse error")
172 173 }
173 174 }
174 175
175 176 impl From<DirstateV2ParseError> for crate::DirstateError {
176 177 fn from(error: DirstateV2ParseError) -> Self {
177 178 HgError::from(error).into()
178 179 }
179 180 }
180 181
181 182 impl<'on_disk> Docket<'on_disk> {
182 183 pub fn parents(&self) -> DirstateParents {
183 184 use crate::Node;
184 185 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
185 186 .unwrap()
186 187 .clone();
187 188 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
188 189 .unwrap()
189 190 .clone();
190 191 DirstateParents { p1, p2 }
191 192 }
192 193
193 194 pub fn tree_metadata(&self) -> &[u8] {
194 195 self.header.metadata.as_bytes()
195 196 }
196 197
197 198 pub fn data_size(&self) -> usize {
198 199 // This `unwrap` could only panic on a 16-bit CPU
199 200 self.header.data_size.get().try_into().unwrap()
200 201 }
201 202
202 203 pub fn data_filename(&self) -> String {
203 204 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
204 205 }
205 206 }
206 207
207 208 pub fn read_docket(
208 209 on_disk: &[u8],
209 210 ) -> Result<Docket<'_>, DirstateV2ParseError> {
210 211 let (header, uuid) =
211 212 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
212 213 let uuid_size = header.uuid_size as usize;
213 214 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
214 215 Ok(Docket { header, uuid })
215 216 } else {
216 217 Err(DirstateV2ParseError)
217 218 }
218 219 }
219 220
220 221 pub(super) fn read<'on_disk>(
221 222 on_disk: &'on_disk [u8],
222 223 metadata: &[u8],
223 224 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
224 225 if on_disk.is_empty() {
225 226 return Ok(DirstateMap::empty(on_disk));
226 227 }
227 228 let (meta, _) = TreeMetadata::from_bytes(metadata)
228 229 .map_err(|_| DirstateV2ParseError)?;
229 230 let dirstate_map = DirstateMap {
230 231 on_disk,
231 232 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
232 233 on_disk,
233 234 meta.root_nodes,
234 235 )?),
235 236 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
236 237 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
237 238 ignore_patterns_hash: meta.ignore_patterns_hash,
238 239 unreachable_bytes: meta.unreachable_bytes.get(),
239 240 };
240 241 Ok(dirstate_map)
241 242 }
242 243
243 244 impl Node {
244 245 pub(super) fn full_path<'on_disk>(
245 246 &self,
246 247 on_disk: &'on_disk [u8],
247 248 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
248 249 read_hg_path(on_disk, self.full_path)
249 250 }
250 251
251 252 pub(super) fn base_name_start<'on_disk>(
252 253 &self,
253 254 ) -> Result<usize, DirstateV2ParseError> {
254 255 let start = self.base_name_start.get();
255 256 if start < self.full_path.len.get() {
256 257 let start = usize::try_from(start)
257 258 // u32 -> usize, could only panic on a 16-bit CPU
258 259 .expect("dirstate-v2 base_name_start out of bounds");
259 260 Ok(start)
260 261 } else {
261 262 Err(DirstateV2ParseError)
262 263 }
263 264 }
264 265
265 266 pub(super) fn base_name<'on_disk>(
266 267 &self,
267 268 on_disk: &'on_disk [u8],
268 269 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
269 270 let full_path = self.full_path(on_disk)?;
270 271 let base_name_start = self.base_name_start()?;
271 272 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
272 273 }
273 274
274 275 pub(super) fn path<'on_disk>(
275 276 &self,
276 277 on_disk: &'on_disk [u8],
277 278 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
278 279 Ok(WithBasename::from_raw_parts(
279 280 Cow::Borrowed(self.full_path(on_disk)?),
280 281 self.base_name_start()?,
281 282 ))
282 283 }
283 284
284 285 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
285 286 self.copy_source.start.get() != 0
286 287 }
287 288
288 289 pub(super) fn copy_source<'on_disk>(
289 290 &self,
290 291 on_disk: &'on_disk [u8],
291 292 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
292 293 Ok(if self.has_copy_source() {
293 294 Some(read_hg_path(on_disk, self.copy_source)?)
294 295 } else {
295 296 None
296 297 })
297 298 }
298 299
299 300 fn flags(&self) -> Flags {
300 301 Flags::from_bits_truncate(self.flags.get())
301 302 }
302 303
303 304 fn has_entry(&self) -> bool {
304 305 self.flags().intersects(
305 306 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
306 307 )
307 308 }
308 309
309 310 pub(super) fn node_data(
310 311 &self,
311 312 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
312 313 if self.has_entry() {
313 314 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
314 315 } else if let Some(mtime) = self.cached_directory_mtime()? {
315 316 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
316 317 } else {
317 318 Ok(dirstate_map::NodeData::None)
318 319 }
319 320 }
320 321
321 322 pub(super) fn cached_directory_mtime(
322 323 &self,
323 324 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
324 325 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME) {
325 326 if self.flags().contains(Flags::HAS_FILE_MTIME) {
326 327 Err(DirstateV2ParseError)
327 328 } else {
328 329 Ok(Some(self.mtime.try_into()?))
329 330 }
330 331 } else {
331 332 Ok(None)
332 333 }
333 334 }
334 335
335 336 fn synthesize_unix_mode(&self) -> u32 {
336 337 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
337 338 libc::S_IFLNK
338 339 } else {
339 340 libc::S_IFREG
340 341 };
341 342 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
342 343 0o755
343 344 } else {
344 345 0o644
345 346 };
346 347 file_type | permisions
347 348 }
348 349
349 350 fn assume_entry(&self) -> DirstateEntry {
350 351 // TODO: convert through raw bits instead?
351 352 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
352 353 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
353 354 let p2_info = self.flags().contains(Flags::P2_INFO);
354 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) {
355 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
356 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
357 {
355 358 Some((self.synthesize_unix_mode(), self.size.into()))
356 359 } else {
357 360 None
358 361 };
359 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) {
362 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
363 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
364 {
360 365 Some(self.mtime.truncated_seconds.into())
361 366 } else {
362 367 None
363 368 };
364 369 DirstateEntry::from_v2_data(
365 370 wdir_tracked,
366 371 p1_tracked,
367 372 p2_info,
368 373 mode_size,
369 374 mtime,
370 375 )
371 376 }
372 377
373 378 pub(super) fn entry(
374 379 &self,
375 380 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
376 381 if self.has_entry() {
377 382 Ok(Some(self.assume_entry()))
378 383 } else {
379 384 Ok(None)
380 385 }
381 386 }
382 387
383 388 pub(super) fn children<'on_disk>(
384 389 &self,
385 390 on_disk: &'on_disk [u8],
386 391 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
387 392 read_nodes(on_disk, self.children)
388 393 }
389 394
390 395 pub(super) fn to_in_memory_node<'on_disk>(
391 396 &self,
392 397 on_disk: &'on_disk [u8],
393 398 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
394 399 Ok(dirstate_map::Node {
395 400 children: dirstate_map::ChildNodes::OnDisk(
396 401 self.children(on_disk)?,
397 402 ),
398 403 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
399 404 data: self.node_data()?,
400 405 descendants_with_entry_count: self
401 406 .descendants_with_entry_count
402 407 .get(),
403 408 tracked_descendants_count: self.tracked_descendants_count.get(),
404 409 })
405 410 }
406 411
407 412 fn from_dirstate_entry(
408 413 entry: &DirstateEntry,
409 414 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
410 415 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
411 416 entry.v2_data();
412 417 // TODO: convert throug raw flag bits instead?
413 418 let mut flags = Flags::empty();
414 419 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
415 420 flags.set(Flags::P1_TRACKED, p1_tracked);
416 421 flags.set(Flags::P2_INFO, p2_info);
417 422 let size = if let Some((m, s)) = mode_size_opt {
418 423 let exec_perm = m & libc::S_IXUSR != 0;
419 424 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
420 425 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
421 426 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
422 427 flags.insert(Flags::HAS_MODE_AND_SIZE);
423 428 s.into()
424 429 } else {
425 430 0.into()
426 431 };
427 432 let mtime = if let Some(m) = mtime_opt {
428 433 flags.insert(Flags::HAS_FILE_MTIME);
429 434 PackedTruncatedTimestamp {
430 435 truncated_seconds: m.into(),
431 436 nanoseconds: 0.into(),
432 437 }
433 438 } else {
434 439 PackedTruncatedTimestamp::null()
435 440 };
436 441 (flags, size, mtime)
437 442 }
438 443 }
439 444
440 445 fn read_hg_path(
441 446 on_disk: &[u8],
442 447 slice: PathSlice,
443 448 ) -> Result<&HgPath, DirstateV2ParseError> {
444 449 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
445 450 }
446 451
447 452 fn read_nodes(
448 453 on_disk: &[u8],
449 454 slice: ChildNodes,
450 455 ) -> Result<&[Node], DirstateV2ParseError> {
451 456 read_slice(on_disk, slice.start, slice.len.get())
452 457 }
453 458
454 459 fn read_slice<T, Len>(
455 460 on_disk: &[u8],
456 461 start: Offset,
457 462 len: Len,
458 463 ) -> Result<&[T], DirstateV2ParseError>
459 464 where
460 465 T: BytesCast,
461 466 Len: TryInto<usize>,
462 467 {
463 468 // Either `usize::MAX` would result in "out of bounds" error since a single
464 469 // `&[u8]` cannot occupy the entire addess space.
465 470 let start = start.get().try_into().unwrap_or(std::usize::MAX);
466 471 let len = len.try_into().unwrap_or(std::usize::MAX);
467 472 on_disk
468 473 .get(start..)
469 474 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
470 475 .map(|(slice, _rest)| slice)
471 476 .ok_or_else(|| DirstateV2ParseError)
472 477 }
473 478
474 479 pub(crate) fn for_each_tracked_path<'on_disk>(
475 480 on_disk: &'on_disk [u8],
476 481 metadata: &[u8],
477 482 mut f: impl FnMut(&'on_disk HgPath),
478 483 ) -> Result<(), DirstateV2ParseError> {
479 484 let (meta, _) = TreeMetadata::from_bytes(metadata)
480 485 .map_err(|_| DirstateV2ParseError)?;
481 486 fn recur<'on_disk>(
482 487 on_disk: &'on_disk [u8],
483 488 nodes: ChildNodes,
484 489 f: &mut impl FnMut(&'on_disk HgPath),
485 490 ) -> Result<(), DirstateV2ParseError> {
486 491 for node in read_nodes(on_disk, nodes)? {
487 492 if let Some(entry) = node.entry()? {
488 493 if entry.state().is_tracked() {
489 494 f(node.full_path(on_disk)?)
490 495 }
491 496 }
492 497 recur(on_disk, node.children, f)?
493 498 }
494 499 Ok(())
495 500 }
496 501 recur(on_disk, meta.root_nodes, &mut f)
497 502 }
498 503
499 504 /// Returns new data and metadata, together with whether that data should be
500 505 /// appended to the existing data file whose content is at
501 506 /// `dirstate_map.on_disk` (true), instead of written to a new data file
502 507 /// (false).
503 508 pub(super) fn write(
504 509 dirstate_map: &mut DirstateMap,
505 510 can_append: bool,
506 511 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
507 512 let append = can_append && dirstate_map.write_should_append();
508 513
509 514 // This ignores the space for paths, and for nodes without an entry.
510 515 // TODO: better estimate? Skip the `Vec` and write to a file directly?
511 516 let size_guess = std::mem::size_of::<Node>()
512 517 * dirstate_map.nodes_with_entry_count as usize;
513 518
514 519 let mut writer = Writer {
515 520 dirstate_map,
516 521 append,
517 522 out: Vec::with_capacity(size_guess),
518 523 };
519 524
520 525 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
521 526
522 527 let meta = TreeMetadata {
523 528 root_nodes,
524 529 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
525 530 nodes_with_copy_source_count: dirstate_map
526 531 .nodes_with_copy_source_count
527 532 .into(),
528 533 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
529 534 unused: [0; 4],
530 535 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
531 536 };
532 537 Ok((writer.out, meta.as_bytes().to_vec(), append))
533 538 }
534 539
535 540 struct Writer<'dmap, 'on_disk> {
536 541 dirstate_map: &'dmap DirstateMap<'on_disk>,
537 542 append: bool,
538 543 out: Vec<u8>,
539 544 }
540 545
541 546 impl Writer<'_, '_> {
542 547 fn write_nodes(
543 548 &mut self,
544 549 nodes: dirstate_map::ChildNodesRef,
545 550 ) -> Result<ChildNodes, DirstateError> {
546 551 // Reuse already-written nodes if possible
547 552 if self.append {
548 553 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
549 554 let start = self.on_disk_offset_of(nodes_slice).expect(
550 555 "dirstate-v2 OnDisk nodes not found within on_disk",
551 556 );
552 557 let len = child_nodes_len_from_usize(nodes_slice.len());
553 558 return Ok(ChildNodes { start, len });
554 559 }
555 560 }
556 561
557 562 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
558 563 // undefined iteration order. Sort to enable binary search in the
559 564 // written file.
560 565 let nodes = nodes.sorted();
561 566 let nodes_len = nodes.len();
562 567
563 568 // First accumulate serialized nodes in a `Vec`
564 569 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
565 570 for node in nodes {
566 571 let children =
567 572 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
568 573 let full_path = node.full_path(self.dirstate_map.on_disk)?;
569 574 let full_path = self.write_path(full_path.as_bytes());
570 575 let copy_source = if let Some(source) =
571 576 node.copy_source(self.dirstate_map.on_disk)?
572 577 {
573 578 self.write_path(source.as_bytes())
574 579 } else {
575 580 PathSlice {
576 581 start: 0.into(),
577 582 len: 0.into(),
578 583 }
579 584 };
580 585 on_disk_nodes.push(match node {
581 586 NodeRef::InMemory(path, node) => {
582 587 let (flags, size, mtime) = match &node.data {
583 588 dirstate_map::NodeData::Entry(entry) => {
584 589 Node::from_dirstate_entry(entry)
585 590 }
586 591 dirstate_map::NodeData::CachedDirectory { mtime } => (
587 592 Flags::HAS_DIRECTORY_MTIME,
588 593 0.into(),
589 594 (*mtime).into(),
590 595 ),
591 596 dirstate_map::NodeData::None => (
592 597 Flags::empty(),
593 598 0.into(),
594 599 PackedTruncatedTimestamp::null(),
595 600 ),
596 601 };
597 602 Node {
598 603 children,
599 604 copy_source,
600 605 full_path,
601 606 base_name_start: u16::try_from(path.base_name_start())
602 607 // Could only panic for paths over 64 KiB
603 608 .expect("dirstate-v2 path length overflow")
604 609 .into(),
605 610 descendants_with_entry_count: node
606 611 .descendants_with_entry_count
607 612 .into(),
608 613 tracked_descendants_count: node
609 614 .tracked_descendants_count
610 615 .into(),
611 616 flags: flags.bits().into(),
612 617 size,
613 618 mtime,
614 619 }
615 620 }
616 621 NodeRef::OnDisk(node) => Node {
617 622 children,
618 623 copy_source,
619 624 full_path,
620 625 ..*node
621 626 },
622 627 })
623 628 }
624 629 // … so we can write them contiguously, after writing everything else
625 630 // they refer to.
626 631 let start = self.current_offset();
627 632 let len = child_nodes_len_from_usize(nodes_len);
628 633 self.out.extend(on_disk_nodes.as_bytes());
629 634 Ok(ChildNodes { start, len })
630 635 }
631 636
632 637 /// If the given slice of items is within `on_disk`, returns its offset
633 638 /// from the start of `on_disk`.
634 639 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
635 640 where
636 641 T: BytesCast,
637 642 {
638 643 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
639 644 let start = slice.as_ptr() as usize;
640 645 let end = start + slice.len();
641 646 start..=end
642 647 }
643 648 let slice_addresses = address_range(slice.as_bytes());
644 649 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
645 650 if on_disk_addresses.contains(slice_addresses.start())
646 651 && on_disk_addresses.contains(slice_addresses.end())
647 652 {
648 653 let offset = slice_addresses.start() - on_disk_addresses.start();
649 654 Some(offset_from_usize(offset))
650 655 } else {
651 656 None
652 657 }
653 658 }
654 659
655 660 fn current_offset(&mut self) -> Offset {
656 661 let mut offset = self.out.len();
657 662 if self.append {
658 663 offset += self.dirstate_map.on_disk.len()
659 664 }
660 665 offset_from_usize(offset)
661 666 }
662 667
663 668 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
664 669 let len = path_len_from_usize(slice.len());
665 670 // Reuse an already-written path if possible
666 671 if self.append {
667 672 if let Some(start) = self.on_disk_offset_of(slice) {
668 673 return PathSlice { start, len };
669 674 }
670 675 }
671 676 let start = self.current_offset();
672 677 self.out.extend(slice.as_bytes());
673 678 PathSlice { start, len }
674 679 }
675 680 }
676 681
677 682 fn offset_from_usize(x: usize) -> Offset {
678 683 u32::try_from(x)
679 684 // Could only panic for a dirstate file larger than 4 GiB
680 685 .expect("dirstate-v2 offset overflow")
681 686 .into()
682 687 }
683 688
684 689 fn child_nodes_len_from_usize(x: usize) -> Size {
685 690 u32::try_from(x)
686 691 // Could only panic with over 4 billion nodes
687 692 .expect("dirstate-v2 slice length overflow")
688 693 .into()
689 694 }
690 695
691 696 fn path_len_from_usize(x: usize) -> PathSize {
692 697 u16::try_from(x)
693 698 // Could only panic for paths over 64 KiB
694 699 .expect("dirstate-v2 path length overflow")
695 700 .into()
696 701 }
697 702
698 703 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
699 704 fn from(timestamp: TruncatedTimestamp) -> Self {
700 705 Self {
701 706 truncated_seconds: timestamp.truncated_seconds().into(),
702 707 nanoseconds: timestamp.nanoseconds().into(),
703 708 }
704 709 }
705 710 }
706 711
707 712 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
708 713 type Error = DirstateV2ParseError;
709 714
710 715 fn try_from(
711 716 timestamp: PackedTruncatedTimestamp,
712 717 ) -> Result<Self, Self::Error> {
713 718 Self::from_already_truncated(
714 719 timestamp.truncated_seconds.get(),
715 720 timestamp.nanoseconds.get(),
716 721 )
717 722 }
718 723 }
719 724 impl PackedTruncatedTimestamp {
720 725 fn null() -> Self {
721 726 Self {
722 727 truncated_seconds: 0.into(),
723 728 nanoseconds: 0.into(),
724 729 }
725 730 }
726 731 }
General Comments 0
You need to be logged in to leave comments. Login now