##// END OF EJS Templates
dirstate-v2: initial Python parser...
Simon Sapin -
r49035:a32a9607 default
parent child Browse files
Show More
@@ -0,0 +1,118 b''
1 # v2.py - Pure-Python implementation of the dirstate-v2 file format
2 #
3 # Copyright Mercurial Contributors
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9
10 import struct
11
12 from .. import policy
13
14 parsers = policy.importmod('parsers')
15
16
17 # Must match the constant of the same name in
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
19 TREE_METADATA_SIZE = 44
20 NODE_SIZE = 43
21
22
23 # Must match the `TreeMetadata` Rust struct in
24 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
25 #
26 # * 4 bytes: start offset of root nodes
27 # * 4 bytes: number of root nodes
28 # * 4 bytes: total number of nodes in the tree that have an entry
29 # * 4 bytes: total number of nodes in the tree that have a copy source
30 # * 4 bytes: number of bytes in the data file that are not used anymore
31 # * 4 bytes: unused
32 # * 20 bytes: SHA-1 hash of ignore patterns
33 TREE_METADATA = struct.Struct('>LLLLL4s20s')
34
35
36 # Must match the `Node` Rust struct in
37 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
38 #
39 # * 4 bytes: start offset of full path
40 # * 2 bytes: length of the full path
41 # * 2 bytes: length within the full path before its "base name"
42 # * 4 bytes: start offset of the copy source if any, or zero for no copy source
43 # * 2 bytes: length of the copy source if any, or unused
44 # * 4 bytes: start offset of child nodes
45 # * 4 bytes: number of child nodes
46 # * 4 bytes: number of descendant nodes that have an entry
47 # * 4 bytes: number of descendant nodes that have a "tracked" state
48 # * 1 byte: flags
49 # * 4 bytes: expected size
50 # * 4 bytes: mtime seconds
51 # * 4 bytes: mtime nanoseconds
52 NODE = struct.Struct('>LHHLHLLLLBlll')
53
54
55 assert TREE_METADATA_SIZE == TREE_METADATA.size
56 assert NODE_SIZE == NODE.size
57
58
59 def parse_dirstate(map, copy_map, data, tree_metadata):
60 """parse a full v2-dirstate from a binary data into dictionnaries:
61
62 - map: a {path: entry} mapping that will be filled
63 - copy_map: a {path: copy-source} mapping that will be filled
64 - data: a binary blob contains v2 nodes data
65 - tree_metadata:: a binary blob of the top level node (from the docket)
66 """
67 (
68 root_nodes_start,
69 root_nodes_len,
70 _nodes_with_entry_count,
71 _nodes_with_copy_source_count,
72 _unreachable_bytes,
73 _unused,
74 _ignore_patterns_hash,
75 ) = TREE_METADATA.unpack(tree_metadata)
76 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
77
78
79 def parse_nodes(map, copy_map, data, start, len):
80 """parse <len> nodes from <data> starting at offset <start>
81
82 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
83 """
84 for i in range(len):
85 node_start = start + NODE_SIZE * i
86 node_bytes = slice_with_len(data, node_start, NODE_SIZE)
87 (
88 path_start,
89 path_len,
90 _basename_start,
91 copy_source_start,
92 copy_source_len,
93 children_start,
94 children_count,
95 _descendants_with_entry_count,
96 _tracked_descendants_count,
97 flags,
98 size,
99 mtime_s,
100 _mtime_ns,
101 ) = NODE.unpack(node_bytes)
102
103 # Parse child nodes of this node recursively
104 parse_nodes(map, copy_map, data, children_start, children_count)
105
106 item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s)
107 if not item.any_tracked:
108 continue
109 path = slice_with_len(data, path_start, path_len)
110 map[path] = item
111 if copy_source_start:
112 copy_map[path] = slice_with_len(
113 data, copy_source_start, copy_source_len
114 )
115
116
117 def slice_with_len(data, start, len):
118 return data[start : start + len]
@@ -1,1126 +1,1155 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static const int dirstate_v1_from_p2 = -2;
33 33 static const int dirstate_v1_nonnormal = -1;
34 34 static const int ambiguous_time = -1;
35 35
36 36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
37 37 {
38 38 Py_ssize_t expected_size;
39 39
40 40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
41 41 return NULL;
42 42 }
43 43
44 44 return _dict_new_presized(expected_size);
45 45 }
46 46
47 47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
48 48 PyObject *kwds)
49 49 {
50 50 /* We do all the initialization here and not a tp_init function because
51 51 * dirstate_item is immutable. */
52 52 dirstateItemObject *t;
53 53 int wc_tracked;
54 54 int p1_tracked;
55 55 int p2_info;
56 56 int has_meaningful_data;
57 57 int has_meaningful_mtime;
58 58 int mode;
59 59 int size;
60 60 int mtime;
61 61 PyObject *parentfiledata;
62 62 static char *keywords_name[] = {
63 63 "wc_tracked",
64 64 "p1_tracked",
65 65 "p2_info",
66 66 "has_meaningful_data",
67 67 "has_meaningful_mtime",
68 68 "parentfiledata",
69 69 NULL,
70 70 };
71 71 wc_tracked = 0;
72 72 p1_tracked = 0;
73 73 p2_info = 0;
74 74 has_meaningful_mtime = 1;
75 75 has_meaningful_data = 1;
76 76 parentfiledata = Py_None;
77 77 if (!PyArg_ParseTupleAndKeywords(
78 78 args, kwds, "|iiiiiO", keywords_name, &wc_tracked, &p1_tracked,
79 79 &p2_info, &has_meaningful_data, &has_meaningful_mtime,
80 80 &parentfiledata)) {
81 81 return NULL;
82 82 }
83 83 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
84 84 if (!t) {
85 85 return NULL;
86 86 }
87 87
88 88 t->flags = 0;
89 89 if (wc_tracked) {
90 90 t->flags |= dirstate_flag_wc_tracked;
91 91 }
92 92 if (p1_tracked) {
93 93 t->flags |= dirstate_flag_p1_tracked;
94 94 }
95 95 if (p2_info) {
96 96 t->flags |= dirstate_flag_p2_info;
97 97 }
98 98
99 99 if (parentfiledata != Py_None) {
100 100 if (!PyTuple_CheckExact(parentfiledata)) {
101 101 PyErr_SetString(
102 102 PyExc_TypeError,
103 103 "parentfiledata should be a Tuple or None");
104 104 return NULL;
105 105 }
106 106 mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
107 107 size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
108 108 mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
109 109 } else {
110 110 has_meaningful_data = 0;
111 111 has_meaningful_mtime = 0;
112 112 }
113 113 if (has_meaningful_data) {
114 114 t->flags |= dirstate_flag_has_meaningful_data;
115 115 t->mode = mode;
116 116 t->size = size;
117 117 } else {
118 118 t->mode = 0;
119 119 t->size = 0;
120 120 }
121 121 if (has_meaningful_mtime) {
122 122 t->flags |= dirstate_flag_has_meaningful_mtime;
123 123 t->mtime = mtime;
124 124 } else {
125 125 t->mtime = 0;
126 126 }
127 127 return (PyObject *)t;
128 128 }
129 129
130 130 static void dirstate_item_dealloc(PyObject *o)
131 131 {
132 132 PyObject_Del(o);
133 133 }
134 134
135 135 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
136 136 {
137 137 return (self->flags & dirstate_flag_wc_tracked);
138 138 }
139 139
140 140 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
141 141 {
142 142 const unsigned char mask = dirstate_flag_wc_tracked |
143 143 dirstate_flag_p1_tracked |
144 144 dirstate_flag_p2_info;
145 145 return (self->flags & mask);
146 146 }
147 147
148 148 static inline bool dirstate_item_c_added(dirstateItemObject *self)
149 149 {
150 150 const unsigned char mask =
151 151 (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
152 152 dirstate_flag_p2_info);
153 153 const unsigned char target = dirstate_flag_wc_tracked;
154 154 return (self->flags & mask) == target;
155 155 }
156 156
157 157 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
158 158 {
159 159 if (self->flags & dirstate_flag_wc_tracked) {
160 160 return false;
161 161 }
162 162 return (self->flags &
163 163 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
164 164 }
165 165
166 166 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
167 167 {
168 168 return ((self->flags & dirstate_flag_wc_tracked) &&
169 169 (self->flags & dirstate_flag_p1_tracked) &&
170 170 (self->flags & dirstate_flag_p2_info));
171 171 }
172 172
173 173 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
174 174 {
175 175 return ((self->flags & dirstate_flag_wc_tracked) &&
176 176 !(self->flags & dirstate_flag_p1_tracked) &&
177 177 (self->flags & dirstate_flag_p2_info));
178 178 }
179 179
180 180 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
181 181 {
182 182 if (dirstate_item_c_removed(self)) {
183 183 return 'r';
184 184 } else if (dirstate_item_c_merged(self)) {
185 185 return 'm';
186 186 } else if (dirstate_item_c_added(self)) {
187 187 return 'a';
188 188 } else {
189 189 return 'n';
190 190 }
191 191 }
192 192
193 193 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
194 194 {
195 195 if (self->flags & dirstate_flag_has_meaningful_data) {
196 196 return self->mode;
197 197 } else {
198 198 return 0;
199 199 }
200 200 }
201 201
202 202 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
203 203 {
204 204 if (!(self->flags & dirstate_flag_wc_tracked) &&
205 205 (self->flags & dirstate_flag_p2_info)) {
206 206 if (self->flags & dirstate_flag_p1_tracked) {
207 207 return dirstate_v1_nonnormal;
208 208 } else {
209 209 return dirstate_v1_from_p2;
210 210 }
211 211 } else if (dirstate_item_c_removed(self)) {
212 212 return 0;
213 213 } else if (self->flags & dirstate_flag_p2_info) {
214 214 return dirstate_v1_from_p2;
215 215 } else if (dirstate_item_c_added(self)) {
216 216 return dirstate_v1_nonnormal;
217 217 } else if (self->flags & dirstate_flag_has_meaningful_data) {
218 218 return self->size;
219 219 } else {
220 220 return dirstate_v1_nonnormal;
221 221 }
222 222 }
223 223
224 224 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
225 225 {
226 226 if (dirstate_item_c_removed(self)) {
227 227 return 0;
228 228 } else if (!(self->flags & dirstate_flag_has_meaningful_mtime) ||
229 229 !(self->flags & dirstate_flag_p1_tracked) ||
230 230 !(self->flags & dirstate_flag_wc_tracked) ||
231 231 (self->flags & dirstate_flag_p2_info)) {
232 232 return ambiguous_time;
233 233 } else {
234 234 return self->mtime;
235 235 }
236 236 }
237 237
238 238 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
239 239 {
240 240 char state = dirstate_item_c_v1_state(self);
241 241 return PyBytes_FromStringAndSize(&state, 1);
242 242 };
243 243
244 244 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
245 245 {
246 246 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
247 247 };
248 248
249 249 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
250 250 {
251 251 return PyInt_FromLong(dirstate_item_c_v1_size(self));
252 252 };
253 253
254 254 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
255 255 {
256 256 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
257 257 };
258 258
259 259 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
260 260 PyObject *value)
261 261 {
262 262 long now;
263 263 if (!pylong_to_long(value, &now)) {
264 264 return NULL;
265 265 }
266 266 if (dirstate_item_c_v1_state(self) == 'n' &&
267 267 dirstate_item_c_v1_mtime(self) == now) {
268 268 Py_RETURN_TRUE;
269 269 } else {
270 270 Py_RETURN_FALSE;
271 271 }
272 272 };
273 273
274 274 /* This will never change since it's bound to V1
275 275 */
276 276 static inline dirstateItemObject *
277 277 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
278 278 {
279 279 dirstateItemObject *t =
280 280 PyObject_New(dirstateItemObject, &dirstateItemType);
281 281 if (!t) {
282 282 return NULL;
283 283 }
284 284 t->flags = 0;
285 285 t->mode = 0;
286 286 t->size = 0;
287 287 t->mtime = 0;
288 288
289 289 if (state == 'm') {
290 290 t->flags = (dirstate_flag_wc_tracked |
291 291 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
292 292 } else if (state == 'a') {
293 293 t->flags = dirstate_flag_wc_tracked;
294 294 } else if (state == 'r') {
295 295 if (size == dirstate_v1_nonnormal) {
296 296 t->flags =
297 297 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
298 298 } else if (size == dirstate_v1_from_p2) {
299 299 t->flags = dirstate_flag_p2_info;
300 300 } else {
301 301 t->flags = dirstate_flag_p1_tracked;
302 302 }
303 303 } else if (state == 'n') {
304 304 if (size == dirstate_v1_from_p2) {
305 305 t->flags =
306 306 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
307 307 } else if (size == dirstate_v1_nonnormal) {
308 308 t->flags =
309 309 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
310 310 } else if (mtime == ambiguous_time) {
311 311 t->flags = (dirstate_flag_wc_tracked |
312 312 dirstate_flag_p1_tracked |
313 313 dirstate_flag_has_meaningful_data);
314 314 t->mode = mode;
315 315 t->size = size;
316 316 } else {
317 317 t->flags = (dirstate_flag_wc_tracked |
318 318 dirstate_flag_p1_tracked |
319 319 dirstate_flag_has_meaningful_data |
320 320 dirstate_flag_has_meaningful_mtime);
321 321 t->mode = mode;
322 322 t->size = size;
323 323 t->mtime = mtime;
324 324 }
325 325 } else {
326 326 PyErr_Format(PyExc_RuntimeError,
327 327 "unknown state: `%c` (%d, %d, %d)", state, mode,
328 328 size, mtime, NULL);
329 329 Py_DECREF(t);
330 330 return NULL;
331 331 }
332 332
333 333 return t;
334 334 }
335 335
336 336 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
337 337 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
338 338 PyObject *args)
339 339 {
340 340 /* We do all the initialization here and not a tp_init function because
341 341 * dirstate_item is immutable. */
342 342 char state;
343 343 int size, mode, mtime;
344 344 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
345 345 return NULL;
346 346 }
347 347 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
348 348 };
349 349
350 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
351 PyObject *args)
352 {
353 dirstateItemObject *t =
354 PyObject_New(dirstateItemObject, &dirstateItemType);
355 if (!t) {
356 return NULL;
357 }
358 if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
359 return NULL;
360 }
361 t->mode = 0;
362 if (t->flags & dirstate_flag_has_meaningful_data) {
363 if (t->flags & dirstate_flag_mode_exec_perm) {
364 t->mode = 0755;
365 } else {
366 t->mode = 0644;
367 }
368 if (t->flags & dirstate_flag_mode_is_symlink) {
369 t->mode |= S_IFLNK;
370 } else {
371 t->mode |= S_IFREG;
372 }
373 }
374 return (PyObject *)t;
375 };
376
350 377 /* This means the next status call will have to actually check its content
351 378 to make sure it is correct. */
352 379 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
353 380 {
354 381 self->flags &= ~dirstate_flag_has_meaningful_mtime;
355 382 Py_RETURN_NONE;
356 383 }
357 384
358 385 /* See docstring of the python implementation for details */
359 386 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
360 387 PyObject *args)
361 388 {
362 389 int size, mode, mtime;
363 390 if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
364 391 return NULL;
365 392 }
366 393 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
367 394 dirstate_flag_has_meaningful_data |
368 395 dirstate_flag_has_meaningful_mtime;
369 396 self->mode = mode;
370 397 self->size = size;
371 398 self->mtime = mtime;
372 399 Py_RETURN_NONE;
373 400 }
374 401
375 402 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
376 403 {
377 404 self->flags |= dirstate_flag_wc_tracked;
378 405 self->flags &= ~dirstate_flag_has_meaningful_mtime;
379 406 Py_RETURN_NONE;
380 407 }
381 408
382 409 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
383 410 {
384 411 self->flags &= ~dirstate_flag_wc_tracked;
385 412 self->mode = 0;
386 413 self->mtime = 0;
387 414 self->size = 0;
388 415 Py_RETURN_NONE;
389 416 }
390 417
391 418 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
392 419 {
393 420 if (self->flags & dirstate_flag_p2_info) {
394 421 self->flags &= ~(dirstate_flag_p2_info |
395 422 dirstate_flag_has_meaningful_data |
396 423 dirstate_flag_has_meaningful_mtime);
397 424 self->mode = 0;
398 425 self->mtime = 0;
399 426 self->size = 0;
400 427 }
401 428 Py_RETURN_NONE;
402 429 }
403 430 static PyMethodDef dirstate_item_methods[] = {
404 431 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
405 432 "return a \"state\" suitable for v1 serialization"},
406 433 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
407 434 "return a \"mode\" suitable for v1 serialization"},
408 435 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
409 436 "return a \"size\" suitable for v1 serialization"},
410 437 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
411 438 "return a \"mtime\" suitable for v1 serialization"},
412 439 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
413 440 "True if the stored mtime would be ambiguous with the current time"},
414 441 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
415 442 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
443 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
444 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
416 445 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
417 446 METH_NOARGS, "mark a file as \"possibly dirty\""},
418 447 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
419 448 "mark a file as \"clean\""},
420 449 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
421 450 "mark a file as \"tracked\""},
422 451 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
423 452 "mark a file as \"untracked\""},
424 453 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
425 454 "remove all \"merge-only\" from a DirstateItem"},
426 455 {NULL} /* Sentinel */
427 456 };
428 457
429 458 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
430 459 {
431 460 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
432 461 };
433 462
434 463 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
435 464 {
436 465 return PyInt_FromLong(dirstate_item_c_v1_size(self));
437 466 };
438 467
439 468 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
440 469 {
441 470 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
442 471 };
443 472
444 473 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
445 474 {
446 475 char state = dirstate_item_c_v1_state(self);
447 476 return PyBytes_FromStringAndSize(&state, 1);
448 477 };
449 478
450 479 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
451 480 {
452 481 if (dirstate_item_c_tracked(self)) {
453 482 Py_RETURN_TRUE;
454 483 } else {
455 484 Py_RETURN_FALSE;
456 485 }
457 486 };
458 487 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
459 488 {
460 489 if (self->flags & dirstate_flag_p1_tracked) {
461 490 Py_RETURN_TRUE;
462 491 } else {
463 492 Py_RETURN_FALSE;
464 493 }
465 494 };
466 495
467 496 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
468 497 {
469 498 if (dirstate_item_c_added(self)) {
470 499 Py_RETURN_TRUE;
471 500 } else {
472 501 Py_RETURN_FALSE;
473 502 }
474 503 };
475 504
476 505 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
477 506 {
478 507 if (self->flags & dirstate_flag_wc_tracked &&
479 508 self->flags & dirstate_flag_p2_info) {
480 509 Py_RETURN_TRUE;
481 510 } else {
482 511 Py_RETURN_FALSE;
483 512 }
484 513 };
485 514
486 515 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
487 516 {
488 517 if (dirstate_item_c_merged(self)) {
489 518 Py_RETURN_TRUE;
490 519 } else {
491 520 Py_RETURN_FALSE;
492 521 }
493 522 };
494 523
495 524 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
496 525 {
497 526 if (dirstate_item_c_from_p2(self)) {
498 527 Py_RETURN_TRUE;
499 528 } else {
500 529 Py_RETURN_FALSE;
501 530 }
502 531 };
503 532
504 533 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
505 534 {
506 535 if (!(self->flags & dirstate_flag_wc_tracked)) {
507 536 Py_RETURN_FALSE;
508 537 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
509 538 Py_RETURN_FALSE;
510 539 } else if (self->flags & dirstate_flag_p2_info) {
511 540 Py_RETURN_FALSE;
512 541 } else {
513 542 Py_RETURN_TRUE;
514 543 }
515 544 };
516 545
517 546 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
518 547 {
519 548 if (dirstate_item_c_any_tracked(self)) {
520 549 Py_RETURN_TRUE;
521 550 } else {
522 551 Py_RETURN_FALSE;
523 552 }
524 553 };
525 554
526 555 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
527 556 {
528 557 if (dirstate_item_c_removed(self)) {
529 558 Py_RETURN_TRUE;
530 559 } else {
531 560 Py_RETURN_FALSE;
532 561 }
533 562 };
534 563
535 564 static PyGetSetDef dirstate_item_getset[] = {
536 565 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
537 566 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
538 567 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
539 568 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
540 569 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
541 570 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
542 571 NULL},
543 572 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
544 573 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
545 574 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
546 575 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
547 576 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
548 577 NULL},
549 578 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
550 579 NULL},
551 580 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
552 581 {NULL} /* Sentinel */
553 582 };
554 583
555 584 PyTypeObject dirstateItemType = {
556 585 PyVarObject_HEAD_INIT(NULL, 0) /* header */
557 586 "dirstate_tuple", /* tp_name */
558 587 sizeof(dirstateItemObject), /* tp_basicsize */
559 588 0, /* tp_itemsize */
560 589 (destructor)dirstate_item_dealloc, /* tp_dealloc */
561 590 0, /* tp_print */
562 591 0, /* tp_getattr */
563 592 0, /* tp_setattr */
564 593 0, /* tp_compare */
565 594 0, /* tp_repr */
566 595 0, /* tp_as_number */
567 596 0, /* tp_as_sequence */
568 597 0, /* tp_as_mapping */
569 598 0, /* tp_hash */
570 599 0, /* tp_call */
571 600 0, /* tp_str */
572 601 0, /* tp_getattro */
573 602 0, /* tp_setattro */
574 603 0, /* tp_as_buffer */
575 604 Py_TPFLAGS_DEFAULT, /* tp_flags */
576 605 "dirstate tuple", /* tp_doc */
577 606 0, /* tp_traverse */
578 607 0, /* tp_clear */
579 608 0, /* tp_richcompare */
580 609 0, /* tp_weaklistoffset */
581 610 0, /* tp_iter */
582 611 0, /* tp_iternext */
583 612 dirstate_item_methods, /* tp_methods */
584 613 0, /* tp_members */
585 614 dirstate_item_getset, /* tp_getset */
586 615 0, /* tp_base */
587 616 0, /* tp_dict */
588 617 0, /* tp_descr_get */
589 618 0, /* tp_descr_set */
590 619 0, /* tp_dictoffset */
591 620 0, /* tp_init */
592 621 0, /* tp_alloc */
593 622 dirstate_item_new, /* tp_new */
594 623 };
595 624
596 625 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
597 626 {
598 627 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
599 628 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
600 629 char state, *cur, *str, *cpos;
601 630 int mode, size, mtime;
602 631 unsigned int flen, pos = 40;
603 632 Py_ssize_t len = 40;
604 633 Py_ssize_t readlen;
605 634
606 635 if (!PyArg_ParseTuple(
607 636 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
608 637 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
609 638 goto quit;
610 639 }
611 640
612 641 len = readlen;
613 642
614 643 /* read parents */
615 644 if (len < 40) {
616 645 PyErr_SetString(PyExc_ValueError,
617 646 "too little data for parents");
618 647 goto quit;
619 648 }
620 649
621 650 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
622 651 str + 20, (Py_ssize_t)20);
623 652 if (!parents) {
624 653 goto quit;
625 654 }
626 655
627 656 /* read filenames */
628 657 while (pos >= 40 && pos < len) {
629 658 if (pos + 17 > len) {
630 659 PyErr_SetString(PyExc_ValueError,
631 660 "overflow in dirstate");
632 661 goto quit;
633 662 }
634 663 cur = str + pos;
635 664 /* unpack header */
636 665 state = *cur;
637 666 mode = getbe32(cur + 1);
638 667 size = getbe32(cur + 5);
639 668 mtime = getbe32(cur + 9);
640 669 flen = getbe32(cur + 13);
641 670 pos += 17;
642 671 cur += 17;
643 672 if (flen > len - pos) {
644 673 PyErr_SetString(PyExc_ValueError,
645 674 "overflow in dirstate");
646 675 goto quit;
647 676 }
648 677
649 678 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
650 679 size, mtime);
651 680 if (!entry)
652 681 goto quit;
653 682 cpos = memchr(cur, 0, flen);
654 683 if (cpos) {
655 684 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
656 685 cname = PyBytes_FromStringAndSize(
657 686 cpos + 1, flen - (cpos - cur) - 1);
658 687 if (!fname || !cname ||
659 688 PyDict_SetItem(cmap, fname, cname) == -1 ||
660 689 PyDict_SetItem(dmap, fname, entry) == -1) {
661 690 goto quit;
662 691 }
663 692 Py_DECREF(cname);
664 693 } else {
665 694 fname = PyBytes_FromStringAndSize(cur, flen);
666 695 if (!fname ||
667 696 PyDict_SetItem(dmap, fname, entry) == -1) {
668 697 goto quit;
669 698 }
670 699 }
671 700 Py_DECREF(fname);
672 701 Py_DECREF(entry);
673 702 fname = cname = entry = NULL;
674 703 pos += flen;
675 704 }
676 705
677 706 ret = parents;
678 707 Py_INCREF(ret);
679 708 quit:
680 709 Py_XDECREF(fname);
681 710 Py_XDECREF(cname);
682 711 Py_XDECREF(entry);
683 712 Py_XDECREF(parents);
684 713 return ret;
685 714 }
686 715
687 716 /*
688 717 * Efficiently pack a dirstate object into its on-disk format.
689 718 */
690 719 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
691 720 {
692 721 PyObject *packobj = NULL;
693 722 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
694 723 Py_ssize_t nbytes, pos, l;
695 724 PyObject *k, *v = NULL, *pn;
696 725 char *p, *s;
697 726 int now;
698 727
699 728 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
700 729 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
701 730 &now)) {
702 731 return NULL;
703 732 }
704 733
705 734 if (PyTuple_Size(pl) != 2) {
706 735 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
707 736 return NULL;
708 737 }
709 738
710 739 /* Figure out how much we need to allocate. */
711 740 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
712 741 PyObject *c;
713 742 if (!PyBytes_Check(k)) {
714 743 PyErr_SetString(PyExc_TypeError, "expected string key");
715 744 goto bail;
716 745 }
717 746 nbytes += PyBytes_GET_SIZE(k) + 17;
718 747 c = PyDict_GetItem(copymap, k);
719 748 if (c) {
720 749 if (!PyBytes_Check(c)) {
721 750 PyErr_SetString(PyExc_TypeError,
722 751 "expected string key");
723 752 goto bail;
724 753 }
725 754 nbytes += PyBytes_GET_SIZE(c) + 1;
726 755 }
727 756 }
728 757
729 758 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
730 759 if (packobj == NULL) {
731 760 goto bail;
732 761 }
733 762
734 763 p = PyBytes_AS_STRING(packobj);
735 764
736 765 pn = PyTuple_GET_ITEM(pl, 0);
737 766 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
738 767 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
739 768 goto bail;
740 769 }
741 770 memcpy(p, s, l);
742 771 p += 20;
743 772 pn = PyTuple_GET_ITEM(pl, 1);
744 773 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
745 774 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
746 775 goto bail;
747 776 }
748 777 memcpy(p, s, l);
749 778 p += 20;
750 779
751 780 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
752 781 dirstateItemObject *tuple;
753 782 char state;
754 783 int mode, size, mtime;
755 784 Py_ssize_t len, l;
756 785 PyObject *o;
757 786 char *t;
758 787
759 788 if (!dirstate_tuple_check(v)) {
760 789 PyErr_SetString(PyExc_TypeError,
761 790 "expected a dirstate tuple");
762 791 goto bail;
763 792 }
764 793 tuple = (dirstateItemObject *)v;
765 794
766 795 state = dirstate_item_c_v1_state(tuple);
767 796 mode = dirstate_item_c_v1_mode(tuple);
768 797 size = dirstate_item_c_v1_size(tuple);
769 798 mtime = dirstate_item_c_v1_mtime(tuple);
770 799 if (state == 'n' && mtime == now) {
771 800 /* See pure/parsers.py:pack_dirstate for why we do
772 801 * this. */
773 802 mtime = -1;
774 803 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
775 804 state, mode, size, mtime);
776 805 if (!mtime_unset) {
777 806 goto bail;
778 807 }
779 808 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
780 809 goto bail;
781 810 }
782 811 Py_DECREF(mtime_unset);
783 812 mtime_unset = NULL;
784 813 }
785 814 *p++ = state;
786 815 putbe32((uint32_t)mode, p);
787 816 putbe32((uint32_t)size, p + 4);
788 817 putbe32((uint32_t)mtime, p + 8);
789 818 t = p + 12;
790 819 p += 16;
791 820 len = PyBytes_GET_SIZE(k);
792 821 memcpy(p, PyBytes_AS_STRING(k), len);
793 822 p += len;
794 823 o = PyDict_GetItem(copymap, k);
795 824 if (o) {
796 825 *p++ = '\0';
797 826 l = PyBytes_GET_SIZE(o);
798 827 memcpy(p, PyBytes_AS_STRING(o), l);
799 828 p += l;
800 829 len += l + 1;
801 830 }
802 831 putbe32((uint32_t)len, t);
803 832 }
804 833
805 834 pos = p - PyBytes_AS_STRING(packobj);
806 835 if (pos != nbytes) {
807 836 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
808 837 (long)pos, (long)nbytes);
809 838 goto bail;
810 839 }
811 840
812 841 return packobj;
813 842 bail:
814 843 Py_XDECREF(mtime_unset);
815 844 Py_XDECREF(packobj);
816 845 Py_XDECREF(v);
817 846 return NULL;
818 847 }
819 848
820 849 #define BUMPED_FIX 1
821 850 #define USING_SHA_256 2
822 851 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
823 852
824 853 static PyObject *readshas(const char *source, unsigned char num,
825 854 Py_ssize_t hashwidth)
826 855 {
827 856 int i;
828 857 PyObject *list = PyTuple_New(num);
829 858 if (list == NULL) {
830 859 return NULL;
831 860 }
832 861 for (i = 0; i < num; i++) {
833 862 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
834 863 if (hash == NULL) {
835 864 Py_DECREF(list);
836 865 return NULL;
837 866 }
838 867 PyTuple_SET_ITEM(list, i, hash);
839 868 source += hashwidth;
840 869 }
841 870 return list;
842 871 }
843 872
844 873 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
845 874 uint32_t *msize)
846 875 {
847 876 const char *data = databegin;
848 877 const char *meta;
849 878
850 879 double mtime;
851 880 int16_t tz;
852 881 uint16_t flags;
853 882 unsigned char nsuccs, nparents, nmetadata;
854 883 Py_ssize_t hashwidth = 20;
855 884
856 885 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
857 886 PyObject *metadata = NULL, *ret = NULL;
858 887 int i;
859 888
860 889 if (data + FM1_HEADER_SIZE > dataend) {
861 890 goto overflow;
862 891 }
863 892
864 893 *msize = getbe32(data);
865 894 data += 4;
866 895 mtime = getbefloat64(data);
867 896 data += 8;
868 897 tz = getbeint16(data);
869 898 data += 2;
870 899 flags = getbeuint16(data);
871 900 data += 2;
872 901
873 902 if (flags & USING_SHA_256) {
874 903 hashwidth = 32;
875 904 }
876 905
877 906 nsuccs = (unsigned char)(*data++);
878 907 nparents = (unsigned char)(*data++);
879 908 nmetadata = (unsigned char)(*data++);
880 909
881 910 if (databegin + *msize > dataend) {
882 911 goto overflow;
883 912 }
884 913 dataend = databegin + *msize; /* narrow down to marker size */
885 914
886 915 if (data + hashwidth > dataend) {
887 916 goto overflow;
888 917 }
889 918 prec = PyBytes_FromStringAndSize(data, hashwidth);
890 919 data += hashwidth;
891 920 if (prec == NULL) {
892 921 goto bail;
893 922 }
894 923
895 924 if (data + nsuccs * hashwidth > dataend) {
896 925 goto overflow;
897 926 }
898 927 succs = readshas(data, nsuccs, hashwidth);
899 928 if (succs == NULL) {
900 929 goto bail;
901 930 }
902 931 data += nsuccs * hashwidth;
903 932
904 933 if (nparents == 1 || nparents == 2) {
905 934 if (data + nparents * hashwidth > dataend) {
906 935 goto overflow;
907 936 }
908 937 parents = readshas(data, nparents, hashwidth);
909 938 if (parents == NULL) {
910 939 goto bail;
911 940 }
912 941 data += nparents * hashwidth;
913 942 } else {
914 943 parents = Py_None;
915 944 Py_INCREF(parents);
916 945 }
917 946
918 947 if (data + 2 * nmetadata > dataend) {
919 948 goto overflow;
920 949 }
921 950 meta = data + (2 * nmetadata);
922 951 metadata = PyTuple_New(nmetadata);
923 952 if (metadata == NULL) {
924 953 goto bail;
925 954 }
926 955 for (i = 0; i < nmetadata; i++) {
927 956 PyObject *tmp, *left = NULL, *right = NULL;
928 957 Py_ssize_t leftsize = (unsigned char)(*data++);
929 958 Py_ssize_t rightsize = (unsigned char)(*data++);
930 959 if (meta + leftsize + rightsize > dataend) {
931 960 goto overflow;
932 961 }
933 962 left = PyBytes_FromStringAndSize(meta, leftsize);
934 963 meta += leftsize;
935 964 right = PyBytes_FromStringAndSize(meta, rightsize);
936 965 meta += rightsize;
937 966 tmp = PyTuple_New(2);
938 967 if (!left || !right || !tmp) {
939 968 Py_XDECREF(left);
940 969 Py_XDECREF(right);
941 970 Py_XDECREF(tmp);
942 971 goto bail;
943 972 }
944 973 PyTuple_SET_ITEM(tmp, 0, left);
945 974 PyTuple_SET_ITEM(tmp, 1, right);
946 975 PyTuple_SET_ITEM(metadata, i, tmp);
947 976 }
948 977 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
949 978 (int)tz * 60, parents);
950 979 goto bail; /* return successfully */
951 980
952 981 overflow:
953 982 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
954 983 bail:
955 984 Py_XDECREF(prec);
956 985 Py_XDECREF(succs);
957 986 Py_XDECREF(metadata);
958 987 Py_XDECREF(parents);
959 988 return ret;
960 989 }
961 990
962 991 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
963 992 {
964 993 const char *data, *dataend;
965 994 Py_ssize_t datalen, offset, stop;
966 995 PyObject *markers = NULL;
967 996
968 997 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
969 998 &offset, &stop)) {
970 999 return NULL;
971 1000 }
972 1001 if (offset < 0) {
973 1002 PyErr_SetString(PyExc_ValueError,
974 1003 "invalid negative offset in fm1readmarkers");
975 1004 return NULL;
976 1005 }
977 1006 if (stop > datalen) {
978 1007 PyErr_SetString(
979 1008 PyExc_ValueError,
980 1009 "stop longer than data length in fm1readmarkers");
981 1010 return NULL;
982 1011 }
983 1012 dataend = data + datalen;
984 1013 data += offset;
985 1014 markers = PyList_New(0);
986 1015 if (!markers) {
987 1016 return NULL;
988 1017 }
989 1018 while (offset < stop) {
990 1019 uint32_t msize;
991 1020 int error;
992 1021 PyObject *record = fm1readmarker(data, dataend, &msize);
993 1022 if (!record) {
994 1023 goto bail;
995 1024 }
996 1025 error = PyList_Append(markers, record);
997 1026 Py_DECREF(record);
998 1027 if (error) {
999 1028 goto bail;
1000 1029 }
1001 1030 data += msize;
1002 1031 offset += msize;
1003 1032 }
1004 1033 return markers;
1005 1034 bail:
1006 1035 Py_DECREF(markers);
1007 1036 return NULL;
1008 1037 }
1009 1038
1010 1039 static char parsers_doc[] = "Efficient content parsing.";
1011 1040
1012 1041 PyObject *encodedir(PyObject *self, PyObject *args);
1013 1042 PyObject *pathencode(PyObject *self, PyObject *args);
1014 1043 PyObject *lowerencode(PyObject *self, PyObject *args);
1015 1044 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1016 1045
1017 1046 static PyMethodDef methods[] = {
1018 1047 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1019 1048 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1020 1049 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1021 1050 "parse a revlog index\n"},
1022 1051 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1023 1052 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1024 1053 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1025 1054 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1026 1055 "construct a dict with an expected size\n"},
1027 1056 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1028 1057 "make file foldmap\n"},
1029 1058 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1030 1059 "escape a UTF-8 byte string to JSON (fast path)\n"},
1031 1060 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1032 1061 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1033 1062 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1034 1063 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1035 1064 "parse v1 obsolete markers\n"},
1036 1065 {NULL, NULL}};
1037 1066
1038 1067 void dirs_module_init(PyObject *mod);
1039 1068 void manifest_module_init(PyObject *mod);
1040 1069 void revlog_module_init(PyObject *mod);
1041 1070
1042 1071 static const int version = 20;
1043 1072
1044 1073 static void module_init(PyObject *mod)
1045 1074 {
1046 1075 PyModule_AddIntConstant(mod, "version", version);
1047 1076
1048 1077 /* This module constant has two purposes. First, it lets us unit test
1049 1078 * the ImportError raised without hard-coding any error text. This
1050 1079 * means we can change the text in the future without breaking tests,
1051 1080 * even across changesets without a recompile. Second, its presence
1052 1081 * can be used to determine whether the version-checking logic is
1053 1082 * present, which also helps in testing across changesets without a
1054 1083 * recompile. Note that this means the pure-Python version of parsers
1055 1084 * should not have this module constant. */
1056 1085 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1057 1086
1058 1087 dirs_module_init(mod);
1059 1088 manifest_module_init(mod);
1060 1089 revlog_module_init(mod);
1061 1090
1062 1091 if (PyType_Ready(&dirstateItemType) < 0) {
1063 1092 return;
1064 1093 }
1065 1094 Py_INCREF(&dirstateItemType);
1066 1095 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1067 1096 }
1068 1097
1069 1098 static int check_python_version(void)
1070 1099 {
1071 1100 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1072 1101 long hexversion;
1073 1102 if (!sys) {
1074 1103 return -1;
1075 1104 }
1076 1105 ver = PyObject_GetAttrString(sys, "hexversion");
1077 1106 Py_DECREF(sys);
1078 1107 if (!ver) {
1079 1108 return -1;
1080 1109 }
1081 1110 hexversion = PyInt_AsLong(ver);
1082 1111 Py_DECREF(ver);
1083 1112 /* sys.hexversion is a 32-bit number by default, so the -1 case
1084 1113 * should only occur in unusual circumstances (e.g. if sys.hexversion
1085 1114 * is manually set to an invalid value). */
1086 1115 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1087 1116 PyErr_Format(PyExc_ImportError,
1088 1117 "%s: The Mercurial extension "
1089 1118 "modules were compiled with Python " PY_VERSION
1090 1119 ", but "
1091 1120 "Mercurial is currently using Python with "
1092 1121 "sys.hexversion=%ld: "
1093 1122 "Python %s\n at: %s",
1094 1123 versionerrortext, hexversion, Py_GetVersion(),
1095 1124 Py_GetProgramFullPath());
1096 1125 return -1;
1097 1126 }
1098 1127 return 0;
1099 1128 }
1100 1129
1101 1130 #ifdef IS_PY3K
1102 1131 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1103 1132 parsers_doc, -1, methods};
1104 1133
1105 1134 PyMODINIT_FUNC PyInit_parsers(void)
1106 1135 {
1107 1136 PyObject *mod;
1108 1137
1109 1138 if (check_python_version() == -1)
1110 1139 return NULL;
1111 1140 mod = PyModule_Create(&parsers_module);
1112 1141 module_init(mod);
1113 1142 return mod;
1114 1143 }
1115 1144 #else
1116 1145 PyMODINIT_FUNC initparsers(void)
1117 1146 {
1118 1147 PyObject *mod;
1119 1148
1120 1149 if (check_python_version() == -1) {
1121 1150 return;
1122 1151 }
1123 1152 mod = Py_InitModule3("parsers", methods, parsers_doc);
1124 1153 module_init(mod);
1125 1154 }
1126 1155 #endif
@@ -1,80 +1,82 b''
1 1 /*
2 2 util.h - utility functions for interfacing with the various python APIs.
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_UTIL_H_
9 9 #define _HG_UTIL_H_
10 10
11 11 #include "compat.h"
12 12
13 13 #if PY_MAJOR_VERSION >= 3
14 14 #define IS_PY3K
15 15 #endif
16 16
17 17 /* helper to switch things like string literal depending on Python version */
18 18 #ifdef IS_PY3K
19 19 #define PY23(py2, py3) py3
20 20 #else
21 21 #define PY23(py2, py3) py2
22 22 #endif
23 23
24 24 /* clang-format off */
25 25 typedef struct {
26 26 PyObject_HEAD
27 27 unsigned char flags;
28 28 int mode;
29 29 int size;
30 30 int mtime;
31 31 } dirstateItemObject;
32 32 /* clang-format on */
33 33
34 34 static const unsigned char dirstate_flag_wc_tracked = 1;
35 35 static const unsigned char dirstate_flag_p1_tracked = 1 << 1;
36 36 static const unsigned char dirstate_flag_p2_info = 1 << 2;
37 37 static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
38 38 static const unsigned char dirstate_flag_has_meaningful_mtime = 1 << 4;
39 static const unsigned char dirstate_flag_mode_exec_perm = 1 << 5;
40 static const unsigned char dirstate_flag_mode_is_symlink = 1 << 6;
39 41
40 42 extern PyTypeObject dirstateItemType;
41 43 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
42 44
43 45 #ifndef MIN
44 46 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
45 47 #endif
46 48 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
47 49 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
48 50 #define true 1
49 51 #define false 0
50 52 typedef unsigned char bool;
51 53 #else
52 54 #include <stdbool.h>
53 55 #endif
54 56
55 57 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
56 58 {
57 59 /* _PyDict_NewPresized expects a minused parameter, but it actually
58 60 creates a dictionary that's the nearest power of two bigger than the
59 61 parameter. For example, with the initial minused = 1000, the
60 62 dictionary created has size 1024. Of course in a lot of cases that
61 63 can be greater than the maximum load factor Python's dict object
62 64 expects (= 2/3), so as soon as we cross the threshold we'll resize
63 65 anyway. So create a dictionary that's at least 3/2 the size. */
64 66 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
65 67 }
66 68
67 69 /* Convert a PyInt or PyLong to a long. Returns false if there is an
68 70 error, in which case an exception will already have been set. */
69 71 static inline bool pylong_to_long(PyObject *pylong, long *out)
70 72 {
71 73 *out = PyLong_AsLong(pylong);
72 74 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
73 75 * not an error. */
74 76 if (*out != -1) {
75 77 return true;
76 78 }
77 79 return PyErr_Occurred() == NULL;
78 80 }
79 81
80 82 #endif /* _HG_UTIL_H_ */
@@ -1,731 +1,732 b''
1 1 # dirstatemap.py
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 from __future__ import absolute_import
7 7
8 8 import errno
9 9
10 10 from .i18n import _
11 11
12 12 from . import (
13 13 error,
14 14 pathutil,
15 15 policy,
16 16 pycompat,
17 17 txnutil,
18 18 util,
19 19 )
20 20
21 21 from .dirstateutils import (
22 22 docket as docketmod,
23 v2,
23 24 )
24 25
25 26 parsers = policy.importmod('parsers')
26 27 rustmod = policy.importrust('dirstate')
27 28
28 29 propertycache = util.propertycache
29 30
30 31 if rustmod is None:
31 32 DirstateItem = parsers.DirstateItem
32 33 else:
33 34 DirstateItem = rustmod.DirstateItem
34 35
35 36 rangemask = 0x7FFFFFFF
36 37
37 38
38 39 class _dirstatemapcommon(object):
39 40 """
40 41 Methods that are identical for both implementations of the dirstatemap
41 42 class, with and without Rust extensions enabled.
42 43 """
43 44
44 45 # please pytype
45 46
46 47 _map = None
47 48 copymap = None
48 49
49 50 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
50 51 self._use_dirstate_v2 = use_dirstate_v2
51 52 self._nodeconstants = nodeconstants
52 53 self._ui = ui
53 54 self._opener = opener
54 55 self._root = root
55 56 self._filename = b'dirstate'
56 57 self._nodelen = 20 # Also update Rust code when changing this!
57 58 self._parents = None
58 59 self._dirtyparents = False
59 60 self._docket = None
60 61
61 62 # for consistent view between _pl() and _read() invocations
62 63 self._pendingmode = None
63 64
64 65 def preload(self):
65 66 """Loads the underlying data, if it's not already loaded"""
66 67 self._map
67 68
68 69 def get(self, key, default=None):
69 70 return self._map.get(key, default)
70 71
71 72 def __len__(self):
72 73 return len(self._map)
73 74
74 75 def __iter__(self):
75 76 return iter(self._map)
76 77
77 78 def __contains__(self, key):
78 79 return key in self._map
79 80
80 81 def __getitem__(self, item):
81 82 return self._map[item]
82 83
83 84 ### sub-class utility method
84 85 #
85 86 # Use to allow for generic implementation of some method while still coping
86 87 # with minor difference between implementation.
87 88
88 89 def _dirs_incr(self, filename, old_entry=None):
89 90 """incremente the dirstate counter if applicable
90 91
91 92 This might be a no-op for some subclass who deal with directory
92 93 tracking in a different way.
93 94 """
94 95
95 96 def _dirs_decr(self, filename, old_entry=None, remove_variant=False):
96 97 """decremente the dirstate counter if applicable
97 98
98 99 This might be a no-op for some subclass who deal with directory
99 100 tracking in a different way.
100 101 """
101 102
102 103 def _refresh_entry(self, f, entry):
103 104 """record updated state of an entry"""
104 105
105 106 def _insert_entry(self, f, entry):
106 107 """add a new dirstate entry (or replace an unrelated one)
107 108
108 109 The fact it is actually new is the responsability of the caller
109 110 """
110 111
111 112 def _drop_entry(self, f):
112 113 """remove any entry for file f
113 114
114 115 This should also drop associated copy information
115 116
116 117 The fact we actually need to drop it is the responsability of the caller"""
117 118
118 119 ### method to manipulate the entries
119 120
120 121 def set_possibly_dirty(self, filename):
121 122 """record that the current state of the file on disk is unknown"""
122 123 entry = self[filename]
123 124 entry.set_possibly_dirty()
124 125 self._refresh_entry(filename, entry)
125 126
126 127 def set_clean(self, filename, mode, size, mtime):
127 128 """mark a file as back to a clean state"""
128 129 entry = self[filename]
129 130 mtime = mtime & rangemask
130 131 size = size & rangemask
131 132 entry.set_clean(mode, size, mtime)
132 133 self._refresh_entry(filename, entry)
133 134 self.copymap.pop(filename, None)
134 135
135 136 def set_tracked(self, filename):
136 137 new = False
137 138 entry = self.get(filename)
138 139 if entry is None:
139 140 self._dirs_incr(filename)
140 141 entry = DirstateItem(
141 142 wc_tracked=True,
142 143 )
143 144
144 145 self._insert_entry(filename, entry)
145 146 new = True
146 147 elif not entry.tracked:
147 148 self._dirs_incr(filename, entry)
148 149 entry.set_tracked()
149 150 self._refresh_entry(filename, entry)
150 151 new = True
151 152 else:
152 153 # XXX This is probably overkill for more case, but we need this to
153 154 # fully replace the `normallookup` call with `set_tracked` one.
154 155 # Consider smoothing this in the future.
155 156 entry.set_possibly_dirty()
156 157 self._refresh_entry(filename, entry)
157 158 return new
158 159
159 160 def set_untracked(self, f):
160 161 """Mark a file as no longer tracked in the dirstate map"""
161 162 entry = self.get(f)
162 163 if entry is None:
163 164 return False
164 165 else:
165 166 self._dirs_decr(f, old_entry=entry, remove_variant=not entry.added)
166 167 if not entry.p2_info:
167 168 self.copymap.pop(f, None)
168 169 entry.set_untracked()
169 170 self._refresh_entry(f, entry)
170 171 return True
171 172
172 173 def reset_state(
173 174 self,
174 175 filename,
175 176 wc_tracked=False,
176 177 p1_tracked=False,
177 178 p2_info=False,
178 179 has_meaningful_mtime=True,
179 180 has_meaningful_data=True,
180 181 parentfiledata=None,
181 182 ):
182 183 """Set a entry to a given state, diregarding all previous state
183 184
184 185 This is to be used by the part of the dirstate API dedicated to
185 186 adjusting the dirstate after a update/merge.
186 187
187 188 note: calling this might result to no entry existing at all if the
188 189 dirstate map does not see any point at having one for this file
189 190 anymore.
190 191 """
191 192 # copy information are now outdated
192 193 # (maybe new information should be in directly passed to this function)
193 194 self.copymap.pop(filename, None)
194 195
195 196 if not (p1_tracked or p2_info or wc_tracked):
196 197 old_entry = self._map.get(filename)
197 198 self._drop_entry(filename)
198 199 self._dirs_decr(filename, old_entry=old_entry)
199 200 return
200 201
201 202 old_entry = self._map.get(filename)
202 203 self._dirs_incr(filename, old_entry)
203 204 entry = DirstateItem(
204 205 wc_tracked=wc_tracked,
205 206 p1_tracked=p1_tracked,
206 207 p2_info=p2_info,
207 208 has_meaningful_mtime=has_meaningful_mtime,
208 209 parentfiledata=parentfiledata,
209 210 )
210 211 self._insert_entry(filename, entry)
211 212
212 213 ### disk interaction
213 214
214 215 def _opendirstatefile(self):
215 216 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
216 217 if self._pendingmode is not None and self._pendingmode != mode:
217 218 fp.close()
218 219 raise error.Abort(
219 220 _(b'working directory state may be changed parallelly')
220 221 )
221 222 self._pendingmode = mode
222 223 return fp
223 224
224 225 def _readdirstatefile(self, size=-1):
225 226 try:
226 227 with self._opendirstatefile() as fp:
227 228 return fp.read(size)
228 229 except IOError as err:
229 230 if err.errno != errno.ENOENT:
230 231 raise
231 232 # File doesn't exist, so the current state is empty
232 233 return b''
233 234
234 235 @property
235 236 def docket(self):
236 237 if not self._docket:
237 238 if not self._use_dirstate_v2:
238 239 raise error.ProgrammingError(
239 240 b'dirstate only has a docket in v2 format'
240 241 )
241 242 self._docket = docketmod.DirstateDocket.parse(
242 243 self._readdirstatefile(), self._nodeconstants
243 244 )
244 245 return self._docket
245 246
246 247 def write_v2_no_append(self, tr, st, meta, packed):
247 248 old_docket = self.docket
248 249 new_docket = docketmod.DirstateDocket.with_new_uuid(
249 250 self.parents(), len(packed), meta
250 251 )
251 252 data_filename = new_docket.data_filename()
252 253 if tr:
253 254 tr.add(data_filename, 0)
254 255 self._opener.write(data_filename, packed)
255 256 # Write the new docket after the new data file has been
256 257 # written. Because `st` was opened with `atomictemp=True`,
257 258 # the actual `.hg/dirstate` file is only affected on close.
258 259 st.write(new_docket.serialize())
259 260 st.close()
260 261 # Remove the old data file after the new docket pointing to
261 262 # the new data file was written.
262 263 if old_docket.uuid:
263 264 data_filename = old_docket.data_filename()
264 265 unlink = lambda _tr=None: self._opener.unlink(data_filename)
265 266 if tr:
266 267 category = b"dirstate-v2-clean-" + old_docket.uuid
267 268 tr.addpostclose(category, unlink)
268 269 else:
269 270 unlink()
270 271 self._docket = new_docket
271 272
272 273 ### reading/setting parents
273 274
274 275 def parents(self):
275 276 if not self._parents:
276 277 if self._use_dirstate_v2:
277 278 self._parents = self.docket.parents
278 279 else:
279 280 read_len = self._nodelen * 2
280 281 st = self._readdirstatefile(read_len)
281 282 l = len(st)
282 283 if l == read_len:
283 284 self._parents = (
284 285 st[: self._nodelen],
285 286 st[self._nodelen : 2 * self._nodelen],
286 287 )
287 288 elif l == 0:
288 289 self._parents = (
289 290 self._nodeconstants.nullid,
290 291 self._nodeconstants.nullid,
291 292 )
292 293 else:
293 294 raise error.Abort(
294 295 _(b'working directory state appears damaged!')
295 296 )
296 297
297 298 return self._parents
298 299
299 300
300 301 class dirstatemap(_dirstatemapcommon):
301 302 """Map encapsulating the dirstate's contents.
302 303
303 304 The dirstate contains the following state:
304 305
305 306 - `identity` is the identity of the dirstate file, which can be used to
306 307 detect when changes have occurred to the dirstate file.
307 308
308 309 - `parents` is a pair containing the parents of the working copy. The
309 310 parents are updated by calling `setparents`.
310 311
311 312 - the state map maps filenames to tuples of (state, mode, size, mtime),
312 313 where state is a single character representing 'normal', 'added',
313 314 'removed', or 'merged'. It is read by treating the dirstate as a
314 315 dict. File state is updated by calling various methods (see each
315 316 documentation for details):
316 317
317 318 - `reset_state`,
318 319 - `set_tracked`
319 320 - `set_untracked`
320 321 - `set_clean`
321 322 - `set_possibly_dirty`
322 323
323 324 - `copymap` maps destination filenames to their source filename.
324 325
325 326 The dirstate also provides the following views onto the state:
326 327
327 328 - `filefoldmap` is a dict mapping normalized filenames to the denormalized
328 329 form that they appear as in the dirstate.
329 330
330 331 - `dirfoldmap` is a dict mapping normalized directory names to the
331 332 denormalized form that they appear as in the dirstate.
332 333 """
333 334
334 335 def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
335 336 super(dirstatemap, self).__init__(
336 337 ui, opener, root, nodeconstants, use_dirstate_v2
337 338 )
338 339 if self._use_dirstate_v2:
339 340 msg = "Dirstate V2 not supportedi"
340 341 msg += "(should have detected unsupported requirement)"
341 342 raise error.ProgrammingError(msg)
342 343
343 344 ### Core data storage and access
344 345
345 346 @propertycache
346 347 def _map(self):
347 348 self._map = {}
348 349 self.read()
349 350 return self._map
350 351
351 352 @propertycache
352 353 def copymap(self):
353 354 self.copymap = {}
354 355 self._map
355 356 return self.copymap
356 357
357 358 def clear(self):
358 359 self._map.clear()
359 360 self.copymap.clear()
360 361 self.setparents(self._nodeconstants.nullid, self._nodeconstants.nullid)
361 362 util.clearcachedproperty(self, b"_dirs")
362 363 util.clearcachedproperty(self, b"_alldirs")
363 364 util.clearcachedproperty(self, b"filefoldmap")
364 365 util.clearcachedproperty(self, b"dirfoldmap")
365 366
366 367 def items(self):
367 368 return pycompat.iteritems(self._map)
368 369
369 370 # forward for python2,3 compat
370 371 iteritems = items
371 372
372 373 def debug_iter(self, all):
373 374 """
374 375 Return an iterator of (filename, state, mode, size, mtime) tuples
375 376
376 377 `all` is unused when Rust is not enabled
377 378 """
378 379 for (filename, item) in self.items():
379 380 yield (filename, item.state, item.mode, item.size, item.mtime)
380 381
381 382 def keys(self):
382 383 return self._map.keys()
383 384
384 385 ### reading/setting parents
385 386
386 387 def setparents(self, p1, p2, fold_p2=False):
387 388 self._parents = (p1, p2)
388 389 self._dirtyparents = True
389 390 copies = {}
390 391 if fold_p2:
391 392 for f, s in pycompat.iteritems(self._map):
392 393 # Discard "merged" markers when moving away from a merge state
393 394 if s.p2_info:
394 395 source = self.copymap.pop(f, None)
395 396 if source:
396 397 copies[f] = source
397 398 s.drop_merge_data()
398 399 return copies
399 400
400 401 ### disk interaction
401 402
402 403 def read(self):
403 404 # ignore HG_PENDING because identity is used only for writing
404 405 self.identity = util.filestat.frompath(
405 406 self._opener.join(self._filename)
406 407 )
407 408
408 409 try:
409 410 fp = self._opendirstatefile()
410 411 try:
411 412 st = fp.read()
412 413 finally:
413 414 fp.close()
414 415 except IOError as err:
415 416 if err.errno != errno.ENOENT:
416 417 raise
417 418 return
418 419 if not st:
419 420 return
420 421
421 422 if util.safehasattr(parsers, b'dict_new_presized'):
422 423 # Make an estimate of the number of files in the dirstate based on
423 424 # its size. This trades wasting some memory for avoiding costly
424 425 # resizes. Each entry have a prefix of 17 bytes followed by one or
425 426 # two path names. Studies on various large-scale real-world repositories
426 427 # found 54 bytes a reasonable upper limit for the average path names.
427 428 # Copy entries are ignored for the sake of this estimate.
428 429 self._map = parsers.dict_new_presized(len(st) // 71)
429 430
430 431 # Python's garbage collector triggers a GC each time a certain number
431 432 # of container objects (the number being defined by
432 433 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
433 434 # for each file in the dirstate. The C version then immediately marks
434 435 # them as not to be tracked by the collector. However, this has no
435 436 # effect on when GCs are triggered, only on what objects the GC looks
436 437 # into. This means that O(number of files) GCs are unavoidable.
437 438 # Depending on when in the process's lifetime the dirstate is parsed,
438 439 # this can get very expensive. As a workaround, disable GC while
439 440 # parsing the dirstate.
440 441 #
441 442 # (we cannot decorate the function directly since it is in a C module)
442 443 parse_dirstate = util.nogc(parsers.parse_dirstate)
443 444 p = parse_dirstate(self._map, self.copymap, st)
444 445 if not self._dirtyparents:
445 446 self.setparents(*p)
446 447
447 448 # Avoid excess attribute lookups by fast pathing certain checks
448 449 self.__contains__ = self._map.__contains__
449 450 self.__getitem__ = self._map.__getitem__
450 451 self.get = self._map.get
451 452
452 453 def write(self, _tr, st, now):
453 454 d = parsers.pack_dirstate(self._map, self.copymap, self.parents(), now)
454 455 st.write(d)
455 456 st.close()
456 457 self._dirtyparents = False
457 458
458 459 @propertycache
459 460 def identity(self):
460 461 self._map
461 462 return self.identity
462 463
463 464 ### code related to maintaining and accessing "extra" property
464 465 # (e.g. "has_dir")
465 466
466 467 def _dirs_incr(self, filename, old_entry=None):
467 468 """incremente the dirstate counter if applicable"""
468 469 if (
469 470 old_entry is None or old_entry.removed
470 471 ) and "_dirs" in self.__dict__:
471 472 self._dirs.addpath(filename)
472 473 if old_entry is None and "_alldirs" in self.__dict__:
473 474 self._alldirs.addpath(filename)
474 475
475 476 def _dirs_decr(self, filename, old_entry=None, remove_variant=False):
476 477 """decremente the dirstate counter if applicable"""
477 478 if old_entry is not None:
478 479 if "_dirs" in self.__dict__ and not old_entry.removed:
479 480 self._dirs.delpath(filename)
480 481 if "_alldirs" in self.__dict__ and not remove_variant:
481 482 self._alldirs.delpath(filename)
482 483 elif remove_variant and "_alldirs" in self.__dict__:
483 484 self._alldirs.addpath(filename)
484 485 if "filefoldmap" in self.__dict__:
485 486 normed = util.normcase(filename)
486 487 self.filefoldmap.pop(normed, None)
487 488
488 489 @propertycache
489 490 def filefoldmap(self):
490 491 """Returns a dictionary mapping normalized case paths to their
491 492 non-normalized versions.
492 493 """
493 494 try:
494 495 makefilefoldmap = parsers.make_file_foldmap
495 496 except AttributeError:
496 497 pass
497 498 else:
498 499 return makefilefoldmap(
499 500 self._map, util.normcasespec, util.normcasefallback
500 501 )
501 502
502 503 f = {}
503 504 normcase = util.normcase
504 505 for name, s in pycompat.iteritems(self._map):
505 506 if not s.removed:
506 507 f[normcase(name)] = name
507 508 f[b'.'] = b'.' # prevents useless util.fspath() invocation
508 509 return f
509 510
510 511 @propertycache
511 512 def dirfoldmap(self):
512 513 f = {}
513 514 normcase = util.normcase
514 515 for name in self._dirs:
515 516 f[normcase(name)] = name
516 517 return f
517 518
518 519 def hastrackeddir(self, d):
519 520 """
520 521 Returns True if the dirstate contains a tracked (not removed) file
521 522 in this directory.
522 523 """
523 524 return d in self._dirs
524 525
525 526 def hasdir(self, d):
526 527 """
527 528 Returns True if the dirstate contains a file (tracked or removed)
528 529 in this directory.
529 530 """
530 531 return d in self._alldirs
531 532
532 533 @propertycache
533 534 def _dirs(self):
534 535 return pathutil.dirs(self._map, only_tracked=True)
535 536
536 537 @propertycache
537 538 def _alldirs(self):
538 539 return pathutil.dirs(self._map)
539 540
540 541 ### code related to manipulation of entries and copy-sources
541 542
542 543 def _refresh_entry(self, f, entry):
543 544 if not entry.any_tracked:
544 545 self._map.pop(f, None)
545 546
546 547 def _insert_entry(self, f, entry):
547 548 self._map[f] = entry
548 549
549 550 def _drop_entry(self, f):
550 551 self._map.pop(f, None)
551 552 self.copymap.pop(f, None)
552 553
553 554
554 555 if rustmod is not None:
555 556
556 557 class dirstatemap(_dirstatemapcommon):
557 558
558 559 ### Core data storage and access
559 560
560 561 @propertycache
561 562 def _map(self):
562 563 """
563 564 Fills the Dirstatemap when called.
564 565 """
565 566 # ignore HG_PENDING because identity is used only for writing
566 567 self.identity = util.filestat.frompath(
567 568 self._opener.join(self._filename)
568 569 )
569 570
570 571 if self._use_dirstate_v2:
571 572 if self.docket.uuid:
572 573 # TODO: use mmap when possible
573 574 data = self._opener.read(self.docket.data_filename())
574 575 else:
575 576 data = b''
576 577 self._map = rustmod.DirstateMap.new_v2(
577 578 data, self.docket.data_size, self.docket.tree_metadata
578 579 )
579 580 parents = self.docket.parents
580 581 else:
581 582 self._map, parents = rustmod.DirstateMap.new_v1(
582 583 self._readdirstatefile()
583 584 )
584 585
585 586 if parents and not self._dirtyparents:
586 587 self.setparents(*parents)
587 588
588 589 self.__contains__ = self._map.__contains__
589 590 self.__getitem__ = self._map.__getitem__
590 591 self.get = self._map.get
591 592 return self._map
592 593
593 594 @property
594 595 def copymap(self):
595 596 return self._map.copymap()
596 597
597 598 def debug_iter(self, all):
598 599 """
599 600 Return an iterator of (filename, state, mode, size, mtime) tuples
600 601
601 602 `all`: also include with `state == b' '` dirstate tree nodes that
602 603 don't have an associated `DirstateItem`.
603 604
604 605 """
605 606 return self._map.debug_iter(all)
606 607
607 608 def clear(self):
608 609 self._map.clear()
609 610 self.setparents(
610 611 self._nodeconstants.nullid, self._nodeconstants.nullid
611 612 )
612 613 util.clearcachedproperty(self, b"_dirs")
613 614 util.clearcachedproperty(self, b"_alldirs")
614 615 util.clearcachedproperty(self, b"dirfoldmap")
615 616
616 617 def items(self):
617 618 return self._map.items()
618 619
619 620 # forward for python2,3 compat
620 621 iteritems = items
621 622
622 623 def keys(self):
623 624 return iter(self._map)
624 625
625 626 ### reading/setting parents
626 627
627 628 def setparents(self, p1, p2, fold_p2=False):
628 629 self._parents = (p1, p2)
629 630 self._dirtyparents = True
630 631 copies = {}
631 632 if fold_p2:
632 633 # Collect into an intermediate list to avoid a `RuntimeError`
633 634 # exception due to mutation during iteration.
634 635 # TODO: move this the whole loop to Rust where `iter_mut`
635 636 # enables in-place mutation of elements of a collection while
636 637 # iterating it, without mutating the collection itself.
637 638 files_with_p2_info = [
638 639 f for f, s in self._map.items() if s.p2_info
639 640 ]
640 641 rust_map = self._map
641 642 for f in files_with_p2_info:
642 643 e = rust_map.get(f)
643 644 source = self.copymap.pop(f, None)
644 645 if source:
645 646 copies[f] = source
646 647 e.drop_merge_data()
647 648 rust_map.set_dirstate_item(f, e)
648 649 return copies
649 650
650 651 ### disk interaction
651 652
652 653 @propertycache
653 654 def identity(self):
654 655 self._map
655 656 return self.identity
656 657
657 658 def write(self, tr, st, now):
658 659 if not self._use_dirstate_v2:
659 660 p1, p2 = self.parents()
660 661 packed = self._map.write_v1(p1, p2, now)
661 662 st.write(packed)
662 663 st.close()
663 664 self._dirtyparents = False
664 665 return
665 666
666 667 # We can only append to an existing data file if there is one
667 668 can_append = self.docket.uuid is not None
668 669 packed, meta, append = self._map.write_v2(now, can_append)
669 670 if append:
670 671 docket = self.docket
671 672 data_filename = docket.data_filename()
672 673 if tr:
673 674 tr.add(data_filename, docket.data_size)
674 675 with self._opener(data_filename, b'r+b') as fp:
675 676 fp.seek(docket.data_size)
676 677 assert fp.tell() == docket.data_size
677 678 written = fp.write(packed)
678 679 if written is not None: # py2 may return None
679 680 assert written == len(packed), (written, len(packed))
680 681 docket.data_size += len(packed)
681 682 docket.parents = self.parents()
682 683 docket.tree_metadata = meta
683 684 st.write(docket.serialize())
684 685 st.close()
685 686 else:
686 687 self.write_v2_no_append(tr, st, meta, packed)
687 688 # Reload from the newly-written file
688 689 util.clearcachedproperty(self, b"_map")
689 690 self._dirtyparents = False
690 691
691 692 ### code related to maintaining and accessing "extra" property
692 693 # (e.g. "has_dir")
693 694
694 695 @propertycache
695 696 def filefoldmap(self):
696 697 """Returns a dictionary mapping normalized case paths to their
697 698 non-normalized versions.
698 699 """
699 700 return self._map.filefoldmapasdict()
700 701
701 702 def hastrackeddir(self, d):
702 703 return self._map.hastrackeddir(d)
703 704
704 705 def hasdir(self, d):
705 706 return self._map.hasdir(d)
706 707
707 708 @propertycache
708 709 def dirfoldmap(self):
709 710 f = {}
710 711 normcase = util.normcase
711 712 for name in self._map.tracked_dirs():
712 713 f[normcase(name)] = name
713 714 return f
714 715
715 716 ### code related to manipulation of entries and copy-sources
716 717
717 718 def _refresh_entry(self, f, entry):
718 719 if not entry.any_tracked:
719 720 self._map.drop_item_and_copy_source(f)
720 721 else:
721 722 self._map.addfile(f, entry)
722 723
723 724 def _insert_entry(self, f, entry):
724 725 self._map.addfile(f, entry)
725 726
726 727 def _drop_entry(self, f):
727 728 self._map.drop_item_and_copy_source(f)
728 729
729 730 def __setitem__(self, key, value):
730 731 assert isinstance(value, DirstateItem)
731 732 self._map.set_dirstate_item(key, value)
@@ -1,75 +1,71 b''
1 1 # dirstatedocket.py - docket file for dirstate-v2
2 2 #
3 3 # Copyright Mercurial Contributors
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from ..revlogutils import docket as docket_mod
13
13 from . import v2
14 14
15 15 V2_FORMAT_MARKER = b"dirstate-v2\n"
16 16
17 # Must match the constant of the same name in
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
19 TREE_METADATA_SIZE = 44
20
21 17 # * 12 bytes: format marker
22 18 # * 32 bytes: node ID of the working directory's first parent
23 19 # * 32 bytes: node ID of the working directory's second parent
24 20 # * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
25 21 # * 4 bytes: big-endian used size of the data file
26 22 # * 1 byte: length of the data file's UUID
27 23 # * variable: data file's UUID
28 24 #
29 25 # Node IDs are null-padded if shorter than 32 bytes.
30 26 # A data file shorter than the specified used size is corrupted (truncated)
31 27 HEADER = struct.Struct(
32 ">{}s32s32s{}sLB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
28 ">{}s32s32s{}sLB".format(len(V2_FORMAT_MARKER), v2.TREE_METADATA_SIZE)
33 29 )
34 30
35 31
36 32 class DirstateDocket(object):
37 33 data_filename_pattern = b'dirstate.%s'
38 34
39 35 def __init__(self, parents, data_size, tree_metadata, uuid):
40 36 self.parents = parents
41 37 self.data_size = data_size
42 38 self.tree_metadata = tree_metadata
43 39 self.uuid = uuid
44 40
45 41 @classmethod
46 42 def with_new_uuid(cls, parents, data_size, tree_metadata):
47 43 return cls(parents, data_size, tree_metadata, docket_mod.make_uid())
48 44
49 45 @classmethod
50 46 def parse(cls, data, nodeconstants):
51 47 if not data:
52 48 parents = (nodeconstants.nullid, nodeconstants.nullid)
53 49 return cls(parents, 0, b'', None)
54 50 marker, p1, p2, meta, data_size, uuid_size = HEADER.unpack_from(data)
55 51 if marker != V2_FORMAT_MARKER:
56 52 raise ValueError("expected dirstate-v2 marker")
57 53 uuid = data[HEADER.size : HEADER.size + uuid_size]
58 54 p1 = p1[: nodeconstants.nodelen]
59 55 p2 = p2[: nodeconstants.nodelen]
60 56 return cls((p1, p2), data_size, meta, uuid)
61 57
62 58 def serialize(self):
63 59 p1, p2 = self.parents
64 60 header = HEADER.pack(
65 61 V2_FORMAT_MARKER,
66 62 p1,
67 63 p2,
68 64 self.tree_metadata,
69 65 self.data_size,
70 66 len(self.uuid),
71 67 )
72 68 return header + self.uuid
73 69
74 70 def data_filename(self):
75 71 return self.data_filename_pattern % self.uuid
@@ -1,736 +1,770 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 import stat
10 11 import struct
11 12 import zlib
12 13
13 14 from ..node import (
14 15 nullrev,
15 16 sha1nodeconstants,
16 17 )
17 18 from ..thirdparty import attr
18 19 from .. import (
19 20 error,
20 21 pycompat,
21 22 revlogutils,
22 23 util,
23 24 )
24 25
25 26 from ..revlogutils import nodemap as nodemaputil
26 27 from ..revlogutils import constants as revlog_constants
27 28
28 29 stringio = pycompat.bytesio
29 30
30 31
31 32 _pack = struct.pack
32 33 _unpack = struct.unpack
33 34 _compress = zlib.compress
34 35 _decompress = zlib.decompress
35 36
36 37
37 38 # a special value used internally for `size` if the file come from the other parent
38 39 FROM_P2 = -2
39 40
40 41 # a special value used internally for `size` if the file is modified/merged/added
41 42 NONNORMAL = -1
42 43
43 44 # a special value used internally for `time` if the time is ambigeous
44 45 AMBIGUOUS_TIME = -1
45 46
47 # Bits of the `flags` byte inside a node in the file format
48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 DIRSTATE_V2_P2_INFO = 1 << 2
51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
52 DIRSTATE_V2_HAS_MTIME = 1 << 4
53 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 5
54 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 6
55
46 56
47 57 @attr.s(slots=True, init=False)
48 58 class DirstateItem(object):
49 59 """represent a dirstate entry
50 60
51 61 It hold multiple attributes
52 62
53 63 # about file tracking
54 64 - wc_tracked: is the file tracked by the working copy
55 65 - p1_tracked: is the file tracked in working copy first parent
56 66 - p2_info: the file has been involved in some merge operation. Either
57 67 because it was actually merged, or because the p2 version was
58 68 ahead, or because some rename moved it there. In either case
59 69 `hg status` will want it displayed as modified.
60 70
61 71 # about the file state expected from p1 manifest:
62 72 - mode: the file mode in p1
63 73 - size: the file size in p1
64 74
65 75 These value can be set to None, which mean we don't have a meaningful value
66 76 to compare with. Either because we don't really care about them as there
67 77 `status` is known without having to look at the disk or because we don't
68 78 know these right now and a full comparison will be needed to find out if
69 79 the file is clean.
70 80
71 81 # about the file state on disk last time we saw it:
72 82 - mtime: the last known clean mtime for the file.
73 83
74 84 This value can be set to None if no cachable state exist. Either because we
75 85 do not care (see previous section) or because we could not cache something
76 86 yet.
77 87 """
78 88
79 89 _wc_tracked = attr.ib()
80 90 _p1_tracked = attr.ib()
81 91 _p2_info = attr.ib()
82 92 _mode = attr.ib()
83 93 _size = attr.ib()
84 94 _mtime = attr.ib()
85 95
86 96 def __init__(
87 97 self,
88 98 wc_tracked=False,
89 99 p1_tracked=False,
90 100 p2_info=False,
91 101 has_meaningful_data=True,
92 102 has_meaningful_mtime=True,
93 103 parentfiledata=None,
94 104 ):
95 105 self._wc_tracked = wc_tracked
96 106 self._p1_tracked = p1_tracked
97 107 self._p2_info = p2_info
98 108
99 109 self._mode = None
100 110 self._size = None
101 111 self._mtime = None
102 112 if parentfiledata is None:
103 113 has_meaningful_mtime = False
104 114 has_meaningful_data = False
105 115 if has_meaningful_data:
106 116 self._mode = parentfiledata[0]
107 117 self._size = parentfiledata[1]
108 118 if has_meaningful_mtime:
109 119 self._mtime = parentfiledata[2]
110 120
111 121 @classmethod
122 def from_v2_data(cls, flags, size, mtime):
123 """Build a new DirstateItem object from V2 data"""
124 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
125 mode = None
126 if has_mode_size:
127 assert stat.S_IXUSR == 0o100
128 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
129 mode = 0o755
130 else:
131 mode = 0o644
132 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
133 mode |= stat.S_IFLNK
134 else:
135 mode |= stat.S_IFREG
136 return cls(
137 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
138 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
139 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
140 has_meaningful_data=has_mode_size,
141 has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_MTIME),
142 parentfiledata=(mode, size, mtime),
143 )
144
145 @classmethod
112 146 def from_v1_data(cls, state, mode, size, mtime):
113 147 """Build a new DirstateItem object from V1 data
114 148
115 149 Since the dirstate-v1 format is frozen, the signature of this function
116 150 is not expected to change, unlike the __init__ one.
117 151 """
118 152 if state == b'm':
119 153 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
120 154 elif state == b'a':
121 155 return cls(wc_tracked=True)
122 156 elif state == b'r':
123 157 if size == NONNORMAL:
124 158 p1_tracked = True
125 159 p2_info = True
126 160 elif size == FROM_P2:
127 161 p1_tracked = False
128 162 p2_info = True
129 163 else:
130 164 p1_tracked = True
131 165 p2_info = False
132 166 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
133 167 elif state == b'n':
134 168 if size == FROM_P2:
135 169 return cls(wc_tracked=True, p2_info=True)
136 170 elif size == NONNORMAL:
137 171 return cls(wc_tracked=True, p1_tracked=True)
138 172 elif mtime == AMBIGUOUS_TIME:
139 173 return cls(
140 174 wc_tracked=True,
141 175 p1_tracked=True,
142 176 has_meaningful_mtime=False,
143 177 parentfiledata=(mode, size, 42),
144 178 )
145 179 else:
146 180 return cls(
147 181 wc_tracked=True,
148 182 p1_tracked=True,
149 183 parentfiledata=(mode, size, mtime),
150 184 )
151 185 else:
152 186 raise RuntimeError(b'unknown state: %s' % state)
153 187
154 188 def set_possibly_dirty(self):
155 189 """Mark a file as "possibly dirty"
156 190
157 191 This means the next status call will have to actually check its content
158 192 to make sure it is correct.
159 193 """
160 194 self._mtime = None
161 195
162 196 def set_clean(self, mode, size, mtime):
163 197 """mark a file as "clean" cancelling potential "possibly dirty call"
164 198
165 199 Note: this function is a descendant of `dirstate.normal` and is
166 200 currently expected to be call on "normal" entry only. There are not
167 201 reason for this to not change in the future as long as the ccode is
168 202 updated to preserve the proper state of the non-normal files.
169 203 """
170 204 self._wc_tracked = True
171 205 self._p1_tracked = True
172 206 self._mode = mode
173 207 self._size = size
174 208 self._mtime = mtime
175 209
176 210 def set_tracked(self):
177 211 """mark a file as tracked in the working copy
178 212
179 213 This will ultimately be called by command like `hg add`.
180 214 """
181 215 self._wc_tracked = True
182 216 # `set_tracked` is replacing various `normallookup` call. So we mark
183 217 # the files as needing lookup
184 218 #
185 219 # Consider dropping this in the future in favor of something less broad.
186 220 self._mtime = None
187 221
188 222 def set_untracked(self):
189 223 """mark a file as untracked in the working copy
190 224
191 225 This will ultimately be called by command like `hg remove`.
192 226 """
193 227 self._wc_tracked = False
194 228 self._mode = None
195 229 self._size = None
196 230 self._mtime = None
197 231
198 232 def drop_merge_data(self):
199 233 """remove all "merge-only" from a DirstateItem
200 234
201 235 This is to be call by the dirstatemap code when the second parent is dropped
202 236 """
203 237 if self._p2_info:
204 238 self._p2_info = False
205 239 self._mode = None
206 240 self._size = None
207 241 self._mtime = None
208 242
209 243 @property
210 244 def mode(self):
211 245 return self.v1_mode()
212 246
213 247 @property
214 248 def size(self):
215 249 return self.v1_size()
216 250
217 251 @property
218 252 def mtime(self):
219 253 return self.v1_mtime()
220 254
221 255 @property
222 256 def state(self):
223 257 """
224 258 States are:
225 259 n normal
226 260 m needs merging
227 261 r marked for removal
228 262 a marked for addition
229 263
230 264 XXX This "state" is a bit obscure and mostly a direct expression of the
231 265 dirstatev1 format. It would make sense to ultimately deprecate it in
232 266 favor of the more "semantic" attributes.
233 267 """
234 268 if not self.any_tracked:
235 269 return b'?'
236 270 return self.v1_state()
237 271
238 272 @property
239 273 def tracked(self):
240 274 """True is the file is tracked in the working copy"""
241 275 return self._wc_tracked
242 276
243 277 @property
244 278 def any_tracked(self):
245 279 """True is the file is tracked anywhere (wc or parents)"""
246 280 return self._wc_tracked or self._p1_tracked or self._p2_info
247 281
248 282 @property
249 283 def added(self):
250 284 """True if the file has been added"""
251 285 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
252 286
253 287 @property
254 288 def maybe_clean(self):
255 289 """True if the file has a chance to be in the "clean" state"""
256 290 if not self._wc_tracked:
257 291 return False
258 292 elif not self._p1_tracked:
259 293 return False
260 294 elif self._p2_info:
261 295 return False
262 296 return True
263 297
264 298 @property
265 299 def p1_tracked(self):
266 300 """True if the file is tracked in the first parent manifest"""
267 301 return self._p1_tracked
268 302
269 303 @property
270 304 def p2_info(self):
271 305 """True if the file needed to merge or apply any input from p2
272 306
273 307 See the class documentation for details.
274 308 """
275 309 return self._wc_tracked and self._p2_info
276 310
277 311 @property
278 312 def removed(self):
279 313 """True if the file has been removed"""
280 314 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
281 315
282 316 def v1_state(self):
283 317 """return a "state" suitable for v1 serialization"""
284 318 if not self.any_tracked:
285 319 # the object has no state to record, this is -currently-
286 320 # unsupported
287 321 raise RuntimeError('untracked item')
288 322 elif self.removed:
289 323 return b'r'
290 324 elif self._p1_tracked and self._p2_info:
291 325 return b'm'
292 326 elif self.added:
293 327 return b'a'
294 328 else:
295 329 return b'n'
296 330
297 331 def v1_mode(self):
298 332 """return a "mode" suitable for v1 serialization"""
299 333 return self._mode if self._mode is not None else 0
300 334
301 335 def v1_size(self):
302 336 """return a "size" suitable for v1 serialization"""
303 337 if not self.any_tracked:
304 338 # the object has no state to record, this is -currently-
305 339 # unsupported
306 340 raise RuntimeError('untracked item')
307 341 elif self.removed and self._p1_tracked and self._p2_info:
308 342 return NONNORMAL
309 343 elif self._p2_info:
310 344 return FROM_P2
311 345 elif self.removed:
312 346 return 0
313 347 elif self.added:
314 348 return NONNORMAL
315 349 elif self._size is None:
316 350 return NONNORMAL
317 351 else:
318 352 return self._size
319 353
320 354 def v1_mtime(self):
321 355 """return a "mtime" suitable for v1 serialization"""
322 356 if not self.any_tracked:
323 357 # the object has no state to record, this is -currently-
324 358 # unsupported
325 359 raise RuntimeError('untracked item')
326 360 elif self.removed:
327 361 return 0
328 362 elif self._mtime is None:
329 363 return AMBIGUOUS_TIME
330 364 elif self._p2_info:
331 365 return AMBIGUOUS_TIME
332 366 elif not self._p1_tracked:
333 367 return AMBIGUOUS_TIME
334 368 else:
335 369 return self._mtime
336 370
337 371 def need_delay(self, now):
338 372 """True if the stored mtime would be ambiguous with the current time"""
339 373 return self.v1_state() == b'n' and self.v1_mtime() == now
340 374
341 375
342 376 def gettype(q):
343 377 return int(q & 0xFFFF)
344 378
345 379
346 380 class BaseIndexObject(object):
347 381 # Can I be passed to an algorithme implemented in Rust ?
348 382 rust_ext_compat = 0
349 383 # Format of an index entry according to Python's `struct` language
350 384 index_format = revlog_constants.INDEX_ENTRY_V1
351 385 # Size of a C unsigned long long int, platform independent
352 386 big_int_size = struct.calcsize(b'>Q')
353 387 # Size of a C long int, platform independent
354 388 int_size = struct.calcsize(b'>i')
355 389 # An empty index entry, used as a default value to be overridden, or nullrev
356 390 null_item = (
357 391 0,
358 392 0,
359 393 0,
360 394 -1,
361 395 -1,
362 396 -1,
363 397 -1,
364 398 sha1nodeconstants.nullid,
365 399 0,
366 400 0,
367 401 revlog_constants.COMP_MODE_INLINE,
368 402 revlog_constants.COMP_MODE_INLINE,
369 403 )
370 404
371 405 @util.propertycache
372 406 def entry_size(self):
373 407 return self.index_format.size
374 408
375 409 @property
376 410 def nodemap(self):
377 411 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
378 412 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
379 413 return self._nodemap
380 414
381 415 @util.propertycache
382 416 def _nodemap(self):
383 417 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
384 418 for r in range(0, len(self)):
385 419 n = self[r][7]
386 420 nodemap[n] = r
387 421 return nodemap
388 422
389 423 def has_node(self, node):
390 424 """return True if the node exist in the index"""
391 425 return node in self._nodemap
392 426
393 427 def rev(self, node):
394 428 """return a revision for a node
395 429
396 430 If the node is unknown, raise a RevlogError"""
397 431 return self._nodemap[node]
398 432
399 433 def get_rev(self, node):
400 434 """return a revision for a node
401 435
402 436 If the node is unknown, return None"""
403 437 return self._nodemap.get(node)
404 438
405 439 def _stripnodes(self, start):
406 440 if '_nodemap' in vars(self):
407 441 for r in range(start, len(self)):
408 442 n = self[r][7]
409 443 del self._nodemap[n]
410 444
411 445 def clearcaches(self):
412 446 self.__dict__.pop('_nodemap', None)
413 447
414 448 def __len__(self):
415 449 return self._lgt + len(self._extra)
416 450
417 451 def append(self, tup):
418 452 if '_nodemap' in vars(self):
419 453 self._nodemap[tup[7]] = len(self)
420 454 data = self._pack_entry(len(self), tup)
421 455 self._extra.append(data)
422 456
423 457 def _pack_entry(self, rev, entry):
424 458 assert entry[8] == 0
425 459 assert entry[9] == 0
426 460 return self.index_format.pack(*entry[:8])
427 461
428 462 def _check_index(self, i):
429 463 if not isinstance(i, int):
430 464 raise TypeError(b"expecting int indexes")
431 465 if i < 0 or i >= len(self):
432 466 raise IndexError
433 467
434 468 def __getitem__(self, i):
435 469 if i == -1:
436 470 return self.null_item
437 471 self._check_index(i)
438 472 if i >= self._lgt:
439 473 data = self._extra[i - self._lgt]
440 474 else:
441 475 index = self._calculate_index(i)
442 476 data = self._data[index : index + self.entry_size]
443 477 r = self._unpack_entry(i, data)
444 478 if self._lgt and i == 0:
445 479 offset = revlogutils.offset_type(0, gettype(r[0]))
446 480 r = (offset,) + r[1:]
447 481 return r
448 482
449 483 def _unpack_entry(self, rev, data):
450 484 r = self.index_format.unpack(data)
451 485 r = r + (
452 486 0,
453 487 0,
454 488 revlog_constants.COMP_MODE_INLINE,
455 489 revlog_constants.COMP_MODE_INLINE,
456 490 )
457 491 return r
458 492
459 493 def pack_header(self, header):
460 494 """pack header information as binary"""
461 495 v_fmt = revlog_constants.INDEX_HEADER
462 496 return v_fmt.pack(header)
463 497
464 498 def entry_binary(self, rev):
465 499 """return the raw binary string representing a revision"""
466 500 entry = self[rev]
467 501 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
468 502 if rev == 0:
469 503 p = p[revlog_constants.INDEX_HEADER.size :]
470 504 return p
471 505
472 506
473 507 class IndexObject(BaseIndexObject):
474 508 def __init__(self, data):
475 509 assert len(data) % self.entry_size == 0, (
476 510 len(data),
477 511 self.entry_size,
478 512 len(data) % self.entry_size,
479 513 )
480 514 self._data = data
481 515 self._lgt = len(data) // self.entry_size
482 516 self._extra = []
483 517
484 518 def _calculate_index(self, i):
485 519 return i * self.entry_size
486 520
487 521 def __delitem__(self, i):
488 522 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
489 523 raise ValueError(b"deleting slices only supports a:-1 with step 1")
490 524 i = i.start
491 525 self._check_index(i)
492 526 self._stripnodes(i)
493 527 if i < self._lgt:
494 528 self._data = self._data[: i * self.entry_size]
495 529 self._lgt = i
496 530 self._extra = []
497 531 else:
498 532 self._extra = self._extra[: i - self._lgt]
499 533
500 534
501 535 class PersistentNodeMapIndexObject(IndexObject):
502 536 """a Debug oriented class to test persistent nodemap
503 537
504 538 We need a simple python object to test API and higher level behavior. See
505 539 the Rust implementation for more serious usage. This should be used only
506 540 through the dedicated `devel.persistent-nodemap` config.
507 541 """
508 542
509 543 def nodemap_data_all(self):
510 544 """Return bytes containing a full serialization of a nodemap
511 545
512 546 The nodemap should be valid for the full set of revisions in the
513 547 index."""
514 548 return nodemaputil.persistent_data(self)
515 549
516 550 def nodemap_data_incremental(self):
517 551 """Return bytes containing a incremental update to persistent nodemap
518 552
519 553 This containst the data for an append-only update of the data provided
520 554 in the last call to `update_nodemap_data`.
521 555 """
522 556 if self._nm_root is None:
523 557 return None
524 558 docket = self._nm_docket
525 559 changed, data = nodemaputil.update_persistent_data(
526 560 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
527 561 )
528 562
529 563 self._nm_root = self._nm_max_idx = self._nm_docket = None
530 564 return docket, changed, data
531 565
532 566 def update_nodemap_data(self, docket, nm_data):
533 567 """provide full block of persisted binary data for a nodemap
534 568
535 569 The data are expected to come from disk. See `nodemap_data_all` for a
536 570 produceur of such data."""
537 571 if nm_data is not None:
538 572 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
539 573 if self._nm_root:
540 574 self._nm_docket = docket
541 575 else:
542 576 self._nm_root = self._nm_max_idx = self._nm_docket = None
543 577
544 578
545 579 class InlinedIndexObject(BaseIndexObject):
546 580 def __init__(self, data, inline=0):
547 581 self._data = data
548 582 self._lgt = self._inline_scan(None)
549 583 self._inline_scan(self._lgt)
550 584 self._extra = []
551 585
552 586 def _inline_scan(self, lgt):
553 587 off = 0
554 588 if lgt is not None:
555 589 self._offsets = [0] * lgt
556 590 count = 0
557 591 while off <= len(self._data) - self.entry_size:
558 592 start = off + self.big_int_size
559 593 (s,) = struct.unpack(
560 594 b'>i',
561 595 self._data[start : start + self.int_size],
562 596 )
563 597 if lgt is not None:
564 598 self._offsets[count] = off
565 599 count += 1
566 600 off += self.entry_size + s
567 601 if off != len(self._data):
568 602 raise ValueError(b"corrupted data")
569 603 return count
570 604
571 605 def __delitem__(self, i):
572 606 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
573 607 raise ValueError(b"deleting slices only supports a:-1 with step 1")
574 608 i = i.start
575 609 self._check_index(i)
576 610 self._stripnodes(i)
577 611 if i < self._lgt:
578 612 self._offsets = self._offsets[:i]
579 613 self._lgt = i
580 614 self._extra = []
581 615 else:
582 616 self._extra = self._extra[: i - self._lgt]
583 617
584 618 def _calculate_index(self, i):
585 619 return self._offsets[i]
586 620
587 621
588 622 def parse_index2(data, inline, revlogv2=False):
589 623 if not inline:
590 624 cls = IndexObject2 if revlogv2 else IndexObject
591 625 return cls(data), None
592 626 cls = InlinedIndexObject
593 627 return cls(data, inline), (0, data)
594 628
595 629
596 630 def parse_index_cl_v2(data):
597 631 return IndexChangelogV2(data), None
598 632
599 633
600 634 class IndexObject2(IndexObject):
601 635 index_format = revlog_constants.INDEX_ENTRY_V2
602 636
603 637 def replace_sidedata_info(
604 638 self,
605 639 rev,
606 640 sidedata_offset,
607 641 sidedata_length,
608 642 offset_flags,
609 643 compression_mode,
610 644 ):
611 645 """
612 646 Replace an existing index entry's sidedata offset and length with new
613 647 ones.
614 648 This cannot be used outside of the context of sidedata rewriting,
615 649 inside the transaction that creates the revision `rev`.
616 650 """
617 651 if rev < 0:
618 652 raise KeyError
619 653 self._check_index(rev)
620 654 if rev < self._lgt:
621 655 msg = b"cannot rewrite entries outside of this transaction"
622 656 raise KeyError(msg)
623 657 else:
624 658 entry = list(self[rev])
625 659 entry[0] = offset_flags
626 660 entry[8] = sidedata_offset
627 661 entry[9] = sidedata_length
628 662 entry[11] = compression_mode
629 663 entry = tuple(entry)
630 664 new = self._pack_entry(rev, entry)
631 665 self._extra[rev - self._lgt] = new
632 666
633 667 def _unpack_entry(self, rev, data):
634 668 data = self.index_format.unpack(data)
635 669 entry = data[:10]
636 670 data_comp = data[10] & 3
637 671 sidedata_comp = (data[10] & (3 << 2)) >> 2
638 672 return entry + (data_comp, sidedata_comp)
639 673
640 674 def _pack_entry(self, rev, entry):
641 675 data = entry[:10]
642 676 data_comp = entry[10] & 3
643 677 sidedata_comp = (entry[11] & 3) << 2
644 678 data += (data_comp | sidedata_comp,)
645 679
646 680 return self.index_format.pack(*data)
647 681
648 682 def entry_binary(self, rev):
649 683 """return the raw binary string representing a revision"""
650 684 entry = self[rev]
651 685 return self._pack_entry(rev, entry)
652 686
653 687 def pack_header(self, header):
654 688 """pack header information as binary"""
655 689 msg = 'version header should go in the docket, not the index: %d'
656 690 msg %= header
657 691 raise error.ProgrammingError(msg)
658 692
659 693
660 694 class IndexChangelogV2(IndexObject2):
661 695 index_format = revlog_constants.INDEX_ENTRY_CL_V2
662 696
663 697 def _unpack_entry(self, rev, data, r=True):
664 698 items = self.index_format.unpack(data)
665 699 entry = items[:3] + (rev, rev) + items[3:8]
666 700 data_comp = items[8] & 3
667 701 sidedata_comp = (items[8] >> 2) & 3
668 702 return entry + (data_comp, sidedata_comp)
669 703
670 704 def _pack_entry(self, rev, entry):
671 705 assert entry[3] == rev, entry[3]
672 706 assert entry[4] == rev, entry[4]
673 707 data = entry[:3] + entry[5:10]
674 708 data_comp = entry[10] & 3
675 709 sidedata_comp = (entry[11] & 3) << 2
676 710 data += (data_comp | sidedata_comp,)
677 711 return self.index_format.pack(*data)
678 712
679 713
680 714 def parse_index_devel_nodemap(data, inline):
681 715 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
682 716 return PersistentNodeMapIndexObject(data), None
683 717
684 718
685 719 def parse_dirstate(dmap, copymap, st):
686 720 parents = [st[:20], st[20:40]]
687 721 # dereference fields so they will be local in loop
688 722 format = b">cllll"
689 723 e_size = struct.calcsize(format)
690 724 pos1 = 40
691 725 l = len(st)
692 726
693 727 # the inner loop
694 728 while pos1 < l:
695 729 pos2 = pos1 + e_size
696 730 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
697 731 pos1 = pos2 + e[4]
698 732 f = st[pos2:pos1]
699 733 if b'\0' in f:
700 734 f, c = f.split(b'\0')
701 735 copymap[f] = c
702 736 dmap[f] = DirstateItem.from_v1_data(*e[:4])
703 737 return parents
704 738
705 739
706 740 def pack_dirstate(dmap, copymap, pl, now):
707 741 now = int(now)
708 742 cs = stringio()
709 743 write = cs.write
710 744 write(b"".join(pl))
711 745 for f, e in pycompat.iteritems(dmap):
712 746 if e.need_delay(now):
713 747 # The file was last modified "simultaneously" with the current
714 748 # write to dirstate (i.e. within the same second for file-
715 749 # systems with a granularity of 1 sec). This commonly happens
716 750 # for at least a couple of files on 'update'.
717 751 # The user could change the file without changing its size
718 752 # within the same second. Invalidate the file's mtime in
719 753 # dirstate, forcing future 'status' calls to compare the
720 754 # contents of the file if the size is the same. This prevents
721 755 # mistakenly treating such files as clean.
722 756 e.set_possibly_dirty()
723 757
724 758 if f in copymap:
725 759 f = b"%s\0%s" % (f, copymap[f])
726 760 e = _pack(
727 761 b">cllll",
728 762 e.v1_state(),
729 763 e.v1_mode(),
730 764 e.v1_size(),
731 765 e.v1_mtime(),
732 766 len(f),
733 767 )
734 768 write(e)
735 769 write(f)
736 770 return cs.getvalue()
@@ -1,720 +1,718 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::TruncatedTimestamp;
6 6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 8 use crate::errors::HgError;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::DirstateEntry;
11 11 use crate::DirstateError;
12 12 use crate::DirstateParents;
13 13 use bitflags::bitflags;
14 14 use bytes_cast::unaligned::{U16Be, U32Be};
15 15 use bytes_cast::BytesCast;
16 16 use format_bytes::format_bytes;
17 17 use std::borrow::Cow;
18 18 use std::convert::{TryFrom, TryInto};
19 19
20 20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 21 /// This a redundant sanity check more than an actual "magic number" since
22 22 /// `.hg/requires` already governs which format should be used.
23 23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24 24
25 25 /// Keep space for 256-bit hashes
26 26 const STORED_NODE_ID_BYTES: usize = 32;
27 27
28 28 /// … even though only 160 bits are used for now, with SHA-1
29 29 const USED_NODE_ID_BYTES: usize = 20;
30 30
31 31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 33
34 /// Must match the constant of the same name in
35 /// `mercurial/dirstateutils/docket.py`
34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
36 35 const TREE_METADATA_SIZE: usize = 44;
37
38 36 const NODE_SIZE: usize = 43;
39 37
40 38 /// Make sure that size-affecting changes are made knowingly
41 39 #[allow(unused)]
42 40 fn static_assert_size_of() {
43 41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
44 42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
45 43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
46 44 }
47 45
48 46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
49 47 #[derive(BytesCast)]
50 48 #[repr(C)]
51 49 struct DocketHeader {
52 50 marker: [u8; V2_FORMAT_MARKER.len()],
53 51 parent_1: [u8; STORED_NODE_ID_BYTES],
54 52 parent_2: [u8; STORED_NODE_ID_BYTES],
55 53
56 54 metadata: TreeMetadata,
57 55
58 56 /// Counted in bytes
59 57 data_size: Size,
60 58
61 59 uuid_size: u8,
62 60 }
63 61
64 62 pub struct Docket<'on_disk> {
65 63 header: &'on_disk DocketHeader,
66 64 uuid: &'on_disk [u8],
67 65 }
68 66
69 67 /// Fields are documented in the *Tree metadata in the docket file*
70 68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
71 69 #[derive(BytesCast)]
72 70 #[repr(C)]
73 71 struct TreeMetadata {
74 72 root_nodes: ChildNodes,
75 73 nodes_with_entry_count: Size,
76 74 nodes_with_copy_source_count: Size,
77 75 unreachable_bytes: Size,
78 76 unused: [u8; 4],
79 77
80 78 /// See *Optional hash of ignore patterns* section of
81 79 /// `mercurial/helptext/internals/dirstate-v2.txt`
82 80 ignore_patterns_hash: IgnorePatternsHash,
83 81 }
84 82
85 83 /// Fields are documented in the *The data file format*
86 84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
87 85 #[derive(BytesCast)]
88 86 #[repr(C)]
89 87 pub(super) struct Node {
90 88 full_path: PathSlice,
91 89
92 90 /// In bytes from `self.full_path.start`
93 91 base_name_start: PathSize,
94 92
95 93 copy_source: OptPathSlice,
96 94 children: ChildNodes,
97 95 pub(super) descendants_with_entry_count: Size,
98 96 pub(super) tracked_descendants_count: Size,
99 97 flags: Flags,
100 98 size: U32Be,
101 99 mtime: PackedTruncatedTimestamp,
102 100 }
103 101
104 102 bitflags! {
105 103 #[derive(BytesCast)]
106 104 #[repr(C)]
107 105 struct Flags: u8 {
108 106 const WDIR_TRACKED = 1 << 0;
109 107 const P1_TRACKED = 1 << 1;
110 108 const P2_INFO = 1 << 2;
111 109 const HAS_MODE_AND_SIZE = 1 << 3;
112 110 const HAS_MTIME = 1 << 4;
113 111 const MODE_EXEC_PERM = 1 << 5;
114 112 const MODE_IS_SYMLINK = 1 << 6;
115 113 }
116 114 }
117 115
118 116 /// Duration since the Unix epoch
119 117 #[derive(BytesCast, Copy, Clone)]
120 118 #[repr(C)]
121 119 struct PackedTruncatedTimestamp {
122 120 truncated_seconds: U32Be,
123 121 nanoseconds: U32Be,
124 122 }
125 123
126 124 /// Counted in bytes from the start of the file
127 125 ///
128 126 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
129 127 type Offset = U32Be;
130 128
131 129 /// Counted in number of items
132 130 ///
133 131 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
134 132 type Size = U32Be;
135 133
136 134 /// Counted in bytes
137 135 ///
138 136 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
139 137 type PathSize = U16Be;
140 138
141 139 /// A contiguous sequence of `len` times `Node`, representing the child nodes
142 140 /// of either some other node or of the repository root.
143 141 ///
144 142 /// Always sorted by ascending `full_path`, to allow binary search.
145 143 /// Since nodes with the same parent nodes also have the same parent path,
146 144 /// only the `base_name`s need to be compared during binary search.
147 145 #[derive(BytesCast, Copy, Clone)]
148 146 #[repr(C)]
149 147 struct ChildNodes {
150 148 start: Offset,
151 149 len: Size,
152 150 }
153 151
154 152 /// A `HgPath` of `len` bytes
155 153 #[derive(BytesCast, Copy, Clone)]
156 154 #[repr(C)]
157 155 struct PathSlice {
158 156 start: Offset,
159 157 len: PathSize,
160 158 }
161 159
162 160 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
163 161 type OptPathSlice = PathSlice;
164 162
165 163 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
166 164 ///
167 165 /// This should only happen if Mercurial is buggy or a repository is corrupted.
168 166 #[derive(Debug)]
169 167 pub struct DirstateV2ParseError;
170 168
171 169 impl From<DirstateV2ParseError> for HgError {
172 170 fn from(_: DirstateV2ParseError) -> Self {
173 171 HgError::corrupted("dirstate-v2 parse error")
174 172 }
175 173 }
176 174
177 175 impl From<DirstateV2ParseError> for crate::DirstateError {
178 176 fn from(error: DirstateV2ParseError) -> Self {
179 177 HgError::from(error).into()
180 178 }
181 179 }
182 180
183 181 impl<'on_disk> Docket<'on_disk> {
184 182 pub fn parents(&self) -> DirstateParents {
185 183 use crate::Node;
186 184 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
187 185 .unwrap()
188 186 .clone();
189 187 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
190 188 .unwrap()
191 189 .clone();
192 190 DirstateParents { p1, p2 }
193 191 }
194 192
195 193 pub fn tree_metadata(&self) -> &[u8] {
196 194 self.header.metadata.as_bytes()
197 195 }
198 196
199 197 pub fn data_size(&self) -> usize {
200 198 // This `unwrap` could only panic on a 16-bit CPU
201 199 self.header.data_size.get().try_into().unwrap()
202 200 }
203 201
204 202 pub fn data_filename(&self) -> String {
205 203 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
206 204 }
207 205 }
208 206
209 207 pub fn read_docket(
210 208 on_disk: &[u8],
211 209 ) -> Result<Docket<'_>, DirstateV2ParseError> {
212 210 let (header, uuid) =
213 211 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
214 212 let uuid_size = header.uuid_size as usize;
215 213 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
216 214 Ok(Docket { header, uuid })
217 215 } else {
218 216 Err(DirstateV2ParseError)
219 217 }
220 218 }
221 219
222 220 pub(super) fn read<'on_disk>(
223 221 on_disk: &'on_disk [u8],
224 222 metadata: &[u8],
225 223 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
226 224 if on_disk.is_empty() {
227 225 return Ok(DirstateMap::empty(on_disk));
228 226 }
229 227 let (meta, _) = TreeMetadata::from_bytes(metadata)
230 228 .map_err(|_| DirstateV2ParseError)?;
231 229 let dirstate_map = DirstateMap {
232 230 on_disk,
233 231 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
234 232 on_disk,
235 233 meta.root_nodes,
236 234 )?),
237 235 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
238 236 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
239 237 ignore_patterns_hash: meta.ignore_patterns_hash,
240 238 unreachable_bytes: meta.unreachable_bytes.get(),
241 239 };
242 240 Ok(dirstate_map)
243 241 }
244 242
245 243 impl Node {
246 244 pub(super) fn full_path<'on_disk>(
247 245 &self,
248 246 on_disk: &'on_disk [u8],
249 247 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
250 248 read_hg_path(on_disk, self.full_path)
251 249 }
252 250
253 251 pub(super) fn base_name_start<'on_disk>(
254 252 &self,
255 253 ) -> Result<usize, DirstateV2ParseError> {
256 254 let start = self.base_name_start.get();
257 255 if start < self.full_path.len.get() {
258 256 let start = usize::try_from(start)
259 257 // u32 -> usize, could only panic on a 16-bit CPU
260 258 .expect("dirstate-v2 base_name_start out of bounds");
261 259 Ok(start)
262 260 } else {
263 261 Err(DirstateV2ParseError)
264 262 }
265 263 }
266 264
267 265 pub(super) fn base_name<'on_disk>(
268 266 &self,
269 267 on_disk: &'on_disk [u8],
270 268 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
271 269 let full_path = self.full_path(on_disk)?;
272 270 let base_name_start = self.base_name_start()?;
273 271 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
274 272 }
275 273
276 274 pub(super) fn path<'on_disk>(
277 275 &self,
278 276 on_disk: &'on_disk [u8],
279 277 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
280 278 Ok(WithBasename::from_raw_parts(
281 279 Cow::Borrowed(self.full_path(on_disk)?),
282 280 self.base_name_start()?,
283 281 ))
284 282 }
285 283
286 284 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
287 285 self.copy_source.start.get() != 0
288 286 }
289 287
290 288 pub(super) fn copy_source<'on_disk>(
291 289 &self,
292 290 on_disk: &'on_disk [u8],
293 291 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
294 292 Ok(if self.has_copy_source() {
295 293 Some(read_hg_path(on_disk, self.copy_source)?)
296 294 } else {
297 295 None
298 296 })
299 297 }
300 298
301 299 fn has_entry(&self) -> bool {
302 300 self.flags.intersects(
303 301 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
304 302 )
305 303 }
306 304
307 305 pub(super) fn node_data(
308 306 &self,
309 307 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
310 308 if self.has_entry() {
311 309 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
312 310 } else if let Some(mtime) = self.cached_directory_mtime()? {
313 311 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
314 312 } else {
315 313 Ok(dirstate_map::NodeData::None)
316 314 }
317 315 }
318 316
319 317 pub(super) fn cached_directory_mtime(
320 318 &self,
321 319 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
322 320 Ok(
323 321 if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {
324 322 Some(self.mtime.try_into()?)
325 323 } else {
326 324 None
327 325 },
328 326 )
329 327 }
330 328
331 329 fn synthesize_unix_mode(&self) -> u32 {
332 330 let file_type = if self.flags.contains(Flags::MODE_IS_SYMLINK) {
333 331 libc::S_IFLNK
334 332 } else {
335 333 libc::S_IFREG
336 334 };
337 335 let permisions = if self.flags.contains(Flags::MODE_EXEC_PERM) {
338 336 0o755
339 337 } else {
340 338 0o644
341 339 };
342 340 file_type | permisions
343 341 }
344 342
345 343 fn assume_entry(&self) -> DirstateEntry {
346 344 // TODO: convert through raw bits instead?
347 345 let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
348 346 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
349 347 let p2_info = self.flags.contains(Flags::P2_INFO);
350 348 let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {
351 349 Some((self.synthesize_unix_mode(), self.size.into()))
352 350 } else {
353 351 None
354 352 };
355 353 let mtime = if self.flags.contains(Flags::HAS_MTIME) {
356 354 Some(self.mtime.truncated_seconds.into())
357 355 } else {
358 356 None
359 357 };
360 358 DirstateEntry::from_v2_data(
361 359 wdir_tracked,
362 360 p1_tracked,
363 361 p2_info,
364 362 mode_size,
365 363 mtime,
366 364 )
367 365 }
368 366
369 367 pub(super) fn entry(
370 368 &self,
371 369 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
372 370 if self.has_entry() {
373 371 Ok(Some(self.assume_entry()))
374 372 } else {
375 373 Ok(None)
376 374 }
377 375 }
378 376
379 377 pub(super) fn children<'on_disk>(
380 378 &self,
381 379 on_disk: &'on_disk [u8],
382 380 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
383 381 read_nodes(on_disk, self.children)
384 382 }
385 383
386 384 pub(super) fn to_in_memory_node<'on_disk>(
387 385 &self,
388 386 on_disk: &'on_disk [u8],
389 387 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
390 388 Ok(dirstate_map::Node {
391 389 children: dirstate_map::ChildNodes::OnDisk(
392 390 self.children(on_disk)?,
393 391 ),
394 392 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
395 393 data: self.node_data()?,
396 394 descendants_with_entry_count: self
397 395 .descendants_with_entry_count
398 396 .get(),
399 397 tracked_descendants_count: self.tracked_descendants_count.get(),
400 398 })
401 399 }
402 400
403 401 fn from_dirstate_entry(
404 402 entry: &DirstateEntry,
405 403 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
406 404 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
407 405 entry.v2_data();
408 406 // TODO: convert throug raw flag bits instead?
409 407 let mut flags = Flags::empty();
410 408 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
411 409 flags.set(Flags::P1_TRACKED, p1_tracked);
412 410 flags.set(Flags::P2_INFO, p2_info);
413 411 let size = if let Some((m, s)) = mode_size_opt {
414 412 let exec_perm = m & libc::S_IXUSR != 0;
415 413 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
416 414 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
417 415 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
418 416 flags.insert(Flags::HAS_MODE_AND_SIZE);
419 417 s.into()
420 418 } else {
421 419 0.into()
422 420 };
423 421 let mtime = if let Some(m) = mtime_opt {
424 422 flags.insert(Flags::HAS_MTIME);
425 423 PackedTruncatedTimestamp {
426 424 truncated_seconds: m.into(),
427 425 nanoseconds: 0.into(),
428 426 }
429 427 } else {
430 428 PackedTruncatedTimestamp::null()
431 429 };
432 430 (flags, size, mtime)
433 431 }
434 432 }
435 433
436 434 fn read_hg_path(
437 435 on_disk: &[u8],
438 436 slice: PathSlice,
439 437 ) -> Result<&HgPath, DirstateV2ParseError> {
440 438 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
441 439 }
442 440
443 441 fn read_nodes(
444 442 on_disk: &[u8],
445 443 slice: ChildNodes,
446 444 ) -> Result<&[Node], DirstateV2ParseError> {
447 445 read_slice(on_disk, slice.start, slice.len.get())
448 446 }
449 447
450 448 fn read_slice<T, Len>(
451 449 on_disk: &[u8],
452 450 start: Offset,
453 451 len: Len,
454 452 ) -> Result<&[T], DirstateV2ParseError>
455 453 where
456 454 T: BytesCast,
457 455 Len: TryInto<usize>,
458 456 {
459 457 // Either `usize::MAX` would result in "out of bounds" error since a single
460 458 // `&[u8]` cannot occupy the entire addess space.
461 459 let start = start.get().try_into().unwrap_or(std::usize::MAX);
462 460 let len = len.try_into().unwrap_or(std::usize::MAX);
463 461 on_disk
464 462 .get(start..)
465 463 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
466 464 .map(|(slice, _rest)| slice)
467 465 .ok_or_else(|| DirstateV2ParseError)
468 466 }
469 467
470 468 pub(crate) fn for_each_tracked_path<'on_disk>(
471 469 on_disk: &'on_disk [u8],
472 470 metadata: &[u8],
473 471 mut f: impl FnMut(&'on_disk HgPath),
474 472 ) -> Result<(), DirstateV2ParseError> {
475 473 let (meta, _) = TreeMetadata::from_bytes(metadata)
476 474 .map_err(|_| DirstateV2ParseError)?;
477 475 fn recur<'on_disk>(
478 476 on_disk: &'on_disk [u8],
479 477 nodes: ChildNodes,
480 478 f: &mut impl FnMut(&'on_disk HgPath),
481 479 ) -> Result<(), DirstateV2ParseError> {
482 480 for node in read_nodes(on_disk, nodes)? {
483 481 if let Some(entry) = node.entry()? {
484 482 if entry.state().is_tracked() {
485 483 f(node.full_path(on_disk)?)
486 484 }
487 485 }
488 486 recur(on_disk, node.children, f)?
489 487 }
490 488 Ok(())
491 489 }
492 490 recur(on_disk, meta.root_nodes, &mut f)
493 491 }
494 492
495 493 /// Returns new data and metadata, together with whether that data should be
496 494 /// appended to the existing data file whose content is at
497 495 /// `dirstate_map.on_disk` (true), instead of written to a new data file
498 496 /// (false).
499 497 pub(super) fn write(
500 498 dirstate_map: &mut DirstateMap,
501 499 can_append: bool,
502 500 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
503 501 let append = can_append && dirstate_map.write_should_append();
504 502
505 503 // This ignores the space for paths, and for nodes without an entry.
506 504 // TODO: better estimate? Skip the `Vec` and write to a file directly?
507 505 let size_guess = std::mem::size_of::<Node>()
508 506 * dirstate_map.nodes_with_entry_count as usize;
509 507
510 508 let mut writer = Writer {
511 509 dirstate_map,
512 510 append,
513 511 out: Vec::with_capacity(size_guess),
514 512 };
515 513
516 514 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
517 515
518 516 let meta = TreeMetadata {
519 517 root_nodes,
520 518 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
521 519 nodes_with_copy_source_count: dirstate_map
522 520 .nodes_with_copy_source_count
523 521 .into(),
524 522 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
525 523 unused: [0; 4],
526 524 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
527 525 };
528 526 Ok((writer.out, meta.as_bytes().to_vec(), append))
529 527 }
530 528
531 529 struct Writer<'dmap, 'on_disk> {
532 530 dirstate_map: &'dmap DirstateMap<'on_disk>,
533 531 append: bool,
534 532 out: Vec<u8>,
535 533 }
536 534
537 535 impl Writer<'_, '_> {
538 536 fn write_nodes(
539 537 &mut self,
540 538 nodes: dirstate_map::ChildNodesRef,
541 539 ) -> Result<ChildNodes, DirstateError> {
542 540 // Reuse already-written nodes if possible
543 541 if self.append {
544 542 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
545 543 let start = self.on_disk_offset_of(nodes_slice).expect(
546 544 "dirstate-v2 OnDisk nodes not found within on_disk",
547 545 );
548 546 let len = child_nodes_len_from_usize(nodes_slice.len());
549 547 return Ok(ChildNodes { start, len });
550 548 }
551 549 }
552 550
553 551 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
554 552 // undefined iteration order. Sort to enable binary search in the
555 553 // written file.
556 554 let nodes = nodes.sorted();
557 555 let nodes_len = nodes.len();
558 556
559 557 // First accumulate serialized nodes in a `Vec`
560 558 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
561 559 for node in nodes {
562 560 let children =
563 561 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
564 562 let full_path = node.full_path(self.dirstate_map.on_disk)?;
565 563 let full_path = self.write_path(full_path.as_bytes());
566 564 let copy_source = if let Some(source) =
567 565 node.copy_source(self.dirstate_map.on_disk)?
568 566 {
569 567 self.write_path(source.as_bytes())
570 568 } else {
571 569 PathSlice {
572 570 start: 0.into(),
573 571 len: 0.into(),
574 572 }
575 573 };
576 574 on_disk_nodes.push(match node {
577 575 NodeRef::InMemory(path, node) => {
578 576 let (flags, size, mtime) = match &node.data {
579 577 dirstate_map::NodeData::Entry(entry) => {
580 578 Node::from_dirstate_entry(entry)
581 579 }
582 580 dirstate_map::NodeData::CachedDirectory { mtime } => {
583 581 (Flags::HAS_MTIME, 0.into(), (*mtime).into())
584 582 }
585 583 dirstate_map::NodeData::None => (
586 584 Flags::empty(),
587 585 0.into(),
588 586 PackedTruncatedTimestamp::null(),
589 587 ),
590 588 };
591 589 Node {
592 590 children,
593 591 copy_source,
594 592 full_path,
595 593 base_name_start: u16::try_from(path.base_name_start())
596 594 // Could only panic for paths over 64 KiB
597 595 .expect("dirstate-v2 path length overflow")
598 596 .into(),
599 597 descendants_with_entry_count: node
600 598 .descendants_with_entry_count
601 599 .into(),
602 600 tracked_descendants_count: node
603 601 .tracked_descendants_count
604 602 .into(),
605 603 flags,
606 604 size,
607 605 mtime,
608 606 }
609 607 }
610 608 NodeRef::OnDisk(node) => Node {
611 609 children,
612 610 copy_source,
613 611 full_path,
614 612 ..*node
615 613 },
616 614 })
617 615 }
618 616 // … so we can write them contiguously, after writing everything else
619 617 // they refer to.
620 618 let start = self.current_offset();
621 619 let len = child_nodes_len_from_usize(nodes_len);
622 620 self.out.extend(on_disk_nodes.as_bytes());
623 621 Ok(ChildNodes { start, len })
624 622 }
625 623
626 624 /// If the given slice of items is within `on_disk`, returns its offset
627 625 /// from the start of `on_disk`.
628 626 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
629 627 where
630 628 T: BytesCast,
631 629 {
632 630 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
633 631 let start = slice.as_ptr() as usize;
634 632 let end = start + slice.len();
635 633 start..=end
636 634 }
637 635 let slice_addresses = address_range(slice.as_bytes());
638 636 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
639 637 if on_disk_addresses.contains(slice_addresses.start())
640 638 && on_disk_addresses.contains(slice_addresses.end())
641 639 {
642 640 let offset = slice_addresses.start() - on_disk_addresses.start();
643 641 Some(offset_from_usize(offset))
644 642 } else {
645 643 None
646 644 }
647 645 }
648 646
649 647 fn current_offset(&mut self) -> Offset {
650 648 let mut offset = self.out.len();
651 649 if self.append {
652 650 offset += self.dirstate_map.on_disk.len()
653 651 }
654 652 offset_from_usize(offset)
655 653 }
656 654
657 655 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
658 656 let len = path_len_from_usize(slice.len());
659 657 // Reuse an already-written path if possible
660 658 if self.append {
661 659 if let Some(start) = self.on_disk_offset_of(slice) {
662 660 return PathSlice { start, len };
663 661 }
664 662 }
665 663 let start = self.current_offset();
666 664 self.out.extend(slice.as_bytes());
667 665 PathSlice { start, len }
668 666 }
669 667 }
670 668
671 669 fn offset_from_usize(x: usize) -> Offset {
672 670 u32::try_from(x)
673 671 // Could only panic for a dirstate file larger than 4 GiB
674 672 .expect("dirstate-v2 offset overflow")
675 673 .into()
676 674 }
677 675
678 676 fn child_nodes_len_from_usize(x: usize) -> Size {
679 677 u32::try_from(x)
680 678 // Could only panic with over 4 billion nodes
681 679 .expect("dirstate-v2 slice length overflow")
682 680 .into()
683 681 }
684 682
685 683 fn path_len_from_usize(x: usize) -> PathSize {
686 684 u16::try_from(x)
687 685 // Could only panic for paths over 64 KiB
688 686 .expect("dirstate-v2 path length overflow")
689 687 .into()
690 688 }
691 689
692 690 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
693 691 fn from(timestamp: TruncatedTimestamp) -> Self {
694 692 Self {
695 693 truncated_seconds: timestamp.truncated_seconds().into(),
696 694 nanoseconds: timestamp.nanoseconds().into(),
697 695 }
698 696 }
699 697 }
700 698
701 699 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
702 700 type Error = DirstateV2ParseError;
703 701
704 702 fn try_from(
705 703 timestamp: PackedTruncatedTimestamp,
706 704 ) -> Result<Self, Self::Error> {
707 705 Self::from_already_truncated(
708 706 timestamp.truncated_seconds.get(),
709 707 timestamp.nanoseconds.get(),
710 708 )
711 709 }
712 710 }
713 711 impl PackedTruncatedTimestamp {
714 712 fn null() -> Self {
715 713 Self {
716 714 truncated_seconds: 0.into(),
717 715 nanoseconds: 0.into(),
718 716 }
719 717 }
720 718 }
General Comments 0
You need to be logged in to leave comments. Login now