##// END OF EJS Templates
dirstate-v2: initial Python parser...
Simon Sapin -
r49035:a32a9607 default
parent child Browse files
Show More
@@ -0,0 +1,118 b''
1 # v2.py - Pure-Python implementation of the dirstate-v2 file format
2 #
3 # Copyright Mercurial Contributors
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9
10 import struct
11
12 from .. import policy
13
14 parsers = policy.importmod('parsers')
15
16
17 # Must match the constant of the same name in
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
19 TREE_METADATA_SIZE = 44
20 NODE_SIZE = 43
21
22
23 # Must match the `TreeMetadata` Rust struct in
24 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
25 #
26 # * 4 bytes: start offset of root nodes
27 # * 4 bytes: number of root nodes
28 # * 4 bytes: total number of nodes in the tree that have an entry
29 # * 4 bytes: total number of nodes in the tree that have a copy source
30 # * 4 bytes: number of bytes in the data file that are not used anymore
31 # * 4 bytes: unused
32 # * 20 bytes: SHA-1 hash of ignore patterns
33 TREE_METADATA = struct.Struct('>LLLLL4s20s')
34
35
36 # Must match the `Node` Rust struct in
37 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
38 #
39 # * 4 bytes: start offset of full path
40 # * 2 bytes: length of the full path
41 # * 2 bytes: length within the full path before its "base name"
42 # * 4 bytes: start offset of the copy source if any, or zero for no copy source
43 # * 2 bytes: length of the copy source if any, or unused
44 # * 4 bytes: start offset of child nodes
45 # * 4 bytes: number of child nodes
46 # * 4 bytes: number of descendant nodes that have an entry
47 # * 4 bytes: number of descendant nodes that have a "tracked" state
48 # * 1 byte: flags
49 # * 4 bytes: expected size
50 # * 4 bytes: mtime seconds
51 # * 4 bytes: mtime nanoseconds
52 NODE = struct.Struct('>LHHLHLLLLBlll')
53
54
55 assert TREE_METADATA_SIZE == TREE_METADATA.size
56 assert NODE_SIZE == NODE.size
57
58
59 def parse_dirstate(map, copy_map, data, tree_metadata):
60 """parse a full v2-dirstate from a binary data into dictionnaries:
61
62 - map: a {path: entry} mapping that will be filled
63 - copy_map: a {path: copy-source} mapping that will be filled
64 - data: a binary blob contains v2 nodes data
65 - tree_metadata:: a binary blob of the top level node (from the docket)
66 """
67 (
68 root_nodes_start,
69 root_nodes_len,
70 _nodes_with_entry_count,
71 _nodes_with_copy_source_count,
72 _unreachable_bytes,
73 _unused,
74 _ignore_patterns_hash,
75 ) = TREE_METADATA.unpack(tree_metadata)
76 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
77
78
79 def parse_nodes(map, copy_map, data, start, len):
80 """parse <len> nodes from <data> starting at offset <start>
81
82 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
83 """
84 for i in range(len):
85 node_start = start + NODE_SIZE * i
86 node_bytes = slice_with_len(data, node_start, NODE_SIZE)
87 (
88 path_start,
89 path_len,
90 _basename_start,
91 copy_source_start,
92 copy_source_len,
93 children_start,
94 children_count,
95 _descendants_with_entry_count,
96 _tracked_descendants_count,
97 flags,
98 size,
99 mtime_s,
100 _mtime_ns,
101 ) = NODE.unpack(node_bytes)
102
103 # Parse child nodes of this node recursively
104 parse_nodes(map, copy_map, data, children_start, children_count)
105
106 item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s)
107 if not item.any_tracked:
108 continue
109 path = slice_with_len(data, path_start, path_len)
110 map[path] = item
111 if copy_source_start:
112 copy_map[path] = slice_with_len(
113 data, copy_source_start, copy_source_len
114 )
115
116
117 def slice_with_len(data, start, len):
118 return data[start : start + len]
@@ -347,6 +347,33 b' static PyObject *dirstate_item_from_v1_m'
347 347 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
348 348 };
349 349
350 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
351 PyObject *args)
352 {
353 dirstateItemObject *t =
354 PyObject_New(dirstateItemObject, &dirstateItemType);
355 if (!t) {
356 return NULL;
357 }
358 if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
359 return NULL;
360 }
361 t->mode = 0;
362 if (t->flags & dirstate_flag_has_meaningful_data) {
363 if (t->flags & dirstate_flag_mode_exec_perm) {
364 t->mode = 0755;
365 } else {
366 t->mode = 0644;
367 }
368 if (t->flags & dirstate_flag_mode_is_symlink) {
369 t->mode |= S_IFLNK;
370 } else {
371 t->mode |= S_IFREG;
372 }
373 }
374 return (PyObject *)t;
375 };
376
350 377 /* This means the next status call will have to actually check its content
351 378 to make sure it is correct. */
352 379 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
@@ -413,6 +440,8 b' static PyMethodDef dirstate_item_methods'
413 440 "True if the stored mtime would be ambiguous with the current time"},
414 441 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
415 442 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
443 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
444 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
416 445 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
417 446 METH_NOARGS, "mark a file as \"possibly dirty\""},
418 447 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
@@ -36,6 +36,8 b' static const unsigned char dirstate_flag'
36 36 static const unsigned char dirstate_flag_p2_info = 1 << 2;
37 37 static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
38 38 static const unsigned char dirstate_flag_has_meaningful_mtime = 1 << 4;
39 static const unsigned char dirstate_flag_mode_exec_perm = 1 << 5;
40 static const unsigned char dirstate_flag_mode_is_symlink = 1 << 6;
39 41
40 42 extern PyTypeObject dirstateItemType;
41 43 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
@@ -20,6 +20,7 b' from . import ('
20 20
21 21 from .dirstateutils import (
22 22 docket as docketmod,
23 v2,
23 24 )
24 25
25 26 parsers = policy.importmod('parsers')
@@ -10,14 +10,10 b' from __future__ import absolute_import'
10 10 import struct
11 11
12 12 from ..revlogutils import docket as docket_mod
13
13 from . import v2
14 14
15 15 V2_FORMAT_MARKER = b"dirstate-v2\n"
16 16
17 # Must match the constant of the same name in
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
19 TREE_METADATA_SIZE = 44
20
21 17 # * 12 bytes: format marker
22 18 # * 32 bytes: node ID of the working directory's first parent
23 19 # * 32 bytes: node ID of the working directory's second parent
@@ -29,7 +25,7 b' TREE_METADATA_SIZE = 44'
29 25 # Node IDs are null-padded if shorter than 32 bytes.
30 26 # A data file shorter than the specified used size is corrupted (truncated)
31 27 HEADER = struct.Struct(
32 ">{}s32s32s{}sLB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
28 ">{}s32s32s{}sLB".format(len(V2_FORMAT_MARKER), v2.TREE_METADATA_SIZE)
33 29 )
34 30
35 31
@@ -7,6 +7,7 b''
7 7
8 8 from __future__ import absolute_import
9 9
10 import stat
10 11 import struct
11 12 import zlib
12 13
@@ -43,6 +44,15 b' NONNORMAL = -1'
43 44 # a special value used internally for `time` if the time is ambigeous
44 45 AMBIGUOUS_TIME = -1
45 46
47 # Bits of the `flags` byte inside a node in the file format
48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 DIRSTATE_V2_P2_INFO = 1 << 2
51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
52 DIRSTATE_V2_HAS_MTIME = 1 << 4
53 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 5
54 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 6
55
46 56
47 57 @attr.s(slots=True, init=False)
48 58 class DirstateItem(object):
@@ -109,6 +119,30 b' class DirstateItem(object):'
109 119 self._mtime = parentfiledata[2]
110 120
111 121 @classmethod
122 def from_v2_data(cls, flags, size, mtime):
123 """Build a new DirstateItem object from V2 data"""
124 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
125 mode = None
126 if has_mode_size:
127 assert stat.S_IXUSR == 0o100
128 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
129 mode = 0o755
130 else:
131 mode = 0o644
132 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
133 mode |= stat.S_IFLNK
134 else:
135 mode |= stat.S_IFREG
136 return cls(
137 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
138 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
139 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
140 has_meaningful_data=has_mode_size,
141 has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_MTIME),
142 parentfiledata=(mode, size, mtime),
143 )
144
145 @classmethod
112 146 def from_v1_data(cls, state, mode, size, mtime):
113 147 """Build a new DirstateItem object from V1 data
114 148
@@ -31,10 +31,8 b' const USED_NODE_ID_BYTES: usize = 20;'
31 31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 33
34 /// Must match the constant of the same name in
35 /// `mercurial/dirstateutils/docket.py`
34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
36 35 const TREE_METADATA_SIZE: usize = 44;
37
38 36 const NODE_SIZE: usize = 43;
39 37
40 38 /// Make sure that size-affecting changes are made knowingly
General Comments 0
You need to be logged in to leave comments. Login now