##// END OF EJS Templates
dirstate-v2: add a new MTIME_SECOND_AMBIGUOUS flags...
marmoute -
r49080:9205d9be default
parent child Browse files
Show More
@@ -1,1311 +1,1319 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <ctype.h>
12 #include <ctype.h>
13 #include <stddef.h>
13 #include <stddef.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #include "bitmanipulation.h"
16 #include "bitmanipulation.h"
17 #include "charencode.h"
17 #include "charencode.h"
18 #include "util.h"
18 #include "util.h"
19
19
20 #ifdef IS_PY3K
20 #ifdef IS_PY3K
21 /* The mapping of Python types is meant to be temporary to get Python
21 /* The mapping of Python types is meant to be temporary to get Python
22 * 3 to compile. We should remove this once Python 3 support is fully
22 * 3 to compile. We should remove this once Python 3 support is fully
23 * supported and proper types are used in the extensions themselves. */
23 * supported and proper types are used in the extensions themselves. */
24 #define PyInt_Check PyLong_Check
24 #define PyInt_Check PyLong_Check
25 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromLong PyLong_FromLong
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 #define PyInt_AsLong PyLong_AsLong
27 #define PyInt_AsLong PyLong_AsLong
28 #endif
28 #endif
29
29
30 static const char *const versionerrortext = "Python minor version mismatch";
30 static const char *const versionerrortext = "Python minor version mismatch";
31
31
32 static const int dirstate_v1_from_p2 = -2;
32 static const int dirstate_v1_from_p2 = -2;
33 static const int dirstate_v1_nonnormal = -1;
33 static const int dirstate_v1_nonnormal = -1;
34 static const int ambiguous_time = -1;
34 static const int ambiguous_time = -1;
35
35
36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
37 {
37 {
38 Py_ssize_t expected_size;
38 Py_ssize_t expected_size;
39
39
40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
41 return NULL;
41 return NULL;
42 }
42 }
43
43
44 return _dict_new_presized(expected_size);
44 return _dict_new_presized(expected_size);
45 }
45 }
46
46
47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
48 PyObject *kwds)
48 PyObject *kwds)
49 {
49 {
50 /* We do all the initialization here and not a tp_init function because
50 /* We do all the initialization here and not a tp_init function because
51 * dirstate_item is immutable. */
51 * dirstate_item is immutable. */
52 dirstateItemObject *t;
52 dirstateItemObject *t;
53 int wc_tracked;
53 int wc_tracked;
54 int p1_tracked;
54 int p1_tracked;
55 int p2_info;
55 int p2_info;
56 int has_meaningful_data;
56 int has_meaningful_data;
57 int has_meaningful_mtime;
57 int has_meaningful_mtime;
58 int mode;
58 int mode;
59 int size;
59 int size;
60 int mtime_s;
60 int mtime_s;
61 int mtime_ns;
61 int mtime_ns;
62 PyObject *parentfiledata;
62 PyObject *parentfiledata;
63 PyObject *fallback_exec;
63 PyObject *fallback_exec;
64 PyObject *fallback_symlink;
64 PyObject *fallback_symlink;
65 static char *keywords_name[] = {
65 static char *keywords_name[] = {
66 "wc_tracked", "p1_tracked", "p2_info",
66 "wc_tracked", "p1_tracked", "p2_info",
67 "has_meaningful_data", "has_meaningful_mtime", "parentfiledata",
67 "has_meaningful_data", "has_meaningful_mtime", "parentfiledata",
68 "fallback_exec", "fallback_symlink", NULL,
68 "fallback_exec", "fallback_symlink", NULL,
69 };
69 };
70 wc_tracked = 0;
70 wc_tracked = 0;
71 p1_tracked = 0;
71 p1_tracked = 0;
72 p2_info = 0;
72 p2_info = 0;
73 has_meaningful_mtime = 1;
73 has_meaningful_mtime = 1;
74 has_meaningful_data = 1;
74 has_meaningful_data = 1;
75 parentfiledata = Py_None;
75 parentfiledata = Py_None;
76 fallback_exec = Py_None;
76 fallback_exec = Py_None;
77 fallback_symlink = Py_None;
77 fallback_symlink = Py_None;
78 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiOOO", keywords_name,
78 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iiiiiOOO", keywords_name,
79 &wc_tracked, &p1_tracked, &p2_info,
79 &wc_tracked, &p1_tracked, &p2_info,
80 &has_meaningful_data,
80 &has_meaningful_data,
81 &has_meaningful_mtime, &parentfiledata,
81 &has_meaningful_mtime, &parentfiledata,
82 &fallback_exec, &fallback_symlink)) {
82 &fallback_exec, &fallback_symlink)) {
83 return NULL;
83 return NULL;
84 }
84 }
85 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
85 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
86 if (!t) {
86 if (!t) {
87 return NULL;
87 return NULL;
88 }
88 }
89
89
90 t->flags = 0;
90 t->flags = 0;
91 if (wc_tracked) {
91 if (wc_tracked) {
92 t->flags |= dirstate_flag_wc_tracked;
92 t->flags |= dirstate_flag_wc_tracked;
93 }
93 }
94 if (p1_tracked) {
94 if (p1_tracked) {
95 t->flags |= dirstate_flag_p1_tracked;
95 t->flags |= dirstate_flag_p1_tracked;
96 }
96 }
97 if (p2_info) {
97 if (p2_info) {
98 t->flags |= dirstate_flag_p2_info;
98 t->flags |= dirstate_flag_p2_info;
99 }
99 }
100
100
101 if (fallback_exec != Py_None) {
101 if (fallback_exec != Py_None) {
102 t->flags |= dirstate_flag_has_fallback_exec;
102 t->flags |= dirstate_flag_has_fallback_exec;
103 if (PyObject_IsTrue(fallback_exec)) {
103 if (PyObject_IsTrue(fallback_exec)) {
104 t->flags |= dirstate_flag_fallback_exec;
104 t->flags |= dirstate_flag_fallback_exec;
105 }
105 }
106 }
106 }
107 if (fallback_symlink != Py_None) {
107 if (fallback_symlink != Py_None) {
108 t->flags |= dirstate_flag_has_fallback_symlink;
108 t->flags |= dirstate_flag_has_fallback_symlink;
109 if (PyObject_IsTrue(fallback_symlink)) {
109 if (PyObject_IsTrue(fallback_symlink)) {
110 t->flags |= dirstate_flag_fallback_symlink;
110 t->flags |= dirstate_flag_fallback_symlink;
111 }
111 }
112 }
112 }
113
113
114 if (parentfiledata != Py_None) {
114 if (parentfiledata != Py_None) {
115 if (!PyArg_ParseTuple(parentfiledata, "ii(ii)", &mode, &size,
115 if (!PyArg_ParseTuple(parentfiledata, "ii(ii)", &mode, &size,
116 &mtime_s, &mtime_ns)) {
116 &mtime_s, &mtime_ns)) {
117 return NULL;
117 return NULL;
118 }
118 }
119 } else {
119 } else {
120 has_meaningful_data = 0;
120 has_meaningful_data = 0;
121 has_meaningful_mtime = 0;
121 has_meaningful_mtime = 0;
122 }
122 }
123 if (has_meaningful_data) {
123 if (has_meaningful_data) {
124 t->flags |= dirstate_flag_has_meaningful_data;
124 t->flags |= dirstate_flag_has_meaningful_data;
125 t->mode = mode;
125 t->mode = mode;
126 t->size = size;
126 t->size = size;
127 } else {
127 } else {
128 t->mode = 0;
128 t->mode = 0;
129 t->size = 0;
129 t->size = 0;
130 }
130 }
131 if (has_meaningful_mtime) {
131 if (has_meaningful_mtime) {
132 t->flags |= dirstate_flag_has_file_mtime;
132 t->flags |= dirstate_flag_has_file_mtime;
133 t->mtime_s = mtime_s;
133 t->mtime_s = mtime_s;
134 t->mtime_ns = mtime_ns;
134 t->mtime_ns = mtime_ns;
135 } else {
135 } else {
136 t->mtime_s = 0;
136 t->mtime_s = 0;
137 t->mtime_ns = 0;
137 t->mtime_ns = 0;
138 }
138 }
139 return (PyObject *)t;
139 return (PyObject *)t;
140 }
140 }
141
141
142 static void dirstate_item_dealloc(PyObject *o)
142 static void dirstate_item_dealloc(PyObject *o)
143 {
143 {
144 PyObject_Del(o);
144 PyObject_Del(o);
145 }
145 }
146
146
147 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
147 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
148 {
148 {
149 return (self->flags & dirstate_flag_wc_tracked);
149 return (self->flags & dirstate_flag_wc_tracked);
150 }
150 }
151
151
152 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
152 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
153 {
153 {
154 const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
154 const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
155 dirstate_flag_p2_info;
155 dirstate_flag_p2_info;
156 return (self->flags & mask);
156 return (self->flags & mask);
157 }
157 }
158
158
159 static inline bool dirstate_item_c_added(dirstateItemObject *self)
159 static inline bool dirstate_item_c_added(dirstateItemObject *self)
160 {
160 {
161 const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
161 const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
162 dirstate_flag_p2_info);
162 dirstate_flag_p2_info);
163 const int target = dirstate_flag_wc_tracked;
163 const int target = dirstate_flag_wc_tracked;
164 return (self->flags & mask) == target;
164 return (self->flags & mask) == target;
165 }
165 }
166
166
167 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
167 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
168 {
168 {
169 if (self->flags & dirstate_flag_wc_tracked) {
169 if (self->flags & dirstate_flag_wc_tracked) {
170 return false;
170 return false;
171 }
171 }
172 return (self->flags &
172 return (self->flags &
173 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
173 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
174 }
174 }
175
175
176 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
176 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
177 {
177 {
178 return ((self->flags & dirstate_flag_wc_tracked) &&
178 return ((self->flags & dirstate_flag_wc_tracked) &&
179 (self->flags & dirstate_flag_p1_tracked) &&
179 (self->flags & dirstate_flag_p1_tracked) &&
180 (self->flags & dirstate_flag_p2_info));
180 (self->flags & dirstate_flag_p2_info));
181 }
181 }
182
182
183 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
183 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
184 {
184 {
185 return ((self->flags & dirstate_flag_wc_tracked) &&
185 return ((self->flags & dirstate_flag_wc_tracked) &&
186 !(self->flags & dirstate_flag_p1_tracked) &&
186 !(self->flags & dirstate_flag_p1_tracked) &&
187 (self->flags & dirstate_flag_p2_info));
187 (self->flags & dirstate_flag_p2_info));
188 }
188 }
189
189
190 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
190 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
191 {
191 {
192 if (dirstate_item_c_removed(self)) {
192 if (dirstate_item_c_removed(self)) {
193 return 'r';
193 return 'r';
194 } else if (dirstate_item_c_merged(self)) {
194 } else if (dirstate_item_c_merged(self)) {
195 return 'm';
195 return 'm';
196 } else if (dirstate_item_c_added(self)) {
196 } else if (dirstate_item_c_added(self)) {
197 return 'a';
197 return 'a';
198 } else {
198 } else {
199 return 'n';
199 return 'n';
200 }
200 }
201 }
201 }
202
202
203 static inline bool dirstate_item_c_has_fallback_exec(dirstateItemObject *self)
203 static inline bool dirstate_item_c_has_fallback_exec(dirstateItemObject *self)
204 {
204 {
205 return (bool)self->flags & dirstate_flag_has_fallback_exec;
205 return (bool)self->flags & dirstate_flag_has_fallback_exec;
206 }
206 }
207
207
208 static inline bool
208 static inline bool
209 dirstate_item_c_has_fallback_symlink(dirstateItemObject *self)
209 dirstate_item_c_has_fallback_symlink(dirstateItemObject *self)
210 {
210 {
211 return (bool)self->flags & dirstate_flag_has_fallback_symlink;
211 return (bool)self->flags & dirstate_flag_has_fallback_symlink;
212 }
212 }
213
213
214 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
214 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
215 {
215 {
216 if (self->flags & dirstate_flag_has_meaningful_data) {
216 if (self->flags & dirstate_flag_has_meaningful_data) {
217 return self->mode;
217 return self->mode;
218 } else {
218 } else {
219 return 0;
219 return 0;
220 }
220 }
221 }
221 }
222
222
223 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
223 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
224 {
224 {
225 if (!(self->flags & dirstate_flag_wc_tracked) &&
225 if (!(self->flags & dirstate_flag_wc_tracked) &&
226 (self->flags & dirstate_flag_p2_info)) {
226 (self->flags & dirstate_flag_p2_info)) {
227 if (self->flags & dirstate_flag_p1_tracked) {
227 if (self->flags & dirstate_flag_p1_tracked) {
228 return dirstate_v1_nonnormal;
228 return dirstate_v1_nonnormal;
229 } else {
229 } else {
230 return dirstate_v1_from_p2;
230 return dirstate_v1_from_p2;
231 }
231 }
232 } else if (dirstate_item_c_removed(self)) {
232 } else if (dirstate_item_c_removed(self)) {
233 return 0;
233 return 0;
234 } else if (self->flags & dirstate_flag_p2_info) {
234 } else if (self->flags & dirstate_flag_p2_info) {
235 return dirstate_v1_from_p2;
235 return dirstate_v1_from_p2;
236 } else if (dirstate_item_c_added(self)) {
236 } else if (dirstate_item_c_added(self)) {
237 return dirstate_v1_nonnormal;
237 return dirstate_v1_nonnormal;
238 } else if (self->flags & dirstate_flag_has_meaningful_data) {
238 } else if (self->flags & dirstate_flag_has_meaningful_data) {
239 return self->size;
239 return self->size;
240 } else {
240 } else {
241 return dirstate_v1_nonnormal;
241 return dirstate_v1_nonnormal;
242 }
242 }
243 }
243 }
244
244
245 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
245 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
246 {
246 {
247 if (dirstate_item_c_removed(self)) {
247 if (dirstate_item_c_removed(self)) {
248 return 0;
248 return 0;
249 } else if (!(self->flags & dirstate_flag_has_file_mtime) ||
249 } else if (!(self->flags & dirstate_flag_has_file_mtime) ||
250 !(self->flags & dirstate_flag_p1_tracked) ||
250 !(self->flags & dirstate_flag_p1_tracked) ||
251 !(self->flags & dirstate_flag_wc_tracked) ||
251 !(self->flags & dirstate_flag_wc_tracked) ||
252 (self->flags & dirstate_flag_p2_info)) {
252 (self->flags & dirstate_flag_p2_info)) {
253 return ambiguous_time;
253 return ambiguous_time;
254 } else {
254 } else {
255 return self->mtime_s;
255 return self->mtime_s;
256 }
256 }
257 }
257 }
258
258
259 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
259 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
260 {
260 {
261 int flags = self->flags;
261 int flags = self->flags;
262 int mode = dirstate_item_c_v1_mode(self);
262 int mode = dirstate_item_c_v1_mode(self);
263 if ((mode & S_IXUSR) != 0) {
263 if ((mode & S_IXUSR) != 0) {
264 flags |= dirstate_flag_mode_exec_perm;
264 flags |= dirstate_flag_mode_exec_perm;
265 } else {
265 } else {
266 flags &= ~dirstate_flag_mode_exec_perm;
266 flags &= ~dirstate_flag_mode_exec_perm;
267 }
267 }
268 if (S_ISLNK(mode)) {
268 if (S_ISLNK(mode)) {
269 flags |= dirstate_flag_mode_is_symlink;
269 flags |= dirstate_flag_mode_is_symlink;
270 } else {
270 } else {
271 flags &= ~dirstate_flag_mode_is_symlink;
271 flags &= ~dirstate_flag_mode_is_symlink;
272 }
272 }
273 return Py_BuildValue("iiii", flags, self->size, self->mtime_s,
273 return Py_BuildValue("iiii", flags, self->size, self->mtime_s,
274 self->mtime_ns);
274 self->mtime_ns);
275 };
275 };
276
276
277 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
277 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
278 {
278 {
279 char state = dirstate_item_c_v1_state(self);
279 char state = dirstate_item_c_v1_state(self);
280 return PyBytes_FromStringAndSize(&state, 1);
280 return PyBytes_FromStringAndSize(&state, 1);
281 };
281 };
282
282
283 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
283 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
284 {
284 {
285 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
285 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
286 };
286 };
287
287
288 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
288 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
289 {
289 {
290 return PyInt_FromLong(dirstate_item_c_v1_size(self));
290 return PyInt_FromLong(dirstate_item_c_v1_size(self));
291 };
291 };
292
292
293 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
293 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
294 {
294 {
295 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
295 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
296 };
296 };
297
297
298 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
298 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
299 PyObject *now)
299 PyObject *now)
300 {
300 {
301 int now_s;
301 int now_s;
302 int now_ns;
302 int now_ns;
303 if (!PyArg_ParseTuple(now, "ii", &now_s, &now_ns)) {
303 if (!PyArg_ParseTuple(now, "ii", &now_s, &now_ns)) {
304 return NULL;
304 return NULL;
305 }
305 }
306 if (dirstate_item_c_v1_state(self) == 'n' && self->mtime_s == now_s) {
306 if (dirstate_item_c_v1_state(self) == 'n' && self->mtime_s == now_s) {
307 Py_RETURN_TRUE;
307 Py_RETURN_TRUE;
308 } else {
308 } else {
309 Py_RETURN_FALSE;
309 Py_RETURN_FALSE;
310 }
310 }
311 };
311 };
312
312
313 static PyObject *dirstate_item_mtime_likely_equal_to(dirstateItemObject *self,
313 static PyObject *dirstate_item_mtime_likely_equal_to(dirstateItemObject *self,
314 PyObject *other)
314 PyObject *other)
315 {
315 {
316 int other_s;
316 int other_s;
317 int other_ns;
317 int other_ns;
318 if (!PyArg_ParseTuple(other, "ii", &other_s, &other_ns)) {
318 if (!PyArg_ParseTuple(other, "ii", &other_s, &other_ns)) {
319 return NULL;
319 return NULL;
320 }
320 }
321 if ((self->flags & dirstate_flag_has_file_mtime) &&
321 if ((self->flags & dirstate_flag_has_file_mtime) &&
322 self->mtime_s == other_s && self->mtime_ns == other_ns) {
322 self->mtime_s == other_s && self->mtime_ns == other_ns) {
323 Py_RETURN_TRUE;
323 Py_RETURN_TRUE;
324 } else {
324 } else {
325 Py_RETURN_FALSE;
325 Py_RETURN_FALSE;
326 }
326 }
327 };
327 };
328
328
329 /* This will never change since it's bound to V1
329 /* This will never change since it's bound to V1
330 */
330 */
331 static inline dirstateItemObject *
331 static inline dirstateItemObject *
332 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
332 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
333 {
333 {
334 dirstateItemObject *t =
334 dirstateItemObject *t =
335 PyObject_New(dirstateItemObject, &dirstateItemType);
335 PyObject_New(dirstateItemObject, &dirstateItemType);
336 if (!t) {
336 if (!t) {
337 return NULL;
337 return NULL;
338 }
338 }
339 t->flags = 0;
339 t->flags = 0;
340 t->mode = 0;
340 t->mode = 0;
341 t->size = 0;
341 t->size = 0;
342 t->mtime_s = 0;
342 t->mtime_s = 0;
343 t->mtime_ns = 0;
343 t->mtime_ns = 0;
344
344
345 if (state == 'm') {
345 if (state == 'm') {
346 t->flags = (dirstate_flag_wc_tracked |
346 t->flags = (dirstate_flag_wc_tracked |
347 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
347 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
348 } else if (state == 'a') {
348 } else if (state == 'a') {
349 t->flags = dirstate_flag_wc_tracked;
349 t->flags = dirstate_flag_wc_tracked;
350 } else if (state == 'r') {
350 } else if (state == 'r') {
351 if (size == dirstate_v1_nonnormal) {
351 if (size == dirstate_v1_nonnormal) {
352 t->flags =
352 t->flags =
353 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
353 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
354 } else if (size == dirstate_v1_from_p2) {
354 } else if (size == dirstate_v1_from_p2) {
355 t->flags = dirstate_flag_p2_info;
355 t->flags = dirstate_flag_p2_info;
356 } else {
356 } else {
357 t->flags = dirstate_flag_p1_tracked;
357 t->flags = dirstate_flag_p1_tracked;
358 }
358 }
359 } else if (state == 'n') {
359 } else if (state == 'n') {
360 if (size == dirstate_v1_from_p2) {
360 if (size == dirstate_v1_from_p2) {
361 t->flags =
361 t->flags =
362 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
362 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
363 } else if (size == dirstate_v1_nonnormal) {
363 } else if (size == dirstate_v1_nonnormal) {
364 t->flags =
364 t->flags =
365 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
365 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
366 } else if (mtime == ambiguous_time) {
366 } else if (mtime == ambiguous_time) {
367 t->flags = (dirstate_flag_wc_tracked |
367 t->flags = (dirstate_flag_wc_tracked |
368 dirstate_flag_p1_tracked |
368 dirstate_flag_p1_tracked |
369 dirstate_flag_has_meaningful_data);
369 dirstate_flag_has_meaningful_data);
370 t->mode = mode;
370 t->mode = mode;
371 t->size = size;
371 t->size = size;
372 } else {
372 } else {
373 t->flags = (dirstate_flag_wc_tracked |
373 t->flags = (dirstate_flag_wc_tracked |
374 dirstate_flag_p1_tracked |
374 dirstate_flag_p1_tracked |
375 dirstate_flag_has_meaningful_data |
375 dirstate_flag_has_meaningful_data |
376 dirstate_flag_has_file_mtime);
376 dirstate_flag_has_file_mtime);
377 t->mode = mode;
377 t->mode = mode;
378 t->size = size;
378 t->size = size;
379 t->mtime_s = mtime;
379 t->mtime_s = mtime;
380 }
380 }
381 } else {
381 } else {
382 PyErr_Format(PyExc_RuntimeError,
382 PyErr_Format(PyExc_RuntimeError,
383 "unknown state: `%c` (%d, %d, %d)", state, mode,
383 "unknown state: `%c` (%d, %d, %d)", state, mode,
384 size, mtime, NULL);
384 size, mtime, NULL);
385 Py_DECREF(t);
385 Py_DECREF(t);
386 return NULL;
386 return NULL;
387 }
387 }
388
388
389 return t;
389 return t;
390 }
390 }
391
391
392 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
392 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
393 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
393 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
394 PyObject *args)
394 PyObject *args)
395 {
395 {
396 /* We do all the initialization here and not a tp_init function because
396 /* We do all the initialization here and not a tp_init function because
397 * dirstate_item is immutable. */
397 * dirstate_item is immutable. */
398 char state;
398 char state;
399 int size, mode, mtime;
399 int size, mode, mtime;
400 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
400 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
401 return NULL;
401 return NULL;
402 }
402 }
403 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
403 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
404 };
404 };
405
405
406 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
406 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
407 PyObject *args)
407 PyObject *args)
408 {
408 {
409 dirstateItemObject *t =
409 dirstateItemObject *t =
410 PyObject_New(dirstateItemObject, &dirstateItemType);
410 PyObject_New(dirstateItemObject, &dirstateItemType);
411 if (!t) {
411 if (!t) {
412 return NULL;
412 return NULL;
413 }
413 }
414 if (!PyArg_ParseTuple(args, "iiii", &t->flags, &t->size, &t->mtime_s,
414 if (!PyArg_ParseTuple(args, "iiii", &t->flags, &t->size, &t->mtime_s,
415 &t->mtime_ns)) {
415 &t->mtime_ns)) {
416 return NULL;
416 return NULL;
417 }
417 }
418 if (t->flags & dirstate_flag_expected_state_is_modified) {
418 if (t->flags & dirstate_flag_expected_state_is_modified) {
419 t->flags &= ~(dirstate_flag_expected_state_is_modified |
419 t->flags &= ~(dirstate_flag_expected_state_is_modified |
420 dirstate_flag_has_meaningful_data |
420 dirstate_flag_has_meaningful_data |
421 dirstate_flag_has_file_mtime);
421 dirstate_flag_has_file_mtime);
422 }
422 }
423 if (t->flags & dirstate_flag_mtime_second_ambiguous) {
424 /* The current code is not able to do the more subtle comparison
425 * that the MTIME_SECOND_AMBIGUOUS requires. So we ignore the
426 * mtime */
427 t->flags &= ~(dirstate_flag_mtime_second_ambiguous |
428 dirstate_flag_has_meaningful_data |
429 dirstate_flag_has_file_mtime);
430 }
423 t->mode = 0;
431 t->mode = 0;
424 if (t->flags & dirstate_flag_has_meaningful_data) {
432 if (t->flags & dirstate_flag_has_meaningful_data) {
425 if (t->flags & dirstate_flag_mode_exec_perm) {
433 if (t->flags & dirstate_flag_mode_exec_perm) {
426 t->mode = 0755;
434 t->mode = 0755;
427 } else {
435 } else {
428 t->mode = 0644;
436 t->mode = 0644;
429 }
437 }
430 if (t->flags & dirstate_flag_mode_is_symlink) {
438 if (t->flags & dirstate_flag_mode_is_symlink) {
431 t->mode |= S_IFLNK;
439 t->mode |= S_IFLNK;
432 } else {
440 } else {
433 t->mode |= S_IFREG;
441 t->mode |= S_IFREG;
434 }
442 }
435 }
443 }
436 return (PyObject *)t;
444 return (PyObject *)t;
437 };
445 };
438
446
439 /* This means the next status call will have to actually check its content
447 /* This means the next status call will have to actually check its content
440 to make sure it is correct. */
448 to make sure it is correct. */
441 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
449 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
442 {
450 {
443 self->flags &= ~dirstate_flag_has_file_mtime;
451 self->flags &= ~dirstate_flag_has_file_mtime;
444 Py_RETURN_NONE;
452 Py_RETURN_NONE;
445 }
453 }
446
454
447 /* See docstring of the python implementation for details */
455 /* See docstring of the python implementation for details */
448 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
456 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
449 PyObject *args)
457 PyObject *args)
450 {
458 {
451 int size, mode, mtime_s, mtime_ns;
459 int size, mode, mtime_s, mtime_ns;
452 if (!PyArg_ParseTuple(args, "ii(ii)", &mode, &size, &mtime_s,
460 if (!PyArg_ParseTuple(args, "ii(ii)", &mode, &size, &mtime_s,
453 &mtime_ns)) {
461 &mtime_ns)) {
454 return NULL;
462 return NULL;
455 }
463 }
456 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
464 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
457 dirstate_flag_has_meaningful_data |
465 dirstate_flag_has_meaningful_data |
458 dirstate_flag_has_file_mtime;
466 dirstate_flag_has_file_mtime;
459 self->mode = mode;
467 self->mode = mode;
460 self->size = size;
468 self->size = size;
461 self->mtime_s = mtime_s;
469 self->mtime_s = mtime_s;
462 self->mtime_ns = mtime_ns;
470 self->mtime_ns = mtime_ns;
463 Py_RETURN_NONE;
471 Py_RETURN_NONE;
464 }
472 }
465
473
466 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
474 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
467 {
475 {
468 self->flags |= dirstate_flag_wc_tracked;
476 self->flags |= dirstate_flag_wc_tracked;
469 self->flags &= ~dirstate_flag_has_file_mtime;
477 self->flags &= ~dirstate_flag_has_file_mtime;
470 Py_RETURN_NONE;
478 Py_RETURN_NONE;
471 }
479 }
472
480
473 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
481 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
474 {
482 {
475 self->flags &= ~dirstate_flag_wc_tracked;
483 self->flags &= ~dirstate_flag_wc_tracked;
476 self->mode = 0;
484 self->mode = 0;
477 self->size = 0;
485 self->size = 0;
478 self->mtime_s = 0;
486 self->mtime_s = 0;
479 self->mtime_ns = 0;
487 self->mtime_ns = 0;
480 Py_RETURN_NONE;
488 Py_RETURN_NONE;
481 }
489 }
482
490
483 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
491 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
484 {
492 {
485 if (self->flags & dirstate_flag_p2_info) {
493 if (self->flags & dirstate_flag_p2_info) {
486 self->flags &= ~(dirstate_flag_p2_info |
494 self->flags &= ~(dirstate_flag_p2_info |
487 dirstate_flag_has_meaningful_data |
495 dirstate_flag_has_meaningful_data |
488 dirstate_flag_has_file_mtime);
496 dirstate_flag_has_file_mtime);
489 self->mode = 0;
497 self->mode = 0;
490 self->size = 0;
498 self->size = 0;
491 self->mtime_s = 0;
499 self->mtime_s = 0;
492 self->mtime_ns = 0;
500 self->mtime_ns = 0;
493 }
501 }
494 Py_RETURN_NONE;
502 Py_RETURN_NONE;
495 }
503 }
496 static PyMethodDef dirstate_item_methods[] = {
504 static PyMethodDef dirstate_item_methods[] = {
497 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
505 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
498 "return data suitable for v2 serialization"},
506 "return data suitable for v2 serialization"},
499 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
507 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
500 "return a \"state\" suitable for v1 serialization"},
508 "return a \"state\" suitable for v1 serialization"},
501 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
509 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
502 "return a \"mode\" suitable for v1 serialization"},
510 "return a \"mode\" suitable for v1 serialization"},
503 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
511 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
504 "return a \"size\" suitable for v1 serialization"},
512 "return a \"size\" suitable for v1 serialization"},
505 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
513 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
506 "return a \"mtime\" suitable for v1 serialization"},
514 "return a \"mtime\" suitable for v1 serialization"},
507 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
515 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
508 "True if the stored mtime would be ambiguous with the current time"},
516 "True if the stored mtime would be ambiguous with the current time"},
509 {"mtime_likely_equal_to", (PyCFunction)dirstate_item_mtime_likely_equal_to,
517 {"mtime_likely_equal_to", (PyCFunction)dirstate_item_mtime_likely_equal_to,
510 METH_O, "True if the stored mtime is likely equal to the given mtime"},
518 METH_O, "True if the stored mtime is likely equal to the given mtime"},
511 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
519 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
512 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
520 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
513 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
521 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
514 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
522 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
515 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
523 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
516 METH_NOARGS, "mark a file as \"possibly dirty\""},
524 METH_NOARGS, "mark a file as \"possibly dirty\""},
517 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
525 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
518 "mark a file as \"clean\""},
526 "mark a file as \"clean\""},
519 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
527 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
520 "mark a file as \"tracked\""},
528 "mark a file as \"tracked\""},
521 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
529 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
522 "mark a file as \"untracked\""},
530 "mark a file as \"untracked\""},
523 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
531 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
524 "remove all \"merge-only\" from a DirstateItem"},
532 "remove all \"merge-only\" from a DirstateItem"},
525 {NULL} /* Sentinel */
533 {NULL} /* Sentinel */
526 };
534 };
527
535
528 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
536 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
529 {
537 {
530 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
538 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
531 };
539 };
532
540
533 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
541 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
534 {
542 {
535 return PyInt_FromLong(dirstate_item_c_v1_size(self));
543 return PyInt_FromLong(dirstate_item_c_v1_size(self));
536 };
544 };
537
545
538 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
546 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
539 {
547 {
540 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
548 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
541 };
549 };
542
550
543 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
551 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
544 {
552 {
545 char state = dirstate_item_c_v1_state(self);
553 char state = dirstate_item_c_v1_state(self);
546 return PyBytes_FromStringAndSize(&state, 1);
554 return PyBytes_FromStringAndSize(&state, 1);
547 };
555 };
548
556
549 static PyObject *dirstate_item_get_has_fallback_exec(dirstateItemObject *self)
557 static PyObject *dirstate_item_get_has_fallback_exec(dirstateItemObject *self)
550 {
558 {
551 if (dirstate_item_c_has_fallback_exec(self)) {
559 if (dirstate_item_c_has_fallback_exec(self)) {
552 Py_RETURN_TRUE;
560 Py_RETURN_TRUE;
553 } else {
561 } else {
554 Py_RETURN_FALSE;
562 Py_RETURN_FALSE;
555 }
563 }
556 };
564 };
557
565
558 static PyObject *dirstate_item_get_fallback_exec(dirstateItemObject *self)
566 static PyObject *dirstate_item_get_fallback_exec(dirstateItemObject *self)
559 {
567 {
560 if (dirstate_item_c_has_fallback_exec(self)) {
568 if (dirstate_item_c_has_fallback_exec(self)) {
561 if (self->flags & dirstate_flag_fallback_exec) {
569 if (self->flags & dirstate_flag_fallback_exec) {
562 Py_RETURN_TRUE;
570 Py_RETURN_TRUE;
563 } else {
571 } else {
564 Py_RETURN_FALSE;
572 Py_RETURN_FALSE;
565 }
573 }
566 } else {
574 } else {
567 Py_RETURN_NONE;
575 Py_RETURN_NONE;
568 }
576 }
569 };
577 };
570
578
571 static int dirstate_item_set_fallback_exec(dirstateItemObject *self,
579 static int dirstate_item_set_fallback_exec(dirstateItemObject *self,
572 PyObject *value)
580 PyObject *value)
573 {
581 {
574 if ((value == Py_None) || (value == NULL)) {
582 if ((value == Py_None) || (value == NULL)) {
575 self->flags &= ~dirstate_flag_has_fallback_exec;
583 self->flags &= ~dirstate_flag_has_fallback_exec;
576 } else {
584 } else {
577 self->flags |= dirstate_flag_has_fallback_exec;
585 self->flags |= dirstate_flag_has_fallback_exec;
578 if (PyObject_IsTrue(value)) {
586 if (PyObject_IsTrue(value)) {
579 self->flags |= dirstate_flag_fallback_exec;
587 self->flags |= dirstate_flag_fallback_exec;
580 } else {
588 } else {
581 self->flags &= ~dirstate_flag_fallback_exec;
589 self->flags &= ~dirstate_flag_fallback_exec;
582 }
590 }
583 }
591 }
584 return 0;
592 return 0;
585 };
593 };
586
594
587 static PyObject *
595 static PyObject *
588 dirstate_item_get_has_fallback_symlink(dirstateItemObject *self)
596 dirstate_item_get_has_fallback_symlink(dirstateItemObject *self)
589 {
597 {
590 if (dirstate_item_c_has_fallback_symlink(self)) {
598 if (dirstate_item_c_has_fallback_symlink(self)) {
591 Py_RETURN_TRUE;
599 Py_RETURN_TRUE;
592 } else {
600 } else {
593 Py_RETURN_FALSE;
601 Py_RETURN_FALSE;
594 }
602 }
595 };
603 };
596
604
597 static PyObject *dirstate_item_get_fallback_symlink(dirstateItemObject *self)
605 static PyObject *dirstate_item_get_fallback_symlink(dirstateItemObject *self)
598 {
606 {
599 if (dirstate_item_c_has_fallback_symlink(self)) {
607 if (dirstate_item_c_has_fallback_symlink(self)) {
600 if (self->flags & dirstate_flag_fallback_symlink) {
608 if (self->flags & dirstate_flag_fallback_symlink) {
601 Py_RETURN_TRUE;
609 Py_RETURN_TRUE;
602 } else {
610 } else {
603 Py_RETURN_FALSE;
611 Py_RETURN_FALSE;
604 }
612 }
605 } else {
613 } else {
606 Py_RETURN_NONE;
614 Py_RETURN_NONE;
607 }
615 }
608 };
616 };
609
617
610 static int dirstate_item_set_fallback_symlink(dirstateItemObject *self,
618 static int dirstate_item_set_fallback_symlink(dirstateItemObject *self,
611 PyObject *value)
619 PyObject *value)
612 {
620 {
613 if ((value == Py_None) || (value == NULL)) {
621 if ((value == Py_None) || (value == NULL)) {
614 self->flags &= ~dirstate_flag_has_fallback_symlink;
622 self->flags &= ~dirstate_flag_has_fallback_symlink;
615 } else {
623 } else {
616 self->flags |= dirstate_flag_has_fallback_symlink;
624 self->flags |= dirstate_flag_has_fallback_symlink;
617 if (PyObject_IsTrue(value)) {
625 if (PyObject_IsTrue(value)) {
618 self->flags |= dirstate_flag_fallback_symlink;
626 self->flags |= dirstate_flag_fallback_symlink;
619 } else {
627 } else {
620 self->flags &= ~dirstate_flag_fallback_symlink;
628 self->flags &= ~dirstate_flag_fallback_symlink;
621 }
629 }
622 }
630 }
623 return 0;
631 return 0;
624 };
632 };
625
633
626 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
634 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
627 {
635 {
628 if (dirstate_item_c_tracked(self)) {
636 if (dirstate_item_c_tracked(self)) {
629 Py_RETURN_TRUE;
637 Py_RETURN_TRUE;
630 } else {
638 } else {
631 Py_RETURN_FALSE;
639 Py_RETURN_FALSE;
632 }
640 }
633 };
641 };
634 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
642 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
635 {
643 {
636 if (self->flags & dirstate_flag_p1_tracked) {
644 if (self->flags & dirstate_flag_p1_tracked) {
637 Py_RETURN_TRUE;
645 Py_RETURN_TRUE;
638 } else {
646 } else {
639 Py_RETURN_FALSE;
647 Py_RETURN_FALSE;
640 }
648 }
641 };
649 };
642
650
643 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
651 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
644 {
652 {
645 if (dirstate_item_c_added(self)) {
653 if (dirstate_item_c_added(self)) {
646 Py_RETURN_TRUE;
654 Py_RETURN_TRUE;
647 } else {
655 } else {
648 Py_RETURN_FALSE;
656 Py_RETURN_FALSE;
649 }
657 }
650 };
658 };
651
659
652 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
660 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
653 {
661 {
654 if (self->flags & dirstate_flag_wc_tracked &&
662 if (self->flags & dirstate_flag_wc_tracked &&
655 self->flags & dirstate_flag_p2_info) {
663 self->flags & dirstate_flag_p2_info) {
656 Py_RETURN_TRUE;
664 Py_RETURN_TRUE;
657 } else {
665 } else {
658 Py_RETURN_FALSE;
666 Py_RETURN_FALSE;
659 }
667 }
660 };
668 };
661
669
662 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
670 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
663 {
671 {
664 if (dirstate_item_c_merged(self)) {
672 if (dirstate_item_c_merged(self)) {
665 Py_RETURN_TRUE;
673 Py_RETURN_TRUE;
666 } else {
674 } else {
667 Py_RETURN_FALSE;
675 Py_RETURN_FALSE;
668 }
676 }
669 };
677 };
670
678
671 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
679 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
672 {
680 {
673 if (dirstate_item_c_from_p2(self)) {
681 if (dirstate_item_c_from_p2(self)) {
674 Py_RETURN_TRUE;
682 Py_RETURN_TRUE;
675 } else {
683 } else {
676 Py_RETURN_FALSE;
684 Py_RETURN_FALSE;
677 }
685 }
678 };
686 };
679
687
680 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
688 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
681 {
689 {
682 if (!(self->flags & dirstate_flag_wc_tracked)) {
690 if (!(self->flags & dirstate_flag_wc_tracked)) {
683 Py_RETURN_FALSE;
691 Py_RETURN_FALSE;
684 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
692 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
685 Py_RETURN_FALSE;
693 Py_RETURN_FALSE;
686 } else if (self->flags & dirstate_flag_p2_info) {
694 } else if (self->flags & dirstate_flag_p2_info) {
687 Py_RETURN_FALSE;
695 Py_RETURN_FALSE;
688 } else {
696 } else {
689 Py_RETURN_TRUE;
697 Py_RETURN_TRUE;
690 }
698 }
691 };
699 };
692
700
693 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
701 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
694 {
702 {
695 if (dirstate_item_c_any_tracked(self)) {
703 if (dirstate_item_c_any_tracked(self)) {
696 Py_RETURN_TRUE;
704 Py_RETURN_TRUE;
697 } else {
705 } else {
698 Py_RETURN_FALSE;
706 Py_RETURN_FALSE;
699 }
707 }
700 };
708 };
701
709
702 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
710 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
703 {
711 {
704 if (dirstate_item_c_removed(self)) {
712 if (dirstate_item_c_removed(self)) {
705 Py_RETURN_TRUE;
713 Py_RETURN_TRUE;
706 } else {
714 } else {
707 Py_RETURN_FALSE;
715 Py_RETURN_FALSE;
708 }
716 }
709 };
717 };
710
718
711 static PyGetSetDef dirstate_item_getset[] = {
719 static PyGetSetDef dirstate_item_getset[] = {
712 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
720 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
713 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
721 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
714 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
722 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
715 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
723 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
716 {"has_fallback_exec", (getter)dirstate_item_get_has_fallback_exec, NULL,
724 {"has_fallback_exec", (getter)dirstate_item_get_has_fallback_exec, NULL,
717 "has_fallback_exec", NULL},
725 "has_fallback_exec", NULL},
718 {"fallback_exec", (getter)dirstate_item_get_fallback_exec,
726 {"fallback_exec", (getter)dirstate_item_get_fallback_exec,
719 (setter)dirstate_item_set_fallback_exec, "fallback_exec", NULL},
727 (setter)dirstate_item_set_fallback_exec, "fallback_exec", NULL},
720 {"has_fallback_symlink", (getter)dirstate_item_get_has_fallback_symlink,
728 {"has_fallback_symlink", (getter)dirstate_item_get_has_fallback_symlink,
721 NULL, "has_fallback_symlink", NULL},
729 NULL, "has_fallback_symlink", NULL},
722 {"fallback_symlink", (getter)dirstate_item_get_fallback_symlink,
730 {"fallback_symlink", (getter)dirstate_item_get_fallback_symlink,
723 (setter)dirstate_item_set_fallback_symlink, "fallback_symlink", NULL},
731 (setter)dirstate_item_set_fallback_symlink, "fallback_symlink", NULL},
724 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
732 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
725 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
733 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
726 NULL},
734 NULL},
727 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
735 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
728 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
736 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
729 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
737 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
730 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
738 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
731 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
739 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
732 NULL},
740 NULL},
733 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
741 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
734 NULL},
742 NULL},
735 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
743 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
736 {NULL} /* Sentinel */
744 {NULL} /* Sentinel */
737 };
745 };
738
746
739 PyTypeObject dirstateItemType = {
747 PyTypeObject dirstateItemType = {
740 PyVarObject_HEAD_INIT(NULL, 0) /* header */
748 PyVarObject_HEAD_INIT(NULL, 0) /* header */
741 "dirstate_tuple", /* tp_name */
749 "dirstate_tuple", /* tp_name */
742 sizeof(dirstateItemObject), /* tp_basicsize */
750 sizeof(dirstateItemObject), /* tp_basicsize */
743 0, /* tp_itemsize */
751 0, /* tp_itemsize */
744 (destructor)dirstate_item_dealloc, /* tp_dealloc */
752 (destructor)dirstate_item_dealloc, /* tp_dealloc */
745 0, /* tp_print */
753 0, /* tp_print */
746 0, /* tp_getattr */
754 0, /* tp_getattr */
747 0, /* tp_setattr */
755 0, /* tp_setattr */
748 0, /* tp_compare */
756 0, /* tp_compare */
749 0, /* tp_repr */
757 0, /* tp_repr */
750 0, /* tp_as_number */
758 0, /* tp_as_number */
751 0, /* tp_as_sequence */
759 0, /* tp_as_sequence */
752 0, /* tp_as_mapping */
760 0, /* tp_as_mapping */
753 0, /* tp_hash */
761 0, /* tp_hash */
754 0, /* tp_call */
762 0, /* tp_call */
755 0, /* tp_str */
763 0, /* tp_str */
756 0, /* tp_getattro */
764 0, /* tp_getattro */
757 0, /* tp_setattro */
765 0, /* tp_setattro */
758 0, /* tp_as_buffer */
766 0, /* tp_as_buffer */
759 Py_TPFLAGS_DEFAULT, /* tp_flags */
767 Py_TPFLAGS_DEFAULT, /* tp_flags */
760 "dirstate tuple", /* tp_doc */
768 "dirstate tuple", /* tp_doc */
761 0, /* tp_traverse */
769 0, /* tp_traverse */
762 0, /* tp_clear */
770 0, /* tp_clear */
763 0, /* tp_richcompare */
771 0, /* tp_richcompare */
764 0, /* tp_weaklistoffset */
772 0, /* tp_weaklistoffset */
765 0, /* tp_iter */
773 0, /* tp_iter */
766 0, /* tp_iternext */
774 0, /* tp_iternext */
767 dirstate_item_methods, /* tp_methods */
775 dirstate_item_methods, /* tp_methods */
768 0, /* tp_members */
776 0, /* tp_members */
769 dirstate_item_getset, /* tp_getset */
777 dirstate_item_getset, /* tp_getset */
770 0, /* tp_base */
778 0, /* tp_base */
771 0, /* tp_dict */
779 0, /* tp_dict */
772 0, /* tp_descr_get */
780 0, /* tp_descr_get */
773 0, /* tp_descr_set */
781 0, /* tp_descr_set */
774 0, /* tp_dictoffset */
782 0, /* tp_dictoffset */
775 0, /* tp_init */
783 0, /* tp_init */
776 0, /* tp_alloc */
784 0, /* tp_alloc */
777 dirstate_item_new, /* tp_new */
785 dirstate_item_new, /* tp_new */
778 };
786 };
779
787
780 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
788 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
781 {
789 {
782 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
790 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
783 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
791 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
784 char state, *cur, *str, *cpos;
792 char state, *cur, *str, *cpos;
785 int mode, size, mtime;
793 int mode, size, mtime;
786 unsigned int flen, pos = 40;
794 unsigned int flen, pos = 40;
787 Py_ssize_t len = 40;
795 Py_ssize_t len = 40;
788 Py_ssize_t readlen;
796 Py_ssize_t readlen;
789
797
790 if (!PyArg_ParseTuple(
798 if (!PyArg_ParseTuple(
791 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
799 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
792 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
800 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
793 goto quit;
801 goto quit;
794 }
802 }
795
803
796 len = readlen;
804 len = readlen;
797
805
798 /* read parents */
806 /* read parents */
799 if (len < 40) {
807 if (len < 40) {
800 PyErr_SetString(PyExc_ValueError,
808 PyErr_SetString(PyExc_ValueError,
801 "too little data for parents");
809 "too little data for parents");
802 goto quit;
810 goto quit;
803 }
811 }
804
812
805 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
813 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
806 str + 20, (Py_ssize_t)20);
814 str + 20, (Py_ssize_t)20);
807 if (!parents) {
815 if (!parents) {
808 goto quit;
816 goto quit;
809 }
817 }
810
818
811 /* read filenames */
819 /* read filenames */
812 while (pos >= 40 && pos < len) {
820 while (pos >= 40 && pos < len) {
813 if (pos + 17 > len) {
821 if (pos + 17 > len) {
814 PyErr_SetString(PyExc_ValueError,
822 PyErr_SetString(PyExc_ValueError,
815 "overflow in dirstate");
823 "overflow in dirstate");
816 goto quit;
824 goto quit;
817 }
825 }
818 cur = str + pos;
826 cur = str + pos;
819 /* unpack header */
827 /* unpack header */
820 state = *cur;
828 state = *cur;
821 mode = getbe32(cur + 1);
829 mode = getbe32(cur + 1);
822 size = getbe32(cur + 5);
830 size = getbe32(cur + 5);
823 mtime = getbe32(cur + 9);
831 mtime = getbe32(cur + 9);
824 flen = getbe32(cur + 13);
832 flen = getbe32(cur + 13);
825 pos += 17;
833 pos += 17;
826 cur += 17;
834 cur += 17;
827 if (flen > len - pos) {
835 if (flen > len - pos) {
828 PyErr_SetString(PyExc_ValueError,
836 PyErr_SetString(PyExc_ValueError,
829 "overflow in dirstate");
837 "overflow in dirstate");
830 goto quit;
838 goto quit;
831 }
839 }
832
840
833 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
841 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
834 size, mtime);
842 size, mtime);
835 if (!entry)
843 if (!entry)
836 goto quit;
844 goto quit;
837 cpos = memchr(cur, 0, flen);
845 cpos = memchr(cur, 0, flen);
838 if (cpos) {
846 if (cpos) {
839 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
847 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
840 cname = PyBytes_FromStringAndSize(
848 cname = PyBytes_FromStringAndSize(
841 cpos + 1, flen - (cpos - cur) - 1);
849 cpos + 1, flen - (cpos - cur) - 1);
842 if (!fname || !cname ||
850 if (!fname || !cname ||
843 PyDict_SetItem(cmap, fname, cname) == -1 ||
851 PyDict_SetItem(cmap, fname, cname) == -1 ||
844 PyDict_SetItem(dmap, fname, entry) == -1) {
852 PyDict_SetItem(dmap, fname, entry) == -1) {
845 goto quit;
853 goto quit;
846 }
854 }
847 Py_DECREF(cname);
855 Py_DECREF(cname);
848 } else {
856 } else {
849 fname = PyBytes_FromStringAndSize(cur, flen);
857 fname = PyBytes_FromStringAndSize(cur, flen);
850 if (!fname ||
858 if (!fname ||
851 PyDict_SetItem(dmap, fname, entry) == -1) {
859 PyDict_SetItem(dmap, fname, entry) == -1) {
852 goto quit;
860 goto quit;
853 }
861 }
854 }
862 }
855 Py_DECREF(fname);
863 Py_DECREF(fname);
856 Py_DECREF(entry);
864 Py_DECREF(entry);
857 fname = cname = entry = NULL;
865 fname = cname = entry = NULL;
858 pos += flen;
866 pos += flen;
859 }
867 }
860
868
861 ret = parents;
869 ret = parents;
862 Py_INCREF(ret);
870 Py_INCREF(ret);
863 quit:
871 quit:
864 Py_XDECREF(fname);
872 Py_XDECREF(fname);
865 Py_XDECREF(cname);
873 Py_XDECREF(cname);
866 Py_XDECREF(entry);
874 Py_XDECREF(entry);
867 Py_XDECREF(parents);
875 Py_XDECREF(parents);
868 return ret;
876 return ret;
869 }
877 }
870
878
871 /*
879 /*
872 * Efficiently pack a dirstate object into its on-disk format.
880 * Efficiently pack a dirstate object into its on-disk format.
873 */
881 */
874 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
882 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
875 {
883 {
876 PyObject *packobj = NULL;
884 PyObject *packobj = NULL;
877 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
885 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
878 Py_ssize_t nbytes, pos, l;
886 Py_ssize_t nbytes, pos, l;
879 PyObject *k, *v = NULL, *pn;
887 PyObject *k, *v = NULL, *pn;
880 char *p, *s;
888 char *p, *s;
881 int now_s;
889 int now_s;
882 int now_ns;
890 int now_ns;
883
891
884 if (!PyArg_ParseTuple(args, "O!O!O!(ii):pack_dirstate", &PyDict_Type,
892 if (!PyArg_ParseTuple(args, "O!O!O!(ii):pack_dirstate", &PyDict_Type,
885 &map, &PyDict_Type, &copymap, &PyTuple_Type, &pl,
893 &map, &PyDict_Type, &copymap, &PyTuple_Type, &pl,
886 &now_s, &now_ns)) {
894 &now_s, &now_ns)) {
887 return NULL;
895 return NULL;
888 }
896 }
889
897
890 if (PyTuple_Size(pl) != 2) {
898 if (PyTuple_Size(pl) != 2) {
891 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
899 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
892 return NULL;
900 return NULL;
893 }
901 }
894
902
895 /* Figure out how much we need to allocate. */
903 /* Figure out how much we need to allocate. */
896 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
904 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
897 PyObject *c;
905 PyObject *c;
898 if (!PyBytes_Check(k)) {
906 if (!PyBytes_Check(k)) {
899 PyErr_SetString(PyExc_TypeError, "expected string key");
907 PyErr_SetString(PyExc_TypeError, "expected string key");
900 goto bail;
908 goto bail;
901 }
909 }
902 nbytes += PyBytes_GET_SIZE(k) + 17;
910 nbytes += PyBytes_GET_SIZE(k) + 17;
903 c = PyDict_GetItem(copymap, k);
911 c = PyDict_GetItem(copymap, k);
904 if (c) {
912 if (c) {
905 if (!PyBytes_Check(c)) {
913 if (!PyBytes_Check(c)) {
906 PyErr_SetString(PyExc_TypeError,
914 PyErr_SetString(PyExc_TypeError,
907 "expected string key");
915 "expected string key");
908 goto bail;
916 goto bail;
909 }
917 }
910 nbytes += PyBytes_GET_SIZE(c) + 1;
918 nbytes += PyBytes_GET_SIZE(c) + 1;
911 }
919 }
912 }
920 }
913
921
914 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
922 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
915 if (packobj == NULL) {
923 if (packobj == NULL) {
916 goto bail;
924 goto bail;
917 }
925 }
918
926
919 p = PyBytes_AS_STRING(packobj);
927 p = PyBytes_AS_STRING(packobj);
920
928
921 pn = PyTuple_GET_ITEM(pl, 0);
929 pn = PyTuple_GET_ITEM(pl, 0);
922 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
930 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
923 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
931 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
924 goto bail;
932 goto bail;
925 }
933 }
926 memcpy(p, s, l);
934 memcpy(p, s, l);
927 p += 20;
935 p += 20;
928 pn = PyTuple_GET_ITEM(pl, 1);
936 pn = PyTuple_GET_ITEM(pl, 1);
929 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
937 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
930 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
938 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
931 goto bail;
939 goto bail;
932 }
940 }
933 memcpy(p, s, l);
941 memcpy(p, s, l);
934 p += 20;
942 p += 20;
935
943
936 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
944 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
937 dirstateItemObject *tuple;
945 dirstateItemObject *tuple;
938 char state;
946 char state;
939 int mode, size, mtime;
947 int mode, size, mtime;
940 Py_ssize_t len, l;
948 Py_ssize_t len, l;
941 PyObject *o;
949 PyObject *o;
942 char *t;
950 char *t;
943
951
944 if (!dirstate_tuple_check(v)) {
952 if (!dirstate_tuple_check(v)) {
945 PyErr_SetString(PyExc_TypeError,
953 PyErr_SetString(PyExc_TypeError,
946 "expected a dirstate tuple");
954 "expected a dirstate tuple");
947 goto bail;
955 goto bail;
948 }
956 }
949 tuple = (dirstateItemObject *)v;
957 tuple = (dirstateItemObject *)v;
950
958
951 state = dirstate_item_c_v1_state(tuple);
959 state = dirstate_item_c_v1_state(tuple);
952 mode = dirstate_item_c_v1_mode(tuple);
960 mode = dirstate_item_c_v1_mode(tuple);
953 size = dirstate_item_c_v1_size(tuple);
961 size = dirstate_item_c_v1_size(tuple);
954 mtime = dirstate_item_c_v1_mtime(tuple);
962 mtime = dirstate_item_c_v1_mtime(tuple);
955 if (state == 'n' && tuple->mtime_s == now_s) {
963 if (state == 'n' && tuple->mtime_s == now_s) {
956 /* See pure/parsers.py:pack_dirstate for why we do
964 /* See pure/parsers.py:pack_dirstate for why we do
957 * this. */
965 * this. */
958 mtime = -1;
966 mtime = -1;
959 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
967 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
960 state, mode, size, mtime);
968 state, mode, size, mtime);
961 if (!mtime_unset) {
969 if (!mtime_unset) {
962 goto bail;
970 goto bail;
963 }
971 }
964 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
972 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
965 goto bail;
973 goto bail;
966 }
974 }
967 Py_DECREF(mtime_unset);
975 Py_DECREF(mtime_unset);
968 mtime_unset = NULL;
976 mtime_unset = NULL;
969 }
977 }
970 *p++ = state;
978 *p++ = state;
971 putbe32((uint32_t)mode, p);
979 putbe32((uint32_t)mode, p);
972 putbe32((uint32_t)size, p + 4);
980 putbe32((uint32_t)size, p + 4);
973 putbe32((uint32_t)mtime, p + 8);
981 putbe32((uint32_t)mtime, p + 8);
974 t = p + 12;
982 t = p + 12;
975 p += 16;
983 p += 16;
976 len = PyBytes_GET_SIZE(k);
984 len = PyBytes_GET_SIZE(k);
977 memcpy(p, PyBytes_AS_STRING(k), len);
985 memcpy(p, PyBytes_AS_STRING(k), len);
978 p += len;
986 p += len;
979 o = PyDict_GetItem(copymap, k);
987 o = PyDict_GetItem(copymap, k);
980 if (o) {
988 if (o) {
981 *p++ = '\0';
989 *p++ = '\0';
982 l = PyBytes_GET_SIZE(o);
990 l = PyBytes_GET_SIZE(o);
983 memcpy(p, PyBytes_AS_STRING(o), l);
991 memcpy(p, PyBytes_AS_STRING(o), l);
984 p += l;
992 p += l;
985 len += l + 1;
993 len += l + 1;
986 }
994 }
987 putbe32((uint32_t)len, t);
995 putbe32((uint32_t)len, t);
988 }
996 }
989
997
990 pos = p - PyBytes_AS_STRING(packobj);
998 pos = p - PyBytes_AS_STRING(packobj);
991 if (pos != nbytes) {
999 if (pos != nbytes) {
992 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
1000 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
993 (long)pos, (long)nbytes);
1001 (long)pos, (long)nbytes);
994 goto bail;
1002 goto bail;
995 }
1003 }
996
1004
997 return packobj;
1005 return packobj;
998 bail:
1006 bail:
999 Py_XDECREF(mtime_unset);
1007 Py_XDECREF(mtime_unset);
1000 Py_XDECREF(packobj);
1008 Py_XDECREF(packobj);
1001 Py_XDECREF(v);
1009 Py_XDECREF(v);
1002 return NULL;
1010 return NULL;
1003 }
1011 }
1004
1012
1005 #define BUMPED_FIX 1
1013 #define BUMPED_FIX 1
1006 #define USING_SHA_256 2
1014 #define USING_SHA_256 2
1007 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
1015 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
1008
1016
1009 static PyObject *readshas(const char *source, unsigned char num,
1017 static PyObject *readshas(const char *source, unsigned char num,
1010 Py_ssize_t hashwidth)
1018 Py_ssize_t hashwidth)
1011 {
1019 {
1012 int i;
1020 int i;
1013 PyObject *list = PyTuple_New(num);
1021 PyObject *list = PyTuple_New(num);
1014 if (list == NULL) {
1022 if (list == NULL) {
1015 return NULL;
1023 return NULL;
1016 }
1024 }
1017 for (i = 0; i < num; i++) {
1025 for (i = 0; i < num; i++) {
1018 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
1026 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
1019 if (hash == NULL) {
1027 if (hash == NULL) {
1020 Py_DECREF(list);
1028 Py_DECREF(list);
1021 return NULL;
1029 return NULL;
1022 }
1030 }
1023 PyTuple_SET_ITEM(list, i, hash);
1031 PyTuple_SET_ITEM(list, i, hash);
1024 source += hashwidth;
1032 source += hashwidth;
1025 }
1033 }
1026 return list;
1034 return list;
1027 }
1035 }
1028
1036
1029 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
1037 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
1030 uint32_t *msize)
1038 uint32_t *msize)
1031 {
1039 {
1032 const char *data = databegin;
1040 const char *data = databegin;
1033 const char *meta;
1041 const char *meta;
1034
1042
1035 double mtime;
1043 double mtime;
1036 int16_t tz;
1044 int16_t tz;
1037 uint16_t flags;
1045 uint16_t flags;
1038 unsigned char nsuccs, nparents, nmetadata;
1046 unsigned char nsuccs, nparents, nmetadata;
1039 Py_ssize_t hashwidth = 20;
1047 Py_ssize_t hashwidth = 20;
1040
1048
1041 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
1049 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
1042 PyObject *metadata = NULL, *ret = NULL;
1050 PyObject *metadata = NULL, *ret = NULL;
1043 int i;
1051 int i;
1044
1052
1045 if (data + FM1_HEADER_SIZE > dataend) {
1053 if (data + FM1_HEADER_SIZE > dataend) {
1046 goto overflow;
1054 goto overflow;
1047 }
1055 }
1048
1056
1049 *msize = getbe32(data);
1057 *msize = getbe32(data);
1050 data += 4;
1058 data += 4;
1051 mtime = getbefloat64(data);
1059 mtime = getbefloat64(data);
1052 data += 8;
1060 data += 8;
1053 tz = getbeint16(data);
1061 tz = getbeint16(data);
1054 data += 2;
1062 data += 2;
1055 flags = getbeuint16(data);
1063 flags = getbeuint16(data);
1056 data += 2;
1064 data += 2;
1057
1065
1058 if (flags & USING_SHA_256) {
1066 if (flags & USING_SHA_256) {
1059 hashwidth = 32;
1067 hashwidth = 32;
1060 }
1068 }
1061
1069
1062 nsuccs = (unsigned char)(*data++);
1070 nsuccs = (unsigned char)(*data++);
1063 nparents = (unsigned char)(*data++);
1071 nparents = (unsigned char)(*data++);
1064 nmetadata = (unsigned char)(*data++);
1072 nmetadata = (unsigned char)(*data++);
1065
1073
1066 if (databegin + *msize > dataend) {
1074 if (databegin + *msize > dataend) {
1067 goto overflow;
1075 goto overflow;
1068 }
1076 }
1069 dataend = databegin + *msize; /* narrow down to marker size */
1077 dataend = databegin + *msize; /* narrow down to marker size */
1070
1078
1071 if (data + hashwidth > dataend) {
1079 if (data + hashwidth > dataend) {
1072 goto overflow;
1080 goto overflow;
1073 }
1081 }
1074 prec = PyBytes_FromStringAndSize(data, hashwidth);
1082 prec = PyBytes_FromStringAndSize(data, hashwidth);
1075 data += hashwidth;
1083 data += hashwidth;
1076 if (prec == NULL) {
1084 if (prec == NULL) {
1077 goto bail;
1085 goto bail;
1078 }
1086 }
1079
1087
1080 if (data + nsuccs * hashwidth > dataend) {
1088 if (data + nsuccs * hashwidth > dataend) {
1081 goto overflow;
1089 goto overflow;
1082 }
1090 }
1083 succs = readshas(data, nsuccs, hashwidth);
1091 succs = readshas(data, nsuccs, hashwidth);
1084 if (succs == NULL) {
1092 if (succs == NULL) {
1085 goto bail;
1093 goto bail;
1086 }
1094 }
1087 data += nsuccs * hashwidth;
1095 data += nsuccs * hashwidth;
1088
1096
1089 if (nparents == 1 || nparents == 2) {
1097 if (nparents == 1 || nparents == 2) {
1090 if (data + nparents * hashwidth > dataend) {
1098 if (data + nparents * hashwidth > dataend) {
1091 goto overflow;
1099 goto overflow;
1092 }
1100 }
1093 parents = readshas(data, nparents, hashwidth);
1101 parents = readshas(data, nparents, hashwidth);
1094 if (parents == NULL) {
1102 if (parents == NULL) {
1095 goto bail;
1103 goto bail;
1096 }
1104 }
1097 data += nparents * hashwidth;
1105 data += nparents * hashwidth;
1098 } else {
1106 } else {
1099 parents = Py_None;
1107 parents = Py_None;
1100 Py_INCREF(parents);
1108 Py_INCREF(parents);
1101 }
1109 }
1102
1110
1103 if (data + 2 * nmetadata > dataend) {
1111 if (data + 2 * nmetadata > dataend) {
1104 goto overflow;
1112 goto overflow;
1105 }
1113 }
1106 meta = data + (2 * nmetadata);
1114 meta = data + (2 * nmetadata);
1107 metadata = PyTuple_New(nmetadata);
1115 metadata = PyTuple_New(nmetadata);
1108 if (metadata == NULL) {
1116 if (metadata == NULL) {
1109 goto bail;
1117 goto bail;
1110 }
1118 }
1111 for (i = 0; i < nmetadata; i++) {
1119 for (i = 0; i < nmetadata; i++) {
1112 PyObject *tmp, *left = NULL, *right = NULL;
1120 PyObject *tmp, *left = NULL, *right = NULL;
1113 Py_ssize_t leftsize = (unsigned char)(*data++);
1121 Py_ssize_t leftsize = (unsigned char)(*data++);
1114 Py_ssize_t rightsize = (unsigned char)(*data++);
1122 Py_ssize_t rightsize = (unsigned char)(*data++);
1115 if (meta + leftsize + rightsize > dataend) {
1123 if (meta + leftsize + rightsize > dataend) {
1116 goto overflow;
1124 goto overflow;
1117 }
1125 }
1118 left = PyBytes_FromStringAndSize(meta, leftsize);
1126 left = PyBytes_FromStringAndSize(meta, leftsize);
1119 meta += leftsize;
1127 meta += leftsize;
1120 right = PyBytes_FromStringAndSize(meta, rightsize);
1128 right = PyBytes_FromStringAndSize(meta, rightsize);
1121 meta += rightsize;
1129 meta += rightsize;
1122 tmp = PyTuple_New(2);
1130 tmp = PyTuple_New(2);
1123 if (!left || !right || !tmp) {
1131 if (!left || !right || !tmp) {
1124 Py_XDECREF(left);
1132 Py_XDECREF(left);
1125 Py_XDECREF(right);
1133 Py_XDECREF(right);
1126 Py_XDECREF(tmp);
1134 Py_XDECREF(tmp);
1127 goto bail;
1135 goto bail;
1128 }
1136 }
1129 PyTuple_SET_ITEM(tmp, 0, left);
1137 PyTuple_SET_ITEM(tmp, 0, left);
1130 PyTuple_SET_ITEM(tmp, 1, right);
1138 PyTuple_SET_ITEM(tmp, 1, right);
1131 PyTuple_SET_ITEM(metadata, i, tmp);
1139 PyTuple_SET_ITEM(metadata, i, tmp);
1132 }
1140 }
1133 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
1141 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
1134 (int)tz * 60, parents);
1142 (int)tz * 60, parents);
1135 goto bail; /* return successfully */
1143 goto bail; /* return successfully */
1136
1144
1137 overflow:
1145 overflow:
1138 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1146 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1139 bail:
1147 bail:
1140 Py_XDECREF(prec);
1148 Py_XDECREF(prec);
1141 Py_XDECREF(succs);
1149 Py_XDECREF(succs);
1142 Py_XDECREF(metadata);
1150 Py_XDECREF(metadata);
1143 Py_XDECREF(parents);
1151 Py_XDECREF(parents);
1144 return ret;
1152 return ret;
1145 }
1153 }
1146
1154
1147 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1155 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1148 {
1156 {
1149 const char *data, *dataend;
1157 const char *data, *dataend;
1150 Py_ssize_t datalen, offset, stop;
1158 Py_ssize_t datalen, offset, stop;
1151 PyObject *markers = NULL;
1159 PyObject *markers = NULL;
1152
1160
1153 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1161 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1154 &offset, &stop)) {
1162 &offset, &stop)) {
1155 return NULL;
1163 return NULL;
1156 }
1164 }
1157 if (offset < 0) {
1165 if (offset < 0) {
1158 PyErr_SetString(PyExc_ValueError,
1166 PyErr_SetString(PyExc_ValueError,
1159 "invalid negative offset in fm1readmarkers");
1167 "invalid negative offset in fm1readmarkers");
1160 return NULL;
1168 return NULL;
1161 }
1169 }
1162 if (stop > datalen) {
1170 if (stop > datalen) {
1163 PyErr_SetString(
1171 PyErr_SetString(
1164 PyExc_ValueError,
1172 PyExc_ValueError,
1165 "stop longer than data length in fm1readmarkers");
1173 "stop longer than data length in fm1readmarkers");
1166 return NULL;
1174 return NULL;
1167 }
1175 }
1168 dataend = data + datalen;
1176 dataend = data + datalen;
1169 data += offset;
1177 data += offset;
1170 markers = PyList_New(0);
1178 markers = PyList_New(0);
1171 if (!markers) {
1179 if (!markers) {
1172 return NULL;
1180 return NULL;
1173 }
1181 }
1174 while (offset < stop) {
1182 while (offset < stop) {
1175 uint32_t msize;
1183 uint32_t msize;
1176 int error;
1184 int error;
1177 PyObject *record = fm1readmarker(data, dataend, &msize);
1185 PyObject *record = fm1readmarker(data, dataend, &msize);
1178 if (!record) {
1186 if (!record) {
1179 goto bail;
1187 goto bail;
1180 }
1188 }
1181 error = PyList_Append(markers, record);
1189 error = PyList_Append(markers, record);
1182 Py_DECREF(record);
1190 Py_DECREF(record);
1183 if (error) {
1191 if (error) {
1184 goto bail;
1192 goto bail;
1185 }
1193 }
1186 data += msize;
1194 data += msize;
1187 offset += msize;
1195 offset += msize;
1188 }
1196 }
1189 return markers;
1197 return markers;
1190 bail:
1198 bail:
1191 Py_DECREF(markers);
1199 Py_DECREF(markers);
1192 return NULL;
1200 return NULL;
1193 }
1201 }
1194
1202
1195 static char parsers_doc[] = "Efficient content parsing.";
1203 static char parsers_doc[] = "Efficient content parsing.";
1196
1204
1197 PyObject *encodedir(PyObject *self, PyObject *args);
1205 PyObject *encodedir(PyObject *self, PyObject *args);
1198 PyObject *pathencode(PyObject *self, PyObject *args);
1206 PyObject *pathencode(PyObject *self, PyObject *args);
1199 PyObject *lowerencode(PyObject *self, PyObject *args);
1207 PyObject *lowerencode(PyObject *self, PyObject *args);
1200 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1208 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1201
1209
1202 static PyMethodDef methods[] = {
1210 static PyMethodDef methods[] = {
1203 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1211 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1204 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1212 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1205 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1213 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1206 "parse a revlog index\n"},
1214 "parse a revlog index\n"},
1207 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1215 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1208 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1216 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1209 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1217 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1210 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1218 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1211 "construct a dict with an expected size\n"},
1219 "construct a dict with an expected size\n"},
1212 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1220 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1213 "make file foldmap\n"},
1221 "make file foldmap\n"},
1214 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1222 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1215 "escape a UTF-8 byte string to JSON (fast path)\n"},
1223 "escape a UTF-8 byte string to JSON (fast path)\n"},
1216 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1224 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1217 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1225 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1218 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1226 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1219 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1227 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1220 "parse v1 obsolete markers\n"},
1228 "parse v1 obsolete markers\n"},
1221 {NULL, NULL}};
1229 {NULL, NULL}};
1222
1230
1223 void dirs_module_init(PyObject *mod);
1231 void dirs_module_init(PyObject *mod);
1224 void manifest_module_init(PyObject *mod);
1232 void manifest_module_init(PyObject *mod);
1225 void revlog_module_init(PyObject *mod);
1233 void revlog_module_init(PyObject *mod);
1226
1234
1227 static const int version = 20;
1235 static const int version = 20;
1228
1236
1229 static void module_init(PyObject *mod)
1237 static void module_init(PyObject *mod)
1230 {
1238 {
1231 PyModule_AddIntConstant(mod, "version", version);
1239 PyModule_AddIntConstant(mod, "version", version);
1232
1240
1233 /* This module constant has two purposes. First, it lets us unit test
1241 /* This module constant has two purposes. First, it lets us unit test
1234 * the ImportError raised without hard-coding any error text. This
1242 * the ImportError raised without hard-coding any error text. This
1235 * means we can change the text in the future without breaking tests,
1243 * means we can change the text in the future without breaking tests,
1236 * even across changesets without a recompile. Second, its presence
1244 * even across changesets without a recompile. Second, its presence
1237 * can be used to determine whether the version-checking logic is
1245 * can be used to determine whether the version-checking logic is
1238 * present, which also helps in testing across changesets without a
1246 * present, which also helps in testing across changesets without a
1239 * recompile. Note that this means the pure-Python version of parsers
1247 * recompile. Note that this means the pure-Python version of parsers
1240 * should not have this module constant. */
1248 * should not have this module constant. */
1241 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1249 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1242
1250
1243 dirs_module_init(mod);
1251 dirs_module_init(mod);
1244 manifest_module_init(mod);
1252 manifest_module_init(mod);
1245 revlog_module_init(mod);
1253 revlog_module_init(mod);
1246
1254
1247 if (PyType_Ready(&dirstateItemType) < 0) {
1255 if (PyType_Ready(&dirstateItemType) < 0) {
1248 return;
1256 return;
1249 }
1257 }
1250 Py_INCREF(&dirstateItemType);
1258 Py_INCREF(&dirstateItemType);
1251 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1259 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1252 }
1260 }
1253
1261
1254 static int check_python_version(void)
1262 static int check_python_version(void)
1255 {
1263 {
1256 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1264 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1257 long hexversion;
1265 long hexversion;
1258 if (!sys) {
1266 if (!sys) {
1259 return -1;
1267 return -1;
1260 }
1268 }
1261 ver = PyObject_GetAttrString(sys, "hexversion");
1269 ver = PyObject_GetAttrString(sys, "hexversion");
1262 Py_DECREF(sys);
1270 Py_DECREF(sys);
1263 if (!ver) {
1271 if (!ver) {
1264 return -1;
1272 return -1;
1265 }
1273 }
1266 hexversion = PyInt_AsLong(ver);
1274 hexversion = PyInt_AsLong(ver);
1267 Py_DECREF(ver);
1275 Py_DECREF(ver);
1268 /* sys.hexversion is a 32-bit number by default, so the -1 case
1276 /* sys.hexversion is a 32-bit number by default, so the -1 case
1269 * should only occur in unusual circumstances (e.g. if sys.hexversion
1277 * should only occur in unusual circumstances (e.g. if sys.hexversion
1270 * is manually set to an invalid value). */
1278 * is manually set to an invalid value). */
1271 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1279 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1272 PyErr_Format(PyExc_ImportError,
1280 PyErr_Format(PyExc_ImportError,
1273 "%s: The Mercurial extension "
1281 "%s: The Mercurial extension "
1274 "modules were compiled with Python " PY_VERSION
1282 "modules were compiled with Python " PY_VERSION
1275 ", but "
1283 ", but "
1276 "Mercurial is currently using Python with "
1284 "Mercurial is currently using Python with "
1277 "sys.hexversion=%ld: "
1285 "sys.hexversion=%ld: "
1278 "Python %s\n at: %s",
1286 "Python %s\n at: %s",
1279 versionerrortext, hexversion, Py_GetVersion(),
1287 versionerrortext, hexversion, Py_GetVersion(),
1280 Py_GetProgramFullPath());
1288 Py_GetProgramFullPath());
1281 return -1;
1289 return -1;
1282 }
1290 }
1283 return 0;
1291 return 0;
1284 }
1292 }
1285
1293
1286 #ifdef IS_PY3K
1294 #ifdef IS_PY3K
1287 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1295 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1288 parsers_doc, -1, methods};
1296 parsers_doc, -1, methods};
1289
1297
1290 PyMODINIT_FUNC PyInit_parsers(void)
1298 PyMODINIT_FUNC PyInit_parsers(void)
1291 {
1299 {
1292 PyObject *mod;
1300 PyObject *mod;
1293
1301
1294 if (check_python_version() == -1)
1302 if (check_python_version() == -1)
1295 return NULL;
1303 return NULL;
1296 mod = PyModule_Create(&parsers_module);
1304 mod = PyModule_Create(&parsers_module);
1297 module_init(mod);
1305 module_init(mod);
1298 return mod;
1306 return mod;
1299 }
1307 }
1300 #else
1308 #else
1301 PyMODINIT_FUNC initparsers(void)
1309 PyMODINIT_FUNC initparsers(void)
1302 {
1310 {
1303 PyObject *mod;
1311 PyObject *mod;
1304
1312
1305 if (check_python_version() == -1) {
1313 if (check_python_version() == -1) {
1306 return;
1314 return;
1307 }
1315 }
1308 mod = Py_InitModule3("parsers", methods, parsers_doc);
1316 mod = Py_InitModule3("parsers", methods, parsers_doc);
1309 module_init(mod);
1317 module_init(mod);
1310 }
1318 }
1311 #endif
1319 #endif
@@ -1,91 +1,92 b''
1 /*
1 /*
2 util.h - utility functions for interfacing with the various python APIs.
2 util.h - utility functions for interfacing with the various python APIs.
3
3
4 This software may be used and distributed according to the terms of
4 This software may be used and distributed according to the terms of
5 the GNU General Public License, incorporated herein by reference.
5 the GNU General Public License, incorporated herein by reference.
6 */
6 */
7
7
8 #ifndef _HG_UTIL_H_
8 #ifndef _HG_UTIL_H_
9 #define _HG_UTIL_H_
9 #define _HG_UTIL_H_
10
10
11 #include "compat.h"
11 #include "compat.h"
12
12
13 #if PY_MAJOR_VERSION >= 3
13 #if PY_MAJOR_VERSION >= 3
14 #define IS_PY3K
14 #define IS_PY3K
15 #endif
15 #endif
16
16
17 /* helper to switch things like string literal depending on Python version */
17 /* helper to switch things like string literal depending on Python version */
18 #ifdef IS_PY3K
18 #ifdef IS_PY3K
19 #define PY23(py2, py3) py3
19 #define PY23(py2, py3) py3
20 #else
20 #else
21 #define PY23(py2, py3) py2
21 #define PY23(py2, py3) py2
22 #endif
22 #endif
23
23
24 /* clang-format off */
24 /* clang-format off */
25 typedef struct {
25 typedef struct {
26 PyObject_HEAD
26 PyObject_HEAD
27 int flags;
27 int flags;
28 int mode;
28 int mode;
29 int size;
29 int size;
30 int mtime_s;
30 int mtime_s;
31 int mtime_ns;
31 int mtime_ns;
32 } dirstateItemObject;
32 } dirstateItemObject;
33 /* clang-format on */
33 /* clang-format on */
34
34
35 static const int dirstate_flag_wc_tracked = 1;
35 static const int dirstate_flag_wc_tracked = 1;
36 static const int dirstate_flag_p1_tracked = 1 << 1;
36 static const int dirstate_flag_p1_tracked = 1 << 1;
37 static const int dirstate_flag_p2_info = 1 << 2;
37 static const int dirstate_flag_p2_info = 1 << 2;
38 static const int dirstate_flag_has_meaningful_data = 1 << 3;
38 static const int dirstate_flag_has_meaningful_data = 1 << 3;
39 static const int dirstate_flag_has_file_mtime = 1 << 4;
39 static const int dirstate_flag_has_file_mtime = 1 << 4;
40 static const int dirstate_flag_has_directory_mtime = 1 << 5;
40 static const int dirstate_flag_has_directory_mtime = 1 << 5;
41 static const int dirstate_flag_mode_exec_perm = 1 << 6;
41 static const int dirstate_flag_mode_exec_perm = 1 << 6;
42 static const int dirstate_flag_mode_is_symlink = 1 << 7;
42 static const int dirstate_flag_mode_is_symlink = 1 << 7;
43 static const int dirstate_flag_expected_state_is_modified = 1 << 8;
43 static const int dirstate_flag_expected_state_is_modified = 1 << 8;
44 static const int dirstate_flag_all_unknown_recorded = 1 << 9;
44 static const int dirstate_flag_all_unknown_recorded = 1 << 9;
45 static const int dirstate_flag_all_ignored_recorded = 1 << 10;
45 static const int dirstate_flag_all_ignored_recorded = 1 << 10;
46 static const int dirstate_flag_fallback_exec = 1 << 11;
46 static const int dirstate_flag_fallback_exec = 1 << 11;
47 static const int dirstate_flag_has_fallback_exec = 1 << 12;
47 static const int dirstate_flag_has_fallback_exec = 1 << 12;
48 static const int dirstate_flag_fallback_symlink = 1 << 13;
48 static const int dirstate_flag_fallback_symlink = 1 << 13;
49 static const int dirstate_flag_has_fallback_symlink = 1 << 14;
49 static const int dirstate_flag_has_fallback_symlink = 1 << 14;
50 static const int dirstate_flag_mtime_second_ambiguous = 1 << 15;
50
51
51 extern PyTypeObject dirstateItemType;
52 extern PyTypeObject dirstateItemType;
52 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
53 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
53
54
54 #ifndef MIN
55 #ifndef MIN
55 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
56 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
56 #endif
57 #endif
57 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
58 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
58 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
59 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
59 #define true 1
60 #define true 1
60 #define false 0
61 #define false 0
61 typedef unsigned char bool;
62 typedef unsigned char bool;
62 #else
63 #else
63 #include <stdbool.h>
64 #include <stdbool.h>
64 #endif
65 #endif
65
66
66 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
67 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
67 {
68 {
68 /* _PyDict_NewPresized expects a minused parameter, but it actually
69 /* _PyDict_NewPresized expects a minused parameter, but it actually
69 creates a dictionary that's the nearest power of two bigger than the
70 creates a dictionary that's the nearest power of two bigger than the
70 parameter. For example, with the initial minused = 1000, the
71 parameter. For example, with the initial minused = 1000, the
71 dictionary created has size 1024. Of course in a lot of cases that
72 dictionary created has size 1024. Of course in a lot of cases that
72 can be greater than the maximum load factor Python's dict object
73 can be greater than the maximum load factor Python's dict object
73 expects (= 2/3), so as soon as we cross the threshold we'll resize
74 expects (= 2/3), so as soon as we cross the threshold we'll resize
74 anyway. So create a dictionary that's at least 3/2 the size. */
75 anyway. So create a dictionary that's at least 3/2 the size. */
75 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
76 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
76 }
77 }
77
78
78 /* Convert a PyInt or PyLong to a long. Returns false if there is an
79 /* Convert a PyInt or PyLong to a long. Returns false if there is an
79 error, in which case an exception will already have been set. */
80 error, in which case an exception will already have been set. */
80 static inline bool pylong_to_long(PyObject *pylong, long *out)
81 static inline bool pylong_to_long(PyObject *pylong, long *out)
81 {
82 {
82 *out = PyLong_AsLong(pylong);
83 *out = PyLong_AsLong(pylong);
83 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
84 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
84 * not an error. */
85 * not an error. */
85 if (*out != -1) {
86 if (*out != -1) {
86 return true;
87 return true;
87 }
88 }
88 return PyErr_Occurred() == NULL;
89 return PyErr_Occurred() == NULL;
89 }
90 }
90
91
91 #endif /* _HG_UTIL_H_ */
92 #endif /* _HG_UTIL_H_ */
@@ -1,594 +1,602 b''
1 The *dirstate* is what Mercurial uses internally to track
1 The *dirstate* is what Mercurial uses internally to track
2 the state of files in the working directory,
2 the state of files in the working directory,
3 such as set by commands like `hg add` and `hg rm`.
3 such as set by commands like `hg add` and `hg rm`.
4 It also contains some cached data that help make `hg status` faster.
4 It also contains some cached data that help make `hg status` faster.
5 The name refers both to `.hg/dirstate` on the filesystem
5 The name refers both to `.hg/dirstate` on the filesystem
6 and the corresponding data structure in memory while a Mercurial process
6 and the corresponding data structure in memory while a Mercurial process
7 is running.
7 is running.
8
8
9 The original file format, retroactively dubbed `dirstate-v1`,
9 The original file format, retroactively dubbed `dirstate-v1`,
10 is described at https://www.mercurial-scm.org/wiki/DirState.
10 is described at https://www.mercurial-scm.org/wiki/DirState.
11 It is made of a flat sequence of unordered variable-size entries,
11 It is made of a flat sequence of unordered variable-size entries,
12 so accessing any information in it requires parsing all of it.
12 so accessing any information in it requires parsing all of it.
13 Similarly, saving changes requires rewriting the entire file.
13 Similarly, saving changes requires rewriting the entire file.
14
14
15 The newer `dirsate-v2` file format is designed to fix these limitations
15 The newer `dirsate-v2` file format is designed to fix these limitations
16 and make `hg status` faster.
16 and make `hg status` faster.
17
17
18 User guide
18 User guide
19 ==========
19 ==========
20
20
21 Compatibility
21 Compatibility
22 -------------
22 -------------
23
23
24 The file format is experimental and may still change.
24 The file format is experimental and may still change.
25 Different versions of Mercurial may not be compatible with each other
25 Different versions of Mercurial may not be compatible with each other
26 when working on a local repository that uses this format.
26 when working on a local repository that uses this format.
27 When using an incompatible version with the experimental format,
27 When using an incompatible version with the experimental format,
28 anything can happen including data corruption.
28 anything can happen including data corruption.
29
29
30 Since the dirstate is entirely local and not relevant to the wire protocol,
30 Since the dirstate is entirely local and not relevant to the wire protocol,
31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
32
32
33 When `share-safe` is enabled, different repositories sharing the same store
33 When `share-safe` is enabled, different repositories sharing the same store
34 can use different dirstate formats.
34 can use different dirstate formats.
35
35
36 Enabling `dirsate-v2` for new local repositories
36 Enabling `dirsate-v2` for new local repositories
37 ------------------------------------------------
37 ------------------------------------------------
38
38
39 When creating a new local repository such as with `hg init` or `hg clone`,
39 When creating a new local repository such as with `hg init` or `hg clone`,
40 the `exp-dirstate-v2` boolean in the `format` configuration section
40 the `exp-dirstate-v2` boolean in the `format` configuration section
41 controls whether to use this file format.
41 controls whether to use this file format.
42 This is disabled by default as of this writing.
42 This is disabled by default as of this writing.
43 To enable it for a single repository, run for example::
43 To enable it for a single repository, run for example::
44
44
45 $ hg init my-project --config format.exp-dirstate-v2=1
45 $ hg init my-project --config format.exp-dirstate-v2=1
46
46
47 Checking the format of an existing local repsitory
47 Checking the format of an existing local repsitory
48 --------------------------------------------------
48 --------------------------------------------------
49
49
50 The `debugformat` commands prints information about
50 The `debugformat` commands prints information about
51 which of multiple optional formats are used in the current repository,
51 which of multiple optional formats are used in the current repository,
52 including `dirstate-v2`::
52 including `dirstate-v2`::
53
53
54 $ hg debugformat
54 $ hg debugformat
55 format-variant repo
55 format-variant repo
56 fncache: yes
56 fncache: yes
57 dirstate-v2: yes
57 dirstate-v2: yes
58 […]
58 […]
59
59
60 Upgrading or downgrading an existing local repository
60 Upgrading or downgrading an existing local repository
61 -----------------------------------------------------
61 -----------------------------------------------------
62
62
63 The `debugupgrade` command does various upgrades or downgrades
63 The `debugupgrade` command does various upgrades or downgrades
64 on a local repository
64 on a local repository
65 based on the current Mercurial version and on configuration.
65 based on the current Mercurial version and on configuration.
66 The same `format.exp-dirstate-v2` configuration is used again.
66 The same `format.exp-dirstate-v2` configuration is used again.
67
67
68 Example to upgrade::
68 Example to upgrade::
69
69
70 $ hg debugupgrade --config format.exp-dirstate-v2=1
70 $ hg debugupgrade --config format.exp-dirstate-v2=1
71
71
72 Example to downgrade to `dirstate-v1`::
72 Example to downgrade to `dirstate-v1`::
73
73
74 $ hg debugupgrade --config format.exp-dirstate-v2=0
74 $ hg debugupgrade --config format.exp-dirstate-v2=0
75
75
76 Both of this commands do nothing but print a list of proposed changes,
76 Both of this commands do nothing but print a list of proposed changes,
77 which may include changes unrelated to the dirstate.
77 which may include changes unrelated to the dirstate.
78 Those other changes are controlled by their own configuration keys.
78 Those other changes are controlled by their own configuration keys.
79 Add `--run` to a command to actually apply the proposed changes.
79 Add `--run` to a command to actually apply the proposed changes.
80
80
81 Backups of `.hg/requires` and `.hg/dirstate` are created
81 Backups of `.hg/requires` and `.hg/dirstate` are created
82 in a `.hg/upgradebackup.*` directory.
82 in a `.hg/upgradebackup.*` directory.
83 If something goes wrong, restoring those files should undo the change.
83 If something goes wrong, restoring those files should undo the change.
84
84
85 Note that upgrading affects compatibility with older versions of Mercurial
85 Note that upgrading affects compatibility with older versions of Mercurial
86 as noted above.
86 as noted above.
87 This can be relevant when a repository’s files are on a USB drive
87 This can be relevant when a repository’s files are on a USB drive
88 or some other removable media, or shared over the network, etc.
88 or some other removable media, or shared over the network, etc.
89
89
90 Internal filesystem representation
90 Internal filesystem representation
91 ==================================
91 ==================================
92
92
93 Requirements file
93 Requirements file
94 -----------------
94 -----------------
95
95
96 The `.hg/requires` file indicates which of various optional file formats
96 The `.hg/requires` file indicates which of various optional file formats
97 are used by a given repository.
97 are used by a given repository.
98 Mercurial aborts when seeing a requirement it does not know about,
98 Mercurial aborts when seeing a requirement it does not know about,
99 which avoids older version accidentally messing up a respository
99 which avoids older version accidentally messing up a respository
100 that uses a format that was introduced later.
100 that uses a format that was introduced later.
101 For versions that do support a format, the presence or absence of
101 For versions that do support a format, the presence or absence of
102 the corresponding requirement indicates whether to use that format.
102 the corresponding requirement indicates whether to use that format.
103
103
104 When the file contains a `exp-dirstate-v2` line,
104 When the file contains a `exp-dirstate-v2` line,
105 the `dirstate-v2` format is used.
105 the `dirstate-v2` format is used.
106 With no such line `dirstate-v1` is used.
106 With no such line `dirstate-v1` is used.
107
107
108 High level description
108 High level description
109 ----------------------
109 ----------------------
110
110
111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
112 in `dirstate-v2` that file is a "docket" file
112 in `dirstate-v2` that file is a "docket" file
113 that only contains some metadata
113 that only contains some metadata
114 and points to separate data file named `.hg/dirstate.{ID}`,
114 and points to separate data file named `.hg/dirstate.{ID}`,
115 where `{ID}` is a random identifier.
115 where `{ID}` is a random identifier.
116
116
117 This separation allows making data files append-only
117 This separation allows making data files append-only
118 and therefore safer to memory-map.
118 and therefore safer to memory-map.
119 Creating a new data file (occasionally to clean up unused data)
119 Creating a new data file (occasionally to clean up unused data)
120 can be done with a different ID
120 can be done with a different ID
121 without disrupting another Mercurial process
121 without disrupting another Mercurial process
122 that could still be using the previous data file.
122 that could still be using the previous data file.
123
123
124 Both files have a format designed to reduce the need for parsing,
124 Both files have a format designed to reduce the need for parsing,
125 by using fixed-size binary components as much as possible.
125 by using fixed-size binary components as much as possible.
126 For data that is not fixed-size,
126 For data that is not fixed-size,
127 references to other parts of a file can be made by storing "pseudo-pointers":
127 references to other parts of a file can be made by storing "pseudo-pointers":
128 integers counted in bytes from the start of a file.
128 integers counted in bytes from the start of a file.
129 For read-only access no data structure is needed,
129 For read-only access no data structure is needed,
130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
131 with specific parts read on demand.
131 with specific parts read on demand.
132
132
133 The data file contains "nodes" organized in a tree.
133 The data file contains "nodes" organized in a tree.
134 Each node represents a file or directory inside the working directory
134 Each node represents a file or directory inside the working directory
135 or its parent changeset.
135 or its parent changeset.
136 This tree has the same structure as the filesystem,
136 This tree has the same structure as the filesystem,
137 so a node representing a directory has child nodes representing
137 so a node representing a directory has child nodes representing
138 the files and subdirectories contained directly in that directory.
138 the files and subdirectories contained directly in that directory.
139
139
140 The docket file format
140 The docket file format
141 ----------------------
141 ----------------------
142
142
143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
144 and `mercurial/dirstateutils/docket.py`.
144 and `mercurial/dirstateutils/docket.py`.
145
145
146 Components of the docket file are found at fixed offsets,
146 Components of the docket file are found at fixed offsets,
147 counted in bytes from the start of the file:
147 counted in bytes from the start of the file:
148
148
149 * Offset 0:
149 * Offset 0:
150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
152 although it is not strictly necessary
152 although it is not strictly necessary
153 since `.hg/requires` determines which format to use.
153 since `.hg/requires` determines which format to use.
154
154
155 * Offset 12:
155 * Offset 12:
156 The changeset node ID on the first parent of the working directory,
156 The changeset node ID on the first parent of the working directory,
157 as up to 32 binary bytes.
157 as up to 32 binary bytes.
158 If a node ID is shorter (20 bytes for SHA-1),
158 If a node ID is shorter (20 bytes for SHA-1),
159 it is start-aligned and the rest of the bytes are set to zero.
159 it is start-aligned and the rest of the bytes are set to zero.
160
160
161 * Offset 44:
161 * Offset 44:
162 The changeset node ID on the second parent of the working directory,
162 The changeset node ID on the second parent of the working directory,
163 or all zeros if there isn’t one.
163 or all zeros if there isn’t one.
164 Also 32 binary bytes.
164 Also 32 binary bytes.
165
165
166 * Offset 76:
166 * Offset 76:
167 Tree metadata on 44 bytes, described below.
167 Tree metadata on 44 bytes, described below.
168 Its separation in this documentation from the rest of the docket
168 Its separation in this documentation from the rest of the docket
169 reflects a detail of the current implementation.
169 reflects a detail of the current implementation.
170 Since tree metadata is also made of fields at fixed offsets, those could
170 Since tree metadata is also made of fields at fixed offsets, those could
171 be inlined here by adding 76 bytes to each offset.
171 be inlined here by adding 76 bytes to each offset.
172
172
173 * Offset 120:
173 * Offset 120:
174 The used size of the data file, as a 32-bit big-endian integer.
174 The used size of the data file, as a 32-bit big-endian integer.
175 The actual size of the data file may be larger
175 The actual size of the data file may be larger
176 (if another Mercurial processis in appending to it
176 (if another Mercurial processis in appending to it
177 but has not updated the docket yet).
177 but has not updated the docket yet).
178 That extra data must be ignored.
178 That extra data must be ignored.
179
179
180 * Offset 124:
180 * Offset 124:
181 The length of the data file identifier, as a 8-bit integer.
181 The length of the data file identifier, as a 8-bit integer.
182
182
183 * Offset 125:
183 * Offset 125:
184 The data file identifier.
184 The data file identifier.
185
185
186 * Any additional data is current ignored, and dropped when updating the file.
186 * Any additional data is current ignored, and dropped when updating the file.
187
187
188 Tree metadata in the docket file
188 Tree metadata in the docket file
189 --------------------------------
189 --------------------------------
190
190
191 Tree metadata is similarly made of components at fixed offsets.
191 Tree metadata is similarly made of components at fixed offsets.
192 These offsets are counted in bytes from the start of tree metadata,
192 These offsets are counted in bytes from the start of tree metadata,
193 which is 76 bytes after the start of the docket file.
193 which is 76 bytes after the start of the docket file.
194
194
195 This metadata can be thought of as the singular root of the tree
195 This metadata can be thought of as the singular root of the tree
196 formed by nodes in the data file.
196 formed by nodes in the data file.
197
197
198 * Offset 0:
198 * Offset 0:
199 Pseudo-pointer to the start of root nodes,
199 Pseudo-pointer to the start of root nodes,
200 counted in bytes from the start of the data file,
200 counted in bytes from the start of the data file,
201 as a 32-bit big-endian integer.
201 as a 32-bit big-endian integer.
202 These nodes describe files and directories found directly
202 These nodes describe files and directories found directly
203 at the root of the working directory.
203 at the root of the working directory.
204
204
205 * Offset 4:
205 * Offset 4:
206 Number of root nodes, as a 32-bit big-endian integer.
206 Number of root nodes, as a 32-bit big-endian integer.
207
207
208 * Offset 8:
208 * Offset 8:
209 Total number of nodes in the entire tree that "have a dirstate entry",
209 Total number of nodes in the entire tree that "have a dirstate entry",
210 as a 32-bit big-endian integer.
210 as a 32-bit big-endian integer.
211 Those nodes represent files that would be present at all in `dirstate-v1`.
211 Those nodes represent files that would be present at all in `dirstate-v1`.
212 This is typically less than the total number of nodes.
212 This is typically less than the total number of nodes.
213 This counter is used to implement `len(dirstatemap)`.
213 This counter is used to implement `len(dirstatemap)`.
214
214
215 * Offset 12:
215 * Offset 12:
216 Number of nodes in the entire tree that have a copy source,
216 Number of nodes in the entire tree that have a copy source,
217 as a 32-bit big-endian integer.
217 as a 32-bit big-endian integer.
218 At the next commit, these files are recorded
218 At the next commit, these files are recorded
219 as having been copied or moved/renamed from that source.
219 as having been copied or moved/renamed from that source.
220 (A move is recorded as a copy and separate removal of the source.)
220 (A move is recorded as a copy and separate removal of the source.)
221 This counter is used to implement `len(dirstatemap.copymap)`.
221 This counter is used to implement `len(dirstatemap.copymap)`.
222
222
223 * Offset 16:
223 * Offset 16:
224 An estimation of how many bytes of the data file
224 An estimation of how many bytes of the data file
225 (within its used size) are unused, as a 32-bit big-endian integer.
225 (within its used size) are unused, as a 32-bit big-endian integer.
226 When appending to an existing data file,
226 When appending to an existing data file,
227 some existing nodes or paths can be unreachable from the new root
227 some existing nodes or paths can be unreachable from the new root
228 but they still take up space.
228 but they still take up space.
229 This counter is used to decide when to write a new data file from scratch
229 This counter is used to decide when to write a new data file from scratch
230 instead of appending to an existing one,
230 instead of appending to an existing one,
231 in order to get rid of that unreachable data
231 in order to get rid of that unreachable data
232 and avoid unbounded file size growth.
232 and avoid unbounded file size growth.
233
233
234 * Offset 20:
234 * Offset 20:
235 These four bytes are currently ignored
235 These four bytes are currently ignored
236 and reset to zero when updating a docket file.
236 and reset to zero when updating a docket file.
237 This is an attempt at forward compatibility:
237 This is an attempt at forward compatibility:
238 future Mercurial versions could use this as a bit field
238 future Mercurial versions could use this as a bit field
239 to indicate that a dirstate has additional data or constraints.
239 to indicate that a dirstate has additional data or constraints.
240 Finding a dirstate file with the relevant bit unset indicates that
240 Finding a dirstate file with the relevant bit unset indicates that
241 it was written by a then-older version
241 it was written by a then-older version
242 which is not aware of that future change.
242 which is not aware of that future change.
243
243
244 * Offset 24:
244 * Offset 24:
245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
246 When present, the hash is of ignore patterns
246 When present, the hash is of ignore patterns
247 that were used for some previous run of the `status` algorithm.
247 that were used for some previous run of the `status` algorithm.
248
248
249 * (Offset 44: end of tree metadata)
249 * (Offset 44: end of tree metadata)
250
250
251 Optional hash of ignore patterns
251 Optional hash of ignore patterns
252 --------------------------------
252 --------------------------------
253
253
254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
255 has been optimized such that its run time is dominated by calls
255 has been optimized such that its run time is dominated by calls
256 to `stat` for reading the filesystem metadata of a file or directory,
256 to `stat` for reading the filesystem metadata of a file or directory,
257 and to `readdir` for listing the contents of a directory.
257 and to `readdir` for listing the contents of a directory.
258 In some cases the algorithm can skip calls to `readdir`
258 In some cases the algorithm can skip calls to `readdir`
259 (saving significant time)
259 (saving significant time)
260 because the dirstate already contains enough of the relevant information
260 because the dirstate already contains enough of the relevant information
261 to build the correct `status` results.
261 to build the correct `status` results.
262
262
263 The default configuration of `hg status` is to list unknown files
263 The default configuration of `hg status` is to list unknown files
264 but not ignored files.
264 but not ignored files.
265 In this case, it matters for the `readdir`-skipping optimization
265 In this case, it matters for the `readdir`-skipping optimization
266 if a given file used to be ignored but became unknown
266 if a given file used to be ignored but became unknown
267 because `.hgignore` changed.
267 because `.hgignore` changed.
268 To detect the possibility of such a change,
268 To detect the possibility of such a change,
269 the tree metadata contains an optional hash of all ignore patterns.
269 the tree metadata contains an optional hash of all ignore patterns.
270
270
271 We define:
271 We define:
272
272
273 * "Root" ignore files as:
273 * "Root" ignore files as:
274
274
275 - `.hgignore` at the root of the repository if it exists
275 - `.hgignore` at the root of the repository if it exists
276 - And all files from `ui.ignore.*` config.
276 - And all files from `ui.ignore.*` config.
277
277
278 This set of files is sorted by the string representation of their path.
278 This set of files is sorted by the string representation of their path.
279
279
280 * The "expanded contents" of an ignore files is the byte string made
280 * The "expanded contents" of an ignore files is the byte string made
281 by the concatenation of its contents followed by the "expanded contents"
281 by the concatenation of its contents followed by the "expanded contents"
282 of other files included with `include:` or `subinclude:` directives,
282 of other files included with `include:` or `subinclude:` directives,
283 in inclusion order. This definition is recursive, as included files can
283 in inclusion order. This definition is recursive, as included files can
284 themselves include more files.
284 themselves include more files.
285
285
286 This hash is defined as the SHA-1 of the concatenation (in sorted
286 This hash is defined as the SHA-1 of the concatenation (in sorted
287 order) of the "expanded contents" of each "root" ignore file.
287 order) of the "expanded contents" of each "root" ignore file.
288 (Note that computing this does not require actually concatenating
288 (Note that computing this does not require actually concatenating
289 into a single contiguous byte sequence.
289 into a single contiguous byte sequence.
290 Instead a SHA-1 hasher object can be created
290 Instead a SHA-1 hasher object can be created
291 and fed separate chunks one by one.)
291 and fed separate chunks one by one.)
292
292
293 The data file format
293 The data file format
294 --------------------
294 --------------------
295
295
296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
297 and `mercurial/dirstateutils/v2.py`.
297 and `mercurial/dirstateutils/v2.py`.
298
298
299 The data file contains two types of data: paths and nodes.
299 The data file contains two types of data: paths and nodes.
300
300
301 Paths and nodes can be organized in any order in the file, except that sibling
301 Paths and nodes can be organized in any order in the file, except that sibling
302 nodes must be next to each other and sorted by their path.
302 nodes must be next to each other and sorted by their path.
303 Contiguity lets the parent refer to them all
303 Contiguity lets the parent refer to them all
304 by their count and a single pseudo-pointer,
304 by their count and a single pseudo-pointer,
305 instead of storing one pseudo-pointer per child node.
305 instead of storing one pseudo-pointer per child node.
306 Sorting allows using binary seach to find a child node with a given name
306 Sorting allows using binary seach to find a child node with a given name
307 in `O(log(n))` byte sequence comparisons.
307 in `O(log(n))` byte sequence comparisons.
308
308
309 The current implemention writes paths and child node before a given node
309 The current implemention writes paths and child node before a given node
310 for ease of figuring out the value of pseudo-pointers by the time the are to be
310 for ease of figuring out the value of pseudo-pointers by the time the are to be
311 written, but this is not an obligation and readers must not rely on it.
311 written, but this is not an obligation and readers must not rely on it.
312
312
313 A path is stored as a byte string anywhere in the file, without delimiter.
313 A path is stored as a byte string anywhere in the file, without delimiter.
314 It is refered to by one or more node by a pseudo-pointer to its start, and its
314 It is refered to by one or more node by a pseudo-pointer to its start, and its
315 length in bytes. Since there is no delimiter,
315 length in bytes. Since there is no delimiter,
316 when a path is a substring of another the same bytes could be reused,
316 when a path is a substring of another the same bytes could be reused,
317 although the implementation does not exploit this as of this writing.
317 although the implementation does not exploit this as of this writing.
318
318
319 A node is stored on 43 bytes with components at fixed offsets. Paths and
319 A node is stored on 43 bytes with components at fixed offsets. Paths and
320 child nodes relevant to a node are stored externally and referenced though
320 child nodes relevant to a node are stored externally and referenced though
321 pseudo-pointers.
321 pseudo-pointers.
322
322
323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
324 counting bytes from the start of the data file. Path lengths and positions
324 counting bytes from the start of the data file. Path lengths and positions
325 are 16-bit integers, also counted in bytes.
325 are 16-bit integers, also counted in bytes.
326
326
327 Node components are:
327 Node components are:
328
328
329 * Offset 0:
329 * Offset 0:
330 Pseudo-pointer to the full path of this node,
330 Pseudo-pointer to the full path of this node,
331 from the working directory root.
331 from the working directory root.
332
332
333 * Offset 4:
333 * Offset 4:
334 Length of the full path.
334 Length of the full path.
335
335
336 * Offset 6:
336 * Offset 6:
337 Position of the last `/` path separator within the full path,
337 Position of the last `/` path separator within the full path,
338 in bytes from the start of the full path,
338 in bytes from the start of the full path,
339 or zero if there isn’t one.
339 or zero if there isn’t one.
340 The part of the full path after this position is the "base name".
340 The part of the full path after this position is the "base name".
341 Since sibling nodes have the same parent, only their base name vary
341 Since sibling nodes have the same parent, only their base name vary
342 and needs to be considered when doing binary search to find a given path.
342 and needs to be considered when doing binary search to find a given path.
343
343
344 * Offset 8:
344 * Offset 8:
345 Pseudo-pointer to the "copy source" path for this node,
345 Pseudo-pointer to the "copy source" path for this node,
346 or zero if there is no copy source.
346 or zero if there is no copy source.
347
347
348 * Offset 12:
348 * Offset 12:
349 Length of the copy source path, or zero if there isn’t one.
349 Length of the copy source path, or zero if there isn’t one.
350
350
351 * Offset 14:
351 * Offset 14:
352 Pseudo-pointer to the start of child nodes.
352 Pseudo-pointer to the start of child nodes.
353
353
354 * Offset 18:
354 * Offset 18:
355 Number of child nodes, as a 32-bit integer.
355 Number of child nodes, as a 32-bit integer.
356 They occupy 43 times this number of bytes
356 They occupy 43 times this number of bytes
357 (not counting space for paths, and further descendants).
357 (not counting space for paths, and further descendants).
358
358
359 * Offset 22:
359 * Offset 22:
360 Number as a 32-bit integer of descendant nodes in this subtree,
360 Number as a 32-bit integer of descendant nodes in this subtree,
361 not including this node itself,
361 not including this node itself,
362 that "have a dirstate entry".
362 that "have a dirstate entry".
363 Those nodes represent files that would be present at all in `dirstate-v1`.
363 Those nodes represent files that would be present at all in `dirstate-v1`.
364 This is typically less than the total number of descendants.
364 This is typically less than the total number of descendants.
365 This counter is used to implement `has_dir`.
365 This counter is used to implement `has_dir`.
366
366
367 * Offset 26:
367 * Offset 26:
368 Number as a 32-bit integer of descendant nodes in this subtree,
368 Number as a 32-bit integer of descendant nodes in this subtree,
369 not including this node itself,
369 not including this node itself,
370 that represent files tracked in the working directory.
370 that represent files tracked in the working directory.
371 (For example, `hg rm` makes a file untracked.)
371 (For example, `hg rm` makes a file untracked.)
372 This counter is used to implement `has_tracked_dir`.
372 This counter is used to implement `has_tracked_dir`.
373
373
374 * Offset 30:
374 * Offset 30:
375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
376 Starting from least-significant, bit masks are::
376 Starting from least-significant, bit masks are::
377
377
378 WDIR_TRACKED = 1 << 0
378 WDIR_TRACKED = 1 << 0
379 P1_TRACKED = 1 << 1
379 P1_TRACKED = 1 << 1
380 P2_INFO = 1 << 2
380 P2_INFO = 1 << 2
381 HAS_MODE_AND_SIZE = 1 << 3
381 HAS_MODE_AND_SIZE = 1 << 3
382 HAS_FILE_MTIME = 1 << 4
382 HAS_FILE_MTIME = 1 << 4
383 HAS_DIRECTORY_MTIME = 1 << 5
383 HAS_DIRECTORY_MTIME = 1 << 5
384 MODE_EXEC_PERM = 1 << 6
384 MODE_EXEC_PERM = 1 << 6
385 MODE_IS_SYMLINK = 1 << 7
385 MODE_IS_SYMLINK = 1 << 7
386 EXPECTED_STATE_IS_MODIFIED = 1 << 8
386 EXPECTED_STATE_IS_MODIFIED = 1 << 8
387 ALL_UNKNOWN_RECORDED = 1 << 9
387 ALL_UNKNOWN_RECORDED = 1 << 9
388 ALL_IGNORED_RECORDED = 1 << 10
388 ALL_IGNORED_RECORDED = 1 << 10
389 HAS_FALLBACK_EXEC = 1 << 11
389 HAS_FALLBACK_EXEC = 1 << 11
390 FALLBACK_EXEC = 1 << 12
390 FALLBACK_EXEC = 1 << 12
391 HAS_FALLBACK_SYMLINK = 1 << 13
391 HAS_FALLBACK_SYMLINK = 1 << 13
392 FALLBACK_SYMLINK = 1 << 14
392 FALLBACK_SYMLINK = 1 << 14
393 MTIME_SECOND_AMBIGUOUS = 1 << 15
393
394
394 The meaning of each bit is described below.
395 The meaning of each bit is described below.
395
396
396 Other bits are unset.
397 Other bits are unset.
397 They may be assigned meaning if the future,
398 They may be assigned meaning if the future,
398 with the limitation that Mercurial versions that pre-date such meaning
399 with the limitation that Mercurial versions that pre-date such meaning
399 will always reset those bits to unset when writing nodes.
400 will always reset those bits to unset when writing nodes.
400 (A new node is written for any mutation in its subtree,
401 (A new node is written for any mutation in its subtree,
401 leaving the bytes of the old node unreachable
402 leaving the bytes of the old node unreachable
402 until the data file is rewritten entirely.)
403 until the data file is rewritten entirely.)
403
404
404 * Offset 32:
405 * Offset 32:
405 A `size` field described below, as a 32-bit integer.
406 A `size` field described below, as a 32-bit integer.
406 Unlike in dirstate-v1, negative values are not used.
407 Unlike in dirstate-v1, negative values are not used.
407
408
408 * Offset 36:
409 * Offset 36:
409 The seconds component of an `mtime` field described below,
410 The seconds component of an `mtime` field described below,
410 as a 32-bit integer.
411 as a 32-bit integer.
411 Unlike in dirstate-v1, negative values are not used.
412 Unlike in dirstate-v1, negative values are not used.
412 When `mtime` is used, this is number of seconds since the Unix epoch
413 When `mtime` is used, this is number of seconds since the Unix epoch
413 truncated to its lower 31 bits.
414 truncated to its lower 31 bits.
414
415
415 * Offset 40:
416 * Offset 40:
416 The nanoseconds component of an `mtime` field described below,
417 The nanoseconds component of an `mtime` field described below,
417 as a 32-bit integer.
418 as a 32-bit integer.
418 When `mtime` is used,
419 When `mtime` is used,
419 this is the number of nanoseconds since `mtime.seconds`,
420 this is the number of nanoseconds since `mtime.seconds`,
420 always stritctly less than one billion.
421 always stritctly less than one billion.
421
422
422 This may be zero if more precision is not available.
423 This may be zero if more precision is not available.
423 (This can happen because of limitations in any of Mercurial, Python,
424 (This can happen because of limitations in any of Mercurial, Python,
424 libc, the operating system, …)
425 libc, the operating system, …)
425
426
426 When comparing two mtimes and either has this component set to zero,
427 When comparing two mtimes and either has this component set to zero,
427 the sub-second precision of both should be ignored.
428 the sub-second precision of both should be ignored.
428 False positives when checking mtime equality due to clock resolution
429 False positives when checking mtime equality due to clock resolution
429 are always possible and the status algorithm needs to deal with them,
430 are always possible and the status algorithm needs to deal with them,
430 but having too many false negatives could be harmful too.
431 but having too many false negatives could be harmful too.
431
432
432 * (Offset 44: end of this node)
433 * (Offset 44: end of this node)
433
434
434 The meaning of the boolean values packed in `flags` is:
435 The meaning of the boolean values packed in `flags` is:
435
436
436 `WDIR_TRACKED`
437 `WDIR_TRACKED`
437 Set if the working directory contains a tracked file at this node’s path.
438 Set if the working directory contains a tracked file at this node’s path.
438 This is typically set and unset by `hg add` and `hg rm`.
439 This is typically set and unset by `hg add` and `hg rm`.
439
440
440 `P1_TRACKED`
441 `P1_TRACKED`
441 Set if the working directory’s first parent changeset
442 Set if the working directory’s first parent changeset
442 (whose node identifier is found in tree metadata)
443 (whose node identifier is found in tree metadata)
443 contains a tracked file at this node’s path.
444 contains a tracked file at this node’s path.
444 This is a cache to reduce manifest lookups.
445 This is a cache to reduce manifest lookups.
445
446
446 `P2_INFO`
447 `P2_INFO`
447 Set if the file has been involved in some merge operation.
448 Set if the file has been involved in some merge operation.
448 Either because it was actually merged,
449 Either because it was actually merged,
449 or because the version in the second parent p2 version was ahead,
450 or because the version in the second parent p2 version was ahead,
450 or because some rename moved it there.
451 or because some rename moved it there.
451 In either case `hg status` will want it displayed as modified.
452 In either case `hg status` will want it displayed as modified.
452
453
453 Files that would be mentioned at all in the `dirstate-v1` file format
454 Files that would be mentioned at all in the `dirstate-v1` file format
454 have a node with at least one of the above three bits set in `dirstate-v2`.
455 have a node with at least one of the above three bits set in `dirstate-v2`.
455 Let’s call these files "tracked anywhere",
456 Let’s call these files "tracked anywhere",
456 and "untracked" the nodes with all three of these bits unset.
457 and "untracked" the nodes with all three of these bits unset.
457 Untracked nodes are typically for directories:
458 Untracked nodes are typically for directories:
458 they hold child nodes and form the tree structure.
459 they hold child nodes and form the tree structure.
459 Additional untracked nodes may also exist.
460 Additional untracked nodes may also exist.
460 Although implementations should strive to clean up nodes
461 Although implementations should strive to clean up nodes
461 that are entirely unused, other untracked nodes may also exist.
462 that are entirely unused, other untracked nodes may also exist.
462 For example, a future version of Mercurial might in some cases
463 For example, a future version of Mercurial might in some cases
463 add nodes for untracked files or/and ignored files in the working directory
464 add nodes for untracked files or/and ignored files in the working directory
464 in order to optimize `hg status`
465 in order to optimize `hg status`
465 by enabling it to skip `readdir` in more cases.
466 by enabling it to skip `readdir` in more cases.
466
467
467 `HAS_MODE_AND_SIZE`
468 `HAS_MODE_AND_SIZE`
468 Must be unset for untracked nodes.
469 Must be unset for untracked nodes.
469 For files tracked anywhere, if this is set:
470 For files tracked anywhere, if this is set:
470 - The `size` field is the expected file size,
471 - The `size` field is the expected file size,
471 in bytes truncated its lower to 31 bits.
472 in bytes truncated its lower to 31 bits.
472 - The expected execute permission for the file’s owner
473 - The expected execute permission for the file’s owner
473 is given by `MODE_EXEC_PERM`
474 is given by `MODE_EXEC_PERM`
474 - The expected file type is given by `MODE_IS_SIMLINK`:
475 - The expected file type is given by `MODE_IS_SIMLINK`:
475 a symbolic link if set, or a normal file if unset.
476 a symbolic link if set, or a normal file if unset.
476 If this is unset the expected size, permission, and file type are unknown.
477 If this is unset the expected size, permission, and file type are unknown.
477 The `size` field is unused (set to zero).
478 The `size` field is unused (set to zero).
478
479
479 `HAS_FILE_MTIME`
480 `HAS_FILE_MTIME`
480 Must be unset for untracked nodes.
481 Must be unset for untracked nodes.
481 If this and `HAS_DIRECTORY_MTIME` are both unset,
482 If this and `HAS_DIRECTORY_MTIME` are both unset,
482 the `mtime` field is unused (set to zero).
483 the `mtime` field is unused (set to zero).
483 If this is set, `mtime` is the expected modification time.
484 If this is set, `mtime` is the expected modification time.
484
485
485 `HAS_DIRECTORY_MTIME`
486 `HAS_DIRECTORY_MTIME`
486 Must be unset for file tracked anywhere.
487 Must be unset for file tracked anywhere.
487 If this and `HAS_DIRECTORY_MTIME` are both unset,
488 If this and `HAS_DIRECTORY_MTIME` are both unset,
488 the `mtime` field is unused (set to zero).
489 the `mtime` field is unused (set to zero).
489 If this is set, at some point,
490 If this is set, at some point,
490 this path in the working directory was observed:
491 this path in the working directory was observed:
491
492
492 - To be a directory
493 - To be a directory
493 - With the modification time given in `mtime`
494 - With the modification time given in `mtime`
494 - That time was already strictly in the past when observed,
495 - That time was already strictly in the past when observed,
495 meaning that later changes cannot happen in the same clock tick
496 meaning that later changes cannot happen in the same clock tick
496 and must cause a different modification time
497 and must cause a different modification time
497 (unless the system clock jumps back and we get unlucky,
498 (unless the system clock jumps back and we get unlucky,
498 which is not impossible but deemed unlikely enough).
499 which is not impossible but deemed unlikely enough).
499 - All direct children of this directory
500 - All direct children of this directory
500 (as returned by `std::fs::read_dir`)
501 (as returned by `std::fs::read_dir`)
501 either have a corresponding dirstate node,
502 either have a corresponding dirstate node,
502 or are ignored by ignore patterns whose hash is in tree metadata.
503 or are ignored by ignore patterns whose hash is in tree metadata.
503
504
504 This means that if `std::fs::symlink_metadata` later reports
505 This means that if `std::fs::symlink_metadata` later reports
505 the same modification time
506 the same modification time
506 and ignored patterns haven’t changed,
507 and ignored patterns haven’t changed,
507 a run of status that is not listing ignored files
508 a run of status that is not listing ignored files
508 can skip calling `std::fs::read_dir` again for this directory,
509 can skip calling `std::fs::read_dir` again for this directory,
509 and iterate child dirstate nodes instead.
510 and iterate child dirstate nodes instead.
510
511
511 `MODE_EXEC_PERM`
512 `MODE_EXEC_PERM`
512 Must be unset if `HAS_MODE_AND_SIZE` is unset.
513 Must be unset if `HAS_MODE_AND_SIZE` is unset.
513 If `HAS_MODE_AND_SIZE` is set,
514 If `HAS_MODE_AND_SIZE` is set,
514 this indicates whether the file’s own is expected
515 this indicates whether the file’s own is expected
515 to have execute permission.
516 to have execute permission.
516
517
517 `MODE_IS_SYMLINK`
518 `MODE_IS_SYMLINK`
518 Must be unset if `HAS_MODE_AND_SIZE` is unset.
519 Must be unset if `HAS_MODE_AND_SIZE` is unset.
519 If `HAS_MODE_AND_SIZE` is set,
520 If `HAS_MODE_AND_SIZE` is set,
520 this indicates whether the file is expected to be a symlink
521 this indicates whether the file is expected to be a symlink
521 as opposed to a normal file.
522 as opposed to a normal file.
522
523
523 `EXPECTED_STATE_IS_MODIFIED`
524 `EXPECTED_STATE_IS_MODIFIED`
524 Must be unset for untracked nodes.
525 Must be unset for untracked nodes.
525 For:
526 For:
526 - a file tracked anywhere
527 - a file tracked anywhere
527 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
528 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
528 - if that metadata matches
529 - if that metadata matches
529 metadata found in the working directory with `stat`
530 metadata found in the working directory with `stat`
530 This bit indicates the status of the file.
531 This bit indicates the status of the file.
531 If set, the status is modified. If unset, it is clean.
532 If set, the status is modified. If unset, it is clean.
532
533
533 In cases where `hg status` needs to read the contents of a file
534 In cases where `hg status` needs to read the contents of a file
534 because metadata is ambiguous, this bit lets it record the result
535 because metadata is ambiguous, this bit lets it record the result
535 if the result is modified so that a future run of `hg status`
536 if the result is modified so that a future run of `hg status`
536 does not need to do the same again.
537 does not need to do the same again.
537 It is valid to never set this bit,
538 It is valid to never set this bit,
538 and consider expected metadata ambiguous if it is set.
539 and consider expected metadata ambiguous if it is set.
539
540
540 `ALL_UNKNOWN_RECORDED`
541 `ALL_UNKNOWN_RECORDED`
541 If set, all "unknown" children existing on disk (at the time of the last
542 If set, all "unknown" children existing on disk (at the time of the last
542 status) have been recorded and the `mtime` associated with
543 status) have been recorded and the `mtime` associated with
543 `HAS_DIRECTORY_MTIME` can be used for optimization even when "unknown" file
544 `HAS_DIRECTORY_MTIME` can be used for optimization even when "unknown" file
544 are listed.
545 are listed.
545
546
546 Note that the amount recorded "unknown" children can still be zero if None
547 Note that the amount recorded "unknown" children can still be zero if None
547 where present.
548 where present.
548
549
549 Also note that having this flag unset does not imply that no "unknown"
550 Also note that having this flag unset does not imply that no "unknown"
550 children have been recorded. Some might be present, but there is no garantee
551 children have been recorded. Some might be present, but there is no garantee
551 that is will be all of them.
552 that is will be all of them.
552
553
553 `ALL_IGNORED_RECORDED`
554 `ALL_IGNORED_RECORDED`
554 If set, all "ignored" children existing on disk (at the time of the last
555 If set, all "ignored" children existing on disk (at the time of the last
555 status) have been recorded and the `mtime` associated with
556 status) have been recorded and the `mtime` associated with
556 `HAS_DIRECTORY_MTIME` can be used for optimization even when "ignored" file
557 `HAS_DIRECTORY_MTIME` can be used for optimization even when "ignored" file
557 are listed.
558 are listed.
558
559
559 Note that the amount recorded "ignored" children can still be zero if None
560 Note that the amount recorded "ignored" children can still be zero if None
560 where present.
561 where present.
561
562
562 Also note that having this flag unset does not imply that no "ignored"
563 Also note that having this flag unset does not imply that no "ignored"
563 children have been recorded. Some might be present, but there is no garantee
564 children have been recorded. Some might be present, but there is no garantee
564 that is will be all of them.
565 that is will be all of them.
565
566
566 `HAS_FALLBACK_EXEC`
567 `HAS_FALLBACK_EXEC`
567 If this flag is set, the entry carries "fallback" information for the
568 If this flag is set, the entry carries "fallback" information for the
568 executable bit in the `FALLBACK_EXEC` flag.
569 executable bit in the `FALLBACK_EXEC` flag.
569
570
570 Fallback information can be stored in the dirstate to keep track of
571 Fallback information can be stored in the dirstate to keep track of
571 filesystem attribute tracked by Mercurial when the underlying file
572 filesystem attribute tracked by Mercurial when the underlying file
572 system or operating system does not support that property, (e.g.
573 system or operating system does not support that property, (e.g.
573 Windows).
574 Windows).
574
575
575 `FALLBACK_EXEC`
576 `FALLBACK_EXEC`
576 Should be ignored if `HAS_FALLBACK_EXEC` is unset. If set the file for this
577 Should be ignored if `HAS_FALLBACK_EXEC` is unset. If set the file for this
577 entry should be considered executable if that information cannot be
578 entry should be considered executable if that information cannot be
578 extracted from the file system. If unset it should be considered
579 extracted from the file system. If unset it should be considered
579 non-executable instead.
580 non-executable instead.
580
581
581 `HAS_FALLBACK_SYMLINK`
582 `HAS_FALLBACK_SYMLINK`
582 If this flag is set, the entry carries "fallback" information for symbolic
583 If this flag is set, the entry carries "fallback" information for symbolic
583 link status in the `FALLBACK_SYMLINK` flag.
584 link status in the `FALLBACK_SYMLINK` flag.
584
585
585 Fallback information can be stored in the dirstate to keep track of
586 Fallback information can be stored in the dirstate to keep track of
586 filesystem attribute tracked by Mercurial when the underlying file
587 filesystem attribute tracked by Mercurial when the underlying file
587 system or operating system does not support that property, (e.g.
588 system or operating system does not support that property, (e.g.
588 Windows).
589 Windows).
589
590
590 `FALLBACK_SYMLINK`
591 `FALLBACK_SYMLINK`
591 Should be ignored if `HAS_FALLBACK_SYMLINK` is unset. If set the file for
592 Should be ignored if `HAS_FALLBACK_SYMLINK` is unset. If set the file for
592 this entry should be considered a symlink if that information cannot be
593 this entry should be considered a symlink if that information cannot be
593 extracted from the file system. If unset it should be considered a normal
594 extracted from the file system. If unset it should be considered a normal
594 file instead.
595 file instead.
596
597 `MTIME_SECOND_AMBIGUOUS`
598 This flag is relevant only when `HAS_FILE_MTIME` is set. When set, the
599 `mtime` stored in the entry is only valid for comparison with timestamps
600 that have nanosecond information. If available timestamp does not carries
601 nanosecond information, the `mtime` should be ignored and no optimisation
602 can be applied.
@@ -1,928 +1,933 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import stat
10 import stat
11 import struct
11 import struct
12 import zlib
12 import zlib
13
13
14 from ..node import (
14 from ..node import (
15 nullrev,
15 nullrev,
16 sha1nodeconstants,
16 sha1nodeconstants,
17 )
17 )
18 from ..thirdparty import attr
18 from ..thirdparty import attr
19 from .. import (
19 from .. import (
20 error,
20 error,
21 pycompat,
21 pycompat,
22 revlogutils,
22 revlogutils,
23 util,
23 util,
24 )
24 )
25
25
26 from ..revlogutils import nodemap as nodemaputil
26 from ..revlogutils import nodemap as nodemaputil
27 from ..revlogutils import constants as revlog_constants
27 from ..revlogutils import constants as revlog_constants
28
28
29 stringio = pycompat.bytesio
29 stringio = pycompat.bytesio
30
30
31
31
32 _pack = struct.pack
32 _pack = struct.pack
33 _unpack = struct.unpack
33 _unpack = struct.unpack
34 _compress = zlib.compress
34 _compress = zlib.compress
35 _decompress = zlib.decompress
35 _decompress = zlib.decompress
36
36
37
37
38 # a special value used internally for `size` if the file come from the other parent
38 # a special value used internally for `size` if the file come from the other parent
39 FROM_P2 = -2
39 FROM_P2 = -2
40
40
41 # a special value used internally for `size` if the file is modified/merged/added
41 # a special value used internally for `size` if the file is modified/merged/added
42 NONNORMAL = -1
42 NONNORMAL = -1
43
43
44 # a special value used internally for `time` if the time is ambigeous
44 # a special value used internally for `time` if the time is ambigeous
45 AMBIGUOUS_TIME = -1
45 AMBIGUOUS_TIME = -1
46
46
47 # Bits of the `flags` byte inside a node in the file format
47 # Bits of the `flags` byte inside a node in the file format
48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 DIRSTATE_V2_P1_TRACKED = 1 << 1
49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 DIRSTATE_V2_P2_INFO = 1 << 2
50 DIRSTATE_V2_P2_INFO = 1 << 2
51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
52 DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
52 DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
53 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available
53 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available
54 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
54 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
55 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
55 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
56 DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
56 DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
57 DIRSTATE_V2_ALL_UNKNOWN_RECORDED = 1 << 9
57 DIRSTATE_V2_ALL_UNKNOWN_RECORDED = 1 << 9
58 DIRSTATE_V2_ALL_IGNORED_RECORDED = 1 << 10
58 DIRSTATE_V2_ALL_IGNORED_RECORDED = 1 << 10
59 DIRSTATE_V2_HAS_FALLBACK_EXEC = 1 << 11
59 DIRSTATE_V2_HAS_FALLBACK_EXEC = 1 << 11
60 DIRSTATE_V2_FALLBACK_EXEC = 1 << 12
60 DIRSTATE_V2_FALLBACK_EXEC = 1 << 12
61 DIRSTATE_V2_HAS_FALLBACK_SYMLINK = 1 << 13
61 DIRSTATE_V2_HAS_FALLBACK_SYMLINK = 1 << 13
62 DIRSTATE_V2_FALLBACK_SYMLINK = 1 << 14
62 DIRSTATE_V2_FALLBACK_SYMLINK = 1 << 14
63 DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS = 1 << 15
63
64
64
65
65 @attr.s(slots=True, init=False)
66 @attr.s(slots=True, init=False)
66 class DirstateItem(object):
67 class DirstateItem(object):
67 """represent a dirstate entry
68 """represent a dirstate entry
68
69
69 It hold multiple attributes
70 It hold multiple attributes
70
71
71 # about file tracking
72 # about file tracking
72 - wc_tracked: is the file tracked by the working copy
73 - wc_tracked: is the file tracked by the working copy
73 - p1_tracked: is the file tracked in working copy first parent
74 - p1_tracked: is the file tracked in working copy first parent
74 - p2_info: the file has been involved in some merge operation. Either
75 - p2_info: the file has been involved in some merge operation. Either
75 because it was actually merged, or because the p2 version was
76 because it was actually merged, or because the p2 version was
76 ahead, or because some rename moved it there. In either case
77 ahead, or because some rename moved it there. In either case
77 `hg status` will want it displayed as modified.
78 `hg status` will want it displayed as modified.
78
79
79 # about the file state expected from p1 manifest:
80 # about the file state expected from p1 manifest:
80 - mode: the file mode in p1
81 - mode: the file mode in p1
81 - size: the file size in p1
82 - size: the file size in p1
82
83
83 These value can be set to None, which mean we don't have a meaningful value
84 These value can be set to None, which mean we don't have a meaningful value
84 to compare with. Either because we don't really care about them as there
85 to compare with. Either because we don't really care about them as there
85 `status` is known without having to look at the disk or because we don't
86 `status` is known without having to look at the disk or because we don't
86 know these right now and a full comparison will be needed to find out if
87 know these right now and a full comparison will be needed to find out if
87 the file is clean.
88 the file is clean.
88
89
89 # about the file state on disk last time we saw it:
90 # about the file state on disk last time we saw it:
90 - mtime: the last known clean mtime for the file.
91 - mtime: the last known clean mtime for the file.
91
92
92 This value can be set to None if no cachable state exist. Either because we
93 This value can be set to None if no cachable state exist. Either because we
93 do not care (see previous section) or because we could not cache something
94 do not care (see previous section) or because we could not cache something
94 yet.
95 yet.
95 """
96 """
96
97
97 _wc_tracked = attr.ib()
98 _wc_tracked = attr.ib()
98 _p1_tracked = attr.ib()
99 _p1_tracked = attr.ib()
99 _p2_info = attr.ib()
100 _p2_info = attr.ib()
100 _mode = attr.ib()
101 _mode = attr.ib()
101 _size = attr.ib()
102 _size = attr.ib()
102 _mtime_s = attr.ib()
103 _mtime_s = attr.ib()
103 _mtime_ns = attr.ib()
104 _mtime_ns = attr.ib()
104 _fallback_exec = attr.ib()
105 _fallback_exec = attr.ib()
105 _fallback_symlink = attr.ib()
106 _fallback_symlink = attr.ib()
106
107
107 def __init__(
108 def __init__(
108 self,
109 self,
109 wc_tracked=False,
110 wc_tracked=False,
110 p1_tracked=False,
111 p1_tracked=False,
111 p2_info=False,
112 p2_info=False,
112 has_meaningful_data=True,
113 has_meaningful_data=True,
113 has_meaningful_mtime=True,
114 has_meaningful_mtime=True,
114 parentfiledata=None,
115 parentfiledata=None,
115 fallback_exec=None,
116 fallback_exec=None,
116 fallback_symlink=None,
117 fallback_symlink=None,
117 ):
118 ):
118 self._wc_tracked = wc_tracked
119 self._wc_tracked = wc_tracked
119 self._p1_tracked = p1_tracked
120 self._p1_tracked = p1_tracked
120 self._p2_info = p2_info
121 self._p2_info = p2_info
121
122
122 self._fallback_exec = fallback_exec
123 self._fallback_exec = fallback_exec
123 self._fallback_symlink = fallback_symlink
124 self._fallback_symlink = fallback_symlink
124
125
125 self._mode = None
126 self._mode = None
126 self._size = None
127 self._size = None
127 self._mtime_s = None
128 self._mtime_s = None
128 self._mtime_ns = None
129 self._mtime_ns = None
129 if parentfiledata is None:
130 if parentfiledata is None:
130 has_meaningful_mtime = False
131 has_meaningful_mtime = False
131 has_meaningful_data = False
132 has_meaningful_data = False
132 if has_meaningful_data:
133 if has_meaningful_data:
133 self._mode = parentfiledata[0]
134 self._mode = parentfiledata[0]
134 self._size = parentfiledata[1]
135 self._size = parentfiledata[1]
135 if has_meaningful_mtime:
136 if has_meaningful_mtime:
136 self._mtime_s, self._mtime_ns = parentfiledata[2]
137 self._mtime_s, self._mtime_ns = parentfiledata[2]
137
138
138 @classmethod
139 @classmethod
139 def from_v2_data(cls, flags, size, mtime_s, mtime_ns):
140 def from_v2_data(cls, flags, size, mtime_s, mtime_ns):
140 """Build a new DirstateItem object from V2 data"""
141 """Build a new DirstateItem object from V2 data"""
141 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
142 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
142 has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
143 has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
144 if flags & DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS:
145 # The current code is not able to do the more subtle comparison that the
146 # MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
147 has_meaningful_mtime = False
143 mode = None
148 mode = None
144
149
145 if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
150 if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
146 # we do not have support for this flag in the code yet,
151 # we do not have support for this flag in the code yet,
147 # force a lookup for this file.
152 # force a lookup for this file.
148 has_mode_size = False
153 has_mode_size = False
149 has_meaningful_mtime = False
154 has_meaningful_mtime = False
150
155
151 fallback_exec = None
156 fallback_exec = None
152 if flags & DIRSTATE_V2_HAS_FALLBACK_EXEC:
157 if flags & DIRSTATE_V2_HAS_FALLBACK_EXEC:
153 fallback_exec = flags & DIRSTATE_V2_FALLBACK_EXEC
158 fallback_exec = flags & DIRSTATE_V2_FALLBACK_EXEC
154
159
155 fallback_symlink = None
160 fallback_symlink = None
156 if flags & DIRSTATE_V2_HAS_FALLBACK_SYMLINK:
161 if flags & DIRSTATE_V2_HAS_FALLBACK_SYMLINK:
157 fallback_symlink = flags & DIRSTATE_V2_FALLBACK_SYMLINK
162 fallback_symlink = flags & DIRSTATE_V2_FALLBACK_SYMLINK
158
163
159 if has_mode_size:
164 if has_mode_size:
160 assert stat.S_IXUSR == 0o100
165 assert stat.S_IXUSR == 0o100
161 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
166 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
162 mode = 0o755
167 mode = 0o755
163 else:
168 else:
164 mode = 0o644
169 mode = 0o644
165 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
170 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
166 mode |= stat.S_IFLNK
171 mode |= stat.S_IFLNK
167 else:
172 else:
168 mode |= stat.S_IFREG
173 mode |= stat.S_IFREG
169 return cls(
174 return cls(
170 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
175 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
171 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
176 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
172 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
177 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
173 has_meaningful_data=has_mode_size,
178 has_meaningful_data=has_mode_size,
174 has_meaningful_mtime=has_meaningful_mtime,
179 has_meaningful_mtime=has_meaningful_mtime,
175 parentfiledata=(mode, size, (mtime_s, mtime_ns)),
180 parentfiledata=(mode, size, (mtime_s, mtime_ns)),
176 fallback_exec=fallback_exec,
181 fallback_exec=fallback_exec,
177 fallback_symlink=fallback_symlink,
182 fallback_symlink=fallback_symlink,
178 )
183 )
179
184
180 @classmethod
185 @classmethod
181 def from_v1_data(cls, state, mode, size, mtime):
186 def from_v1_data(cls, state, mode, size, mtime):
182 """Build a new DirstateItem object from V1 data
187 """Build a new DirstateItem object from V1 data
183
188
184 Since the dirstate-v1 format is frozen, the signature of this function
189 Since the dirstate-v1 format is frozen, the signature of this function
185 is not expected to change, unlike the __init__ one.
190 is not expected to change, unlike the __init__ one.
186 """
191 """
187 if state == b'm':
192 if state == b'm':
188 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
193 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
189 elif state == b'a':
194 elif state == b'a':
190 return cls(wc_tracked=True)
195 return cls(wc_tracked=True)
191 elif state == b'r':
196 elif state == b'r':
192 if size == NONNORMAL:
197 if size == NONNORMAL:
193 p1_tracked = True
198 p1_tracked = True
194 p2_info = True
199 p2_info = True
195 elif size == FROM_P2:
200 elif size == FROM_P2:
196 p1_tracked = False
201 p1_tracked = False
197 p2_info = True
202 p2_info = True
198 else:
203 else:
199 p1_tracked = True
204 p1_tracked = True
200 p2_info = False
205 p2_info = False
201 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
206 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
202 elif state == b'n':
207 elif state == b'n':
203 if size == FROM_P2:
208 if size == FROM_P2:
204 return cls(wc_tracked=True, p2_info=True)
209 return cls(wc_tracked=True, p2_info=True)
205 elif size == NONNORMAL:
210 elif size == NONNORMAL:
206 return cls(wc_tracked=True, p1_tracked=True)
211 return cls(wc_tracked=True, p1_tracked=True)
207 elif mtime == AMBIGUOUS_TIME:
212 elif mtime == AMBIGUOUS_TIME:
208 return cls(
213 return cls(
209 wc_tracked=True,
214 wc_tracked=True,
210 p1_tracked=True,
215 p1_tracked=True,
211 has_meaningful_mtime=False,
216 has_meaningful_mtime=False,
212 parentfiledata=(mode, size, (42, 0)),
217 parentfiledata=(mode, size, (42, 0)),
213 )
218 )
214 else:
219 else:
215 return cls(
220 return cls(
216 wc_tracked=True,
221 wc_tracked=True,
217 p1_tracked=True,
222 p1_tracked=True,
218 parentfiledata=(mode, size, (mtime, 0)),
223 parentfiledata=(mode, size, (mtime, 0)),
219 )
224 )
220 else:
225 else:
221 raise RuntimeError(b'unknown state: %s' % state)
226 raise RuntimeError(b'unknown state: %s' % state)
222
227
223 def set_possibly_dirty(self):
228 def set_possibly_dirty(self):
224 """Mark a file as "possibly dirty"
229 """Mark a file as "possibly dirty"
225
230
226 This means the next status call will have to actually check its content
231 This means the next status call will have to actually check its content
227 to make sure it is correct.
232 to make sure it is correct.
228 """
233 """
229 self._mtime_s = None
234 self._mtime_s = None
230 self._mtime_ns = None
235 self._mtime_ns = None
231
236
232 def set_clean(self, mode, size, mtime):
237 def set_clean(self, mode, size, mtime):
233 """mark a file as "clean" cancelling potential "possibly dirty call"
238 """mark a file as "clean" cancelling potential "possibly dirty call"
234
239
235 Note: this function is a descendant of `dirstate.normal` and is
240 Note: this function is a descendant of `dirstate.normal` and is
236 currently expected to be call on "normal" entry only. There are not
241 currently expected to be call on "normal" entry only. There are not
237 reason for this to not change in the future as long as the ccode is
242 reason for this to not change in the future as long as the ccode is
238 updated to preserve the proper state of the non-normal files.
243 updated to preserve the proper state of the non-normal files.
239 """
244 """
240 self._wc_tracked = True
245 self._wc_tracked = True
241 self._p1_tracked = True
246 self._p1_tracked = True
242 self._mode = mode
247 self._mode = mode
243 self._size = size
248 self._size = size
244 self._mtime_s, self._mtime_ns = mtime
249 self._mtime_s, self._mtime_ns = mtime
245
250
246 def set_tracked(self):
251 def set_tracked(self):
247 """mark a file as tracked in the working copy
252 """mark a file as tracked in the working copy
248
253
249 This will ultimately be called by command like `hg add`.
254 This will ultimately be called by command like `hg add`.
250 """
255 """
251 self._wc_tracked = True
256 self._wc_tracked = True
252 # `set_tracked` is replacing various `normallookup` call. So we mark
257 # `set_tracked` is replacing various `normallookup` call. So we mark
253 # the files as needing lookup
258 # the files as needing lookup
254 #
259 #
255 # Consider dropping this in the future in favor of something less broad.
260 # Consider dropping this in the future in favor of something less broad.
256 self._mtime_s = None
261 self._mtime_s = None
257 self._mtime_ns = None
262 self._mtime_ns = None
258
263
259 def set_untracked(self):
264 def set_untracked(self):
260 """mark a file as untracked in the working copy
265 """mark a file as untracked in the working copy
261
266
262 This will ultimately be called by command like `hg remove`.
267 This will ultimately be called by command like `hg remove`.
263 """
268 """
264 self._wc_tracked = False
269 self._wc_tracked = False
265 self._mode = None
270 self._mode = None
266 self._size = None
271 self._size = None
267 self._mtime_s = None
272 self._mtime_s = None
268 self._mtime_ns = None
273 self._mtime_ns = None
269
274
270 def drop_merge_data(self):
275 def drop_merge_data(self):
271 """remove all "merge-only" from a DirstateItem
276 """remove all "merge-only" from a DirstateItem
272
277
273 This is to be call by the dirstatemap code when the second parent is dropped
278 This is to be call by the dirstatemap code when the second parent is dropped
274 """
279 """
275 if self._p2_info:
280 if self._p2_info:
276 self._p2_info = False
281 self._p2_info = False
277 self._mode = None
282 self._mode = None
278 self._size = None
283 self._size = None
279 self._mtime_s = None
284 self._mtime_s = None
280 self._mtime_ns = None
285 self._mtime_ns = None
281
286
282 @property
287 @property
283 def mode(self):
288 def mode(self):
284 return self.v1_mode()
289 return self.v1_mode()
285
290
286 @property
291 @property
287 def size(self):
292 def size(self):
288 return self.v1_size()
293 return self.v1_size()
289
294
290 @property
295 @property
291 def mtime(self):
296 def mtime(self):
292 return self.v1_mtime()
297 return self.v1_mtime()
293
298
294 def mtime_likely_equal_to(self, other_mtime):
299 def mtime_likely_equal_to(self, other_mtime):
295 self_sec = self._mtime_s
300 self_sec = self._mtime_s
296 if self_sec is None:
301 if self_sec is None:
297 return False
302 return False
298 self_ns = self._mtime_ns
303 self_ns = self._mtime_ns
299 other_sec, other_ns = other_mtime
304 other_sec, other_ns = other_mtime
300 return self_sec == other_sec and self_ns == other_ns
305 return self_sec == other_sec and self_ns == other_ns
301
306
302 @property
307 @property
303 def state(self):
308 def state(self):
304 """
309 """
305 States are:
310 States are:
306 n normal
311 n normal
307 m needs merging
312 m needs merging
308 r marked for removal
313 r marked for removal
309 a marked for addition
314 a marked for addition
310
315
311 XXX This "state" is a bit obscure and mostly a direct expression of the
316 XXX This "state" is a bit obscure and mostly a direct expression of the
312 dirstatev1 format. It would make sense to ultimately deprecate it in
317 dirstatev1 format. It would make sense to ultimately deprecate it in
313 favor of the more "semantic" attributes.
318 favor of the more "semantic" attributes.
314 """
319 """
315 if not self.any_tracked:
320 if not self.any_tracked:
316 return b'?'
321 return b'?'
317 return self.v1_state()
322 return self.v1_state()
318
323
319 @property
324 @property
320 def has_fallback_exec(self):
325 def has_fallback_exec(self):
321 """True if "fallback" information are available for the "exec" bit
326 """True if "fallback" information are available for the "exec" bit
322
327
323 Fallback information can be stored in the dirstate to keep track of
328 Fallback information can be stored in the dirstate to keep track of
324 filesystem attribute tracked by Mercurial when the underlying file
329 filesystem attribute tracked by Mercurial when the underlying file
325 system or operating system does not support that property, (e.g.
330 system or operating system does not support that property, (e.g.
326 Windows).
331 Windows).
327
332
328 Not all version of the dirstate on-disk storage support preserving this
333 Not all version of the dirstate on-disk storage support preserving this
329 information.
334 information.
330 """
335 """
331 return self._fallback_exec is not None
336 return self._fallback_exec is not None
332
337
333 @property
338 @property
334 def fallback_exec(self):
339 def fallback_exec(self):
335 """ "fallback" information for the executable bit
340 """ "fallback" information for the executable bit
336
341
337 True if the file should be considered executable when we cannot get
342 True if the file should be considered executable when we cannot get
338 this information from the files system. False if it should be
343 this information from the files system. False if it should be
339 considered non-executable.
344 considered non-executable.
340
345
341 See has_fallback_exec for details."""
346 See has_fallback_exec for details."""
342 return self._fallback_exec
347 return self._fallback_exec
343
348
344 @fallback_exec.setter
349 @fallback_exec.setter
345 def set_fallback_exec(self, value):
350 def set_fallback_exec(self, value):
346 """control "fallback" executable bit
351 """control "fallback" executable bit
347
352
348 Set to:
353 Set to:
349 - True if the file should be considered executable,
354 - True if the file should be considered executable,
350 - False if the file should be considered non-executable,
355 - False if the file should be considered non-executable,
351 - None if we do not have valid fallback data.
356 - None if we do not have valid fallback data.
352
357
353 See has_fallback_exec for details."""
358 See has_fallback_exec for details."""
354 if value is None:
359 if value is None:
355 self._fallback_exec = None
360 self._fallback_exec = None
356 else:
361 else:
357 self._fallback_exec = bool(value)
362 self._fallback_exec = bool(value)
358
363
359 @property
364 @property
360 def has_fallback_symlink(self):
365 def has_fallback_symlink(self):
361 """True if "fallback" information are available for symlink status
366 """True if "fallback" information are available for symlink status
362
367
363 Fallback information can be stored in the dirstate to keep track of
368 Fallback information can be stored in the dirstate to keep track of
364 filesystem attribute tracked by Mercurial when the underlying file
369 filesystem attribute tracked by Mercurial when the underlying file
365 system or operating system does not support that property, (e.g.
370 system or operating system does not support that property, (e.g.
366 Windows).
371 Windows).
367
372
368 Not all version of the dirstate on-disk storage support preserving this
373 Not all version of the dirstate on-disk storage support preserving this
369 information."""
374 information."""
370 return self._fallback_symlink is not None
375 return self._fallback_symlink is not None
371
376
372 @property
377 @property
373 def fallback_symlink(self):
378 def fallback_symlink(self):
374 """ "fallback" information for symlink status
379 """ "fallback" information for symlink status
375
380
376 True if the file should be considered executable when we cannot get
381 True if the file should be considered executable when we cannot get
377 this information from the files system. False if it should be
382 this information from the files system. False if it should be
378 considered non-executable.
383 considered non-executable.
379
384
380 See has_fallback_exec for details."""
385 See has_fallback_exec for details."""
381 return self._fallback_symlink
386 return self._fallback_symlink
382
387
383 @fallback_symlink.setter
388 @fallback_symlink.setter
384 def set_fallback_symlink(self, value):
389 def set_fallback_symlink(self, value):
385 """control "fallback" symlink status
390 """control "fallback" symlink status
386
391
387 Set to:
392 Set to:
388 - True if the file should be considered a symlink,
393 - True if the file should be considered a symlink,
389 - False if the file should be considered not a symlink,
394 - False if the file should be considered not a symlink,
390 - None if we do not have valid fallback data.
395 - None if we do not have valid fallback data.
391
396
392 See has_fallback_symlink for details."""
397 See has_fallback_symlink for details."""
393 if value is None:
398 if value is None:
394 self._fallback_symlink = None
399 self._fallback_symlink = None
395 else:
400 else:
396 self._fallback_symlink = bool(value)
401 self._fallback_symlink = bool(value)
397
402
398 @property
403 @property
399 def tracked(self):
404 def tracked(self):
400 """True is the file is tracked in the working copy"""
405 """True is the file is tracked in the working copy"""
401 return self._wc_tracked
406 return self._wc_tracked
402
407
403 @property
408 @property
404 def any_tracked(self):
409 def any_tracked(self):
405 """True is the file is tracked anywhere (wc or parents)"""
410 """True is the file is tracked anywhere (wc or parents)"""
406 return self._wc_tracked or self._p1_tracked or self._p2_info
411 return self._wc_tracked or self._p1_tracked or self._p2_info
407
412
408 @property
413 @property
409 def added(self):
414 def added(self):
410 """True if the file has been added"""
415 """True if the file has been added"""
411 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
416 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
412
417
413 @property
418 @property
414 def maybe_clean(self):
419 def maybe_clean(self):
415 """True if the file has a chance to be in the "clean" state"""
420 """True if the file has a chance to be in the "clean" state"""
416 if not self._wc_tracked:
421 if not self._wc_tracked:
417 return False
422 return False
418 elif not self._p1_tracked:
423 elif not self._p1_tracked:
419 return False
424 return False
420 elif self._p2_info:
425 elif self._p2_info:
421 return False
426 return False
422 return True
427 return True
423
428
424 @property
429 @property
425 def p1_tracked(self):
430 def p1_tracked(self):
426 """True if the file is tracked in the first parent manifest"""
431 """True if the file is tracked in the first parent manifest"""
427 return self._p1_tracked
432 return self._p1_tracked
428
433
429 @property
434 @property
430 def p2_info(self):
435 def p2_info(self):
431 """True if the file needed to merge or apply any input from p2
436 """True if the file needed to merge or apply any input from p2
432
437
433 See the class documentation for details.
438 See the class documentation for details.
434 """
439 """
435 return self._wc_tracked and self._p2_info
440 return self._wc_tracked and self._p2_info
436
441
437 @property
442 @property
438 def removed(self):
443 def removed(self):
439 """True if the file has been removed"""
444 """True if the file has been removed"""
440 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
445 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
441
446
442 def v2_data(self):
447 def v2_data(self):
443 """Returns (flags, mode, size, mtime) for v2 serialization"""
448 """Returns (flags, mode, size, mtime) for v2 serialization"""
444 flags = 0
449 flags = 0
445 if self._wc_tracked:
450 if self._wc_tracked:
446 flags |= DIRSTATE_V2_WDIR_TRACKED
451 flags |= DIRSTATE_V2_WDIR_TRACKED
447 if self._p1_tracked:
452 if self._p1_tracked:
448 flags |= DIRSTATE_V2_P1_TRACKED
453 flags |= DIRSTATE_V2_P1_TRACKED
449 if self._p2_info:
454 if self._p2_info:
450 flags |= DIRSTATE_V2_P2_INFO
455 flags |= DIRSTATE_V2_P2_INFO
451 if self._mode is not None and self._size is not None:
456 if self._mode is not None and self._size is not None:
452 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
457 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
453 if self.mode & stat.S_IXUSR:
458 if self.mode & stat.S_IXUSR:
454 flags |= DIRSTATE_V2_MODE_EXEC_PERM
459 flags |= DIRSTATE_V2_MODE_EXEC_PERM
455 if stat.S_ISLNK(self.mode):
460 if stat.S_ISLNK(self.mode):
456 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
461 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
457 if self._mtime_s is not None:
462 if self._mtime_s is not None:
458 flags |= DIRSTATE_V2_HAS_FILE_MTIME
463 flags |= DIRSTATE_V2_HAS_FILE_MTIME
459
464
460 if self._fallback_exec is not None:
465 if self._fallback_exec is not None:
461 flags |= DIRSTATE_V2_HAS_FALLBACK_EXEC
466 flags |= DIRSTATE_V2_HAS_FALLBACK_EXEC
462 if self._fallback_exec:
467 if self._fallback_exec:
463 flags |= DIRSTATE_V2_FALLBACK_EXEC
468 flags |= DIRSTATE_V2_FALLBACK_EXEC
464
469
465 if self._fallback_symlink is not None:
470 if self._fallback_symlink is not None:
466 flags |= DIRSTATE_V2_HAS_FALLBACK_SYMLINK
471 flags |= DIRSTATE_V2_HAS_FALLBACK_SYMLINK
467 if self._fallback_symlink:
472 if self._fallback_symlink:
468 flags |= DIRSTATE_V2_FALLBACK_SYMLINK
473 flags |= DIRSTATE_V2_FALLBACK_SYMLINK
469
474
470 # Note: we do not need to do anything regarding
475 # Note: we do not need to do anything regarding
471 # DIRSTATE_V2_ALL_UNKNOWN_RECORDED and DIRSTATE_V2_ALL_IGNORED_RECORDED
476 # DIRSTATE_V2_ALL_UNKNOWN_RECORDED and DIRSTATE_V2_ALL_IGNORED_RECORDED
472 # since we never set _DIRSTATE_V2_HAS_DIRCTORY_MTIME
477 # since we never set _DIRSTATE_V2_HAS_DIRCTORY_MTIME
473 return (flags, self._size or 0, self._mtime_s or 0, self._mtime_ns or 0)
478 return (flags, self._size or 0, self._mtime_s or 0, self._mtime_ns or 0)
474
479
475 def v1_state(self):
480 def v1_state(self):
476 """return a "state" suitable for v1 serialization"""
481 """return a "state" suitable for v1 serialization"""
477 if not self.any_tracked:
482 if not self.any_tracked:
478 # the object has no state to record, this is -currently-
483 # the object has no state to record, this is -currently-
479 # unsupported
484 # unsupported
480 raise RuntimeError('untracked item')
485 raise RuntimeError('untracked item')
481 elif self.removed:
486 elif self.removed:
482 return b'r'
487 return b'r'
483 elif self._p1_tracked and self._p2_info:
488 elif self._p1_tracked and self._p2_info:
484 return b'm'
489 return b'm'
485 elif self.added:
490 elif self.added:
486 return b'a'
491 return b'a'
487 else:
492 else:
488 return b'n'
493 return b'n'
489
494
490 def v1_mode(self):
495 def v1_mode(self):
491 """return a "mode" suitable for v1 serialization"""
496 """return a "mode" suitable for v1 serialization"""
492 return self._mode if self._mode is not None else 0
497 return self._mode if self._mode is not None else 0
493
498
494 def v1_size(self):
499 def v1_size(self):
495 """return a "size" suitable for v1 serialization"""
500 """return a "size" suitable for v1 serialization"""
496 if not self.any_tracked:
501 if not self.any_tracked:
497 # the object has no state to record, this is -currently-
502 # the object has no state to record, this is -currently-
498 # unsupported
503 # unsupported
499 raise RuntimeError('untracked item')
504 raise RuntimeError('untracked item')
500 elif self.removed and self._p1_tracked and self._p2_info:
505 elif self.removed and self._p1_tracked and self._p2_info:
501 return NONNORMAL
506 return NONNORMAL
502 elif self._p2_info:
507 elif self._p2_info:
503 return FROM_P2
508 return FROM_P2
504 elif self.removed:
509 elif self.removed:
505 return 0
510 return 0
506 elif self.added:
511 elif self.added:
507 return NONNORMAL
512 return NONNORMAL
508 elif self._size is None:
513 elif self._size is None:
509 return NONNORMAL
514 return NONNORMAL
510 else:
515 else:
511 return self._size
516 return self._size
512
517
513 def v1_mtime(self):
518 def v1_mtime(self):
514 """return a "mtime" suitable for v1 serialization"""
519 """return a "mtime" suitable for v1 serialization"""
515 if not self.any_tracked:
520 if not self.any_tracked:
516 # the object has no state to record, this is -currently-
521 # the object has no state to record, this is -currently-
517 # unsupported
522 # unsupported
518 raise RuntimeError('untracked item')
523 raise RuntimeError('untracked item')
519 elif self.removed:
524 elif self.removed:
520 return 0
525 return 0
521 elif self._mtime_s is None:
526 elif self._mtime_s is None:
522 return AMBIGUOUS_TIME
527 return AMBIGUOUS_TIME
523 elif self._p2_info:
528 elif self._p2_info:
524 return AMBIGUOUS_TIME
529 return AMBIGUOUS_TIME
525 elif not self._p1_tracked:
530 elif not self._p1_tracked:
526 return AMBIGUOUS_TIME
531 return AMBIGUOUS_TIME
527 else:
532 else:
528 return self._mtime_s
533 return self._mtime_s
529
534
530 def need_delay(self, now):
535 def need_delay(self, now):
531 """True if the stored mtime would be ambiguous with the current time"""
536 """True if the stored mtime would be ambiguous with the current time"""
532 return self.v1_state() == b'n' and self._mtime_s == now[0]
537 return self.v1_state() == b'n' and self._mtime_s == now[0]
533
538
534
539
535 def gettype(q):
540 def gettype(q):
536 return int(q & 0xFFFF)
541 return int(q & 0xFFFF)
537
542
538
543
539 class BaseIndexObject(object):
544 class BaseIndexObject(object):
540 # Can I be passed to an algorithme implemented in Rust ?
545 # Can I be passed to an algorithme implemented in Rust ?
541 rust_ext_compat = 0
546 rust_ext_compat = 0
542 # Format of an index entry according to Python's `struct` language
547 # Format of an index entry according to Python's `struct` language
543 index_format = revlog_constants.INDEX_ENTRY_V1
548 index_format = revlog_constants.INDEX_ENTRY_V1
544 # Size of a C unsigned long long int, platform independent
549 # Size of a C unsigned long long int, platform independent
545 big_int_size = struct.calcsize(b'>Q')
550 big_int_size = struct.calcsize(b'>Q')
546 # Size of a C long int, platform independent
551 # Size of a C long int, platform independent
547 int_size = struct.calcsize(b'>i')
552 int_size = struct.calcsize(b'>i')
548 # An empty index entry, used as a default value to be overridden, or nullrev
553 # An empty index entry, used as a default value to be overridden, or nullrev
549 null_item = (
554 null_item = (
550 0,
555 0,
551 0,
556 0,
552 0,
557 0,
553 -1,
558 -1,
554 -1,
559 -1,
555 -1,
560 -1,
556 -1,
561 -1,
557 sha1nodeconstants.nullid,
562 sha1nodeconstants.nullid,
558 0,
563 0,
559 0,
564 0,
560 revlog_constants.COMP_MODE_INLINE,
565 revlog_constants.COMP_MODE_INLINE,
561 revlog_constants.COMP_MODE_INLINE,
566 revlog_constants.COMP_MODE_INLINE,
562 )
567 )
563
568
564 @util.propertycache
569 @util.propertycache
565 def entry_size(self):
570 def entry_size(self):
566 return self.index_format.size
571 return self.index_format.size
567
572
568 @property
573 @property
569 def nodemap(self):
574 def nodemap(self):
570 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
575 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
571 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
576 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
572 return self._nodemap
577 return self._nodemap
573
578
574 @util.propertycache
579 @util.propertycache
575 def _nodemap(self):
580 def _nodemap(self):
576 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
581 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
577 for r in range(0, len(self)):
582 for r in range(0, len(self)):
578 n = self[r][7]
583 n = self[r][7]
579 nodemap[n] = r
584 nodemap[n] = r
580 return nodemap
585 return nodemap
581
586
582 def has_node(self, node):
587 def has_node(self, node):
583 """return True if the node exist in the index"""
588 """return True if the node exist in the index"""
584 return node in self._nodemap
589 return node in self._nodemap
585
590
586 def rev(self, node):
591 def rev(self, node):
587 """return a revision for a node
592 """return a revision for a node
588
593
589 If the node is unknown, raise a RevlogError"""
594 If the node is unknown, raise a RevlogError"""
590 return self._nodemap[node]
595 return self._nodemap[node]
591
596
592 def get_rev(self, node):
597 def get_rev(self, node):
593 """return a revision for a node
598 """return a revision for a node
594
599
595 If the node is unknown, return None"""
600 If the node is unknown, return None"""
596 return self._nodemap.get(node)
601 return self._nodemap.get(node)
597
602
598 def _stripnodes(self, start):
603 def _stripnodes(self, start):
599 if '_nodemap' in vars(self):
604 if '_nodemap' in vars(self):
600 for r in range(start, len(self)):
605 for r in range(start, len(self)):
601 n = self[r][7]
606 n = self[r][7]
602 del self._nodemap[n]
607 del self._nodemap[n]
603
608
604 def clearcaches(self):
609 def clearcaches(self):
605 self.__dict__.pop('_nodemap', None)
610 self.__dict__.pop('_nodemap', None)
606
611
607 def __len__(self):
612 def __len__(self):
608 return self._lgt + len(self._extra)
613 return self._lgt + len(self._extra)
609
614
610 def append(self, tup):
615 def append(self, tup):
611 if '_nodemap' in vars(self):
616 if '_nodemap' in vars(self):
612 self._nodemap[tup[7]] = len(self)
617 self._nodemap[tup[7]] = len(self)
613 data = self._pack_entry(len(self), tup)
618 data = self._pack_entry(len(self), tup)
614 self._extra.append(data)
619 self._extra.append(data)
615
620
616 def _pack_entry(self, rev, entry):
621 def _pack_entry(self, rev, entry):
617 assert entry[8] == 0
622 assert entry[8] == 0
618 assert entry[9] == 0
623 assert entry[9] == 0
619 return self.index_format.pack(*entry[:8])
624 return self.index_format.pack(*entry[:8])
620
625
621 def _check_index(self, i):
626 def _check_index(self, i):
622 if not isinstance(i, int):
627 if not isinstance(i, int):
623 raise TypeError(b"expecting int indexes")
628 raise TypeError(b"expecting int indexes")
624 if i < 0 or i >= len(self):
629 if i < 0 or i >= len(self):
625 raise IndexError
630 raise IndexError
626
631
627 def __getitem__(self, i):
632 def __getitem__(self, i):
628 if i == -1:
633 if i == -1:
629 return self.null_item
634 return self.null_item
630 self._check_index(i)
635 self._check_index(i)
631 if i >= self._lgt:
636 if i >= self._lgt:
632 data = self._extra[i - self._lgt]
637 data = self._extra[i - self._lgt]
633 else:
638 else:
634 index = self._calculate_index(i)
639 index = self._calculate_index(i)
635 data = self._data[index : index + self.entry_size]
640 data = self._data[index : index + self.entry_size]
636 r = self._unpack_entry(i, data)
641 r = self._unpack_entry(i, data)
637 if self._lgt and i == 0:
642 if self._lgt and i == 0:
638 offset = revlogutils.offset_type(0, gettype(r[0]))
643 offset = revlogutils.offset_type(0, gettype(r[0]))
639 r = (offset,) + r[1:]
644 r = (offset,) + r[1:]
640 return r
645 return r
641
646
642 def _unpack_entry(self, rev, data):
647 def _unpack_entry(self, rev, data):
643 r = self.index_format.unpack(data)
648 r = self.index_format.unpack(data)
644 r = r + (
649 r = r + (
645 0,
650 0,
646 0,
651 0,
647 revlog_constants.COMP_MODE_INLINE,
652 revlog_constants.COMP_MODE_INLINE,
648 revlog_constants.COMP_MODE_INLINE,
653 revlog_constants.COMP_MODE_INLINE,
649 )
654 )
650 return r
655 return r
651
656
652 def pack_header(self, header):
657 def pack_header(self, header):
653 """pack header information as binary"""
658 """pack header information as binary"""
654 v_fmt = revlog_constants.INDEX_HEADER
659 v_fmt = revlog_constants.INDEX_HEADER
655 return v_fmt.pack(header)
660 return v_fmt.pack(header)
656
661
657 def entry_binary(self, rev):
662 def entry_binary(self, rev):
658 """return the raw binary string representing a revision"""
663 """return the raw binary string representing a revision"""
659 entry = self[rev]
664 entry = self[rev]
660 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
665 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
661 if rev == 0:
666 if rev == 0:
662 p = p[revlog_constants.INDEX_HEADER.size :]
667 p = p[revlog_constants.INDEX_HEADER.size :]
663 return p
668 return p
664
669
665
670
666 class IndexObject(BaseIndexObject):
671 class IndexObject(BaseIndexObject):
667 def __init__(self, data):
672 def __init__(self, data):
668 assert len(data) % self.entry_size == 0, (
673 assert len(data) % self.entry_size == 0, (
669 len(data),
674 len(data),
670 self.entry_size,
675 self.entry_size,
671 len(data) % self.entry_size,
676 len(data) % self.entry_size,
672 )
677 )
673 self._data = data
678 self._data = data
674 self._lgt = len(data) // self.entry_size
679 self._lgt = len(data) // self.entry_size
675 self._extra = []
680 self._extra = []
676
681
677 def _calculate_index(self, i):
682 def _calculate_index(self, i):
678 return i * self.entry_size
683 return i * self.entry_size
679
684
680 def __delitem__(self, i):
685 def __delitem__(self, i):
681 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
686 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
682 raise ValueError(b"deleting slices only supports a:-1 with step 1")
687 raise ValueError(b"deleting slices only supports a:-1 with step 1")
683 i = i.start
688 i = i.start
684 self._check_index(i)
689 self._check_index(i)
685 self._stripnodes(i)
690 self._stripnodes(i)
686 if i < self._lgt:
691 if i < self._lgt:
687 self._data = self._data[: i * self.entry_size]
692 self._data = self._data[: i * self.entry_size]
688 self._lgt = i
693 self._lgt = i
689 self._extra = []
694 self._extra = []
690 else:
695 else:
691 self._extra = self._extra[: i - self._lgt]
696 self._extra = self._extra[: i - self._lgt]
692
697
693
698
694 class PersistentNodeMapIndexObject(IndexObject):
699 class PersistentNodeMapIndexObject(IndexObject):
695 """a Debug oriented class to test persistent nodemap
700 """a Debug oriented class to test persistent nodemap
696
701
697 We need a simple python object to test API and higher level behavior. See
702 We need a simple python object to test API and higher level behavior. See
698 the Rust implementation for more serious usage. This should be used only
703 the Rust implementation for more serious usage. This should be used only
699 through the dedicated `devel.persistent-nodemap` config.
704 through the dedicated `devel.persistent-nodemap` config.
700 """
705 """
701
706
702 def nodemap_data_all(self):
707 def nodemap_data_all(self):
703 """Return bytes containing a full serialization of a nodemap
708 """Return bytes containing a full serialization of a nodemap
704
709
705 The nodemap should be valid for the full set of revisions in the
710 The nodemap should be valid for the full set of revisions in the
706 index."""
711 index."""
707 return nodemaputil.persistent_data(self)
712 return nodemaputil.persistent_data(self)
708
713
709 def nodemap_data_incremental(self):
714 def nodemap_data_incremental(self):
710 """Return bytes containing a incremental update to persistent nodemap
715 """Return bytes containing a incremental update to persistent nodemap
711
716
712 This containst the data for an append-only update of the data provided
717 This containst the data for an append-only update of the data provided
713 in the last call to `update_nodemap_data`.
718 in the last call to `update_nodemap_data`.
714 """
719 """
715 if self._nm_root is None:
720 if self._nm_root is None:
716 return None
721 return None
717 docket = self._nm_docket
722 docket = self._nm_docket
718 changed, data = nodemaputil.update_persistent_data(
723 changed, data = nodemaputil.update_persistent_data(
719 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
724 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
720 )
725 )
721
726
722 self._nm_root = self._nm_max_idx = self._nm_docket = None
727 self._nm_root = self._nm_max_idx = self._nm_docket = None
723 return docket, changed, data
728 return docket, changed, data
724
729
725 def update_nodemap_data(self, docket, nm_data):
730 def update_nodemap_data(self, docket, nm_data):
726 """provide full block of persisted binary data for a nodemap
731 """provide full block of persisted binary data for a nodemap
727
732
728 The data are expected to come from disk. See `nodemap_data_all` for a
733 The data are expected to come from disk. See `nodemap_data_all` for a
729 produceur of such data."""
734 produceur of such data."""
730 if nm_data is not None:
735 if nm_data is not None:
731 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
736 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
732 if self._nm_root:
737 if self._nm_root:
733 self._nm_docket = docket
738 self._nm_docket = docket
734 else:
739 else:
735 self._nm_root = self._nm_max_idx = self._nm_docket = None
740 self._nm_root = self._nm_max_idx = self._nm_docket = None
736
741
737
742
738 class InlinedIndexObject(BaseIndexObject):
743 class InlinedIndexObject(BaseIndexObject):
739 def __init__(self, data, inline=0):
744 def __init__(self, data, inline=0):
740 self._data = data
745 self._data = data
741 self._lgt = self._inline_scan(None)
746 self._lgt = self._inline_scan(None)
742 self._inline_scan(self._lgt)
747 self._inline_scan(self._lgt)
743 self._extra = []
748 self._extra = []
744
749
745 def _inline_scan(self, lgt):
750 def _inline_scan(self, lgt):
746 off = 0
751 off = 0
747 if lgt is not None:
752 if lgt is not None:
748 self._offsets = [0] * lgt
753 self._offsets = [0] * lgt
749 count = 0
754 count = 0
750 while off <= len(self._data) - self.entry_size:
755 while off <= len(self._data) - self.entry_size:
751 start = off + self.big_int_size
756 start = off + self.big_int_size
752 (s,) = struct.unpack(
757 (s,) = struct.unpack(
753 b'>i',
758 b'>i',
754 self._data[start : start + self.int_size],
759 self._data[start : start + self.int_size],
755 )
760 )
756 if lgt is not None:
761 if lgt is not None:
757 self._offsets[count] = off
762 self._offsets[count] = off
758 count += 1
763 count += 1
759 off += self.entry_size + s
764 off += self.entry_size + s
760 if off != len(self._data):
765 if off != len(self._data):
761 raise ValueError(b"corrupted data")
766 raise ValueError(b"corrupted data")
762 return count
767 return count
763
768
764 def __delitem__(self, i):
769 def __delitem__(self, i):
765 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
770 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
766 raise ValueError(b"deleting slices only supports a:-1 with step 1")
771 raise ValueError(b"deleting slices only supports a:-1 with step 1")
767 i = i.start
772 i = i.start
768 self._check_index(i)
773 self._check_index(i)
769 self._stripnodes(i)
774 self._stripnodes(i)
770 if i < self._lgt:
775 if i < self._lgt:
771 self._offsets = self._offsets[:i]
776 self._offsets = self._offsets[:i]
772 self._lgt = i
777 self._lgt = i
773 self._extra = []
778 self._extra = []
774 else:
779 else:
775 self._extra = self._extra[: i - self._lgt]
780 self._extra = self._extra[: i - self._lgt]
776
781
777 def _calculate_index(self, i):
782 def _calculate_index(self, i):
778 return self._offsets[i]
783 return self._offsets[i]
779
784
780
785
781 def parse_index2(data, inline, revlogv2=False):
786 def parse_index2(data, inline, revlogv2=False):
782 if not inline:
787 if not inline:
783 cls = IndexObject2 if revlogv2 else IndexObject
788 cls = IndexObject2 if revlogv2 else IndexObject
784 return cls(data), None
789 return cls(data), None
785 cls = InlinedIndexObject
790 cls = InlinedIndexObject
786 return cls(data, inline), (0, data)
791 return cls(data, inline), (0, data)
787
792
788
793
789 def parse_index_cl_v2(data):
794 def parse_index_cl_v2(data):
790 return IndexChangelogV2(data), None
795 return IndexChangelogV2(data), None
791
796
792
797
793 class IndexObject2(IndexObject):
798 class IndexObject2(IndexObject):
794 index_format = revlog_constants.INDEX_ENTRY_V2
799 index_format = revlog_constants.INDEX_ENTRY_V2
795
800
796 def replace_sidedata_info(
801 def replace_sidedata_info(
797 self,
802 self,
798 rev,
803 rev,
799 sidedata_offset,
804 sidedata_offset,
800 sidedata_length,
805 sidedata_length,
801 offset_flags,
806 offset_flags,
802 compression_mode,
807 compression_mode,
803 ):
808 ):
804 """
809 """
805 Replace an existing index entry's sidedata offset and length with new
810 Replace an existing index entry's sidedata offset and length with new
806 ones.
811 ones.
807 This cannot be used outside of the context of sidedata rewriting,
812 This cannot be used outside of the context of sidedata rewriting,
808 inside the transaction that creates the revision `rev`.
813 inside the transaction that creates the revision `rev`.
809 """
814 """
810 if rev < 0:
815 if rev < 0:
811 raise KeyError
816 raise KeyError
812 self._check_index(rev)
817 self._check_index(rev)
813 if rev < self._lgt:
818 if rev < self._lgt:
814 msg = b"cannot rewrite entries outside of this transaction"
819 msg = b"cannot rewrite entries outside of this transaction"
815 raise KeyError(msg)
820 raise KeyError(msg)
816 else:
821 else:
817 entry = list(self[rev])
822 entry = list(self[rev])
818 entry[0] = offset_flags
823 entry[0] = offset_flags
819 entry[8] = sidedata_offset
824 entry[8] = sidedata_offset
820 entry[9] = sidedata_length
825 entry[9] = sidedata_length
821 entry[11] = compression_mode
826 entry[11] = compression_mode
822 entry = tuple(entry)
827 entry = tuple(entry)
823 new = self._pack_entry(rev, entry)
828 new = self._pack_entry(rev, entry)
824 self._extra[rev - self._lgt] = new
829 self._extra[rev - self._lgt] = new
825
830
826 def _unpack_entry(self, rev, data):
831 def _unpack_entry(self, rev, data):
827 data = self.index_format.unpack(data)
832 data = self.index_format.unpack(data)
828 entry = data[:10]
833 entry = data[:10]
829 data_comp = data[10] & 3
834 data_comp = data[10] & 3
830 sidedata_comp = (data[10] & (3 << 2)) >> 2
835 sidedata_comp = (data[10] & (3 << 2)) >> 2
831 return entry + (data_comp, sidedata_comp)
836 return entry + (data_comp, sidedata_comp)
832
837
833 def _pack_entry(self, rev, entry):
838 def _pack_entry(self, rev, entry):
834 data = entry[:10]
839 data = entry[:10]
835 data_comp = entry[10] & 3
840 data_comp = entry[10] & 3
836 sidedata_comp = (entry[11] & 3) << 2
841 sidedata_comp = (entry[11] & 3) << 2
837 data += (data_comp | sidedata_comp,)
842 data += (data_comp | sidedata_comp,)
838
843
839 return self.index_format.pack(*data)
844 return self.index_format.pack(*data)
840
845
841 def entry_binary(self, rev):
846 def entry_binary(self, rev):
842 """return the raw binary string representing a revision"""
847 """return the raw binary string representing a revision"""
843 entry = self[rev]
848 entry = self[rev]
844 return self._pack_entry(rev, entry)
849 return self._pack_entry(rev, entry)
845
850
846 def pack_header(self, header):
851 def pack_header(self, header):
847 """pack header information as binary"""
852 """pack header information as binary"""
848 msg = 'version header should go in the docket, not the index: %d'
853 msg = 'version header should go in the docket, not the index: %d'
849 msg %= header
854 msg %= header
850 raise error.ProgrammingError(msg)
855 raise error.ProgrammingError(msg)
851
856
852
857
853 class IndexChangelogV2(IndexObject2):
858 class IndexChangelogV2(IndexObject2):
854 index_format = revlog_constants.INDEX_ENTRY_CL_V2
859 index_format = revlog_constants.INDEX_ENTRY_CL_V2
855
860
856 def _unpack_entry(self, rev, data, r=True):
861 def _unpack_entry(self, rev, data, r=True):
857 items = self.index_format.unpack(data)
862 items = self.index_format.unpack(data)
858 entry = items[:3] + (rev, rev) + items[3:8]
863 entry = items[:3] + (rev, rev) + items[3:8]
859 data_comp = items[8] & 3
864 data_comp = items[8] & 3
860 sidedata_comp = (items[8] >> 2) & 3
865 sidedata_comp = (items[8] >> 2) & 3
861 return entry + (data_comp, sidedata_comp)
866 return entry + (data_comp, sidedata_comp)
862
867
863 def _pack_entry(self, rev, entry):
868 def _pack_entry(self, rev, entry):
864 assert entry[3] == rev, entry[3]
869 assert entry[3] == rev, entry[3]
865 assert entry[4] == rev, entry[4]
870 assert entry[4] == rev, entry[4]
866 data = entry[:3] + entry[5:10]
871 data = entry[:3] + entry[5:10]
867 data_comp = entry[10] & 3
872 data_comp = entry[10] & 3
868 sidedata_comp = (entry[11] & 3) << 2
873 sidedata_comp = (entry[11] & 3) << 2
869 data += (data_comp | sidedata_comp,)
874 data += (data_comp | sidedata_comp,)
870 return self.index_format.pack(*data)
875 return self.index_format.pack(*data)
871
876
872
877
873 def parse_index_devel_nodemap(data, inline):
878 def parse_index_devel_nodemap(data, inline):
874 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
879 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
875 return PersistentNodeMapIndexObject(data), None
880 return PersistentNodeMapIndexObject(data), None
876
881
877
882
878 def parse_dirstate(dmap, copymap, st):
883 def parse_dirstate(dmap, copymap, st):
879 parents = [st[:20], st[20:40]]
884 parents = [st[:20], st[20:40]]
880 # dereference fields so they will be local in loop
885 # dereference fields so they will be local in loop
881 format = b">cllll"
886 format = b">cllll"
882 e_size = struct.calcsize(format)
887 e_size = struct.calcsize(format)
883 pos1 = 40
888 pos1 = 40
884 l = len(st)
889 l = len(st)
885
890
886 # the inner loop
891 # the inner loop
887 while pos1 < l:
892 while pos1 < l:
888 pos2 = pos1 + e_size
893 pos2 = pos1 + e_size
889 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
894 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
890 pos1 = pos2 + e[4]
895 pos1 = pos2 + e[4]
891 f = st[pos2:pos1]
896 f = st[pos2:pos1]
892 if b'\0' in f:
897 if b'\0' in f:
893 f, c = f.split(b'\0')
898 f, c = f.split(b'\0')
894 copymap[f] = c
899 copymap[f] = c
895 dmap[f] = DirstateItem.from_v1_data(*e[:4])
900 dmap[f] = DirstateItem.from_v1_data(*e[:4])
896 return parents
901 return parents
897
902
898
903
899 def pack_dirstate(dmap, copymap, pl, now):
904 def pack_dirstate(dmap, copymap, pl, now):
900 cs = stringio()
905 cs = stringio()
901 write = cs.write
906 write = cs.write
902 write(b"".join(pl))
907 write(b"".join(pl))
903 for f, e in pycompat.iteritems(dmap):
908 for f, e in pycompat.iteritems(dmap):
904 if e.need_delay(now):
909 if e.need_delay(now):
905 # The file was last modified "simultaneously" with the current
910 # The file was last modified "simultaneously" with the current
906 # write to dirstate (i.e. within the same second for file-
911 # write to dirstate (i.e. within the same second for file-
907 # systems with a granularity of 1 sec). This commonly happens
912 # systems with a granularity of 1 sec). This commonly happens
908 # for at least a couple of files on 'update'.
913 # for at least a couple of files on 'update'.
909 # The user could change the file without changing its size
914 # The user could change the file without changing its size
910 # within the same second. Invalidate the file's mtime in
915 # within the same second. Invalidate the file's mtime in
911 # dirstate, forcing future 'status' calls to compare the
916 # dirstate, forcing future 'status' calls to compare the
912 # contents of the file if the size is the same. This prevents
917 # contents of the file if the size is the same. This prevents
913 # mistakenly treating such files as clean.
918 # mistakenly treating such files as clean.
914 e.set_possibly_dirty()
919 e.set_possibly_dirty()
915
920
916 if f in copymap:
921 if f in copymap:
917 f = b"%s\0%s" % (f, copymap[f])
922 f = b"%s\0%s" % (f, copymap[f])
918 e = _pack(
923 e = _pack(
919 b">cllll",
924 b">cllll",
920 e.v1_state(),
925 e.v1_state(),
921 e.v1_mode(),
926 e.v1_mode(),
922 e.v1_size(),
927 e.v1_size(),
923 e.v1_mtime(),
928 e.v1_mtime(),
924 len(f),
929 len(f),
925 )
930 )
926 write(e)
931 write(e)
927 write(f)
932 write(f)
928 return cs.getvalue()
933 return cs.getvalue()
@@ -1,778 +1,782 b''
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4
4
5 use crate::dirstate::TruncatedTimestamp;
5 use crate::dirstate::TruncatedTimestamp;
6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 use crate::dirstate_tree::path_with_basename::WithBasename;
7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 use crate::errors::HgError;
8 use crate::errors::HgError;
9 use crate::utils::hg_path::HgPath;
9 use crate::utils::hg_path::HgPath;
10 use crate::DirstateEntry;
10 use crate::DirstateEntry;
11 use crate::DirstateError;
11 use crate::DirstateError;
12 use crate::DirstateParents;
12 use crate::DirstateParents;
13 use bitflags::bitflags;
13 use bitflags::bitflags;
14 use bytes_cast::unaligned::{U16Be, U32Be};
14 use bytes_cast::unaligned::{U16Be, U32Be};
15 use bytes_cast::BytesCast;
15 use bytes_cast::BytesCast;
16 use format_bytes::format_bytes;
16 use format_bytes::format_bytes;
17 use std::borrow::Cow;
17 use std::borrow::Cow;
18 use std::convert::{TryFrom, TryInto};
18 use std::convert::{TryFrom, TryInto};
19
19
20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 /// This a redundant sanity check more than an actual "magic number" since
21 /// This a redundant sanity check more than an actual "magic number" since
22 /// `.hg/requires` already governs which format should be used.
22 /// `.hg/requires` already governs which format should be used.
23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24
24
25 /// Keep space for 256-bit hashes
25 /// Keep space for 256-bit hashes
26 const STORED_NODE_ID_BYTES: usize = 32;
26 const STORED_NODE_ID_BYTES: usize = 32;
27
27
28 /// … even though only 160 bits are used for now, with SHA-1
28 /// … even though only 160 bits are used for now, with SHA-1
29 const USED_NODE_ID_BYTES: usize = 20;
29 const USED_NODE_ID_BYTES: usize = 20;
30
30
31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33
33
34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
35 const TREE_METADATA_SIZE: usize = 44;
35 const TREE_METADATA_SIZE: usize = 44;
36 const NODE_SIZE: usize = 44;
36 const NODE_SIZE: usize = 44;
37
37
38 /// Make sure that size-affecting changes are made knowingly
38 /// Make sure that size-affecting changes are made knowingly
39 #[allow(unused)]
39 #[allow(unused)]
40 fn static_assert_size_of() {
40 fn static_assert_size_of() {
41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
44 }
44 }
45
45
46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 #[derive(BytesCast)]
47 #[derive(BytesCast)]
48 #[repr(C)]
48 #[repr(C)]
49 struct DocketHeader {
49 struct DocketHeader {
50 marker: [u8; V2_FORMAT_MARKER.len()],
50 marker: [u8; V2_FORMAT_MARKER.len()],
51 parent_1: [u8; STORED_NODE_ID_BYTES],
51 parent_1: [u8; STORED_NODE_ID_BYTES],
52 parent_2: [u8; STORED_NODE_ID_BYTES],
52 parent_2: [u8; STORED_NODE_ID_BYTES],
53
53
54 metadata: TreeMetadata,
54 metadata: TreeMetadata,
55
55
56 /// Counted in bytes
56 /// Counted in bytes
57 data_size: Size,
57 data_size: Size,
58
58
59 uuid_size: u8,
59 uuid_size: u8,
60 }
60 }
61
61
62 pub struct Docket<'on_disk> {
62 pub struct Docket<'on_disk> {
63 header: &'on_disk DocketHeader,
63 header: &'on_disk DocketHeader,
64 uuid: &'on_disk [u8],
64 uuid: &'on_disk [u8],
65 }
65 }
66
66
67 /// Fields are documented in the *Tree metadata in the docket file*
67 /// Fields are documented in the *Tree metadata in the docket file*
68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 #[derive(BytesCast)]
69 #[derive(BytesCast)]
70 #[repr(C)]
70 #[repr(C)]
71 struct TreeMetadata {
71 struct TreeMetadata {
72 root_nodes: ChildNodes,
72 root_nodes: ChildNodes,
73 nodes_with_entry_count: Size,
73 nodes_with_entry_count: Size,
74 nodes_with_copy_source_count: Size,
74 nodes_with_copy_source_count: Size,
75 unreachable_bytes: Size,
75 unreachable_bytes: Size,
76 unused: [u8; 4],
76 unused: [u8; 4],
77
77
78 /// See *Optional hash of ignore patterns* section of
78 /// See *Optional hash of ignore patterns* section of
79 /// `mercurial/helptext/internals/dirstate-v2.txt`
79 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 ignore_patterns_hash: IgnorePatternsHash,
80 ignore_patterns_hash: IgnorePatternsHash,
81 }
81 }
82
82
83 /// Fields are documented in the *The data file format*
83 /// Fields are documented in the *The data file format*
84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 #[derive(BytesCast)]
85 #[derive(BytesCast)]
86 #[repr(C)]
86 #[repr(C)]
87 pub(super) struct Node {
87 pub(super) struct Node {
88 full_path: PathSlice,
88 full_path: PathSlice,
89
89
90 /// In bytes from `self.full_path.start`
90 /// In bytes from `self.full_path.start`
91 base_name_start: PathSize,
91 base_name_start: PathSize,
92
92
93 copy_source: OptPathSlice,
93 copy_source: OptPathSlice,
94 children: ChildNodes,
94 children: ChildNodes,
95 pub(super) descendants_with_entry_count: Size,
95 pub(super) descendants_with_entry_count: Size,
96 pub(super) tracked_descendants_count: Size,
96 pub(super) tracked_descendants_count: Size,
97 flags: U16Be,
97 flags: U16Be,
98 size: U32Be,
98 size: U32Be,
99 mtime: PackedTruncatedTimestamp,
99 mtime: PackedTruncatedTimestamp,
100 }
100 }
101
101
102 bitflags! {
102 bitflags! {
103 #[repr(C)]
103 #[repr(C)]
104 struct Flags: u16 {
104 struct Flags: u16 {
105 const WDIR_TRACKED = 1 << 0;
105 const WDIR_TRACKED = 1 << 0;
106 const P1_TRACKED = 1 << 1;
106 const P1_TRACKED = 1 << 1;
107 const P2_INFO = 1 << 2;
107 const P2_INFO = 1 << 2;
108 const HAS_MODE_AND_SIZE = 1 << 3;
108 const HAS_MODE_AND_SIZE = 1 << 3;
109 const HAS_FILE_MTIME = 1 << 4;
109 const HAS_FILE_MTIME = 1 << 4;
110 const HAS_DIRECTORY_MTIME = 1 << 5;
110 const HAS_DIRECTORY_MTIME = 1 << 5;
111 const MODE_EXEC_PERM = 1 << 6;
111 const MODE_EXEC_PERM = 1 << 6;
112 const MODE_IS_SYMLINK = 1 << 7;
112 const MODE_IS_SYMLINK = 1 << 7;
113 const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
113 const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
114 const ALL_UNKNOWN_RECORDED = 1 << 9;
114 const ALL_UNKNOWN_RECORDED = 1 << 9;
115 const ALL_IGNORED_RECORDED = 1 << 10;
115 const ALL_IGNORED_RECORDED = 1 << 10;
116 const HAS_FALLBACK_EXEC = 1 << 11;
116 const HAS_FALLBACK_EXEC = 1 << 11;
117 const FALLBACK_EXEC = 1 << 12;
117 const FALLBACK_EXEC = 1 << 12;
118 const HAS_FALLBACK_SYMLINK = 1 << 13;
118 const HAS_FALLBACK_SYMLINK = 1 << 13;
119 const FALLBACK_SYMLINK = 1 << 14;
119 const FALLBACK_SYMLINK = 1 << 14;
120 const MTIME_SECOND_AMBIGUOUS = 1 << 15;
120 }
121 }
121 }
122 }
122
123
123 /// Duration since the Unix epoch
124 /// Duration since the Unix epoch
124 #[derive(BytesCast, Copy, Clone)]
125 #[derive(BytesCast, Copy, Clone)]
125 #[repr(C)]
126 #[repr(C)]
126 struct PackedTruncatedTimestamp {
127 struct PackedTruncatedTimestamp {
127 truncated_seconds: U32Be,
128 truncated_seconds: U32Be,
128 nanoseconds: U32Be,
129 nanoseconds: U32Be,
129 }
130 }
130
131
131 /// Counted in bytes from the start of the file
132 /// Counted in bytes from the start of the file
132 ///
133 ///
133 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
134 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
134 type Offset = U32Be;
135 type Offset = U32Be;
135
136
136 /// Counted in number of items
137 /// Counted in number of items
137 ///
138 ///
138 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
139 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
139 type Size = U32Be;
140 type Size = U32Be;
140
141
141 /// Counted in bytes
142 /// Counted in bytes
142 ///
143 ///
143 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
144 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
144 type PathSize = U16Be;
145 type PathSize = U16Be;
145
146
146 /// A contiguous sequence of `len` times `Node`, representing the child nodes
147 /// A contiguous sequence of `len` times `Node`, representing the child nodes
147 /// of either some other node or of the repository root.
148 /// of either some other node or of the repository root.
148 ///
149 ///
149 /// Always sorted by ascending `full_path`, to allow binary search.
150 /// Always sorted by ascending `full_path`, to allow binary search.
150 /// Since nodes with the same parent nodes also have the same parent path,
151 /// Since nodes with the same parent nodes also have the same parent path,
151 /// only the `base_name`s need to be compared during binary search.
152 /// only the `base_name`s need to be compared during binary search.
152 #[derive(BytesCast, Copy, Clone)]
153 #[derive(BytesCast, Copy, Clone)]
153 #[repr(C)]
154 #[repr(C)]
154 struct ChildNodes {
155 struct ChildNodes {
155 start: Offset,
156 start: Offset,
156 len: Size,
157 len: Size,
157 }
158 }
158
159
159 /// A `HgPath` of `len` bytes
160 /// A `HgPath` of `len` bytes
160 #[derive(BytesCast, Copy, Clone)]
161 #[derive(BytesCast, Copy, Clone)]
161 #[repr(C)]
162 #[repr(C)]
162 struct PathSlice {
163 struct PathSlice {
163 start: Offset,
164 start: Offset,
164 len: PathSize,
165 len: PathSize,
165 }
166 }
166
167
167 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
168 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
168 type OptPathSlice = PathSlice;
169 type OptPathSlice = PathSlice;
169
170
170 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
171 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
171 ///
172 ///
172 /// This should only happen if Mercurial is buggy or a repository is corrupted.
173 /// This should only happen if Mercurial is buggy or a repository is corrupted.
173 #[derive(Debug)]
174 #[derive(Debug)]
174 pub struct DirstateV2ParseError;
175 pub struct DirstateV2ParseError;
175
176
176 impl From<DirstateV2ParseError> for HgError {
177 impl From<DirstateV2ParseError> for HgError {
177 fn from(_: DirstateV2ParseError) -> Self {
178 fn from(_: DirstateV2ParseError) -> Self {
178 HgError::corrupted("dirstate-v2 parse error")
179 HgError::corrupted("dirstate-v2 parse error")
179 }
180 }
180 }
181 }
181
182
182 impl From<DirstateV2ParseError> for crate::DirstateError {
183 impl From<DirstateV2ParseError> for crate::DirstateError {
183 fn from(error: DirstateV2ParseError) -> Self {
184 fn from(error: DirstateV2ParseError) -> Self {
184 HgError::from(error).into()
185 HgError::from(error).into()
185 }
186 }
186 }
187 }
187
188
188 impl<'on_disk> Docket<'on_disk> {
189 impl<'on_disk> Docket<'on_disk> {
189 pub fn parents(&self) -> DirstateParents {
190 pub fn parents(&self) -> DirstateParents {
190 use crate::Node;
191 use crate::Node;
191 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
192 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
192 .unwrap()
193 .unwrap()
193 .clone();
194 .clone();
194 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
195 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
195 .unwrap()
196 .unwrap()
196 .clone();
197 .clone();
197 DirstateParents { p1, p2 }
198 DirstateParents { p1, p2 }
198 }
199 }
199
200
200 pub fn tree_metadata(&self) -> &[u8] {
201 pub fn tree_metadata(&self) -> &[u8] {
201 self.header.metadata.as_bytes()
202 self.header.metadata.as_bytes()
202 }
203 }
203
204
204 pub fn data_size(&self) -> usize {
205 pub fn data_size(&self) -> usize {
205 // This `unwrap` could only panic on a 16-bit CPU
206 // This `unwrap` could only panic on a 16-bit CPU
206 self.header.data_size.get().try_into().unwrap()
207 self.header.data_size.get().try_into().unwrap()
207 }
208 }
208
209
209 pub fn data_filename(&self) -> String {
210 pub fn data_filename(&self) -> String {
210 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
211 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
211 }
212 }
212 }
213 }
213
214
214 pub fn read_docket(
215 pub fn read_docket(
215 on_disk: &[u8],
216 on_disk: &[u8],
216 ) -> Result<Docket<'_>, DirstateV2ParseError> {
217 ) -> Result<Docket<'_>, DirstateV2ParseError> {
217 let (header, uuid) =
218 let (header, uuid) =
218 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
219 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
219 let uuid_size = header.uuid_size as usize;
220 let uuid_size = header.uuid_size as usize;
220 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
221 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
221 Ok(Docket { header, uuid })
222 Ok(Docket { header, uuid })
222 } else {
223 } else {
223 Err(DirstateV2ParseError)
224 Err(DirstateV2ParseError)
224 }
225 }
225 }
226 }
226
227
227 pub(super) fn read<'on_disk>(
228 pub(super) fn read<'on_disk>(
228 on_disk: &'on_disk [u8],
229 on_disk: &'on_disk [u8],
229 metadata: &[u8],
230 metadata: &[u8],
230 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
231 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
231 if on_disk.is_empty() {
232 if on_disk.is_empty() {
232 return Ok(DirstateMap::empty(on_disk));
233 return Ok(DirstateMap::empty(on_disk));
233 }
234 }
234 let (meta, _) = TreeMetadata::from_bytes(metadata)
235 let (meta, _) = TreeMetadata::from_bytes(metadata)
235 .map_err(|_| DirstateV2ParseError)?;
236 .map_err(|_| DirstateV2ParseError)?;
236 let dirstate_map = DirstateMap {
237 let dirstate_map = DirstateMap {
237 on_disk,
238 on_disk,
238 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
239 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
239 on_disk,
240 on_disk,
240 meta.root_nodes,
241 meta.root_nodes,
241 )?),
242 )?),
242 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
243 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
243 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
244 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
244 ignore_patterns_hash: meta.ignore_patterns_hash,
245 ignore_patterns_hash: meta.ignore_patterns_hash,
245 unreachable_bytes: meta.unreachable_bytes.get(),
246 unreachable_bytes: meta.unreachable_bytes.get(),
246 };
247 };
247 Ok(dirstate_map)
248 Ok(dirstate_map)
248 }
249 }
249
250
250 impl Node {
251 impl Node {
251 pub(super) fn full_path<'on_disk>(
252 pub(super) fn full_path<'on_disk>(
252 &self,
253 &self,
253 on_disk: &'on_disk [u8],
254 on_disk: &'on_disk [u8],
254 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
255 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
255 read_hg_path(on_disk, self.full_path)
256 read_hg_path(on_disk, self.full_path)
256 }
257 }
257
258
258 pub(super) fn base_name_start<'on_disk>(
259 pub(super) fn base_name_start<'on_disk>(
259 &self,
260 &self,
260 ) -> Result<usize, DirstateV2ParseError> {
261 ) -> Result<usize, DirstateV2ParseError> {
261 let start = self.base_name_start.get();
262 let start = self.base_name_start.get();
262 if start < self.full_path.len.get() {
263 if start < self.full_path.len.get() {
263 let start = usize::try_from(start)
264 let start = usize::try_from(start)
264 // u32 -> usize, could only panic on a 16-bit CPU
265 // u32 -> usize, could only panic on a 16-bit CPU
265 .expect("dirstate-v2 base_name_start out of bounds");
266 .expect("dirstate-v2 base_name_start out of bounds");
266 Ok(start)
267 Ok(start)
267 } else {
268 } else {
268 Err(DirstateV2ParseError)
269 Err(DirstateV2ParseError)
269 }
270 }
270 }
271 }
271
272
272 pub(super) fn base_name<'on_disk>(
273 pub(super) fn base_name<'on_disk>(
273 &self,
274 &self,
274 on_disk: &'on_disk [u8],
275 on_disk: &'on_disk [u8],
275 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
276 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
276 let full_path = self.full_path(on_disk)?;
277 let full_path = self.full_path(on_disk)?;
277 let base_name_start = self.base_name_start()?;
278 let base_name_start = self.base_name_start()?;
278 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
279 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
279 }
280 }
280
281
281 pub(super) fn path<'on_disk>(
282 pub(super) fn path<'on_disk>(
282 &self,
283 &self,
283 on_disk: &'on_disk [u8],
284 on_disk: &'on_disk [u8],
284 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
285 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
285 Ok(WithBasename::from_raw_parts(
286 Ok(WithBasename::from_raw_parts(
286 Cow::Borrowed(self.full_path(on_disk)?),
287 Cow::Borrowed(self.full_path(on_disk)?),
287 self.base_name_start()?,
288 self.base_name_start()?,
288 ))
289 ))
289 }
290 }
290
291
291 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
292 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
292 self.copy_source.start.get() != 0
293 self.copy_source.start.get() != 0
293 }
294 }
294
295
295 pub(super) fn copy_source<'on_disk>(
296 pub(super) fn copy_source<'on_disk>(
296 &self,
297 &self,
297 on_disk: &'on_disk [u8],
298 on_disk: &'on_disk [u8],
298 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
299 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
299 Ok(if self.has_copy_source() {
300 Ok(if self.has_copy_source() {
300 Some(read_hg_path(on_disk, self.copy_source)?)
301 Some(read_hg_path(on_disk, self.copy_source)?)
301 } else {
302 } else {
302 None
303 None
303 })
304 })
304 }
305 }
305
306
306 fn flags(&self) -> Flags {
307 fn flags(&self) -> Flags {
307 Flags::from_bits_truncate(self.flags.get())
308 Flags::from_bits_truncate(self.flags.get())
308 }
309 }
309
310
310 fn has_entry(&self) -> bool {
311 fn has_entry(&self) -> bool {
311 self.flags().intersects(
312 self.flags().intersects(
312 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
313 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
313 )
314 )
314 }
315 }
315
316
316 pub(super) fn node_data(
317 pub(super) fn node_data(
317 &self,
318 &self,
318 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
319 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
319 if self.has_entry() {
320 if self.has_entry() {
320 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
321 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
321 } else if let Some(mtime) = self.cached_directory_mtime()? {
322 } else if let Some(mtime) = self.cached_directory_mtime()? {
322 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
323 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
323 } else {
324 } else {
324 Ok(dirstate_map::NodeData::None)
325 Ok(dirstate_map::NodeData::None)
325 }
326 }
326 }
327 }
327
328
328 pub(super) fn cached_directory_mtime(
329 pub(super) fn cached_directory_mtime(
329 &self,
330 &self,
330 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
331 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
331 // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
332 // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
332 // ignore the mtime if the flag is set.
333 // ignore the mtime if the flag is set.
333 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
334 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
334 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
335 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
335 {
336 {
336 if self.flags().contains(Flags::HAS_FILE_MTIME) {
337 if self.flags().contains(Flags::HAS_FILE_MTIME) {
337 Err(DirstateV2ParseError)
338 Err(DirstateV2ParseError)
338 } else {
339 } else {
339 Ok(Some(self.mtime.try_into()?))
340 Ok(Some(self.mtime.try_into()?))
340 }
341 }
341 } else {
342 } else {
342 Ok(None)
343 Ok(None)
343 }
344 }
344 }
345 }
345
346
346 fn synthesize_unix_mode(&self) -> u32 {
347 fn synthesize_unix_mode(&self) -> u32 {
347 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
348 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
348 libc::S_IFLNK
349 libc::S_IFLNK
349 } else {
350 } else {
350 libc::S_IFREG
351 libc::S_IFREG
351 };
352 };
352 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
353 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
353 0o755
354 0o755
354 } else {
355 } else {
355 0o644
356 0o644
356 };
357 };
357 file_type | permisions
358 file_type | permisions
358 }
359 }
359
360
360 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
361 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
361 // TODO: convert through raw bits instead?
362 // TODO: convert through raw bits instead?
362 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
363 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
363 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
364 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
364 let p2_info = self.flags().contains(Flags::P2_INFO);
365 let p2_info = self.flags().contains(Flags::P2_INFO);
365 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
366 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
366 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
367 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
367 {
368 {
368 Some((self.synthesize_unix_mode(), self.size.into()))
369 Some((self.synthesize_unix_mode(), self.size.into()))
369 } else {
370 } else {
370 None
371 None
371 };
372 };
372 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
373 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
373 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
374 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
375 // The current code is not able to do the more subtle comparison that the
376 // MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
377 && !self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS)
374 {
378 {
375 // TODO: replace this by `self.mtime.try_into()?` to use
379 // TODO: replace this by `self.mtime.try_into()?` to use
376 // sub-second precision from the file.
380 // sub-second precision from the file.
377 // We don’t do this yet because other parts of the code
381 // We don’t do this yet because other parts of the code
378 // always set it to zero.
382 // always set it to zero.
379 let mtime = TruncatedTimestamp::from_already_truncated(
383 let mtime = TruncatedTimestamp::from_already_truncated(
380 self.mtime.truncated_seconds.get(),
384 self.mtime.truncated_seconds.get(),
381 0,
385 0,
382 )?;
386 )?;
383 Some(mtime)
387 Some(mtime)
384 } else {
388 } else {
385 None
389 None
386 };
390 };
387 Ok(DirstateEntry::from_v2_data(
391 Ok(DirstateEntry::from_v2_data(
388 wdir_tracked,
392 wdir_tracked,
389 p1_tracked,
393 p1_tracked,
390 p2_info,
394 p2_info,
391 mode_size,
395 mode_size,
392 mtime,
396 mtime,
393 None,
397 None,
394 None,
398 None,
395 ))
399 ))
396 }
400 }
397
401
398 pub(super) fn entry(
402 pub(super) fn entry(
399 &self,
403 &self,
400 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
404 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
401 if self.has_entry() {
405 if self.has_entry() {
402 Ok(Some(self.assume_entry()?))
406 Ok(Some(self.assume_entry()?))
403 } else {
407 } else {
404 Ok(None)
408 Ok(None)
405 }
409 }
406 }
410 }
407
411
408 pub(super) fn children<'on_disk>(
412 pub(super) fn children<'on_disk>(
409 &self,
413 &self,
410 on_disk: &'on_disk [u8],
414 on_disk: &'on_disk [u8],
411 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
415 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
412 read_nodes(on_disk, self.children)
416 read_nodes(on_disk, self.children)
413 }
417 }
414
418
415 pub(super) fn to_in_memory_node<'on_disk>(
419 pub(super) fn to_in_memory_node<'on_disk>(
416 &self,
420 &self,
417 on_disk: &'on_disk [u8],
421 on_disk: &'on_disk [u8],
418 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
422 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
419 Ok(dirstate_map::Node {
423 Ok(dirstate_map::Node {
420 children: dirstate_map::ChildNodes::OnDisk(
424 children: dirstate_map::ChildNodes::OnDisk(
421 self.children(on_disk)?,
425 self.children(on_disk)?,
422 ),
426 ),
423 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
427 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
424 data: self.node_data()?,
428 data: self.node_data()?,
425 descendants_with_entry_count: self
429 descendants_with_entry_count: self
426 .descendants_with_entry_count
430 .descendants_with_entry_count
427 .get(),
431 .get(),
428 tracked_descendants_count: self.tracked_descendants_count.get(),
432 tracked_descendants_count: self.tracked_descendants_count.get(),
429 })
433 })
430 }
434 }
431
435
432 fn from_dirstate_entry(
436 fn from_dirstate_entry(
433 entry: &DirstateEntry,
437 entry: &DirstateEntry,
434 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
438 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
435 let (
439 let (
436 wdir_tracked,
440 wdir_tracked,
437 p1_tracked,
441 p1_tracked,
438 p2_info,
442 p2_info,
439 mode_size_opt,
443 mode_size_opt,
440 mtime_opt,
444 mtime_opt,
441 fallback_exec,
445 fallback_exec,
442 fallback_symlink,
446 fallback_symlink,
443 ) = entry.v2_data();
447 ) = entry.v2_data();
444 // TODO: convert throug raw flag bits instead?
448 // TODO: convert throug raw flag bits instead?
445 let mut flags = Flags::empty();
449 let mut flags = Flags::empty();
446 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
450 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
447 flags.set(Flags::P1_TRACKED, p1_tracked);
451 flags.set(Flags::P1_TRACKED, p1_tracked);
448 flags.set(Flags::P2_INFO, p2_info);
452 flags.set(Flags::P2_INFO, p2_info);
449 let size = if let Some((m, s)) = mode_size_opt {
453 let size = if let Some((m, s)) = mode_size_opt {
450 let exec_perm = m & libc::S_IXUSR != 0;
454 let exec_perm = m & libc::S_IXUSR != 0;
451 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
455 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
452 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
456 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
453 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
457 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
454 flags.insert(Flags::HAS_MODE_AND_SIZE);
458 flags.insert(Flags::HAS_MODE_AND_SIZE);
455 s.into()
459 s.into()
456 } else {
460 } else {
457 0.into()
461 0.into()
458 };
462 };
459 let mtime = if let Some(m) = mtime_opt {
463 let mtime = if let Some(m) = mtime_opt {
460 flags.insert(Flags::HAS_FILE_MTIME);
464 flags.insert(Flags::HAS_FILE_MTIME);
461 m.into()
465 m.into()
462 } else {
466 } else {
463 PackedTruncatedTimestamp::null()
467 PackedTruncatedTimestamp::null()
464 };
468 };
465 if let Some(f_exec) = fallback_exec {
469 if let Some(f_exec) = fallback_exec {
466 flags.insert(Flags::HAS_FALLBACK_EXEC);
470 flags.insert(Flags::HAS_FALLBACK_EXEC);
467 if f_exec {
471 if f_exec {
468 flags.insert(Flags::FALLBACK_EXEC);
472 flags.insert(Flags::FALLBACK_EXEC);
469 }
473 }
470 }
474 }
471 if let Some(f_symlink) = fallback_symlink {
475 if let Some(f_symlink) = fallback_symlink {
472 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
476 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
473 if f_symlink {
477 if f_symlink {
474 flags.insert(Flags::FALLBACK_SYMLINK);
478 flags.insert(Flags::FALLBACK_SYMLINK);
475 }
479 }
476 }
480 }
477 (flags, size, mtime)
481 (flags, size, mtime)
478 }
482 }
479 }
483 }
480
484
481 fn read_hg_path(
485 fn read_hg_path(
482 on_disk: &[u8],
486 on_disk: &[u8],
483 slice: PathSlice,
487 slice: PathSlice,
484 ) -> Result<&HgPath, DirstateV2ParseError> {
488 ) -> Result<&HgPath, DirstateV2ParseError> {
485 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
489 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
486 }
490 }
487
491
488 fn read_nodes(
492 fn read_nodes(
489 on_disk: &[u8],
493 on_disk: &[u8],
490 slice: ChildNodes,
494 slice: ChildNodes,
491 ) -> Result<&[Node], DirstateV2ParseError> {
495 ) -> Result<&[Node], DirstateV2ParseError> {
492 read_slice(on_disk, slice.start, slice.len.get())
496 read_slice(on_disk, slice.start, slice.len.get())
493 }
497 }
494
498
495 fn read_slice<T, Len>(
499 fn read_slice<T, Len>(
496 on_disk: &[u8],
500 on_disk: &[u8],
497 start: Offset,
501 start: Offset,
498 len: Len,
502 len: Len,
499 ) -> Result<&[T], DirstateV2ParseError>
503 ) -> Result<&[T], DirstateV2ParseError>
500 where
504 where
501 T: BytesCast,
505 T: BytesCast,
502 Len: TryInto<usize>,
506 Len: TryInto<usize>,
503 {
507 {
504 // Either `usize::MAX` would result in "out of bounds" error since a single
508 // Either `usize::MAX` would result in "out of bounds" error since a single
505 // `&[u8]` cannot occupy the entire addess space.
509 // `&[u8]` cannot occupy the entire addess space.
506 let start = start.get().try_into().unwrap_or(std::usize::MAX);
510 let start = start.get().try_into().unwrap_or(std::usize::MAX);
507 let len = len.try_into().unwrap_or(std::usize::MAX);
511 let len = len.try_into().unwrap_or(std::usize::MAX);
508 on_disk
512 on_disk
509 .get(start..)
513 .get(start..)
510 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
514 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
511 .map(|(slice, _rest)| slice)
515 .map(|(slice, _rest)| slice)
512 .ok_or_else(|| DirstateV2ParseError)
516 .ok_or_else(|| DirstateV2ParseError)
513 }
517 }
514
518
515 pub(crate) fn for_each_tracked_path<'on_disk>(
519 pub(crate) fn for_each_tracked_path<'on_disk>(
516 on_disk: &'on_disk [u8],
520 on_disk: &'on_disk [u8],
517 metadata: &[u8],
521 metadata: &[u8],
518 mut f: impl FnMut(&'on_disk HgPath),
522 mut f: impl FnMut(&'on_disk HgPath),
519 ) -> Result<(), DirstateV2ParseError> {
523 ) -> Result<(), DirstateV2ParseError> {
520 let (meta, _) = TreeMetadata::from_bytes(metadata)
524 let (meta, _) = TreeMetadata::from_bytes(metadata)
521 .map_err(|_| DirstateV2ParseError)?;
525 .map_err(|_| DirstateV2ParseError)?;
522 fn recur<'on_disk>(
526 fn recur<'on_disk>(
523 on_disk: &'on_disk [u8],
527 on_disk: &'on_disk [u8],
524 nodes: ChildNodes,
528 nodes: ChildNodes,
525 f: &mut impl FnMut(&'on_disk HgPath),
529 f: &mut impl FnMut(&'on_disk HgPath),
526 ) -> Result<(), DirstateV2ParseError> {
530 ) -> Result<(), DirstateV2ParseError> {
527 for node in read_nodes(on_disk, nodes)? {
531 for node in read_nodes(on_disk, nodes)? {
528 if let Some(entry) = node.entry()? {
532 if let Some(entry) = node.entry()? {
529 if entry.state().is_tracked() {
533 if entry.state().is_tracked() {
530 f(node.full_path(on_disk)?)
534 f(node.full_path(on_disk)?)
531 }
535 }
532 }
536 }
533 recur(on_disk, node.children, f)?
537 recur(on_disk, node.children, f)?
534 }
538 }
535 Ok(())
539 Ok(())
536 }
540 }
537 recur(on_disk, meta.root_nodes, &mut f)
541 recur(on_disk, meta.root_nodes, &mut f)
538 }
542 }
539
543
540 /// Returns new data and metadata, together with whether that data should be
544 /// Returns new data and metadata, together with whether that data should be
541 /// appended to the existing data file whose content is at
545 /// appended to the existing data file whose content is at
542 /// `dirstate_map.on_disk` (true), instead of written to a new data file
546 /// `dirstate_map.on_disk` (true), instead of written to a new data file
543 /// (false).
547 /// (false).
544 pub(super) fn write(
548 pub(super) fn write(
545 dirstate_map: &mut DirstateMap,
549 dirstate_map: &mut DirstateMap,
546 can_append: bool,
550 can_append: bool,
547 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
551 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
548 let append = can_append && dirstate_map.write_should_append();
552 let append = can_append && dirstate_map.write_should_append();
549
553
550 // This ignores the space for paths, and for nodes without an entry.
554 // This ignores the space for paths, and for nodes without an entry.
551 // TODO: better estimate? Skip the `Vec` and write to a file directly?
555 // TODO: better estimate? Skip the `Vec` and write to a file directly?
552 let size_guess = std::mem::size_of::<Node>()
556 let size_guess = std::mem::size_of::<Node>()
553 * dirstate_map.nodes_with_entry_count as usize;
557 * dirstate_map.nodes_with_entry_count as usize;
554
558
555 let mut writer = Writer {
559 let mut writer = Writer {
556 dirstate_map,
560 dirstate_map,
557 append,
561 append,
558 out: Vec::with_capacity(size_guess),
562 out: Vec::with_capacity(size_guess),
559 };
563 };
560
564
561 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
565 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
562
566
563 let meta = TreeMetadata {
567 let meta = TreeMetadata {
564 root_nodes,
568 root_nodes,
565 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
569 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
566 nodes_with_copy_source_count: dirstate_map
570 nodes_with_copy_source_count: dirstate_map
567 .nodes_with_copy_source_count
571 .nodes_with_copy_source_count
568 .into(),
572 .into(),
569 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
573 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
570 unused: [0; 4],
574 unused: [0; 4],
571 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
575 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
572 };
576 };
573 Ok((writer.out, meta.as_bytes().to_vec(), append))
577 Ok((writer.out, meta.as_bytes().to_vec(), append))
574 }
578 }
575
579
576 struct Writer<'dmap, 'on_disk> {
580 struct Writer<'dmap, 'on_disk> {
577 dirstate_map: &'dmap DirstateMap<'on_disk>,
581 dirstate_map: &'dmap DirstateMap<'on_disk>,
578 append: bool,
582 append: bool,
579 out: Vec<u8>,
583 out: Vec<u8>,
580 }
584 }
581
585
582 impl Writer<'_, '_> {
586 impl Writer<'_, '_> {
583 fn write_nodes(
587 fn write_nodes(
584 &mut self,
588 &mut self,
585 nodes: dirstate_map::ChildNodesRef,
589 nodes: dirstate_map::ChildNodesRef,
586 ) -> Result<ChildNodes, DirstateError> {
590 ) -> Result<ChildNodes, DirstateError> {
587 // Reuse already-written nodes if possible
591 // Reuse already-written nodes if possible
588 if self.append {
592 if self.append {
589 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
593 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
590 let start = self.on_disk_offset_of(nodes_slice).expect(
594 let start = self.on_disk_offset_of(nodes_slice).expect(
591 "dirstate-v2 OnDisk nodes not found within on_disk",
595 "dirstate-v2 OnDisk nodes not found within on_disk",
592 );
596 );
593 let len = child_nodes_len_from_usize(nodes_slice.len());
597 let len = child_nodes_len_from_usize(nodes_slice.len());
594 return Ok(ChildNodes { start, len });
598 return Ok(ChildNodes { start, len });
595 }
599 }
596 }
600 }
597
601
598 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
602 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
599 // undefined iteration order. Sort to enable binary search in the
603 // undefined iteration order. Sort to enable binary search in the
600 // written file.
604 // written file.
601 let nodes = nodes.sorted();
605 let nodes = nodes.sorted();
602 let nodes_len = nodes.len();
606 let nodes_len = nodes.len();
603
607
604 // First accumulate serialized nodes in a `Vec`
608 // First accumulate serialized nodes in a `Vec`
605 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
609 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
606 for node in nodes {
610 for node in nodes {
607 let children =
611 let children =
608 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
612 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
609 let full_path = node.full_path(self.dirstate_map.on_disk)?;
613 let full_path = node.full_path(self.dirstate_map.on_disk)?;
610 let full_path = self.write_path(full_path.as_bytes());
614 let full_path = self.write_path(full_path.as_bytes());
611 let copy_source = if let Some(source) =
615 let copy_source = if let Some(source) =
612 node.copy_source(self.dirstate_map.on_disk)?
616 node.copy_source(self.dirstate_map.on_disk)?
613 {
617 {
614 self.write_path(source.as_bytes())
618 self.write_path(source.as_bytes())
615 } else {
619 } else {
616 PathSlice {
620 PathSlice {
617 start: 0.into(),
621 start: 0.into(),
618 len: 0.into(),
622 len: 0.into(),
619 }
623 }
620 };
624 };
621 on_disk_nodes.push(match node {
625 on_disk_nodes.push(match node {
622 NodeRef::InMemory(path, node) => {
626 NodeRef::InMemory(path, node) => {
623 let (flags, size, mtime) = match &node.data {
627 let (flags, size, mtime) = match &node.data {
624 dirstate_map::NodeData::Entry(entry) => {
628 dirstate_map::NodeData::Entry(entry) => {
625 Node::from_dirstate_entry(entry)
629 Node::from_dirstate_entry(entry)
626 }
630 }
627 dirstate_map::NodeData::CachedDirectory { mtime } => (
631 dirstate_map::NodeData::CachedDirectory { mtime } => (
628 // we currently never set a mtime if unknown file
632 // we currently never set a mtime if unknown file
629 // are present.
633 // are present.
630 // So if we have a mtime for a directory, we know
634 // So if we have a mtime for a directory, we know
631 // they are no unknown
635 // they are no unknown
632 // files and we
636 // files and we
633 // blindly set ALL_UNKNOWN_RECORDED.
637 // blindly set ALL_UNKNOWN_RECORDED.
634 //
638 //
635 // We never set ALL_IGNORED_RECORDED since we
639 // We never set ALL_IGNORED_RECORDED since we
636 // don't track that case
640 // don't track that case
637 // currently.
641 // currently.
638 Flags::HAS_DIRECTORY_MTIME
642 Flags::HAS_DIRECTORY_MTIME
639 | Flags::ALL_UNKNOWN_RECORDED,
643 | Flags::ALL_UNKNOWN_RECORDED,
640 0.into(),
644 0.into(),
641 (*mtime).into(),
645 (*mtime).into(),
642 ),
646 ),
643 dirstate_map::NodeData::None => (
647 dirstate_map::NodeData::None => (
644 Flags::empty(),
648 Flags::empty(),
645 0.into(),
649 0.into(),
646 PackedTruncatedTimestamp::null(),
650 PackedTruncatedTimestamp::null(),
647 ),
651 ),
648 };
652 };
649 Node {
653 Node {
650 children,
654 children,
651 copy_source,
655 copy_source,
652 full_path,
656 full_path,
653 base_name_start: u16::try_from(path.base_name_start())
657 base_name_start: u16::try_from(path.base_name_start())
654 // Could only panic for paths over 64 KiB
658 // Could only panic for paths over 64 KiB
655 .expect("dirstate-v2 path length overflow")
659 .expect("dirstate-v2 path length overflow")
656 .into(),
660 .into(),
657 descendants_with_entry_count: node
661 descendants_with_entry_count: node
658 .descendants_with_entry_count
662 .descendants_with_entry_count
659 .into(),
663 .into(),
660 tracked_descendants_count: node
664 tracked_descendants_count: node
661 .tracked_descendants_count
665 .tracked_descendants_count
662 .into(),
666 .into(),
663 flags: flags.bits().into(),
667 flags: flags.bits().into(),
664 size,
668 size,
665 mtime,
669 mtime,
666 }
670 }
667 }
671 }
668 NodeRef::OnDisk(node) => Node {
672 NodeRef::OnDisk(node) => Node {
669 children,
673 children,
670 copy_source,
674 copy_source,
671 full_path,
675 full_path,
672 ..*node
676 ..*node
673 },
677 },
674 })
678 })
675 }
679 }
676 // … so we can write them contiguously, after writing everything else
680 // … so we can write them contiguously, after writing everything else
677 // they refer to.
681 // they refer to.
678 let start = self.current_offset();
682 let start = self.current_offset();
679 let len = child_nodes_len_from_usize(nodes_len);
683 let len = child_nodes_len_from_usize(nodes_len);
680 self.out.extend(on_disk_nodes.as_bytes());
684 self.out.extend(on_disk_nodes.as_bytes());
681 Ok(ChildNodes { start, len })
685 Ok(ChildNodes { start, len })
682 }
686 }
683
687
684 /// If the given slice of items is within `on_disk`, returns its offset
688 /// If the given slice of items is within `on_disk`, returns its offset
685 /// from the start of `on_disk`.
689 /// from the start of `on_disk`.
686 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
690 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
687 where
691 where
688 T: BytesCast,
692 T: BytesCast,
689 {
693 {
690 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
694 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
691 let start = slice.as_ptr() as usize;
695 let start = slice.as_ptr() as usize;
692 let end = start + slice.len();
696 let end = start + slice.len();
693 start..=end
697 start..=end
694 }
698 }
695 let slice_addresses = address_range(slice.as_bytes());
699 let slice_addresses = address_range(slice.as_bytes());
696 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
700 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
697 if on_disk_addresses.contains(slice_addresses.start())
701 if on_disk_addresses.contains(slice_addresses.start())
698 && on_disk_addresses.contains(slice_addresses.end())
702 && on_disk_addresses.contains(slice_addresses.end())
699 {
703 {
700 let offset = slice_addresses.start() - on_disk_addresses.start();
704 let offset = slice_addresses.start() - on_disk_addresses.start();
701 Some(offset_from_usize(offset))
705 Some(offset_from_usize(offset))
702 } else {
706 } else {
703 None
707 None
704 }
708 }
705 }
709 }
706
710
707 fn current_offset(&mut self) -> Offset {
711 fn current_offset(&mut self) -> Offset {
708 let mut offset = self.out.len();
712 let mut offset = self.out.len();
709 if self.append {
713 if self.append {
710 offset += self.dirstate_map.on_disk.len()
714 offset += self.dirstate_map.on_disk.len()
711 }
715 }
712 offset_from_usize(offset)
716 offset_from_usize(offset)
713 }
717 }
714
718
715 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
719 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
716 let len = path_len_from_usize(slice.len());
720 let len = path_len_from_usize(slice.len());
717 // Reuse an already-written path if possible
721 // Reuse an already-written path if possible
718 if self.append {
722 if self.append {
719 if let Some(start) = self.on_disk_offset_of(slice) {
723 if let Some(start) = self.on_disk_offset_of(slice) {
720 return PathSlice { start, len };
724 return PathSlice { start, len };
721 }
725 }
722 }
726 }
723 let start = self.current_offset();
727 let start = self.current_offset();
724 self.out.extend(slice.as_bytes());
728 self.out.extend(slice.as_bytes());
725 PathSlice { start, len }
729 PathSlice { start, len }
726 }
730 }
727 }
731 }
728
732
729 fn offset_from_usize(x: usize) -> Offset {
733 fn offset_from_usize(x: usize) -> Offset {
730 u32::try_from(x)
734 u32::try_from(x)
731 // Could only panic for a dirstate file larger than 4 GiB
735 // Could only panic for a dirstate file larger than 4 GiB
732 .expect("dirstate-v2 offset overflow")
736 .expect("dirstate-v2 offset overflow")
733 .into()
737 .into()
734 }
738 }
735
739
736 fn child_nodes_len_from_usize(x: usize) -> Size {
740 fn child_nodes_len_from_usize(x: usize) -> Size {
737 u32::try_from(x)
741 u32::try_from(x)
738 // Could only panic with over 4 billion nodes
742 // Could only panic with over 4 billion nodes
739 .expect("dirstate-v2 slice length overflow")
743 .expect("dirstate-v2 slice length overflow")
740 .into()
744 .into()
741 }
745 }
742
746
743 fn path_len_from_usize(x: usize) -> PathSize {
747 fn path_len_from_usize(x: usize) -> PathSize {
744 u16::try_from(x)
748 u16::try_from(x)
745 // Could only panic for paths over 64 KiB
749 // Could only panic for paths over 64 KiB
746 .expect("dirstate-v2 path length overflow")
750 .expect("dirstate-v2 path length overflow")
747 .into()
751 .into()
748 }
752 }
749
753
750 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
754 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
751 fn from(timestamp: TruncatedTimestamp) -> Self {
755 fn from(timestamp: TruncatedTimestamp) -> Self {
752 Self {
756 Self {
753 truncated_seconds: timestamp.truncated_seconds().into(),
757 truncated_seconds: timestamp.truncated_seconds().into(),
754 nanoseconds: timestamp.nanoseconds().into(),
758 nanoseconds: timestamp.nanoseconds().into(),
755 }
759 }
756 }
760 }
757 }
761 }
758
762
759 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
763 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
760 type Error = DirstateV2ParseError;
764 type Error = DirstateV2ParseError;
761
765
762 fn try_from(
766 fn try_from(
763 timestamp: PackedTruncatedTimestamp,
767 timestamp: PackedTruncatedTimestamp,
764 ) -> Result<Self, Self::Error> {
768 ) -> Result<Self, Self::Error> {
765 Self::from_already_truncated(
769 Self::from_already_truncated(
766 timestamp.truncated_seconds.get(),
770 timestamp.truncated_seconds.get(),
767 timestamp.nanoseconds.get(),
771 timestamp.nanoseconds.get(),
768 )
772 )
769 }
773 }
770 }
774 }
771 impl PackedTruncatedTimestamp {
775 impl PackedTruncatedTimestamp {
772 fn null() -> Self {
776 fn null() -> Self {
773 Self {
777 Self {
774 truncated_seconds: 0.into(),
778 truncated_seconds: 0.into(),
775 nanoseconds: 0.into(),
779 nanoseconds: 0.into(),
776 }
780 }
777 }
781 }
778 }
782 }
General Comments 0
You need to be logged in to leave comments. Login now