##// END OF EJS Templates
dirstate-v2: adds a flag to mark a file as modified...
Simon Sapin -
r49066:1730b2fc default
parent child Browse files
Show More
@@ -1,1174 +1,1177 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <ctype.h>
12 #include <ctype.h>
13 #include <stddef.h>
13 #include <stddef.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #include "bitmanipulation.h"
16 #include "bitmanipulation.h"
17 #include "charencode.h"
17 #include "charencode.h"
18 #include "util.h"
18 #include "util.h"
19
19
20 #ifdef IS_PY3K
20 #ifdef IS_PY3K
21 /* The mapping of Python types is meant to be temporary to get Python
21 /* The mapping of Python types is meant to be temporary to get Python
22 * 3 to compile. We should remove this once Python 3 support is fully
22 * 3 to compile. We should remove this once Python 3 support is fully
23 * supported and proper types are used in the extensions themselves. */
23 * supported and proper types are used in the extensions themselves. */
24 #define PyInt_Check PyLong_Check
24 #define PyInt_Check PyLong_Check
25 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromLong PyLong_FromLong
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 #define PyInt_AsLong PyLong_AsLong
27 #define PyInt_AsLong PyLong_AsLong
28 #endif
28 #endif
29
29
30 static const char *const versionerrortext = "Python minor version mismatch";
30 static const char *const versionerrortext = "Python minor version mismatch";
31
31
32 static const int dirstate_v1_from_p2 = -2;
32 static const int dirstate_v1_from_p2 = -2;
33 static const int dirstate_v1_nonnormal = -1;
33 static const int dirstate_v1_nonnormal = -1;
34 static const int ambiguous_time = -1;
34 static const int ambiguous_time = -1;
35
35
36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
37 {
37 {
38 Py_ssize_t expected_size;
38 Py_ssize_t expected_size;
39
39
40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
41 return NULL;
41 return NULL;
42 }
42 }
43
43
44 return _dict_new_presized(expected_size);
44 return _dict_new_presized(expected_size);
45 }
45 }
46
46
47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
48 PyObject *kwds)
48 PyObject *kwds)
49 {
49 {
50 /* We do all the initialization here and not a tp_init function because
50 /* We do all the initialization here and not a tp_init function because
51 * dirstate_item is immutable. */
51 * dirstate_item is immutable. */
52 dirstateItemObject *t;
52 dirstateItemObject *t;
53 int wc_tracked;
53 int wc_tracked;
54 int p1_tracked;
54 int p1_tracked;
55 int p2_info;
55 int p2_info;
56 int has_meaningful_data;
56 int has_meaningful_data;
57 int has_meaningful_mtime;
57 int has_meaningful_mtime;
58 int mode;
58 int mode;
59 int size;
59 int size;
60 int mtime;
60 int mtime;
61 PyObject *parentfiledata;
61 PyObject *parentfiledata;
62 static char *keywords_name[] = {
62 static char *keywords_name[] = {
63 "wc_tracked",
63 "wc_tracked",
64 "p1_tracked",
64 "p1_tracked",
65 "p2_info",
65 "p2_info",
66 "has_meaningful_data",
66 "has_meaningful_data",
67 "has_meaningful_mtime",
67 "has_meaningful_mtime",
68 "parentfiledata",
68 "parentfiledata",
69 NULL,
69 NULL,
70 };
70 };
71 wc_tracked = 0;
71 wc_tracked = 0;
72 p1_tracked = 0;
72 p1_tracked = 0;
73 p2_info = 0;
73 p2_info = 0;
74 has_meaningful_mtime = 1;
74 has_meaningful_mtime = 1;
75 has_meaningful_data = 1;
75 has_meaningful_data = 1;
76 parentfiledata = Py_None;
76 parentfiledata = Py_None;
77 if (!PyArg_ParseTupleAndKeywords(
77 if (!PyArg_ParseTupleAndKeywords(
78 args, kwds, "|iiiiiO", keywords_name, &wc_tracked, &p1_tracked,
78 args, kwds, "|iiiiiO", keywords_name, &wc_tracked, &p1_tracked,
79 &p2_info, &has_meaningful_data, &has_meaningful_mtime,
79 &p2_info, &has_meaningful_data, &has_meaningful_mtime,
80 &parentfiledata)) {
80 &parentfiledata)) {
81 return NULL;
81 return NULL;
82 }
82 }
83 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
83 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
84 if (!t) {
84 if (!t) {
85 return NULL;
85 return NULL;
86 }
86 }
87
87
88 t->flags = 0;
88 t->flags = 0;
89 if (wc_tracked) {
89 if (wc_tracked) {
90 t->flags |= dirstate_flag_wc_tracked;
90 t->flags |= dirstate_flag_wc_tracked;
91 }
91 }
92 if (p1_tracked) {
92 if (p1_tracked) {
93 t->flags |= dirstate_flag_p1_tracked;
93 t->flags |= dirstate_flag_p1_tracked;
94 }
94 }
95 if (p2_info) {
95 if (p2_info) {
96 t->flags |= dirstate_flag_p2_info;
96 t->flags |= dirstate_flag_p2_info;
97 }
97 }
98
98
99 if (parentfiledata != Py_None) {
99 if (parentfiledata != Py_None) {
100 if (!PyTuple_CheckExact(parentfiledata)) {
100 if (!PyTuple_CheckExact(parentfiledata)) {
101 PyErr_SetString(
101 PyErr_SetString(
102 PyExc_TypeError,
102 PyExc_TypeError,
103 "parentfiledata should be a Tuple or None");
103 "parentfiledata should be a Tuple or None");
104 return NULL;
104 return NULL;
105 }
105 }
106 mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
106 mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
107 size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
107 size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
108 mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
108 mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
109 } else {
109 } else {
110 has_meaningful_data = 0;
110 has_meaningful_data = 0;
111 has_meaningful_mtime = 0;
111 has_meaningful_mtime = 0;
112 }
112 }
113 if (has_meaningful_data) {
113 if (has_meaningful_data) {
114 t->flags |= dirstate_flag_has_meaningful_data;
114 t->flags |= dirstate_flag_has_meaningful_data;
115 t->mode = mode;
115 t->mode = mode;
116 t->size = size;
116 t->size = size;
117 } else {
117 } else {
118 t->mode = 0;
118 t->mode = 0;
119 t->size = 0;
119 t->size = 0;
120 }
120 }
121 if (has_meaningful_mtime) {
121 if (has_meaningful_mtime) {
122 t->flags |= dirstate_flag_has_file_mtime;
122 t->flags |= dirstate_flag_has_file_mtime;
123 t->mtime = mtime;
123 t->mtime = mtime;
124 } else {
124 } else {
125 t->mtime = 0;
125 t->mtime = 0;
126 }
126 }
127 return (PyObject *)t;
127 return (PyObject *)t;
128 }
128 }
129
129
130 static void dirstate_item_dealloc(PyObject *o)
130 static void dirstate_item_dealloc(PyObject *o)
131 {
131 {
132 PyObject_Del(o);
132 PyObject_Del(o);
133 }
133 }
134
134
135 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
135 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
136 {
136 {
137 return (self->flags & dirstate_flag_wc_tracked);
137 return (self->flags & dirstate_flag_wc_tracked);
138 }
138 }
139
139
140 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
140 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
141 {
141 {
142 const unsigned char mask = dirstate_flag_wc_tracked |
142 const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
143 dirstate_flag_p1_tracked |
143 dirstate_flag_p2_info;
144 dirstate_flag_p2_info;
145 return (self->flags & mask);
144 return (self->flags & mask);
146 }
145 }
147
146
148 static inline bool dirstate_item_c_added(dirstateItemObject *self)
147 static inline bool dirstate_item_c_added(dirstateItemObject *self)
149 {
148 {
150 const unsigned char mask =
149 const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
151 (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
150 dirstate_flag_p2_info);
152 dirstate_flag_p2_info);
151 const int target = dirstate_flag_wc_tracked;
153 const unsigned char target = dirstate_flag_wc_tracked;
154 return (self->flags & mask) == target;
152 return (self->flags & mask) == target;
155 }
153 }
156
154
157 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
155 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
158 {
156 {
159 if (self->flags & dirstate_flag_wc_tracked) {
157 if (self->flags & dirstate_flag_wc_tracked) {
160 return false;
158 return false;
161 }
159 }
162 return (self->flags &
160 return (self->flags &
163 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
161 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
164 }
162 }
165
163
166 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
164 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
167 {
165 {
168 return ((self->flags & dirstate_flag_wc_tracked) &&
166 return ((self->flags & dirstate_flag_wc_tracked) &&
169 (self->flags & dirstate_flag_p1_tracked) &&
167 (self->flags & dirstate_flag_p1_tracked) &&
170 (self->flags & dirstate_flag_p2_info));
168 (self->flags & dirstate_flag_p2_info));
171 }
169 }
172
170
173 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
171 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
174 {
172 {
175 return ((self->flags & dirstate_flag_wc_tracked) &&
173 return ((self->flags & dirstate_flag_wc_tracked) &&
176 !(self->flags & dirstate_flag_p1_tracked) &&
174 !(self->flags & dirstate_flag_p1_tracked) &&
177 (self->flags & dirstate_flag_p2_info));
175 (self->flags & dirstate_flag_p2_info));
178 }
176 }
179
177
180 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
178 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
181 {
179 {
182 if (dirstate_item_c_removed(self)) {
180 if (dirstate_item_c_removed(self)) {
183 return 'r';
181 return 'r';
184 } else if (dirstate_item_c_merged(self)) {
182 } else if (dirstate_item_c_merged(self)) {
185 return 'm';
183 return 'm';
186 } else if (dirstate_item_c_added(self)) {
184 } else if (dirstate_item_c_added(self)) {
187 return 'a';
185 return 'a';
188 } else {
186 } else {
189 return 'n';
187 return 'n';
190 }
188 }
191 }
189 }
192
190
193 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
191 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
194 {
192 {
195 if (self->flags & dirstate_flag_has_meaningful_data) {
193 if (self->flags & dirstate_flag_has_meaningful_data) {
196 return self->mode;
194 return self->mode;
197 } else {
195 } else {
198 return 0;
196 return 0;
199 }
197 }
200 }
198 }
201
199
202 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
200 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
203 {
201 {
204 if (!(self->flags & dirstate_flag_wc_tracked) &&
202 if (!(self->flags & dirstate_flag_wc_tracked) &&
205 (self->flags & dirstate_flag_p2_info)) {
203 (self->flags & dirstate_flag_p2_info)) {
206 if (self->flags & dirstate_flag_p1_tracked) {
204 if (self->flags & dirstate_flag_p1_tracked) {
207 return dirstate_v1_nonnormal;
205 return dirstate_v1_nonnormal;
208 } else {
206 } else {
209 return dirstate_v1_from_p2;
207 return dirstate_v1_from_p2;
210 }
208 }
211 } else if (dirstate_item_c_removed(self)) {
209 } else if (dirstate_item_c_removed(self)) {
212 return 0;
210 return 0;
213 } else if (self->flags & dirstate_flag_p2_info) {
211 } else if (self->flags & dirstate_flag_p2_info) {
214 return dirstate_v1_from_p2;
212 return dirstate_v1_from_p2;
215 } else if (dirstate_item_c_added(self)) {
213 } else if (dirstate_item_c_added(self)) {
216 return dirstate_v1_nonnormal;
214 return dirstate_v1_nonnormal;
217 } else if (self->flags & dirstate_flag_has_meaningful_data) {
215 } else if (self->flags & dirstate_flag_has_meaningful_data) {
218 return self->size;
216 return self->size;
219 } else {
217 } else {
220 return dirstate_v1_nonnormal;
218 return dirstate_v1_nonnormal;
221 }
219 }
222 }
220 }
223
221
224 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
222 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
225 {
223 {
226 if (dirstate_item_c_removed(self)) {
224 if (dirstate_item_c_removed(self)) {
227 return 0;
225 return 0;
228 } else if (!(self->flags & dirstate_flag_has_file_mtime) ||
226 } else if (!(self->flags & dirstate_flag_has_file_mtime) ||
229 !(self->flags & dirstate_flag_p1_tracked) ||
227 !(self->flags & dirstate_flag_p1_tracked) ||
230 !(self->flags & dirstate_flag_wc_tracked) ||
228 !(self->flags & dirstate_flag_wc_tracked) ||
231 (self->flags & dirstate_flag_p2_info)) {
229 (self->flags & dirstate_flag_p2_info)) {
232 return ambiguous_time;
230 return ambiguous_time;
233 } else {
231 } else {
234 return self->mtime;
232 return self->mtime;
235 }
233 }
236 }
234 }
237
235
238 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
236 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
239 {
237 {
240 unsigned char flags = self->flags;
238 int flags = self->flags;
241 int mode = dirstate_item_c_v1_mode(self);
239 int mode = dirstate_item_c_v1_mode(self);
242 if ((mode & S_IXUSR) != 0) {
240 if ((mode & S_IXUSR) != 0) {
243 flags |= dirstate_flag_mode_exec_perm;
241 flags |= dirstate_flag_mode_exec_perm;
244 } else {
242 } else {
245 flags &= ~dirstate_flag_mode_exec_perm;
243 flags &= ~dirstate_flag_mode_exec_perm;
246 }
244 }
247 if (S_ISLNK(mode)) {
245 if (S_ISLNK(mode)) {
248 flags |= dirstate_flag_mode_is_symlink;
246 flags |= dirstate_flag_mode_is_symlink;
249 } else {
247 } else {
250 flags &= ~dirstate_flag_mode_is_symlink;
248 flags &= ~dirstate_flag_mode_is_symlink;
251 }
249 }
252 return Py_BuildValue("Bii", flags, self->size, self->mtime);
250 return Py_BuildValue("iii", flags, self->size, self->mtime);
253 };
251 };
254
252
255 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
253 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
256 {
254 {
257 char state = dirstate_item_c_v1_state(self);
255 char state = dirstate_item_c_v1_state(self);
258 return PyBytes_FromStringAndSize(&state, 1);
256 return PyBytes_FromStringAndSize(&state, 1);
259 };
257 };
260
258
261 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
259 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
262 {
260 {
263 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
261 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
264 };
262 };
265
263
266 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
264 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
267 {
265 {
268 return PyInt_FromLong(dirstate_item_c_v1_size(self));
266 return PyInt_FromLong(dirstate_item_c_v1_size(self));
269 };
267 };
270
268
271 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
269 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
272 {
270 {
273 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
271 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
274 };
272 };
275
273
276 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
274 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
277 PyObject *value)
275 PyObject *value)
278 {
276 {
279 long now;
277 long now;
280 if (!pylong_to_long(value, &now)) {
278 if (!pylong_to_long(value, &now)) {
281 return NULL;
279 return NULL;
282 }
280 }
283 if (dirstate_item_c_v1_state(self) == 'n' &&
281 if (dirstate_item_c_v1_state(self) == 'n' &&
284 dirstate_item_c_v1_mtime(self) == now) {
282 dirstate_item_c_v1_mtime(self) == now) {
285 Py_RETURN_TRUE;
283 Py_RETURN_TRUE;
286 } else {
284 } else {
287 Py_RETURN_FALSE;
285 Py_RETURN_FALSE;
288 }
286 }
289 };
287 };
290
288
291 /* This will never change since it's bound to V1
289 /* This will never change since it's bound to V1
292 */
290 */
293 static inline dirstateItemObject *
291 static inline dirstateItemObject *
294 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
292 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
295 {
293 {
296 dirstateItemObject *t =
294 dirstateItemObject *t =
297 PyObject_New(dirstateItemObject, &dirstateItemType);
295 PyObject_New(dirstateItemObject, &dirstateItemType);
298 if (!t) {
296 if (!t) {
299 return NULL;
297 return NULL;
300 }
298 }
301 t->flags = 0;
299 t->flags = 0;
302 t->mode = 0;
300 t->mode = 0;
303 t->size = 0;
301 t->size = 0;
304 t->mtime = 0;
302 t->mtime = 0;
305
303
306 if (state == 'm') {
304 if (state == 'm') {
307 t->flags = (dirstate_flag_wc_tracked |
305 t->flags = (dirstate_flag_wc_tracked |
308 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
306 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
309 } else if (state == 'a') {
307 } else if (state == 'a') {
310 t->flags = dirstate_flag_wc_tracked;
308 t->flags = dirstate_flag_wc_tracked;
311 } else if (state == 'r') {
309 } else if (state == 'r') {
312 if (size == dirstate_v1_nonnormal) {
310 if (size == dirstate_v1_nonnormal) {
313 t->flags =
311 t->flags =
314 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
312 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
315 } else if (size == dirstate_v1_from_p2) {
313 } else if (size == dirstate_v1_from_p2) {
316 t->flags = dirstate_flag_p2_info;
314 t->flags = dirstate_flag_p2_info;
317 } else {
315 } else {
318 t->flags = dirstate_flag_p1_tracked;
316 t->flags = dirstate_flag_p1_tracked;
319 }
317 }
320 } else if (state == 'n') {
318 } else if (state == 'n') {
321 if (size == dirstate_v1_from_p2) {
319 if (size == dirstate_v1_from_p2) {
322 t->flags =
320 t->flags =
323 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
321 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
324 } else if (size == dirstate_v1_nonnormal) {
322 } else if (size == dirstate_v1_nonnormal) {
325 t->flags =
323 t->flags =
326 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
324 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
327 } else if (mtime == ambiguous_time) {
325 } else if (mtime == ambiguous_time) {
328 t->flags = (dirstate_flag_wc_tracked |
326 t->flags = (dirstate_flag_wc_tracked |
329 dirstate_flag_p1_tracked |
327 dirstate_flag_p1_tracked |
330 dirstate_flag_has_meaningful_data);
328 dirstate_flag_has_meaningful_data);
331 t->mode = mode;
329 t->mode = mode;
332 t->size = size;
330 t->size = size;
333 } else {
331 } else {
334 t->flags = (dirstate_flag_wc_tracked |
332 t->flags = (dirstate_flag_wc_tracked |
335 dirstate_flag_p1_tracked |
333 dirstate_flag_p1_tracked |
336 dirstate_flag_has_meaningful_data |
334 dirstate_flag_has_meaningful_data |
337 dirstate_flag_has_file_mtime);
335 dirstate_flag_has_file_mtime);
338 t->mode = mode;
336 t->mode = mode;
339 t->size = size;
337 t->size = size;
340 t->mtime = mtime;
338 t->mtime = mtime;
341 }
339 }
342 } else {
340 } else {
343 PyErr_Format(PyExc_RuntimeError,
341 PyErr_Format(PyExc_RuntimeError,
344 "unknown state: `%c` (%d, %d, %d)", state, mode,
342 "unknown state: `%c` (%d, %d, %d)", state, mode,
345 size, mtime, NULL);
343 size, mtime, NULL);
346 Py_DECREF(t);
344 Py_DECREF(t);
347 return NULL;
345 return NULL;
348 }
346 }
349
347
350 return t;
348 return t;
351 }
349 }
352
350
353 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
351 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
354 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
352 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
355 PyObject *args)
353 PyObject *args)
356 {
354 {
357 /* We do all the initialization here and not a tp_init function because
355 /* We do all the initialization here and not a tp_init function because
358 * dirstate_item is immutable. */
356 * dirstate_item is immutable. */
359 char state;
357 char state;
360 int size, mode, mtime;
358 int size, mode, mtime;
361 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
359 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
362 return NULL;
360 return NULL;
363 }
361 }
364 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
362 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
365 };
363 };
366
364
367 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
365 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
368 PyObject *args)
366 PyObject *args)
369 {
367 {
370 dirstateItemObject *t =
368 dirstateItemObject *t =
371 PyObject_New(dirstateItemObject, &dirstateItemType);
369 PyObject_New(dirstateItemObject, &dirstateItemType);
372 if (!t) {
370 if (!t) {
373 return NULL;
371 return NULL;
374 }
372 }
375 if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
373 if (!PyArg_ParseTuple(args, "iii", &t->flags, &t->size, &t->mtime)) {
376 return NULL;
374 return NULL;
377 }
375 }
376 if (t->flags & dirstate_flag_expected_state_is_modified) {
377 t->flags &= ~(dirstate_flag_expected_state_is_modified |
378 dirstate_flag_has_meaningful_data |
379 dirstate_flag_has_file_mtime);
380 }
378 t->mode = 0;
381 t->mode = 0;
379 if (t->flags & dirstate_flag_has_meaningful_data) {
382 if (t->flags & dirstate_flag_has_meaningful_data) {
380 if (t->flags & dirstate_flag_mode_exec_perm) {
383 if (t->flags & dirstate_flag_mode_exec_perm) {
381 t->mode = 0755;
384 t->mode = 0755;
382 } else {
385 } else {
383 t->mode = 0644;
386 t->mode = 0644;
384 }
387 }
385 if (t->flags & dirstate_flag_mode_is_symlink) {
388 if (t->flags & dirstate_flag_mode_is_symlink) {
386 t->mode |= S_IFLNK;
389 t->mode |= S_IFLNK;
387 } else {
390 } else {
388 t->mode |= S_IFREG;
391 t->mode |= S_IFREG;
389 }
392 }
390 }
393 }
391 return (PyObject *)t;
394 return (PyObject *)t;
392 };
395 };
393
396
394 /* This means the next status call will have to actually check its content
397 /* This means the next status call will have to actually check its content
395 to make sure it is correct. */
398 to make sure it is correct. */
396 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
399 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
397 {
400 {
398 self->flags &= ~dirstate_flag_has_file_mtime;
401 self->flags &= ~dirstate_flag_has_file_mtime;
399 Py_RETURN_NONE;
402 Py_RETURN_NONE;
400 }
403 }
401
404
402 /* See docstring of the python implementation for details */
405 /* See docstring of the python implementation for details */
403 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
406 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
404 PyObject *args)
407 PyObject *args)
405 {
408 {
406 int size, mode, mtime;
409 int size, mode, mtime;
407 if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
410 if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
408 return NULL;
411 return NULL;
409 }
412 }
410 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
413 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
411 dirstate_flag_has_meaningful_data |
414 dirstate_flag_has_meaningful_data |
412 dirstate_flag_has_file_mtime;
415 dirstate_flag_has_file_mtime;
413 self->mode = mode;
416 self->mode = mode;
414 self->size = size;
417 self->size = size;
415 self->mtime = mtime;
418 self->mtime = mtime;
416 Py_RETURN_NONE;
419 Py_RETURN_NONE;
417 }
420 }
418
421
419 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
422 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
420 {
423 {
421 self->flags |= dirstate_flag_wc_tracked;
424 self->flags |= dirstate_flag_wc_tracked;
422 self->flags &= ~dirstate_flag_has_file_mtime;
425 self->flags &= ~dirstate_flag_has_file_mtime;
423 Py_RETURN_NONE;
426 Py_RETURN_NONE;
424 }
427 }
425
428
426 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
429 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
427 {
430 {
428 self->flags &= ~dirstate_flag_wc_tracked;
431 self->flags &= ~dirstate_flag_wc_tracked;
429 self->mode = 0;
432 self->mode = 0;
430 self->mtime = 0;
433 self->mtime = 0;
431 self->size = 0;
434 self->size = 0;
432 Py_RETURN_NONE;
435 Py_RETURN_NONE;
433 }
436 }
434
437
435 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
438 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
436 {
439 {
437 if (self->flags & dirstate_flag_p2_info) {
440 if (self->flags & dirstate_flag_p2_info) {
438 self->flags &= ~(dirstate_flag_p2_info |
441 self->flags &= ~(dirstate_flag_p2_info |
439 dirstate_flag_has_meaningful_data |
442 dirstate_flag_has_meaningful_data |
440 dirstate_flag_has_file_mtime);
443 dirstate_flag_has_file_mtime);
441 self->mode = 0;
444 self->mode = 0;
442 self->mtime = 0;
445 self->mtime = 0;
443 self->size = 0;
446 self->size = 0;
444 }
447 }
445 Py_RETURN_NONE;
448 Py_RETURN_NONE;
446 }
449 }
447 static PyMethodDef dirstate_item_methods[] = {
450 static PyMethodDef dirstate_item_methods[] = {
448 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
451 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
449 "return data suitable for v2 serialization"},
452 "return data suitable for v2 serialization"},
450 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
453 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
451 "return a \"state\" suitable for v1 serialization"},
454 "return a \"state\" suitable for v1 serialization"},
452 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
455 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
453 "return a \"mode\" suitable for v1 serialization"},
456 "return a \"mode\" suitable for v1 serialization"},
454 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
457 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
455 "return a \"size\" suitable for v1 serialization"},
458 "return a \"size\" suitable for v1 serialization"},
456 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
459 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
457 "return a \"mtime\" suitable for v1 serialization"},
460 "return a \"mtime\" suitable for v1 serialization"},
458 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
461 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
459 "True if the stored mtime would be ambiguous with the current time"},
462 "True if the stored mtime would be ambiguous with the current time"},
460 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
463 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
461 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
464 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
462 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
465 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
463 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
466 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
464 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
467 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
465 METH_NOARGS, "mark a file as \"possibly dirty\""},
468 METH_NOARGS, "mark a file as \"possibly dirty\""},
466 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
469 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
467 "mark a file as \"clean\""},
470 "mark a file as \"clean\""},
468 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
471 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
469 "mark a file as \"tracked\""},
472 "mark a file as \"tracked\""},
470 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
473 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
471 "mark a file as \"untracked\""},
474 "mark a file as \"untracked\""},
472 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
475 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
473 "remove all \"merge-only\" from a DirstateItem"},
476 "remove all \"merge-only\" from a DirstateItem"},
474 {NULL} /* Sentinel */
477 {NULL} /* Sentinel */
475 };
478 };
476
479
477 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
480 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
478 {
481 {
479 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
482 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
480 };
483 };
481
484
482 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
485 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
483 {
486 {
484 return PyInt_FromLong(dirstate_item_c_v1_size(self));
487 return PyInt_FromLong(dirstate_item_c_v1_size(self));
485 };
488 };
486
489
487 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
490 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
488 {
491 {
489 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
492 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
490 };
493 };
491
494
492 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
495 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
493 {
496 {
494 char state = dirstate_item_c_v1_state(self);
497 char state = dirstate_item_c_v1_state(self);
495 return PyBytes_FromStringAndSize(&state, 1);
498 return PyBytes_FromStringAndSize(&state, 1);
496 };
499 };
497
500
498 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
501 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
499 {
502 {
500 if (dirstate_item_c_tracked(self)) {
503 if (dirstate_item_c_tracked(self)) {
501 Py_RETURN_TRUE;
504 Py_RETURN_TRUE;
502 } else {
505 } else {
503 Py_RETURN_FALSE;
506 Py_RETURN_FALSE;
504 }
507 }
505 };
508 };
506 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
509 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
507 {
510 {
508 if (self->flags & dirstate_flag_p1_tracked) {
511 if (self->flags & dirstate_flag_p1_tracked) {
509 Py_RETURN_TRUE;
512 Py_RETURN_TRUE;
510 } else {
513 } else {
511 Py_RETURN_FALSE;
514 Py_RETURN_FALSE;
512 }
515 }
513 };
516 };
514
517
515 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
518 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
516 {
519 {
517 if (dirstate_item_c_added(self)) {
520 if (dirstate_item_c_added(self)) {
518 Py_RETURN_TRUE;
521 Py_RETURN_TRUE;
519 } else {
522 } else {
520 Py_RETURN_FALSE;
523 Py_RETURN_FALSE;
521 }
524 }
522 };
525 };
523
526
524 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
527 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
525 {
528 {
526 if (self->flags & dirstate_flag_wc_tracked &&
529 if (self->flags & dirstate_flag_wc_tracked &&
527 self->flags & dirstate_flag_p2_info) {
530 self->flags & dirstate_flag_p2_info) {
528 Py_RETURN_TRUE;
531 Py_RETURN_TRUE;
529 } else {
532 } else {
530 Py_RETURN_FALSE;
533 Py_RETURN_FALSE;
531 }
534 }
532 };
535 };
533
536
534 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
537 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
535 {
538 {
536 if (dirstate_item_c_merged(self)) {
539 if (dirstate_item_c_merged(self)) {
537 Py_RETURN_TRUE;
540 Py_RETURN_TRUE;
538 } else {
541 } else {
539 Py_RETURN_FALSE;
542 Py_RETURN_FALSE;
540 }
543 }
541 };
544 };
542
545
543 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
546 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
544 {
547 {
545 if (dirstate_item_c_from_p2(self)) {
548 if (dirstate_item_c_from_p2(self)) {
546 Py_RETURN_TRUE;
549 Py_RETURN_TRUE;
547 } else {
550 } else {
548 Py_RETURN_FALSE;
551 Py_RETURN_FALSE;
549 }
552 }
550 };
553 };
551
554
552 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
555 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
553 {
556 {
554 if (!(self->flags & dirstate_flag_wc_tracked)) {
557 if (!(self->flags & dirstate_flag_wc_tracked)) {
555 Py_RETURN_FALSE;
558 Py_RETURN_FALSE;
556 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
559 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
557 Py_RETURN_FALSE;
560 Py_RETURN_FALSE;
558 } else if (self->flags & dirstate_flag_p2_info) {
561 } else if (self->flags & dirstate_flag_p2_info) {
559 Py_RETURN_FALSE;
562 Py_RETURN_FALSE;
560 } else {
563 } else {
561 Py_RETURN_TRUE;
564 Py_RETURN_TRUE;
562 }
565 }
563 };
566 };
564
567
565 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
568 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
566 {
569 {
567 if (dirstate_item_c_any_tracked(self)) {
570 if (dirstate_item_c_any_tracked(self)) {
568 Py_RETURN_TRUE;
571 Py_RETURN_TRUE;
569 } else {
572 } else {
570 Py_RETURN_FALSE;
573 Py_RETURN_FALSE;
571 }
574 }
572 };
575 };
573
576
574 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
577 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
575 {
578 {
576 if (dirstate_item_c_removed(self)) {
579 if (dirstate_item_c_removed(self)) {
577 Py_RETURN_TRUE;
580 Py_RETURN_TRUE;
578 } else {
581 } else {
579 Py_RETURN_FALSE;
582 Py_RETURN_FALSE;
580 }
583 }
581 };
584 };
582
585
583 static PyGetSetDef dirstate_item_getset[] = {
586 static PyGetSetDef dirstate_item_getset[] = {
584 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
587 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
585 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
588 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
586 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
589 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
587 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
590 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
588 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
591 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
589 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
592 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
590 NULL},
593 NULL},
591 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
594 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
592 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
595 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
593 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
596 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
594 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
597 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
595 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
598 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
596 NULL},
599 NULL},
597 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
600 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
598 NULL},
601 NULL},
599 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
602 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
600 {NULL} /* Sentinel */
603 {NULL} /* Sentinel */
601 };
604 };
602
605
603 PyTypeObject dirstateItemType = {
606 PyTypeObject dirstateItemType = {
604 PyVarObject_HEAD_INIT(NULL, 0) /* header */
607 PyVarObject_HEAD_INIT(NULL, 0) /* header */
605 "dirstate_tuple", /* tp_name */
608 "dirstate_tuple", /* tp_name */
606 sizeof(dirstateItemObject), /* tp_basicsize */
609 sizeof(dirstateItemObject), /* tp_basicsize */
607 0, /* tp_itemsize */
610 0, /* tp_itemsize */
608 (destructor)dirstate_item_dealloc, /* tp_dealloc */
611 (destructor)dirstate_item_dealloc, /* tp_dealloc */
609 0, /* tp_print */
612 0, /* tp_print */
610 0, /* tp_getattr */
613 0, /* tp_getattr */
611 0, /* tp_setattr */
614 0, /* tp_setattr */
612 0, /* tp_compare */
615 0, /* tp_compare */
613 0, /* tp_repr */
616 0, /* tp_repr */
614 0, /* tp_as_number */
617 0, /* tp_as_number */
615 0, /* tp_as_sequence */
618 0, /* tp_as_sequence */
616 0, /* tp_as_mapping */
619 0, /* tp_as_mapping */
617 0, /* tp_hash */
620 0, /* tp_hash */
618 0, /* tp_call */
621 0, /* tp_call */
619 0, /* tp_str */
622 0, /* tp_str */
620 0, /* tp_getattro */
623 0, /* tp_getattro */
621 0, /* tp_setattro */
624 0, /* tp_setattro */
622 0, /* tp_as_buffer */
625 0, /* tp_as_buffer */
623 Py_TPFLAGS_DEFAULT, /* tp_flags */
626 Py_TPFLAGS_DEFAULT, /* tp_flags */
624 "dirstate tuple", /* tp_doc */
627 "dirstate tuple", /* tp_doc */
625 0, /* tp_traverse */
628 0, /* tp_traverse */
626 0, /* tp_clear */
629 0, /* tp_clear */
627 0, /* tp_richcompare */
630 0, /* tp_richcompare */
628 0, /* tp_weaklistoffset */
631 0, /* tp_weaklistoffset */
629 0, /* tp_iter */
632 0, /* tp_iter */
630 0, /* tp_iternext */
633 0, /* tp_iternext */
631 dirstate_item_methods, /* tp_methods */
634 dirstate_item_methods, /* tp_methods */
632 0, /* tp_members */
635 0, /* tp_members */
633 dirstate_item_getset, /* tp_getset */
636 dirstate_item_getset, /* tp_getset */
634 0, /* tp_base */
637 0, /* tp_base */
635 0, /* tp_dict */
638 0, /* tp_dict */
636 0, /* tp_descr_get */
639 0, /* tp_descr_get */
637 0, /* tp_descr_set */
640 0, /* tp_descr_set */
638 0, /* tp_dictoffset */
641 0, /* tp_dictoffset */
639 0, /* tp_init */
642 0, /* tp_init */
640 0, /* tp_alloc */
643 0, /* tp_alloc */
641 dirstate_item_new, /* tp_new */
644 dirstate_item_new, /* tp_new */
642 };
645 };
643
646
644 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
647 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
645 {
648 {
646 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
649 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
647 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
650 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
648 char state, *cur, *str, *cpos;
651 char state, *cur, *str, *cpos;
649 int mode, size, mtime;
652 int mode, size, mtime;
650 unsigned int flen, pos = 40;
653 unsigned int flen, pos = 40;
651 Py_ssize_t len = 40;
654 Py_ssize_t len = 40;
652 Py_ssize_t readlen;
655 Py_ssize_t readlen;
653
656
654 if (!PyArg_ParseTuple(
657 if (!PyArg_ParseTuple(
655 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
658 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
656 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
659 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
657 goto quit;
660 goto quit;
658 }
661 }
659
662
660 len = readlen;
663 len = readlen;
661
664
662 /* read parents */
665 /* read parents */
663 if (len < 40) {
666 if (len < 40) {
664 PyErr_SetString(PyExc_ValueError,
667 PyErr_SetString(PyExc_ValueError,
665 "too little data for parents");
668 "too little data for parents");
666 goto quit;
669 goto quit;
667 }
670 }
668
671
669 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
672 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
670 str + 20, (Py_ssize_t)20);
673 str + 20, (Py_ssize_t)20);
671 if (!parents) {
674 if (!parents) {
672 goto quit;
675 goto quit;
673 }
676 }
674
677
675 /* read filenames */
678 /* read filenames */
676 while (pos >= 40 && pos < len) {
679 while (pos >= 40 && pos < len) {
677 if (pos + 17 > len) {
680 if (pos + 17 > len) {
678 PyErr_SetString(PyExc_ValueError,
681 PyErr_SetString(PyExc_ValueError,
679 "overflow in dirstate");
682 "overflow in dirstate");
680 goto quit;
683 goto quit;
681 }
684 }
682 cur = str + pos;
685 cur = str + pos;
683 /* unpack header */
686 /* unpack header */
684 state = *cur;
687 state = *cur;
685 mode = getbe32(cur + 1);
688 mode = getbe32(cur + 1);
686 size = getbe32(cur + 5);
689 size = getbe32(cur + 5);
687 mtime = getbe32(cur + 9);
690 mtime = getbe32(cur + 9);
688 flen = getbe32(cur + 13);
691 flen = getbe32(cur + 13);
689 pos += 17;
692 pos += 17;
690 cur += 17;
693 cur += 17;
691 if (flen > len - pos) {
694 if (flen > len - pos) {
692 PyErr_SetString(PyExc_ValueError,
695 PyErr_SetString(PyExc_ValueError,
693 "overflow in dirstate");
696 "overflow in dirstate");
694 goto quit;
697 goto quit;
695 }
698 }
696
699
697 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
700 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
698 size, mtime);
701 size, mtime);
699 if (!entry)
702 if (!entry)
700 goto quit;
703 goto quit;
701 cpos = memchr(cur, 0, flen);
704 cpos = memchr(cur, 0, flen);
702 if (cpos) {
705 if (cpos) {
703 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
706 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
704 cname = PyBytes_FromStringAndSize(
707 cname = PyBytes_FromStringAndSize(
705 cpos + 1, flen - (cpos - cur) - 1);
708 cpos + 1, flen - (cpos - cur) - 1);
706 if (!fname || !cname ||
709 if (!fname || !cname ||
707 PyDict_SetItem(cmap, fname, cname) == -1 ||
710 PyDict_SetItem(cmap, fname, cname) == -1 ||
708 PyDict_SetItem(dmap, fname, entry) == -1) {
711 PyDict_SetItem(dmap, fname, entry) == -1) {
709 goto quit;
712 goto quit;
710 }
713 }
711 Py_DECREF(cname);
714 Py_DECREF(cname);
712 } else {
715 } else {
713 fname = PyBytes_FromStringAndSize(cur, flen);
716 fname = PyBytes_FromStringAndSize(cur, flen);
714 if (!fname ||
717 if (!fname ||
715 PyDict_SetItem(dmap, fname, entry) == -1) {
718 PyDict_SetItem(dmap, fname, entry) == -1) {
716 goto quit;
719 goto quit;
717 }
720 }
718 }
721 }
719 Py_DECREF(fname);
722 Py_DECREF(fname);
720 Py_DECREF(entry);
723 Py_DECREF(entry);
721 fname = cname = entry = NULL;
724 fname = cname = entry = NULL;
722 pos += flen;
725 pos += flen;
723 }
726 }
724
727
725 ret = parents;
728 ret = parents;
726 Py_INCREF(ret);
729 Py_INCREF(ret);
727 quit:
730 quit:
728 Py_XDECREF(fname);
731 Py_XDECREF(fname);
729 Py_XDECREF(cname);
732 Py_XDECREF(cname);
730 Py_XDECREF(entry);
733 Py_XDECREF(entry);
731 Py_XDECREF(parents);
734 Py_XDECREF(parents);
732 return ret;
735 return ret;
733 }
736 }
734
737
735 /*
738 /*
736 * Efficiently pack a dirstate object into its on-disk format.
739 * Efficiently pack a dirstate object into its on-disk format.
737 */
740 */
738 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
741 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
739 {
742 {
740 PyObject *packobj = NULL;
743 PyObject *packobj = NULL;
741 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
744 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
742 Py_ssize_t nbytes, pos, l;
745 Py_ssize_t nbytes, pos, l;
743 PyObject *k, *v = NULL, *pn;
746 PyObject *k, *v = NULL, *pn;
744 char *p, *s;
747 char *p, *s;
745 int now;
748 int now;
746
749
747 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
750 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
748 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
751 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
749 &now)) {
752 &now)) {
750 return NULL;
753 return NULL;
751 }
754 }
752
755
753 if (PyTuple_Size(pl) != 2) {
756 if (PyTuple_Size(pl) != 2) {
754 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
757 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
755 return NULL;
758 return NULL;
756 }
759 }
757
760
758 /* Figure out how much we need to allocate. */
761 /* Figure out how much we need to allocate. */
759 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
762 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
760 PyObject *c;
763 PyObject *c;
761 if (!PyBytes_Check(k)) {
764 if (!PyBytes_Check(k)) {
762 PyErr_SetString(PyExc_TypeError, "expected string key");
765 PyErr_SetString(PyExc_TypeError, "expected string key");
763 goto bail;
766 goto bail;
764 }
767 }
765 nbytes += PyBytes_GET_SIZE(k) + 17;
768 nbytes += PyBytes_GET_SIZE(k) + 17;
766 c = PyDict_GetItem(copymap, k);
769 c = PyDict_GetItem(copymap, k);
767 if (c) {
770 if (c) {
768 if (!PyBytes_Check(c)) {
771 if (!PyBytes_Check(c)) {
769 PyErr_SetString(PyExc_TypeError,
772 PyErr_SetString(PyExc_TypeError,
770 "expected string key");
773 "expected string key");
771 goto bail;
774 goto bail;
772 }
775 }
773 nbytes += PyBytes_GET_SIZE(c) + 1;
776 nbytes += PyBytes_GET_SIZE(c) + 1;
774 }
777 }
775 }
778 }
776
779
777 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
780 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
778 if (packobj == NULL) {
781 if (packobj == NULL) {
779 goto bail;
782 goto bail;
780 }
783 }
781
784
782 p = PyBytes_AS_STRING(packobj);
785 p = PyBytes_AS_STRING(packobj);
783
786
784 pn = PyTuple_GET_ITEM(pl, 0);
787 pn = PyTuple_GET_ITEM(pl, 0);
785 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
788 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
786 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
789 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
787 goto bail;
790 goto bail;
788 }
791 }
789 memcpy(p, s, l);
792 memcpy(p, s, l);
790 p += 20;
793 p += 20;
791 pn = PyTuple_GET_ITEM(pl, 1);
794 pn = PyTuple_GET_ITEM(pl, 1);
792 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
795 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
793 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
796 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
794 goto bail;
797 goto bail;
795 }
798 }
796 memcpy(p, s, l);
799 memcpy(p, s, l);
797 p += 20;
800 p += 20;
798
801
799 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
802 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
800 dirstateItemObject *tuple;
803 dirstateItemObject *tuple;
801 char state;
804 char state;
802 int mode, size, mtime;
805 int mode, size, mtime;
803 Py_ssize_t len, l;
806 Py_ssize_t len, l;
804 PyObject *o;
807 PyObject *o;
805 char *t;
808 char *t;
806
809
807 if (!dirstate_tuple_check(v)) {
810 if (!dirstate_tuple_check(v)) {
808 PyErr_SetString(PyExc_TypeError,
811 PyErr_SetString(PyExc_TypeError,
809 "expected a dirstate tuple");
812 "expected a dirstate tuple");
810 goto bail;
813 goto bail;
811 }
814 }
812 tuple = (dirstateItemObject *)v;
815 tuple = (dirstateItemObject *)v;
813
816
814 state = dirstate_item_c_v1_state(tuple);
817 state = dirstate_item_c_v1_state(tuple);
815 mode = dirstate_item_c_v1_mode(tuple);
818 mode = dirstate_item_c_v1_mode(tuple);
816 size = dirstate_item_c_v1_size(tuple);
819 size = dirstate_item_c_v1_size(tuple);
817 mtime = dirstate_item_c_v1_mtime(tuple);
820 mtime = dirstate_item_c_v1_mtime(tuple);
818 if (state == 'n' && mtime == now) {
821 if (state == 'n' && mtime == now) {
819 /* See pure/parsers.py:pack_dirstate for why we do
822 /* See pure/parsers.py:pack_dirstate for why we do
820 * this. */
823 * this. */
821 mtime = -1;
824 mtime = -1;
822 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
825 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
823 state, mode, size, mtime);
826 state, mode, size, mtime);
824 if (!mtime_unset) {
827 if (!mtime_unset) {
825 goto bail;
828 goto bail;
826 }
829 }
827 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
830 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
828 goto bail;
831 goto bail;
829 }
832 }
830 Py_DECREF(mtime_unset);
833 Py_DECREF(mtime_unset);
831 mtime_unset = NULL;
834 mtime_unset = NULL;
832 }
835 }
833 *p++ = state;
836 *p++ = state;
834 putbe32((uint32_t)mode, p);
837 putbe32((uint32_t)mode, p);
835 putbe32((uint32_t)size, p + 4);
838 putbe32((uint32_t)size, p + 4);
836 putbe32((uint32_t)mtime, p + 8);
839 putbe32((uint32_t)mtime, p + 8);
837 t = p + 12;
840 t = p + 12;
838 p += 16;
841 p += 16;
839 len = PyBytes_GET_SIZE(k);
842 len = PyBytes_GET_SIZE(k);
840 memcpy(p, PyBytes_AS_STRING(k), len);
843 memcpy(p, PyBytes_AS_STRING(k), len);
841 p += len;
844 p += len;
842 o = PyDict_GetItem(copymap, k);
845 o = PyDict_GetItem(copymap, k);
843 if (o) {
846 if (o) {
844 *p++ = '\0';
847 *p++ = '\0';
845 l = PyBytes_GET_SIZE(o);
848 l = PyBytes_GET_SIZE(o);
846 memcpy(p, PyBytes_AS_STRING(o), l);
849 memcpy(p, PyBytes_AS_STRING(o), l);
847 p += l;
850 p += l;
848 len += l + 1;
851 len += l + 1;
849 }
852 }
850 putbe32((uint32_t)len, t);
853 putbe32((uint32_t)len, t);
851 }
854 }
852
855
853 pos = p - PyBytes_AS_STRING(packobj);
856 pos = p - PyBytes_AS_STRING(packobj);
854 if (pos != nbytes) {
857 if (pos != nbytes) {
855 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
858 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
856 (long)pos, (long)nbytes);
859 (long)pos, (long)nbytes);
857 goto bail;
860 goto bail;
858 }
861 }
859
862
860 return packobj;
863 return packobj;
861 bail:
864 bail:
862 Py_XDECREF(mtime_unset);
865 Py_XDECREF(mtime_unset);
863 Py_XDECREF(packobj);
866 Py_XDECREF(packobj);
864 Py_XDECREF(v);
867 Py_XDECREF(v);
865 return NULL;
868 return NULL;
866 }
869 }
867
870
868 #define BUMPED_FIX 1
871 #define BUMPED_FIX 1
869 #define USING_SHA_256 2
872 #define USING_SHA_256 2
870 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
873 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
871
874
872 static PyObject *readshas(const char *source, unsigned char num,
875 static PyObject *readshas(const char *source, unsigned char num,
873 Py_ssize_t hashwidth)
876 Py_ssize_t hashwidth)
874 {
877 {
875 int i;
878 int i;
876 PyObject *list = PyTuple_New(num);
879 PyObject *list = PyTuple_New(num);
877 if (list == NULL) {
880 if (list == NULL) {
878 return NULL;
881 return NULL;
879 }
882 }
880 for (i = 0; i < num; i++) {
883 for (i = 0; i < num; i++) {
881 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
884 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
882 if (hash == NULL) {
885 if (hash == NULL) {
883 Py_DECREF(list);
886 Py_DECREF(list);
884 return NULL;
887 return NULL;
885 }
888 }
886 PyTuple_SET_ITEM(list, i, hash);
889 PyTuple_SET_ITEM(list, i, hash);
887 source += hashwidth;
890 source += hashwidth;
888 }
891 }
889 return list;
892 return list;
890 }
893 }
891
894
892 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
895 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
893 uint32_t *msize)
896 uint32_t *msize)
894 {
897 {
895 const char *data = databegin;
898 const char *data = databegin;
896 const char *meta;
899 const char *meta;
897
900
898 double mtime;
901 double mtime;
899 int16_t tz;
902 int16_t tz;
900 uint16_t flags;
903 uint16_t flags;
901 unsigned char nsuccs, nparents, nmetadata;
904 unsigned char nsuccs, nparents, nmetadata;
902 Py_ssize_t hashwidth = 20;
905 Py_ssize_t hashwidth = 20;
903
906
904 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
907 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
905 PyObject *metadata = NULL, *ret = NULL;
908 PyObject *metadata = NULL, *ret = NULL;
906 int i;
909 int i;
907
910
908 if (data + FM1_HEADER_SIZE > dataend) {
911 if (data + FM1_HEADER_SIZE > dataend) {
909 goto overflow;
912 goto overflow;
910 }
913 }
911
914
912 *msize = getbe32(data);
915 *msize = getbe32(data);
913 data += 4;
916 data += 4;
914 mtime = getbefloat64(data);
917 mtime = getbefloat64(data);
915 data += 8;
918 data += 8;
916 tz = getbeint16(data);
919 tz = getbeint16(data);
917 data += 2;
920 data += 2;
918 flags = getbeuint16(data);
921 flags = getbeuint16(data);
919 data += 2;
922 data += 2;
920
923
921 if (flags & USING_SHA_256) {
924 if (flags & USING_SHA_256) {
922 hashwidth = 32;
925 hashwidth = 32;
923 }
926 }
924
927
925 nsuccs = (unsigned char)(*data++);
928 nsuccs = (unsigned char)(*data++);
926 nparents = (unsigned char)(*data++);
929 nparents = (unsigned char)(*data++);
927 nmetadata = (unsigned char)(*data++);
930 nmetadata = (unsigned char)(*data++);
928
931
929 if (databegin + *msize > dataend) {
932 if (databegin + *msize > dataend) {
930 goto overflow;
933 goto overflow;
931 }
934 }
932 dataend = databegin + *msize; /* narrow down to marker size */
935 dataend = databegin + *msize; /* narrow down to marker size */
933
936
934 if (data + hashwidth > dataend) {
937 if (data + hashwidth > dataend) {
935 goto overflow;
938 goto overflow;
936 }
939 }
937 prec = PyBytes_FromStringAndSize(data, hashwidth);
940 prec = PyBytes_FromStringAndSize(data, hashwidth);
938 data += hashwidth;
941 data += hashwidth;
939 if (prec == NULL) {
942 if (prec == NULL) {
940 goto bail;
943 goto bail;
941 }
944 }
942
945
943 if (data + nsuccs * hashwidth > dataend) {
946 if (data + nsuccs * hashwidth > dataend) {
944 goto overflow;
947 goto overflow;
945 }
948 }
946 succs = readshas(data, nsuccs, hashwidth);
949 succs = readshas(data, nsuccs, hashwidth);
947 if (succs == NULL) {
950 if (succs == NULL) {
948 goto bail;
951 goto bail;
949 }
952 }
950 data += nsuccs * hashwidth;
953 data += nsuccs * hashwidth;
951
954
952 if (nparents == 1 || nparents == 2) {
955 if (nparents == 1 || nparents == 2) {
953 if (data + nparents * hashwidth > dataend) {
956 if (data + nparents * hashwidth > dataend) {
954 goto overflow;
957 goto overflow;
955 }
958 }
956 parents = readshas(data, nparents, hashwidth);
959 parents = readshas(data, nparents, hashwidth);
957 if (parents == NULL) {
960 if (parents == NULL) {
958 goto bail;
961 goto bail;
959 }
962 }
960 data += nparents * hashwidth;
963 data += nparents * hashwidth;
961 } else {
964 } else {
962 parents = Py_None;
965 parents = Py_None;
963 Py_INCREF(parents);
966 Py_INCREF(parents);
964 }
967 }
965
968
966 if (data + 2 * nmetadata > dataend) {
969 if (data + 2 * nmetadata > dataend) {
967 goto overflow;
970 goto overflow;
968 }
971 }
969 meta = data + (2 * nmetadata);
972 meta = data + (2 * nmetadata);
970 metadata = PyTuple_New(nmetadata);
973 metadata = PyTuple_New(nmetadata);
971 if (metadata == NULL) {
974 if (metadata == NULL) {
972 goto bail;
975 goto bail;
973 }
976 }
974 for (i = 0; i < nmetadata; i++) {
977 for (i = 0; i < nmetadata; i++) {
975 PyObject *tmp, *left = NULL, *right = NULL;
978 PyObject *tmp, *left = NULL, *right = NULL;
976 Py_ssize_t leftsize = (unsigned char)(*data++);
979 Py_ssize_t leftsize = (unsigned char)(*data++);
977 Py_ssize_t rightsize = (unsigned char)(*data++);
980 Py_ssize_t rightsize = (unsigned char)(*data++);
978 if (meta + leftsize + rightsize > dataend) {
981 if (meta + leftsize + rightsize > dataend) {
979 goto overflow;
982 goto overflow;
980 }
983 }
981 left = PyBytes_FromStringAndSize(meta, leftsize);
984 left = PyBytes_FromStringAndSize(meta, leftsize);
982 meta += leftsize;
985 meta += leftsize;
983 right = PyBytes_FromStringAndSize(meta, rightsize);
986 right = PyBytes_FromStringAndSize(meta, rightsize);
984 meta += rightsize;
987 meta += rightsize;
985 tmp = PyTuple_New(2);
988 tmp = PyTuple_New(2);
986 if (!left || !right || !tmp) {
989 if (!left || !right || !tmp) {
987 Py_XDECREF(left);
990 Py_XDECREF(left);
988 Py_XDECREF(right);
991 Py_XDECREF(right);
989 Py_XDECREF(tmp);
992 Py_XDECREF(tmp);
990 goto bail;
993 goto bail;
991 }
994 }
992 PyTuple_SET_ITEM(tmp, 0, left);
995 PyTuple_SET_ITEM(tmp, 0, left);
993 PyTuple_SET_ITEM(tmp, 1, right);
996 PyTuple_SET_ITEM(tmp, 1, right);
994 PyTuple_SET_ITEM(metadata, i, tmp);
997 PyTuple_SET_ITEM(metadata, i, tmp);
995 }
998 }
996 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
999 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
997 (int)tz * 60, parents);
1000 (int)tz * 60, parents);
998 goto bail; /* return successfully */
1001 goto bail; /* return successfully */
999
1002
1000 overflow:
1003 overflow:
1001 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1004 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1002 bail:
1005 bail:
1003 Py_XDECREF(prec);
1006 Py_XDECREF(prec);
1004 Py_XDECREF(succs);
1007 Py_XDECREF(succs);
1005 Py_XDECREF(metadata);
1008 Py_XDECREF(metadata);
1006 Py_XDECREF(parents);
1009 Py_XDECREF(parents);
1007 return ret;
1010 return ret;
1008 }
1011 }
1009
1012
1010 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1013 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1011 {
1014 {
1012 const char *data, *dataend;
1015 const char *data, *dataend;
1013 Py_ssize_t datalen, offset, stop;
1016 Py_ssize_t datalen, offset, stop;
1014 PyObject *markers = NULL;
1017 PyObject *markers = NULL;
1015
1018
1016 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1019 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1017 &offset, &stop)) {
1020 &offset, &stop)) {
1018 return NULL;
1021 return NULL;
1019 }
1022 }
1020 if (offset < 0) {
1023 if (offset < 0) {
1021 PyErr_SetString(PyExc_ValueError,
1024 PyErr_SetString(PyExc_ValueError,
1022 "invalid negative offset in fm1readmarkers");
1025 "invalid negative offset in fm1readmarkers");
1023 return NULL;
1026 return NULL;
1024 }
1027 }
1025 if (stop > datalen) {
1028 if (stop > datalen) {
1026 PyErr_SetString(
1029 PyErr_SetString(
1027 PyExc_ValueError,
1030 PyExc_ValueError,
1028 "stop longer than data length in fm1readmarkers");
1031 "stop longer than data length in fm1readmarkers");
1029 return NULL;
1032 return NULL;
1030 }
1033 }
1031 dataend = data + datalen;
1034 dataend = data + datalen;
1032 data += offset;
1035 data += offset;
1033 markers = PyList_New(0);
1036 markers = PyList_New(0);
1034 if (!markers) {
1037 if (!markers) {
1035 return NULL;
1038 return NULL;
1036 }
1039 }
1037 while (offset < stop) {
1040 while (offset < stop) {
1038 uint32_t msize;
1041 uint32_t msize;
1039 int error;
1042 int error;
1040 PyObject *record = fm1readmarker(data, dataend, &msize);
1043 PyObject *record = fm1readmarker(data, dataend, &msize);
1041 if (!record) {
1044 if (!record) {
1042 goto bail;
1045 goto bail;
1043 }
1046 }
1044 error = PyList_Append(markers, record);
1047 error = PyList_Append(markers, record);
1045 Py_DECREF(record);
1048 Py_DECREF(record);
1046 if (error) {
1049 if (error) {
1047 goto bail;
1050 goto bail;
1048 }
1051 }
1049 data += msize;
1052 data += msize;
1050 offset += msize;
1053 offset += msize;
1051 }
1054 }
1052 return markers;
1055 return markers;
1053 bail:
1056 bail:
1054 Py_DECREF(markers);
1057 Py_DECREF(markers);
1055 return NULL;
1058 return NULL;
1056 }
1059 }
1057
1060
1058 static char parsers_doc[] = "Efficient content parsing.";
1061 static char parsers_doc[] = "Efficient content parsing.";
1059
1062
1060 PyObject *encodedir(PyObject *self, PyObject *args);
1063 PyObject *encodedir(PyObject *self, PyObject *args);
1061 PyObject *pathencode(PyObject *self, PyObject *args);
1064 PyObject *pathencode(PyObject *self, PyObject *args);
1062 PyObject *lowerencode(PyObject *self, PyObject *args);
1065 PyObject *lowerencode(PyObject *self, PyObject *args);
1063 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1066 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1064
1067
1065 static PyMethodDef methods[] = {
1068 static PyMethodDef methods[] = {
1066 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1069 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1067 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1070 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1068 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1071 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1069 "parse a revlog index\n"},
1072 "parse a revlog index\n"},
1070 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1073 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1071 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1074 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1072 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1075 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1073 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1076 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1074 "construct a dict with an expected size\n"},
1077 "construct a dict with an expected size\n"},
1075 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1078 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1076 "make file foldmap\n"},
1079 "make file foldmap\n"},
1077 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1080 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1078 "escape a UTF-8 byte string to JSON (fast path)\n"},
1081 "escape a UTF-8 byte string to JSON (fast path)\n"},
1079 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1082 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1080 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1083 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1081 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1084 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1082 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1085 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1083 "parse v1 obsolete markers\n"},
1086 "parse v1 obsolete markers\n"},
1084 {NULL, NULL}};
1087 {NULL, NULL}};
1085
1088
1086 void dirs_module_init(PyObject *mod);
1089 void dirs_module_init(PyObject *mod);
1087 void manifest_module_init(PyObject *mod);
1090 void manifest_module_init(PyObject *mod);
1088 void revlog_module_init(PyObject *mod);
1091 void revlog_module_init(PyObject *mod);
1089
1092
1090 static const int version = 20;
1093 static const int version = 20;
1091
1094
1092 static void module_init(PyObject *mod)
1095 static void module_init(PyObject *mod)
1093 {
1096 {
1094 PyModule_AddIntConstant(mod, "version", version);
1097 PyModule_AddIntConstant(mod, "version", version);
1095
1098
1096 /* This module constant has two purposes. First, it lets us unit test
1099 /* This module constant has two purposes. First, it lets us unit test
1097 * the ImportError raised without hard-coding any error text. This
1100 * the ImportError raised without hard-coding any error text. This
1098 * means we can change the text in the future without breaking tests,
1101 * means we can change the text in the future without breaking tests,
1099 * even across changesets without a recompile. Second, its presence
1102 * even across changesets without a recompile. Second, its presence
1100 * can be used to determine whether the version-checking logic is
1103 * can be used to determine whether the version-checking logic is
1101 * present, which also helps in testing across changesets without a
1104 * present, which also helps in testing across changesets without a
1102 * recompile. Note that this means the pure-Python version of parsers
1105 * recompile. Note that this means the pure-Python version of parsers
1103 * should not have this module constant. */
1106 * should not have this module constant. */
1104 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1107 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1105
1108
1106 dirs_module_init(mod);
1109 dirs_module_init(mod);
1107 manifest_module_init(mod);
1110 manifest_module_init(mod);
1108 revlog_module_init(mod);
1111 revlog_module_init(mod);
1109
1112
1110 if (PyType_Ready(&dirstateItemType) < 0) {
1113 if (PyType_Ready(&dirstateItemType) < 0) {
1111 return;
1114 return;
1112 }
1115 }
1113 Py_INCREF(&dirstateItemType);
1116 Py_INCREF(&dirstateItemType);
1114 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1117 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1115 }
1118 }
1116
1119
1117 static int check_python_version(void)
1120 static int check_python_version(void)
1118 {
1121 {
1119 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1122 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1120 long hexversion;
1123 long hexversion;
1121 if (!sys) {
1124 if (!sys) {
1122 return -1;
1125 return -1;
1123 }
1126 }
1124 ver = PyObject_GetAttrString(sys, "hexversion");
1127 ver = PyObject_GetAttrString(sys, "hexversion");
1125 Py_DECREF(sys);
1128 Py_DECREF(sys);
1126 if (!ver) {
1129 if (!ver) {
1127 return -1;
1130 return -1;
1128 }
1131 }
1129 hexversion = PyInt_AsLong(ver);
1132 hexversion = PyInt_AsLong(ver);
1130 Py_DECREF(ver);
1133 Py_DECREF(ver);
1131 /* sys.hexversion is a 32-bit number by default, so the -1 case
1134 /* sys.hexversion is a 32-bit number by default, so the -1 case
1132 * should only occur in unusual circumstances (e.g. if sys.hexversion
1135 * should only occur in unusual circumstances (e.g. if sys.hexversion
1133 * is manually set to an invalid value). */
1136 * is manually set to an invalid value). */
1134 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1137 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1135 PyErr_Format(PyExc_ImportError,
1138 PyErr_Format(PyExc_ImportError,
1136 "%s: The Mercurial extension "
1139 "%s: The Mercurial extension "
1137 "modules were compiled with Python " PY_VERSION
1140 "modules were compiled with Python " PY_VERSION
1138 ", but "
1141 ", but "
1139 "Mercurial is currently using Python with "
1142 "Mercurial is currently using Python with "
1140 "sys.hexversion=%ld: "
1143 "sys.hexversion=%ld: "
1141 "Python %s\n at: %s",
1144 "Python %s\n at: %s",
1142 versionerrortext, hexversion, Py_GetVersion(),
1145 versionerrortext, hexversion, Py_GetVersion(),
1143 Py_GetProgramFullPath());
1146 Py_GetProgramFullPath());
1144 return -1;
1147 return -1;
1145 }
1148 }
1146 return 0;
1149 return 0;
1147 }
1150 }
1148
1151
1149 #ifdef IS_PY3K
1152 #ifdef IS_PY3K
1150 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1153 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1151 parsers_doc, -1, methods};
1154 parsers_doc, -1, methods};
1152
1155
1153 PyMODINIT_FUNC PyInit_parsers(void)
1156 PyMODINIT_FUNC PyInit_parsers(void)
1154 {
1157 {
1155 PyObject *mod;
1158 PyObject *mod;
1156
1159
1157 if (check_python_version() == -1)
1160 if (check_python_version() == -1)
1158 return NULL;
1161 return NULL;
1159 mod = PyModule_Create(&parsers_module);
1162 mod = PyModule_Create(&parsers_module);
1160 module_init(mod);
1163 module_init(mod);
1161 return mod;
1164 return mod;
1162 }
1165 }
1163 #else
1166 #else
1164 PyMODINIT_FUNC initparsers(void)
1167 PyMODINIT_FUNC initparsers(void)
1165 {
1168 {
1166 PyObject *mod;
1169 PyObject *mod;
1167
1170
1168 if (check_python_version() == -1) {
1171 if (check_python_version() == -1) {
1169 return;
1172 return;
1170 }
1173 }
1171 mod = Py_InitModule3("parsers", methods, parsers_doc);
1174 mod = Py_InitModule3("parsers", methods, parsers_doc);
1172 module_init(mod);
1175 module_init(mod);
1173 }
1176 }
1174 #endif
1177 #endif
@@ -1,83 +1,84 b''
1 /*
1 /*
2 util.h - utility functions for interfacing with the various python APIs.
2 util.h - utility functions for interfacing with the various python APIs.
3
3
4 This software may be used and distributed according to the terms of
4 This software may be used and distributed according to the terms of
5 the GNU General Public License, incorporated herein by reference.
5 the GNU General Public License, incorporated herein by reference.
6 */
6 */
7
7
8 #ifndef _HG_UTIL_H_
8 #ifndef _HG_UTIL_H_
9 #define _HG_UTIL_H_
9 #define _HG_UTIL_H_
10
10
11 #include "compat.h"
11 #include "compat.h"
12
12
13 #if PY_MAJOR_VERSION >= 3
13 #if PY_MAJOR_VERSION >= 3
14 #define IS_PY3K
14 #define IS_PY3K
15 #endif
15 #endif
16
16
17 /* helper to switch things like string literal depending on Python version */
17 /* helper to switch things like string literal depending on Python version */
18 #ifdef IS_PY3K
18 #ifdef IS_PY3K
19 #define PY23(py2, py3) py3
19 #define PY23(py2, py3) py3
20 #else
20 #else
21 #define PY23(py2, py3) py2
21 #define PY23(py2, py3) py2
22 #endif
22 #endif
23
23
24 /* clang-format off */
24 /* clang-format off */
25 typedef struct {
25 typedef struct {
26 PyObject_HEAD
26 PyObject_HEAD
27 unsigned char flags;
27 int flags;
28 int mode;
28 int mode;
29 int size;
29 int size;
30 int mtime;
30 int mtime;
31 } dirstateItemObject;
31 } dirstateItemObject;
32 /* clang-format on */
32 /* clang-format on */
33
33
34 static const unsigned char dirstate_flag_wc_tracked = 1;
34 static const int dirstate_flag_wc_tracked = 1;
35 static const unsigned char dirstate_flag_p1_tracked = 1 << 1;
35 static const int dirstate_flag_p1_tracked = 1 << 1;
36 static const unsigned char dirstate_flag_p2_info = 1 << 2;
36 static const int dirstate_flag_p2_info = 1 << 2;
37 static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
37 static const int dirstate_flag_has_meaningful_data = 1 << 3;
38 static const unsigned char dirstate_flag_has_file_mtime = 1 << 4;
38 static const int dirstate_flag_has_file_mtime = 1 << 4;
39 static const unsigned char dirstate_flag_has_directory_mtime = 1 << 5;
39 static const int dirstate_flag_has_directory_mtime = 1 << 5;
40 static const unsigned char dirstate_flag_mode_exec_perm = 1 << 6;
40 static const int dirstate_flag_mode_exec_perm = 1 << 6;
41 static const unsigned char dirstate_flag_mode_is_symlink = 1 << 7;
41 static const int dirstate_flag_mode_is_symlink = 1 << 7;
42 static const int dirstate_flag_expected_state_is_modified = 1 << 8;
42
43
43 extern PyTypeObject dirstateItemType;
44 extern PyTypeObject dirstateItemType;
44 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
45 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
45
46
46 #ifndef MIN
47 #ifndef MIN
47 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
48 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
48 #endif
49 #endif
49 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
50 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
50 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
51 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
51 #define true 1
52 #define true 1
52 #define false 0
53 #define false 0
53 typedef unsigned char bool;
54 typedef unsigned char bool;
54 #else
55 #else
55 #include <stdbool.h>
56 #include <stdbool.h>
56 #endif
57 #endif
57
58
58 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
59 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
59 {
60 {
60 /* _PyDict_NewPresized expects a minused parameter, but it actually
61 /* _PyDict_NewPresized expects a minused parameter, but it actually
61 creates a dictionary that's the nearest power of two bigger than the
62 creates a dictionary that's the nearest power of two bigger than the
62 parameter. For example, with the initial minused = 1000, the
63 parameter. For example, with the initial minused = 1000, the
63 dictionary created has size 1024. Of course in a lot of cases that
64 dictionary created has size 1024. Of course in a lot of cases that
64 can be greater than the maximum load factor Python's dict object
65 can be greater than the maximum load factor Python's dict object
65 expects (= 2/3), so as soon as we cross the threshold we'll resize
66 expects (= 2/3), so as soon as we cross the threshold we'll resize
66 anyway. So create a dictionary that's at least 3/2 the size. */
67 anyway. So create a dictionary that's at least 3/2 the size. */
67 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
68 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
68 }
69 }
69
70
70 /* Convert a PyInt or PyLong to a long. Returns false if there is an
71 /* Convert a PyInt or PyLong to a long. Returns false if there is an
71 error, in which case an exception will already have been set. */
72 error, in which case an exception will already have been set. */
72 static inline bool pylong_to_long(PyObject *pylong, long *out)
73 static inline bool pylong_to_long(PyObject *pylong, long *out)
73 {
74 {
74 *out = PyLong_AsLong(pylong);
75 *out = PyLong_AsLong(pylong);
75 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
76 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
76 * not an error. */
77 * not an error. */
77 if (*out != -1) {
78 if (*out != -1) {
78 return true;
79 return true;
79 }
80 }
80 return PyErr_Occurred() == NULL;
81 return PyErr_Occurred() == NULL;
81 }
82 }
82
83
83 #endif /* _HG_UTIL_H_ */
84 #endif /* _HG_UTIL_H_ */
@@ -1,516 +1,532 b''
1 The *dirstate* is what Mercurial uses internally to track
1 The *dirstate* is what Mercurial uses internally to track
2 the state of files in the working directory,
2 the state of files in the working directory,
3 such as set by commands like `hg add` and `hg rm`.
3 such as set by commands like `hg add` and `hg rm`.
4 It also contains some cached data that help make `hg status` faster.
4 It also contains some cached data that help make `hg status` faster.
5 The name refers both to `.hg/dirstate` on the filesystem
5 The name refers both to `.hg/dirstate` on the filesystem
6 and the corresponding data structure in memory while a Mercurial process
6 and the corresponding data structure in memory while a Mercurial process
7 is running.
7 is running.
8
8
9 The original file format, retroactively dubbed `dirstate-v1`,
9 The original file format, retroactively dubbed `dirstate-v1`,
10 is described at https://www.mercurial-scm.org/wiki/DirState.
10 is described at https://www.mercurial-scm.org/wiki/DirState.
11 It is made of a flat sequence of unordered variable-size entries,
11 It is made of a flat sequence of unordered variable-size entries,
12 so accessing any information in it requires parsing all of it.
12 so accessing any information in it requires parsing all of it.
13 Similarly, saving changes requires rewriting the entire file.
13 Similarly, saving changes requires rewriting the entire file.
14
14
15 The newer `dirsate-v2` file format is designed to fix these limitations
15 The newer `dirsate-v2` file format is designed to fix these limitations
16 and make `hg status` faster.
16 and make `hg status` faster.
17
17
18 User guide
18 User guide
19 ==========
19 ==========
20
20
21 Compatibility
21 Compatibility
22 -------------
22 -------------
23
23
24 The file format is experimental and may still change.
24 The file format is experimental and may still change.
25 Different versions of Mercurial may not be compatible with each other
25 Different versions of Mercurial may not be compatible with each other
26 when working on a local repository that uses this format.
26 when working on a local repository that uses this format.
27 When using an incompatible version with the experimental format,
27 When using an incompatible version with the experimental format,
28 anything can happen including data corruption.
28 anything can happen including data corruption.
29
29
30 Since the dirstate is entirely local and not relevant to the wire protocol,
30 Since the dirstate is entirely local and not relevant to the wire protocol,
31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
32
32
33 When `share-safe` is enabled, different repositories sharing the same store
33 When `share-safe` is enabled, different repositories sharing the same store
34 can use different dirstate formats.
34 can use different dirstate formats.
35
35
36 Enabling `dirsate-v2` for new local repositories
36 Enabling `dirsate-v2` for new local repositories
37 ------------------------------------------------
37 ------------------------------------------------
38
38
39 When creating a new local repository such as with `hg init` or `hg clone`,
39 When creating a new local repository such as with `hg init` or `hg clone`,
40 the `exp-dirstate-v2` boolean in the `format` configuration section
40 the `exp-dirstate-v2` boolean in the `format` configuration section
41 controls whether to use this file format.
41 controls whether to use this file format.
42 This is disabled by default as of this writing.
42 This is disabled by default as of this writing.
43 To enable it for a single repository, run for example::
43 To enable it for a single repository, run for example::
44
44
45 $ hg init my-project --config format.exp-dirstate-v2=1
45 $ hg init my-project --config format.exp-dirstate-v2=1
46
46
47 Checking the format of an existing local repsitory
47 Checking the format of an existing local repsitory
48 --------------------------------------------------
48 --------------------------------------------------
49
49
50 The `debugformat` commands prints information about
50 The `debugformat` commands prints information about
51 which of multiple optional formats are used in the current repository,
51 which of multiple optional formats are used in the current repository,
52 including `dirstate-v2`::
52 including `dirstate-v2`::
53
53
54 $ hg debugformat
54 $ hg debugformat
55 format-variant repo
55 format-variant repo
56 fncache: yes
56 fncache: yes
57 dirstate-v2: yes
57 dirstate-v2: yes
58 […]
58 […]
59
59
60 Upgrading or downgrading an existing local repository
60 Upgrading or downgrading an existing local repository
61 -----------------------------------------------------
61 -----------------------------------------------------
62
62
63 The `debugupgrade` command does various upgrades or downgrades
63 The `debugupgrade` command does various upgrades or downgrades
64 on a local repository
64 on a local repository
65 based on the current Mercurial version and on configuration.
65 based on the current Mercurial version and on configuration.
66 The same `format.exp-dirstate-v2` configuration is used again.
66 The same `format.exp-dirstate-v2` configuration is used again.
67
67
68 Example to upgrade::
68 Example to upgrade::
69
69
70 $ hg debugupgrade --config format.exp-dirstate-v2=1
70 $ hg debugupgrade --config format.exp-dirstate-v2=1
71
71
72 Example to downgrade to `dirstate-v1`::
72 Example to downgrade to `dirstate-v1`::
73
73
74 $ hg debugupgrade --config format.exp-dirstate-v2=0
74 $ hg debugupgrade --config format.exp-dirstate-v2=0
75
75
76 Both of this commands do nothing but print a list of proposed changes,
76 Both of this commands do nothing but print a list of proposed changes,
77 which may include changes unrelated to the dirstate.
77 which may include changes unrelated to the dirstate.
78 Those other changes are controlled by their own configuration keys.
78 Those other changes are controlled by their own configuration keys.
79 Add `--run` to a command to actually apply the proposed changes.
79 Add `--run` to a command to actually apply the proposed changes.
80
80
81 Backups of `.hg/requires` and `.hg/dirstate` are created
81 Backups of `.hg/requires` and `.hg/dirstate` are created
82 in a `.hg/upgradebackup.*` directory.
82 in a `.hg/upgradebackup.*` directory.
83 If something goes wrong, restoring those files should undo the change.
83 If something goes wrong, restoring those files should undo the change.
84
84
85 Note that upgrading affects compatibility with older versions of Mercurial
85 Note that upgrading affects compatibility with older versions of Mercurial
86 as noted above.
86 as noted above.
87 This can be relevant when a repository’s files are on a USB drive
87 This can be relevant when a repository’s files are on a USB drive
88 or some other removable media, or shared over the network, etc.
88 or some other removable media, or shared over the network, etc.
89
89
90 Internal filesystem representation
90 Internal filesystem representation
91 ==================================
91 ==================================
92
92
93 Requirements file
93 Requirements file
94 -----------------
94 -----------------
95
95
96 The `.hg/requires` file indicates which of various optional file formats
96 The `.hg/requires` file indicates which of various optional file formats
97 are used by a given repository.
97 are used by a given repository.
98 Mercurial aborts when seeing a requirement it does not know about,
98 Mercurial aborts when seeing a requirement it does not know about,
99 which avoids older version accidentally messing up a respository
99 which avoids older version accidentally messing up a respository
100 that uses a format that was introduced later.
100 that uses a format that was introduced later.
101 For versions that do support a format, the presence or absence of
101 For versions that do support a format, the presence or absence of
102 the corresponding requirement indicates whether to use that format.
102 the corresponding requirement indicates whether to use that format.
103
103
104 When the file contains a `exp-dirstate-v2` line,
104 When the file contains a `exp-dirstate-v2` line,
105 the `dirstate-v2` format is used.
105 the `dirstate-v2` format is used.
106 With no such line `dirstate-v1` is used.
106 With no such line `dirstate-v1` is used.
107
107
108 High level description
108 High level description
109 ----------------------
109 ----------------------
110
110
111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
112 in `dirstate-v2` that file is a "docket" file
112 in `dirstate-v2` that file is a "docket" file
113 that only contains some metadata
113 that only contains some metadata
114 and points to separate data file named `.hg/dirstate.{ID}`,
114 and points to separate data file named `.hg/dirstate.{ID}`,
115 where `{ID}` is a random identifier.
115 where `{ID}` is a random identifier.
116
116
117 This separation allows making data files append-only
117 This separation allows making data files append-only
118 and therefore safer to memory-map.
118 and therefore safer to memory-map.
119 Creating a new data file (occasionally to clean up unused data)
119 Creating a new data file (occasionally to clean up unused data)
120 can be done with a different ID
120 can be done with a different ID
121 without disrupting another Mercurial process
121 without disrupting another Mercurial process
122 that could still be using the previous data file.
122 that could still be using the previous data file.
123
123
124 Both files have a format designed to reduce the need for parsing,
124 Both files have a format designed to reduce the need for parsing,
125 by using fixed-size binary components as much as possible.
125 by using fixed-size binary components as much as possible.
126 For data that is not fixed-size,
126 For data that is not fixed-size,
127 references to other parts of a file can be made by storing "pseudo-pointers":
127 references to other parts of a file can be made by storing "pseudo-pointers":
128 integers counted in bytes from the start of a file.
128 integers counted in bytes from the start of a file.
129 For read-only access no data structure is needed,
129 For read-only access no data structure is needed,
130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
131 with specific parts read on demand.
131 with specific parts read on demand.
132
132
133 The data file contains "nodes" organized in a tree.
133 The data file contains "nodes" organized in a tree.
134 Each node represents a file or directory inside the working directory
134 Each node represents a file or directory inside the working directory
135 or its parent changeset.
135 or its parent changeset.
136 This tree has the same structure as the filesystem,
136 This tree has the same structure as the filesystem,
137 so a node representing a directory has child nodes representing
137 so a node representing a directory has child nodes representing
138 the files and subdirectories contained directly in that directory.
138 the files and subdirectories contained directly in that directory.
139
139
140 The docket file format
140 The docket file format
141 ----------------------
141 ----------------------
142
142
143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
144 and `mercurial/dirstateutils/docket.py`.
144 and `mercurial/dirstateutils/docket.py`.
145
145
146 Components of the docket file are found at fixed offsets,
146 Components of the docket file are found at fixed offsets,
147 counted in bytes from the start of the file:
147 counted in bytes from the start of the file:
148
148
149 * Offset 0:
149 * Offset 0:
150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
152 although it is not strictly necessary
152 although it is not strictly necessary
153 since `.hg/requires` determines which format to use.
153 since `.hg/requires` determines which format to use.
154
154
155 * Offset 12:
155 * Offset 12:
156 The changeset node ID on the first parent of the working directory,
156 The changeset node ID on the first parent of the working directory,
157 as up to 32 binary bytes.
157 as up to 32 binary bytes.
158 If a node ID is shorter (20 bytes for SHA-1),
158 If a node ID is shorter (20 bytes for SHA-1),
159 it is start-aligned and the rest of the bytes are set to zero.
159 it is start-aligned and the rest of the bytes are set to zero.
160
160
161 * Offset 44:
161 * Offset 44:
162 The changeset node ID on the second parent of the working directory,
162 The changeset node ID on the second parent of the working directory,
163 or all zeros if there isn’t one.
163 or all zeros if there isn’t one.
164 Also 32 binary bytes.
164 Also 32 binary bytes.
165
165
166 * Offset 76:
166 * Offset 76:
167 Tree metadata on 44 bytes, described below.
167 Tree metadata on 44 bytes, described below.
168 Its separation in this documentation from the rest of the docket
168 Its separation in this documentation from the rest of the docket
169 reflects a detail of the current implementation.
169 reflects a detail of the current implementation.
170 Since tree metadata is also made of fields at fixed offsets, those could
170 Since tree metadata is also made of fields at fixed offsets, those could
171 be inlined here by adding 76 bytes to each offset.
171 be inlined here by adding 76 bytes to each offset.
172
172
173 * Offset 120:
173 * Offset 120:
174 The used size of the data file, as a 32-bit big-endian integer.
174 The used size of the data file, as a 32-bit big-endian integer.
175 The actual size of the data file may be larger
175 The actual size of the data file may be larger
176 (if another Mercurial processis in appending to it
176 (if another Mercurial processis in appending to it
177 but has not updated the docket yet).
177 but has not updated the docket yet).
178 That extra data must be ignored.
178 That extra data must be ignored.
179
179
180 * Offset 124:
180 * Offset 124:
181 The length of the data file identifier, as a 8-bit integer.
181 The length of the data file identifier, as a 8-bit integer.
182
182
183 * Offset 125:
183 * Offset 125:
184 The data file identifier.
184 The data file identifier.
185
185
186 * Any additional data is current ignored, and dropped when updating the file.
186 * Any additional data is current ignored, and dropped when updating the file.
187
187
188 Tree metadata in the docket file
188 Tree metadata in the docket file
189 --------------------------------
189 --------------------------------
190
190
191 Tree metadata is similarly made of components at fixed offsets.
191 Tree metadata is similarly made of components at fixed offsets.
192 These offsets are counted in bytes from the start of tree metadata,
192 These offsets are counted in bytes from the start of tree metadata,
193 which is 76 bytes after the start of the docket file.
193 which is 76 bytes after the start of the docket file.
194
194
195 This metadata can be thought of as the singular root of the tree
195 This metadata can be thought of as the singular root of the tree
196 formed by nodes in the data file.
196 formed by nodes in the data file.
197
197
198 * Offset 0:
198 * Offset 0:
199 Pseudo-pointer to the start of root nodes,
199 Pseudo-pointer to the start of root nodes,
200 counted in bytes from the start of the data file,
200 counted in bytes from the start of the data file,
201 as a 32-bit big-endian integer.
201 as a 32-bit big-endian integer.
202 These nodes describe files and directories found directly
202 These nodes describe files and directories found directly
203 at the root of the working directory.
203 at the root of the working directory.
204
204
205 * Offset 4:
205 * Offset 4:
206 Number of root nodes, as a 32-bit big-endian integer.
206 Number of root nodes, as a 32-bit big-endian integer.
207
207
208 * Offset 8:
208 * Offset 8:
209 Total number of nodes in the entire tree that "have a dirstate entry",
209 Total number of nodes in the entire tree that "have a dirstate entry",
210 as a 32-bit big-endian integer.
210 as a 32-bit big-endian integer.
211 Those nodes represent files that would be present at all in `dirstate-v1`.
211 Those nodes represent files that would be present at all in `dirstate-v1`.
212 This is typically less than the total number of nodes.
212 This is typically less than the total number of nodes.
213 This counter is used to implement `len(dirstatemap)`.
213 This counter is used to implement `len(dirstatemap)`.
214
214
215 * Offset 12:
215 * Offset 12:
216 Number of nodes in the entire tree that have a copy source,
216 Number of nodes in the entire tree that have a copy source,
217 as a 32-bit big-endian integer.
217 as a 32-bit big-endian integer.
218 At the next commit, these files are recorded
218 At the next commit, these files are recorded
219 as having been copied or moved/renamed from that source.
219 as having been copied or moved/renamed from that source.
220 (A move is recorded as a copy and separate removal of the source.)
220 (A move is recorded as a copy and separate removal of the source.)
221 This counter is used to implement `len(dirstatemap.copymap)`.
221 This counter is used to implement `len(dirstatemap.copymap)`.
222
222
223 * Offset 16:
223 * Offset 16:
224 An estimation of how many bytes of the data file
224 An estimation of how many bytes of the data file
225 (within its used size) are unused, as a 32-bit big-endian integer.
225 (within its used size) are unused, as a 32-bit big-endian integer.
226 When appending to an existing data file,
226 When appending to an existing data file,
227 some existing nodes or paths can be unreachable from the new root
227 some existing nodes or paths can be unreachable from the new root
228 but they still take up space.
228 but they still take up space.
229 This counter is used to decide when to write a new data file from scratch
229 This counter is used to decide when to write a new data file from scratch
230 instead of appending to an existing one,
230 instead of appending to an existing one,
231 in order to get rid of that unreachable data
231 in order to get rid of that unreachable data
232 and avoid unbounded file size growth.
232 and avoid unbounded file size growth.
233
233
234 * Offset 20:
234 * Offset 20:
235 These four bytes are currently ignored
235 These four bytes are currently ignored
236 and reset to zero when updating a docket file.
236 and reset to zero when updating a docket file.
237 This is an attempt at forward compatibility:
237 This is an attempt at forward compatibility:
238 future Mercurial versions could use this as a bit field
238 future Mercurial versions could use this as a bit field
239 to indicate that a dirstate has additional data or constraints.
239 to indicate that a dirstate has additional data or constraints.
240 Finding a dirstate file with the relevant bit unset indicates that
240 Finding a dirstate file with the relevant bit unset indicates that
241 it was written by a then-older version
241 it was written by a then-older version
242 which is not aware of that future change.
242 which is not aware of that future change.
243
243
244 * Offset 24:
244 * Offset 24:
245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
246 When present, the hash is of ignore patterns
246 When present, the hash is of ignore patterns
247 that were used for some previous run of the `status` algorithm.
247 that were used for some previous run of the `status` algorithm.
248
248
249 * (Offset 44: end of tree metadata)
249 * (Offset 44: end of tree metadata)
250
250
251 Optional hash of ignore patterns
251 Optional hash of ignore patterns
252 --------------------------------
252 --------------------------------
253
253
254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
255 has been optimized such that its run time is dominated by calls
255 has been optimized such that its run time is dominated by calls
256 to `stat` for reading the filesystem metadata of a file or directory,
256 to `stat` for reading the filesystem metadata of a file or directory,
257 and to `readdir` for listing the contents of a directory.
257 and to `readdir` for listing the contents of a directory.
258 In some cases the algorithm can skip calls to `readdir`
258 In some cases the algorithm can skip calls to `readdir`
259 (saving significant time)
259 (saving significant time)
260 because the dirstate already contains enough of the relevant information
260 because the dirstate already contains enough of the relevant information
261 to build the correct `status` results.
261 to build the correct `status` results.
262
262
263 The default configuration of `hg status` is to list unknown files
263 The default configuration of `hg status` is to list unknown files
264 but not ignored files.
264 but not ignored files.
265 In this case, it matters for the `readdir`-skipping optimization
265 In this case, it matters for the `readdir`-skipping optimization
266 if a given file used to be ignored but became unknown
266 if a given file used to be ignored but became unknown
267 because `.hgignore` changed.
267 because `.hgignore` changed.
268 To detect the possibility of such a change,
268 To detect the possibility of such a change,
269 the tree metadata contains an optional hash of all ignore patterns.
269 the tree metadata contains an optional hash of all ignore patterns.
270
270
271 We define:
271 We define:
272
272
273 * "Root" ignore files as:
273 * "Root" ignore files as:
274
274
275 - `.hgignore` at the root of the repository if it exists
275 - `.hgignore` at the root of the repository if it exists
276 - And all files from `ui.ignore.*` config.
276 - And all files from `ui.ignore.*` config.
277
277
278 This set of files is sorted by the string representation of their path.
278 This set of files is sorted by the string representation of their path.
279
279
280 * The "expanded contents" of an ignore files is the byte string made
280 * The "expanded contents" of an ignore files is the byte string made
281 by the concatenation of its contents followed by the "expanded contents"
281 by the concatenation of its contents followed by the "expanded contents"
282 of other files included with `include:` or `subinclude:` directives,
282 of other files included with `include:` or `subinclude:` directives,
283 in inclusion order. This definition is recursive, as included files can
283 in inclusion order. This definition is recursive, as included files can
284 themselves include more files.
284 themselves include more files.
285
285
286 This hash is defined as the SHA-1 of the concatenation (in sorted
286 This hash is defined as the SHA-1 of the concatenation (in sorted
287 order) of the "expanded contents" of each "root" ignore file.
287 order) of the "expanded contents" of each "root" ignore file.
288 (Note that computing this does not require actually concatenating
288 (Note that computing this does not require actually concatenating
289 into a single contiguous byte sequence.
289 into a single contiguous byte sequence.
290 Instead a SHA-1 hasher object can be created
290 Instead a SHA-1 hasher object can be created
291 and fed separate chunks one by one.)
291 and fed separate chunks one by one.)
292
292
293 The data file format
293 The data file format
294 --------------------
294 --------------------
295
295
296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
297 and `mercurial/dirstateutils/v2.py`.
297 and `mercurial/dirstateutils/v2.py`.
298
298
299 The data file contains two types of data: paths and nodes.
299 The data file contains two types of data: paths and nodes.
300
300
301 Paths and nodes can be organized in any order in the file, except that sibling
301 Paths and nodes can be organized in any order in the file, except that sibling
302 nodes must be next to each other and sorted by their path.
302 nodes must be next to each other and sorted by their path.
303 Contiguity lets the parent refer to them all
303 Contiguity lets the parent refer to them all
304 by their count and a single pseudo-pointer,
304 by their count and a single pseudo-pointer,
305 instead of storing one pseudo-pointer per child node.
305 instead of storing one pseudo-pointer per child node.
306 Sorting allows using binary seach to find a child node with a given name
306 Sorting allows using binary seach to find a child node with a given name
307 in `O(log(n))` byte sequence comparisons.
307 in `O(log(n))` byte sequence comparisons.
308
308
309 The current implemention writes paths and child node before a given node
309 The current implemention writes paths and child node before a given node
310 for ease of figuring out the value of pseudo-pointers by the time the are to be
310 for ease of figuring out the value of pseudo-pointers by the time the are to be
311 written, but this is not an obligation and readers must not rely on it.
311 written, but this is not an obligation and readers must not rely on it.
312
312
313 A path is stored as a byte string anywhere in the file, without delimiter.
313 A path is stored as a byte string anywhere in the file, without delimiter.
314 It is refered to by one or more node by a pseudo-pointer to its start, and its
314 It is refered to by one or more node by a pseudo-pointer to its start, and its
315 length in bytes. Since there is no delimiter,
315 length in bytes. Since there is no delimiter,
316 when a path is a substring of another the same bytes could be reused,
316 when a path is a substring of another the same bytes could be reused,
317 although the implementation does not exploit this as of this writing.
317 although the implementation does not exploit this as of this writing.
318
318
319 A node is stored on 43 bytes with components at fixed offsets. Paths and
319 A node is stored on 43 bytes with components at fixed offsets. Paths and
320 child nodes relevant to a node are stored externally and referenced though
320 child nodes relevant to a node are stored externally and referenced though
321 pseudo-pointers.
321 pseudo-pointers.
322
322
323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
324 counting bytes from the start of the data file. Path lengths and positions
324 counting bytes from the start of the data file. Path lengths and positions
325 are 16-bit integers, also counted in bytes.
325 are 16-bit integers, also counted in bytes.
326
326
327 Node components are:
327 Node components are:
328
328
329 * Offset 0:
329 * Offset 0:
330 Pseudo-pointer to the full path of this node,
330 Pseudo-pointer to the full path of this node,
331 from the working directory root.
331 from the working directory root.
332
332
333 * Offset 4:
333 * Offset 4:
334 Length of the full path.
334 Length of the full path.
335
335
336 * Offset 6:
336 * Offset 6:
337 Position of the last `/` path separator within the full path,
337 Position of the last `/` path separator within the full path,
338 in bytes from the start of the full path,
338 in bytes from the start of the full path,
339 or zero if there isn’t one.
339 or zero if there isn’t one.
340 The part of the full path after this position is the "base name".
340 The part of the full path after this position is the "base name".
341 Since sibling nodes have the same parent, only their base name vary
341 Since sibling nodes have the same parent, only their base name vary
342 and needs to be considered when doing binary search to find a given path.
342 and needs to be considered when doing binary search to find a given path.
343
343
344 * Offset 8:
344 * Offset 8:
345 Pseudo-pointer to the "copy source" path for this node,
345 Pseudo-pointer to the "copy source" path for this node,
346 or zero if there is no copy source.
346 or zero if there is no copy source.
347
347
348 * Offset 12:
348 * Offset 12:
349 Length of the copy source path, or zero if there isn’t one.
349 Length of the copy source path, or zero if there isn’t one.
350
350
351 * Offset 14:
351 * Offset 14:
352 Pseudo-pointer to the start of child nodes.
352 Pseudo-pointer to the start of child nodes.
353
353
354 * Offset 18:
354 * Offset 18:
355 Number of child nodes, as a 32-bit integer.
355 Number of child nodes, as a 32-bit integer.
356 They occupy 43 times this number of bytes
356 They occupy 43 times this number of bytes
357 (not counting space for paths, and further descendants).
357 (not counting space for paths, and further descendants).
358
358
359 * Offset 22:
359 * Offset 22:
360 Number as a 32-bit integer of descendant nodes in this subtree,
360 Number as a 32-bit integer of descendant nodes in this subtree,
361 not including this node itself,
361 not including this node itself,
362 that "have a dirstate entry".
362 that "have a dirstate entry".
363 Those nodes represent files that would be present at all in `dirstate-v1`.
363 Those nodes represent files that would be present at all in `dirstate-v1`.
364 This is typically less than the total number of descendants.
364 This is typically less than the total number of descendants.
365 This counter is used to implement `has_dir`.
365 This counter is used to implement `has_dir`.
366
366
367 * Offset 26:
367 * Offset 26:
368 Number as a 32-bit integer of descendant nodes in this subtree,
368 Number as a 32-bit integer of descendant nodes in this subtree,
369 not including this node itself,
369 not including this node itself,
370 that represent files tracked in the working directory.
370 that represent files tracked in the working directory.
371 (For example, `hg rm` makes a file untracked.)
371 (For example, `hg rm` makes a file untracked.)
372 This counter is used to implement `has_tracked_dir`.
372 This counter is used to implement `has_tracked_dir`.
373
373
374 * Offset 30:
374 * Offset 30:
375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
376 Starting from least-significant, bit masks are::
376 Starting from least-significant, bit masks are::
377
377
378 WDIR_TRACKED = 1 << 0
378 WDIR_TRACKED = 1 << 0
379 P1_TRACKED = 1 << 1
379 P1_TRACKED = 1 << 1
380 P2_INFO = 1 << 2
380 P2_INFO = 1 << 2
381 HAS_MODE_AND_SIZE = 1 << 3
381 HAS_MODE_AND_SIZE = 1 << 3
382 HAS_FILE_MTIME = 1 << 4
382 HAS_FILE_MTIME = 1 << 4
383 HAS_DIRECTORY_MTIME = 1 << 5
383 HAS_DIRECTORY_MTIME = 1 << 5
384 MODE_EXEC_PERM = 1 << 6
384 MODE_EXEC_PERM = 1 << 6
385 MODE_IS_SYMLINK = 1 << 7
385 MODE_IS_SYMLINK = 1 << 7
386 EXPECTED_STATE_IS_MODIFIED = 1 << 8
386
387
387 The meaning of each bit is described below.
388 The meaning of each bit is described below.
388
389
389 Other bits are unset.
390 Other bits are unset.
390 They may be assigned meaning if the future,
391 They may be assigned meaning if the future,
391 with the limitation that Mercurial versions that pre-date such meaning
392 with the limitation that Mercurial versions that pre-date such meaning
392 will always reset those bits to unset when writing nodes.
393 will always reset those bits to unset when writing nodes.
393 (A new node is written for any mutation in its subtree,
394 (A new node is written for any mutation in its subtree,
394 leaving the bytes of the old node unreachable
395 leaving the bytes of the old node unreachable
395 until the data file is rewritten entirely.)
396 until the data file is rewritten entirely.)
396
397
397 * Offset 32:
398 * Offset 32:
398 A `size` field described below, as a 32-bit integer.
399 A `size` field described below, as a 32-bit integer.
399 Unlike in dirstate-v1, negative values are not used.
400 Unlike in dirstate-v1, negative values are not used.
400
401
401 * Offset 36:
402 * Offset 36:
402 The seconds component of an `mtime` field described below,
403 The seconds component of an `mtime` field described below,
403 as a 32-bit integer.
404 as a 32-bit integer.
404 Unlike in dirstate-v1, negative values are not used.
405 Unlike in dirstate-v1, negative values are not used.
405 When `mtime` is used, this is number of seconds since the Unix epoch
406 When `mtime` is used, this is number of seconds since the Unix epoch
406 truncated to its lower 31 bits.
407 truncated to its lower 31 bits.
407
408
408 * Offset 40:
409 * Offset 40:
409 The nanoseconds component of an `mtime` field described below,
410 The nanoseconds component of an `mtime` field described below,
410 as a 32-bit integer.
411 as a 32-bit integer.
411 When `mtime` is used,
412 When `mtime` is used,
412 this is the number of nanoseconds since `mtime.seconds`,
413 this is the number of nanoseconds since `mtime.seconds`,
413 always stritctly less than one billion.
414 always stritctly less than one billion.
414
415
415 This may be zero if more precision is not available.
416 This may be zero if more precision is not available.
416 (This can happen because of limitations in any of Mercurial, Python,
417 (This can happen because of limitations in any of Mercurial, Python,
417 libc, the operating system, …)
418 libc, the operating system, …)
418
419
419 When comparing two mtimes and either has this component set to zero,
420 When comparing two mtimes and either has this component set to zero,
420 the sub-second precision of both should be ignored.
421 the sub-second precision of both should be ignored.
421 False positives when checking mtime equality due to clock resolution
422 False positives when checking mtime equality due to clock resolution
422 are always possible and the status algorithm needs to deal with them,
423 are always possible and the status algorithm needs to deal with them,
423 but having too many false negatives could be harmful too.
424 but having too many false negatives could be harmful too.
424
425
425 * (Offset 44: end of this node)
426 * (Offset 44: end of this node)
426
427
427 The meaning of the boolean values packed in `flags` is:
428 The meaning of the boolean values packed in `flags` is:
428
429
429 `WDIR_TRACKED`
430 `WDIR_TRACKED`
430 Set if the working directory contains a tracked file at this node’s path.
431 Set if the working directory contains a tracked file at this node’s path.
431 This is typically set and unset by `hg add` and `hg rm`.
432 This is typically set and unset by `hg add` and `hg rm`.
432
433
433 `P1_TRACKED`
434 `P1_TRACKED`
434 Set if the working directory’s first parent changeset
435 Set if the working directory’s first parent changeset
435 (whose node identifier is found in tree metadata)
436 (whose node identifier is found in tree metadata)
436 contains a tracked file at this node’s path.
437 contains a tracked file at this node’s path.
437 This is a cache to reduce manifest lookups.
438 This is a cache to reduce manifest lookups.
438
439
439 `P2_INFO`
440 `P2_INFO`
440 Set if the file has been involved in some merge operation.
441 Set if the file has been involved in some merge operation.
441 Either because it was actually merged,
442 Either because it was actually merged,
442 or because the version in the second parent p2 version was ahead,
443 or because the version in the second parent p2 version was ahead,
443 or because some rename moved it there.
444 or because some rename moved it there.
444 In either case `hg status` will want it displayed as modified.
445 In either case `hg status` will want it displayed as modified.
445
446
446 Files that would be mentioned at all in the `dirstate-v1` file format
447 Files that would be mentioned at all in the `dirstate-v1` file format
447 have a node with at least one of the above three bits set in `dirstate-v2`.
448 have a node with at least one of the above three bits set in `dirstate-v2`.
448 Let’s call these files "tracked anywhere",
449 Let’s call these files "tracked anywhere",
449 and "untracked" the nodes with all three of these bits unset.
450 and "untracked" the nodes with all three of these bits unset.
450 Untracked nodes are typically for directories:
451 Untracked nodes are typically for directories:
451 they hold child nodes and form the tree structure.
452 they hold child nodes and form the tree structure.
452 Additional untracked nodes may also exist.
453 Additional untracked nodes may also exist.
453 Although implementations should strive to clean up nodes
454 Although implementations should strive to clean up nodes
454 that are entirely unused, other untracked nodes may also exist.
455 that are entirely unused, other untracked nodes may also exist.
455 For example, a future version of Mercurial might in some cases
456 For example, a future version of Mercurial might in some cases
456 add nodes for untracked files or/and ignored files in the working directory
457 add nodes for untracked files or/and ignored files in the working directory
457 in order to optimize `hg status`
458 in order to optimize `hg status`
458 by enabling it to skip `readdir` in more cases.
459 by enabling it to skip `readdir` in more cases.
459
460
460 `HAS_MODE_AND_SIZE`
461 `HAS_MODE_AND_SIZE`
461 Must be unset for untracked nodes.
462 Must be unset for untracked nodes.
462 For files tracked anywhere, if this is set:
463 For files tracked anywhere, if this is set:
463 - The `size` field is the expected file size,
464 - The `size` field is the expected file size,
464 in bytes truncated its lower to 31 bits,
465 in bytes truncated its lower to 31 bits.
465 for the file to be clean.
466 - The expected execute permission for the file’s owner
466 - The expected execute permission for the file’s owner
467 is given by `MODE_EXEC_PERM`
467 is given by `MODE_EXEC_PERM`
468 - The expected file type is given by `MODE_IS_SIMLINK`:
468 - The expected file type is given by `MODE_IS_SIMLINK`:
469 a symbolic link if set, or a normal file if unset.
469 a symbolic link if set, or a normal file if unset.
470 If this is unset the expected size, permission, and file type are unknown.
470 If this is unset the expected size, permission, and file type are unknown.
471 The `size` field is unused (set to zero).
471 The `size` field is unused (set to zero).
472
472
473 `HAS_FILE_MTIME`
473 `HAS_FILE_MTIME`
474 Must be unset for untracked nodes.
474 Must be unset for untracked nodes.
475 If this and `HAS_DIRECTORY_MTIME` are both unset,
475 If this and `HAS_DIRECTORY_MTIME` are both unset,
476 the `mtime` field is unused (set to zero).
476 the `mtime` field is unused (set to zero).
477 If this is set, `mtime` is the modification time
477 If this is set, `mtime` is the expected modification time.
478 expected for the file to be considered clean.
479
478
480 `HAS_DIRECTORY_MTIME`
479 `HAS_DIRECTORY_MTIME`
481 Must be unset for file tracked anywhere.
480 Must be unset for file tracked anywhere.
482 If this and `HAS_DIRECTORY_MTIME` are both unset,
481 If this and `HAS_DIRECTORY_MTIME` are both unset,
483 the `mtime` field is unused (set to zero).
482 the `mtime` field is unused (set to zero).
484 If this is set, at some point,
483 If this is set, at some point,
485 this path in the working directory was observed:
484 this path in the working directory was observed:
486
485
487 - To be a directory
486 - To be a directory
488 - With the modification time given in `mtime`
487 - With the modification time given in `mtime`
489 - That time was already strictly in the past when observed,
488 - That time was already strictly in the past when observed,
490 meaning that later changes cannot happen in the same clock tick
489 meaning that later changes cannot happen in the same clock tick
491 and must cause a different modification time
490 and must cause a different modification time
492 (unless the system clock jumps back and we get unlucky,
491 (unless the system clock jumps back and we get unlucky,
493 which is not impossible but deemed unlikely enough).
492 which is not impossible but deemed unlikely enough).
494 - All direct children of this directory
493 - All direct children of this directory
495 (as returned by `std::fs::read_dir`)
494 (as returned by `std::fs::read_dir`)
496 either have a corresponding dirstate node,
495 either have a corresponding dirstate node,
497 or are ignored by ignore patterns whose hash is in tree metadata.
496 or are ignored by ignore patterns whose hash is in tree metadata.
498
497
499 This means that if `std::fs::symlink_metadata` later reports
498 This means that if `std::fs::symlink_metadata` later reports
500 the same modification time
499 the same modification time
501 and ignored patterns haven’t changed,
500 and ignored patterns haven’t changed,
502 a run of status that is not listing ignored files
501 a run of status that is not listing ignored files
503 can skip calling `std::fs::read_dir` again for this directory,
502 can skip calling `std::fs::read_dir` again for this directory,
504 and iterate child dirstate nodes instead.
503 and iterate child dirstate nodes instead.
505
504
506 `MODE_EXEC_PERM`
505 `MODE_EXEC_PERM`
507 Must be unset if `HAS_MODE_AND_SIZE` is unset.
506 Must be unset if `HAS_MODE_AND_SIZE` is unset.
508 If `HAS_MODE_AND_SIZE` is set,
507 If `HAS_MODE_AND_SIZE` is set,
509 this indicates whether the file’s own is expected
508 this indicates whether the file’s own is expected
510 to have execute permission.
509 to have execute permission.
511
510
512 `MODE_IS_SYMLINK`
511 `MODE_IS_SYMLINK`
513 Must be unset if `HAS_MODE_AND_SIZE` is unset.
512 Must be unset if `HAS_MODE_AND_SIZE` is unset.
514 If `HAS_MODE_AND_SIZE` is set,
513 If `HAS_MODE_AND_SIZE` is set,
515 this indicates whether the file is expected to be a symlink
514 this indicates whether the file is expected to be a symlink
516 as opposed to a normal file.
515 as opposed to a normal file.
516
517 `EXPECTED_STATE_IS_MODIFIED`
518 Must be unset for untracked nodes.
519 For:
520 - a file tracked anywhere
521 - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
522 - if that metadata matches
523 metadata found in the working directory with `stat`
524 This bit indicates the status of the file.
525 If set, the status is modified. If unset, it is clean.
526
527 In cases where `hg status` needs to read the contents of a file
528 because metadata is ambiguous, this bit lets it record the result
529 if the result is modified so that a future run of `hg status`
530 does not need to do the same again.
531 It is valid to never set this bit,
532 and consider expected metadata ambiguous if it is set.
@@ -1,790 +1,799 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import stat
10 import stat
11 import struct
11 import struct
12 import zlib
12 import zlib
13
13
14 from ..node import (
14 from ..node import (
15 nullrev,
15 nullrev,
16 sha1nodeconstants,
16 sha1nodeconstants,
17 )
17 )
18 from ..thirdparty import attr
18 from ..thirdparty import attr
19 from .. import (
19 from .. import (
20 error,
20 error,
21 pycompat,
21 pycompat,
22 revlogutils,
22 revlogutils,
23 util,
23 util,
24 )
24 )
25
25
26 from ..revlogutils import nodemap as nodemaputil
26 from ..revlogutils import nodemap as nodemaputil
27 from ..revlogutils import constants as revlog_constants
27 from ..revlogutils import constants as revlog_constants
28
28
29 stringio = pycompat.bytesio
29 stringio = pycompat.bytesio
30
30
31
31
32 _pack = struct.pack
32 _pack = struct.pack
33 _unpack = struct.unpack
33 _unpack = struct.unpack
34 _compress = zlib.compress
34 _compress = zlib.compress
35 _decompress = zlib.decompress
35 _decompress = zlib.decompress
36
36
37
37
38 # a special value used internally for `size` if the file come from the other parent
38 # a special value used internally for `size` if the file come from the other parent
39 FROM_P2 = -2
39 FROM_P2 = -2
40
40
41 # a special value used internally for `size` if the file is modified/merged/added
41 # a special value used internally for `size` if the file is modified/merged/added
42 NONNORMAL = -1
42 NONNORMAL = -1
43
43
44 # a special value used internally for `time` if the time is ambigeous
44 # a special value used internally for `time` if the time is ambigeous
45 AMBIGUOUS_TIME = -1
45 AMBIGUOUS_TIME = -1
46
46
47 # Bits of the `flags` byte inside a node in the file format
47 # Bits of the `flags` byte inside a node in the file format
48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 DIRSTATE_V2_P1_TRACKED = 1 << 1
49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 DIRSTATE_V2_P2_INFO = 1 << 2
50 DIRSTATE_V2_P2_INFO = 1 << 2
51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
52 DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
52 DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
53 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available
53 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available
54 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
54 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
55 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
55 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
56 DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
56
57
57
58
58 @attr.s(slots=True, init=False)
59 @attr.s(slots=True, init=False)
59 class DirstateItem(object):
60 class DirstateItem(object):
60 """represent a dirstate entry
61 """represent a dirstate entry
61
62
62 It hold multiple attributes
63 It hold multiple attributes
63
64
64 # about file tracking
65 # about file tracking
65 - wc_tracked: is the file tracked by the working copy
66 - wc_tracked: is the file tracked by the working copy
66 - p1_tracked: is the file tracked in working copy first parent
67 - p1_tracked: is the file tracked in working copy first parent
67 - p2_info: the file has been involved in some merge operation. Either
68 - p2_info: the file has been involved in some merge operation. Either
68 because it was actually merged, or because the p2 version was
69 because it was actually merged, or because the p2 version was
69 ahead, or because some rename moved it there. In either case
70 ahead, or because some rename moved it there. In either case
70 `hg status` will want it displayed as modified.
71 `hg status` will want it displayed as modified.
71
72
72 # about the file state expected from p1 manifest:
73 # about the file state expected from p1 manifest:
73 - mode: the file mode in p1
74 - mode: the file mode in p1
74 - size: the file size in p1
75 - size: the file size in p1
75
76
76 These value can be set to None, which mean we don't have a meaningful value
77 These value can be set to None, which mean we don't have a meaningful value
77 to compare with. Either because we don't really care about them as there
78 to compare with. Either because we don't really care about them as there
78 `status` is known without having to look at the disk or because we don't
79 `status` is known without having to look at the disk or because we don't
79 know these right now and a full comparison will be needed to find out if
80 know these right now and a full comparison will be needed to find out if
80 the file is clean.
81 the file is clean.
81
82
82 # about the file state on disk last time we saw it:
83 # about the file state on disk last time we saw it:
83 - mtime: the last known clean mtime for the file.
84 - mtime: the last known clean mtime for the file.
84
85
85 This value can be set to None if no cachable state exist. Either because we
86 This value can be set to None if no cachable state exist. Either because we
86 do not care (see previous section) or because we could not cache something
87 do not care (see previous section) or because we could not cache something
87 yet.
88 yet.
88 """
89 """
89
90
90 _wc_tracked = attr.ib()
91 _wc_tracked = attr.ib()
91 _p1_tracked = attr.ib()
92 _p1_tracked = attr.ib()
92 _p2_info = attr.ib()
93 _p2_info = attr.ib()
93 _mode = attr.ib()
94 _mode = attr.ib()
94 _size = attr.ib()
95 _size = attr.ib()
95 _mtime = attr.ib()
96 _mtime = attr.ib()
96
97
97 def __init__(
98 def __init__(
98 self,
99 self,
99 wc_tracked=False,
100 wc_tracked=False,
100 p1_tracked=False,
101 p1_tracked=False,
101 p2_info=False,
102 p2_info=False,
102 has_meaningful_data=True,
103 has_meaningful_data=True,
103 has_meaningful_mtime=True,
104 has_meaningful_mtime=True,
104 parentfiledata=None,
105 parentfiledata=None,
105 ):
106 ):
106 self._wc_tracked = wc_tracked
107 self._wc_tracked = wc_tracked
107 self._p1_tracked = p1_tracked
108 self._p1_tracked = p1_tracked
108 self._p2_info = p2_info
109 self._p2_info = p2_info
109
110
110 self._mode = None
111 self._mode = None
111 self._size = None
112 self._size = None
112 self._mtime = None
113 self._mtime = None
113 if parentfiledata is None:
114 if parentfiledata is None:
114 has_meaningful_mtime = False
115 has_meaningful_mtime = False
115 has_meaningful_data = False
116 has_meaningful_data = False
116 if has_meaningful_data:
117 if has_meaningful_data:
117 self._mode = parentfiledata[0]
118 self._mode = parentfiledata[0]
118 self._size = parentfiledata[1]
119 self._size = parentfiledata[1]
119 if has_meaningful_mtime:
120 if has_meaningful_mtime:
120 self._mtime = parentfiledata[2]
121 self._mtime = parentfiledata[2]
121
122
122 @classmethod
123 @classmethod
123 def from_v2_data(cls, flags, size, mtime):
124 def from_v2_data(cls, flags, size, mtime):
124 """Build a new DirstateItem object from V2 data"""
125 """Build a new DirstateItem object from V2 data"""
125 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
126 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
127 has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
126 mode = None
128 mode = None
129
130 if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
131 # we do not have support for this flag in the code yet,
132 # force a lookup for this file.
133 has_mode_size = False
134 has_meaningful_mtime = False
135
127 if has_mode_size:
136 if has_mode_size:
128 assert stat.S_IXUSR == 0o100
137 assert stat.S_IXUSR == 0o100
129 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
138 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
130 mode = 0o755
139 mode = 0o755
131 else:
140 else:
132 mode = 0o644
141 mode = 0o644
133 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
142 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
134 mode |= stat.S_IFLNK
143 mode |= stat.S_IFLNK
135 else:
144 else:
136 mode |= stat.S_IFREG
145 mode |= stat.S_IFREG
137 return cls(
146 return cls(
138 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
147 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
139 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
148 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
140 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
149 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
141 has_meaningful_data=has_mode_size,
150 has_meaningful_data=has_mode_size,
142 has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_FILE_MTIME),
151 has_meaningful_mtime=has_meaningful_mtime,
143 parentfiledata=(mode, size, mtime),
152 parentfiledata=(mode, size, mtime),
144 )
153 )
145
154
146 @classmethod
155 @classmethod
147 def from_v1_data(cls, state, mode, size, mtime):
156 def from_v1_data(cls, state, mode, size, mtime):
148 """Build a new DirstateItem object from V1 data
157 """Build a new DirstateItem object from V1 data
149
158
150 Since the dirstate-v1 format is frozen, the signature of this function
159 Since the dirstate-v1 format is frozen, the signature of this function
151 is not expected to change, unlike the __init__ one.
160 is not expected to change, unlike the __init__ one.
152 """
161 """
153 if state == b'm':
162 if state == b'm':
154 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
163 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
155 elif state == b'a':
164 elif state == b'a':
156 return cls(wc_tracked=True)
165 return cls(wc_tracked=True)
157 elif state == b'r':
166 elif state == b'r':
158 if size == NONNORMAL:
167 if size == NONNORMAL:
159 p1_tracked = True
168 p1_tracked = True
160 p2_info = True
169 p2_info = True
161 elif size == FROM_P2:
170 elif size == FROM_P2:
162 p1_tracked = False
171 p1_tracked = False
163 p2_info = True
172 p2_info = True
164 else:
173 else:
165 p1_tracked = True
174 p1_tracked = True
166 p2_info = False
175 p2_info = False
167 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
176 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
168 elif state == b'n':
177 elif state == b'n':
169 if size == FROM_P2:
178 if size == FROM_P2:
170 return cls(wc_tracked=True, p2_info=True)
179 return cls(wc_tracked=True, p2_info=True)
171 elif size == NONNORMAL:
180 elif size == NONNORMAL:
172 return cls(wc_tracked=True, p1_tracked=True)
181 return cls(wc_tracked=True, p1_tracked=True)
173 elif mtime == AMBIGUOUS_TIME:
182 elif mtime == AMBIGUOUS_TIME:
174 return cls(
183 return cls(
175 wc_tracked=True,
184 wc_tracked=True,
176 p1_tracked=True,
185 p1_tracked=True,
177 has_meaningful_mtime=False,
186 has_meaningful_mtime=False,
178 parentfiledata=(mode, size, 42),
187 parentfiledata=(mode, size, 42),
179 )
188 )
180 else:
189 else:
181 return cls(
190 return cls(
182 wc_tracked=True,
191 wc_tracked=True,
183 p1_tracked=True,
192 p1_tracked=True,
184 parentfiledata=(mode, size, mtime),
193 parentfiledata=(mode, size, mtime),
185 )
194 )
186 else:
195 else:
187 raise RuntimeError(b'unknown state: %s' % state)
196 raise RuntimeError(b'unknown state: %s' % state)
188
197
189 def set_possibly_dirty(self):
198 def set_possibly_dirty(self):
190 """Mark a file as "possibly dirty"
199 """Mark a file as "possibly dirty"
191
200
192 This means the next status call will have to actually check its content
201 This means the next status call will have to actually check its content
193 to make sure it is correct.
202 to make sure it is correct.
194 """
203 """
195 self._mtime = None
204 self._mtime = None
196
205
197 def set_clean(self, mode, size, mtime):
206 def set_clean(self, mode, size, mtime):
198 """mark a file as "clean" cancelling potential "possibly dirty call"
207 """mark a file as "clean" cancelling potential "possibly dirty call"
199
208
200 Note: this function is a descendant of `dirstate.normal` and is
209 Note: this function is a descendant of `dirstate.normal` and is
201 currently expected to be call on "normal" entry only. There are not
210 currently expected to be call on "normal" entry only. There are not
202 reason for this to not change in the future as long as the ccode is
211 reason for this to not change in the future as long as the ccode is
203 updated to preserve the proper state of the non-normal files.
212 updated to preserve the proper state of the non-normal files.
204 """
213 """
205 self._wc_tracked = True
214 self._wc_tracked = True
206 self._p1_tracked = True
215 self._p1_tracked = True
207 self._mode = mode
216 self._mode = mode
208 self._size = size
217 self._size = size
209 self._mtime = mtime
218 self._mtime = mtime
210
219
211 def set_tracked(self):
220 def set_tracked(self):
212 """mark a file as tracked in the working copy
221 """mark a file as tracked in the working copy
213
222
214 This will ultimately be called by command like `hg add`.
223 This will ultimately be called by command like `hg add`.
215 """
224 """
216 self._wc_tracked = True
225 self._wc_tracked = True
217 # `set_tracked` is replacing various `normallookup` call. So we mark
226 # `set_tracked` is replacing various `normallookup` call. So we mark
218 # the files as needing lookup
227 # the files as needing lookup
219 #
228 #
220 # Consider dropping this in the future in favor of something less broad.
229 # Consider dropping this in the future in favor of something less broad.
221 self._mtime = None
230 self._mtime = None
222
231
223 def set_untracked(self):
232 def set_untracked(self):
224 """mark a file as untracked in the working copy
233 """mark a file as untracked in the working copy
225
234
226 This will ultimately be called by command like `hg remove`.
235 This will ultimately be called by command like `hg remove`.
227 """
236 """
228 self._wc_tracked = False
237 self._wc_tracked = False
229 self._mode = None
238 self._mode = None
230 self._size = None
239 self._size = None
231 self._mtime = None
240 self._mtime = None
232
241
233 def drop_merge_data(self):
242 def drop_merge_data(self):
234 """remove all "merge-only" from a DirstateItem
243 """remove all "merge-only" from a DirstateItem
235
244
236 This is to be call by the dirstatemap code when the second parent is dropped
245 This is to be call by the dirstatemap code when the second parent is dropped
237 """
246 """
238 if self._p2_info:
247 if self._p2_info:
239 self._p2_info = False
248 self._p2_info = False
240 self._mode = None
249 self._mode = None
241 self._size = None
250 self._size = None
242 self._mtime = None
251 self._mtime = None
243
252
244 @property
253 @property
245 def mode(self):
254 def mode(self):
246 return self.v1_mode()
255 return self.v1_mode()
247
256
248 @property
257 @property
249 def size(self):
258 def size(self):
250 return self.v1_size()
259 return self.v1_size()
251
260
252 @property
261 @property
253 def mtime(self):
262 def mtime(self):
254 return self.v1_mtime()
263 return self.v1_mtime()
255
264
256 @property
265 @property
257 def state(self):
266 def state(self):
258 """
267 """
259 States are:
268 States are:
260 n normal
269 n normal
261 m needs merging
270 m needs merging
262 r marked for removal
271 r marked for removal
263 a marked for addition
272 a marked for addition
264
273
265 XXX This "state" is a bit obscure and mostly a direct expression of the
274 XXX This "state" is a bit obscure and mostly a direct expression of the
266 dirstatev1 format. It would make sense to ultimately deprecate it in
275 dirstatev1 format. It would make sense to ultimately deprecate it in
267 favor of the more "semantic" attributes.
276 favor of the more "semantic" attributes.
268 """
277 """
269 if not self.any_tracked:
278 if not self.any_tracked:
270 return b'?'
279 return b'?'
271 return self.v1_state()
280 return self.v1_state()
272
281
273 @property
282 @property
274 def tracked(self):
283 def tracked(self):
275 """True is the file is tracked in the working copy"""
284 """True is the file is tracked in the working copy"""
276 return self._wc_tracked
285 return self._wc_tracked
277
286
278 @property
287 @property
279 def any_tracked(self):
288 def any_tracked(self):
280 """True is the file is tracked anywhere (wc or parents)"""
289 """True is the file is tracked anywhere (wc or parents)"""
281 return self._wc_tracked or self._p1_tracked or self._p2_info
290 return self._wc_tracked or self._p1_tracked or self._p2_info
282
291
283 @property
292 @property
284 def added(self):
293 def added(self):
285 """True if the file has been added"""
294 """True if the file has been added"""
286 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
295 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
287
296
288 @property
297 @property
289 def maybe_clean(self):
298 def maybe_clean(self):
290 """True if the file has a chance to be in the "clean" state"""
299 """True if the file has a chance to be in the "clean" state"""
291 if not self._wc_tracked:
300 if not self._wc_tracked:
292 return False
301 return False
293 elif not self._p1_tracked:
302 elif not self._p1_tracked:
294 return False
303 return False
295 elif self._p2_info:
304 elif self._p2_info:
296 return False
305 return False
297 return True
306 return True
298
307
299 @property
308 @property
300 def p1_tracked(self):
309 def p1_tracked(self):
301 """True if the file is tracked in the first parent manifest"""
310 """True if the file is tracked in the first parent manifest"""
302 return self._p1_tracked
311 return self._p1_tracked
303
312
304 @property
313 @property
305 def p2_info(self):
314 def p2_info(self):
306 """True if the file needed to merge or apply any input from p2
315 """True if the file needed to merge or apply any input from p2
307
316
308 See the class documentation for details.
317 See the class documentation for details.
309 """
318 """
310 return self._wc_tracked and self._p2_info
319 return self._wc_tracked and self._p2_info
311
320
312 @property
321 @property
313 def removed(self):
322 def removed(self):
314 """True if the file has been removed"""
323 """True if the file has been removed"""
315 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
324 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
316
325
317 def v2_data(self):
326 def v2_data(self):
318 """Returns (flags, mode, size, mtime) for v2 serialization"""
327 """Returns (flags, mode, size, mtime) for v2 serialization"""
319 flags = 0
328 flags = 0
320 if self._wc_tracked:
329 if self._wc_tracked:
321 flags |= DIRSTATE_V2_WDIR_TRACKED
330 flags |= DIRSTATE_V2_WDIR_TRACKED
322 if self._p1_tracked:
331 if self._p1_tracked:
323 flags |= DIRSTATE_V2_P1_TRACKED
332 flags |= DIRSTATE_V2_P1_TRACKED
324 if self._p2_info:
333 if self._p2_info:
325 flags |= DIRSTATE_V2_P2_INFO
334 flags |= DIRSTATE_V2_P2_INFO
326 if self._mode is not None and self._size is not None:
335 if self._mode is not None and self._size is not None:
327 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
336 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
328 if self.mode & stat.S_IXUSR:
337 if self.mode & stat.S_IXUSR:
329 flags |= DIRSTATE_V2_MODE_EXEC_PERM
338 flags |= DIRSTATE_V2_MODE_EXEC_PERM
330 if stat.S_ISLNK(self.mode):
339 if stat.S_ISLNK(self.mode):
331 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
340 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
332 if self._mtime is not None:
341 if self._mtime is not None:
333 flags |= DIRSTATE_V2_HAS_FILE_MTIME
342 flags |= DIRSTATE_V2_HAS_FILE_MTIME
334 return (flags, self._size or 0, self._mtime or 0)
343 return (flags, self._size or 0, self._mtime or 0)
335
344
336 def v1_state(self):
345 def v1_state(self):
337 """return a "state" suitable for v1 serialization"""
346 """return a "state" suitable for v1 serialization"""
338 if not self.any_tracked:
347 if not self.any_tracked:
339 # the object has no state to record, this is -currently-
348 # the object has no state to record, this is -currently-
340 # unsupported
349 # unsupported
341 raise RuntimeError('untracked item')
350 raise RuntimeError('untracked item')
342 elif self.removed:
351 elif self.removed:
343 return b'r'
352 return b'r'
344 elif self._p1_tracked and self._p2_info:
353 elif self._p1_tracked and self._p2_info:
345 return b'm'
354 return b'm'
346 elif self.added:
355 elif self.added:
347 return b'a'
356 return b'a'
348 else:
357 else:
349 return b'n'
358 return b'n'
350
359
351 def v1_mode(self):
360 def v1_mode(self):
352 """return a "mode" suitable for v1 serialization"""
361 """return a "mode" suitable for v1 serialization"""
353 return self._mode if self._mode is not None else 0
362 return self._mode if self._mode is not None else 0
354
363
355 def v1_size(self):
364 def v1_size(self):
356 """return a "size" suitable for v1 serialization"""
365 """return a "size" suitable for v1 serialization"""
357 if not self.any_tracked:
366 if not self.any_tracked:
358 # the object has no state to record, this is -currently-
367 # the object has no state to record, this is -currently-
359 # unsupported
368 # unsupported
360 raise RuntimeError('untracked item')
369 raise RuntimeError('untracked item')
361 elif self.removed and self._p1_tracked and self._p2_info:
370 elif self.removed and self._p1_tracked and self._p2_info:
362 return NONNORMAL
371 return NONNORMAL
363 elif self._p2_info:
372 elif self._p2_info:
364 return FROM_P2
373 return FROM_P2
365 elif self.removed:
374 elif self.removed:
366 return 0
375 return 0
367 elif self.added:
376 elif self.added:
368 return NONNORMAL
377 return NONNORMAL
369 elif self._size is None:
378 elif self._size is None:
370 return NONNORMAL
379 return NONNORMAL
371 else:
380 else:
372 return self._size
381 return self._size
373
382
374 def v1_mtime(self):
383 def v1_mtime(self):
375 """return a "mtime" suitable for v1 serialization"""
384 """return a "mtime" suitable for v1 serialization"""
376 if not self.any_tracked:
385 if not self.any_tracked:
377 # the object has no state to record, this is -currently-
386 # the object has no state to record, this is -currently-
378 # unsupported
387 # unsupported
379 raise RuntimeError('untracked item')
388 raise RuntimeError('untracked item')
380 elif self.removed:
389 elif self.removed:
381 return 0
390 return 0
382 elif self._mtime is None:
391 elif self._mtime is None:
383 return AMBIGUOUS_TIME
392 return AMBIGUOUS_TIME
384 elif self._p2_info:
393 elif self._p2_info:
385 return AMBIGUOUS_TIME
394 return AMBIGUOUS_TIME
386 elif not self._p1_tracked:
395 elif not self._p1_tracked:
387 return AMBIGUOUS_TIME
396 return AMBIGUOUS_TIME
388 else:
397 else:
389 return self._mtime
398 return self._mtime
390
399
391 def need_delay(self, now):
400 def need_delay(self, now):
392 """True if the stored mtime would be ambiguous with the current time"""
401 """True if the stored mtime would be ambiguous with the current time"""
393 return self.v1_state() == b'n' and self.v1_mtime() == now
402 return self.v1_state() == b'n' and self.v1_mtime() == now
394
403
395
404
396 def gettype(q):
405 def gettype(q):
397 return int(q & 0xFFFF)
406 return int(q & 0xFFFF)
398
407
399
408
400 class BaseIndexObject(object):
409 class BaseIndexObject(object):
401 # Can I be passed to an algorithme implemented in Rust ?
410 # Can I be passed to an algorithme implemented in Rust ?
402 rust_ext_compat = 0
411 rust_ext_compat = 0
403 # Format of an index entry according to Python's `struct` language
412 # Format of an index entry according to Python's `struct` language
404 index_format = revlog_constants.INDEX_ENTRY_V1
413 index_format = revlog_constants.INDEX_ENTRY_V1
405 # Size of a C unsigned long long int, platform independent
414 # Size of a C unsigned long long int, platform independent
406 big_int_size = struct.calcsize(b'>Q')
415 big_int_size = struct.calcsize(b'>Q')
407 # Size of a C long int, platform independent
416 # Size of a C long int, platform independent
408 int_size = struct.calcsize(b'>i')
417 int_size = struct.calcsize(b'>i')
409 # An empty index entry, used as a default value to be overridden, or nullrev
418 # An empty index entry, used as a default value to be overridden, or nullrev
410 null_item = (
419 null_item = (
411 0,
420 0,
412 0,
421 0,
413 0,
422 0,
414 -1,
423 -1,
415 -1,
424 -1,
416 -1,
425 -1,
417 -1,
426 -1,
418 sha1nodeconstants.nullid,
427 sha1nodeconstants.nullid,
419 0,
428 0,
420 0,
429 0,
421 revlog_constants.COMP_MODE_INLINE,
430 revlog_constants.COMP_MODE_INLINE,
422 revlog_constants.COMP_MODE_INLINE,
431 revlog_constants.COMP_MODE_INLINE,
423 )
432 )
424
433
425 @util.propertycache
434 @util.propertycache
426 def entry_size(self):
435 def entry_size(self):
427 return self.index_format.size
436 return self.index_format.size
428
437
429 @property
438 @property
430 def nodemap(self):
439 def nodemap(self):
431 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
440 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
432 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
441 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
433 return self._nodemap
442 return self._nodemap
434
443
435 @util.propertycache
444 @util.propertycache
436 def _nodemap(self):
445 def _nodemap(self):
437 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
446 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
438 for r in range(0, len(self)):
447 for r in range(0, len(self)):
439 n = self[r][7]
448 n = self[r][7]
440 nodemap[n] = r
449 nodemap[n] = r
441 return nodemap
450 return nodemap
442
451
443 def has_node(self, node):
452 def has_node(self, node):
444 """return True if the node exist in the index"""
453 """return True if the node exist in the index"""
445 return node in self._nodemap
454 return node in self._nodemap
446
455
447 def rev(self, node):
456 def rev(self, node):
448 """return a revision for a node
457 """return a revision for a node
449
458
450 If the node is unknown, raise a RevlogError"""
459 If the node is unknown, raise a RevlogError"""
451 return self._nodemap[node]
460 return self._nodemap[node]
452
461
453 def get_rev(self, node):
462 def get_rev(self, node):
454 """return a revision for a node
463 """return a revision for a node
455
464
456 If the node is unknown, return None"""
465 If the node is unknown, return None"""
457 return self._nodemap.get(node)
466 return self._nodemap.get(node)
458
467
459 def _stripnodes(self, start):
468 def _stripnodes(self, start):
460 if '_nodemap' in vars(self):
469 if '_nodemap' in vars(self):
461 for r in range(start, len(self)):
470 for r in range(start, len(self)):
462 n = self[r][7]
471 n = self[r][7]
463 del self._nodemap[n]
472 del self._nodemap[n]
464
473
465 def clearcaches(self):
474 def clearcaches(self):
466 self.__dict__.pop('_nodemap', None)
475 self.__dict__.pop('_nodemap', None)
467
476
468 def __len__(self):
477 def __len__(self):
469 return self._lgt + len(self._extra)
478 return self._lgt + len(self._extra)
470
479
471 def append(self, tup):
480 def append(self, tup):
472 if '_nodemap' in vars(self):
481 if '_nodemap' in vars(self):
473 self._nodemap[tup[7]] = len(self)
482 self._nodemap[tup[7]] = len(self)
474 data = self._pack_entry(len(self), tup)
483 data = self._pack_entry(len(self), tup)
475 self._extra.append(data)
484 self._extra.append(data)
476
485
477 def _pack_entry(self, rev, entry):
486 def _pack_entry(self, rev, entry):
478 assert entry[8] == 0
487 assert entry[8] == 0
479 assert entry[9] == 0
488 assert entry[9] == 0
480 return self.index_format.pack(*entry[:8])
489 return self.index_format.pack(*entry[:8])
481
490
482 def _check_index(self, i):
491 def _check_index(self, i):
483 if not isinstance(i, int):
492 if not isinstance(i, int):
484 raise TypeError(b"expecting int indexes")
493 raise TypeError(b"expecting int indexes")
485 if i < 0 or i >= len(self):
494 if i < 0 or i >= len(self):
486 raise IndexError
495 raise IndexError
487
496
488 def __getitem__(self, i):
497 def __getitem__(self, i):
489 if i == -1:
498 if i == -1:
490 return self.null_item
499 return self.null_item
491 self._check_index(i)
500 self._check_index(i)
492 if i >= self._lgt:
501 if i >= self._lgt:
493 data = self._extra[i - self._lgt]
502 data = self._extra[i - self._lgt]
494 else:
503 else:
495 index = self._calculate_index(i)
504 index = self._calculate_index(i)
496 data = self._data[index : index + self.entry_size]
505 data = self._data[index : index + self.entry_size]
497 r = self._unpack_entry(i, data)
506 r = self._unpack_entry(i, data)
498 if self._lgt and i == 0:
507 if self._lgt and i == 0:
499 offset = revlogutils.offset_type(0, gettype(r[0]))
508 offset = revlogutils.offset_type(0, gettype(r[0]))
500 r = (offset,) + r[1:]
509 r = (offset,) + r[1:]
501 return r
510 return r
502
511
503 def _unpack_entry(self, rev, data):
512 def _unpack_entry(self, rev, data):
504 r = self.index_format.unpack(data)
513 r = self.index_format.unpack(data)
505 r = r + (
514 r = r + (
506 0,
515 0,
507 0,
516 0,
508 revlog_constants.COMP_MODE_INLINE,
517 revlog_constants.COMP_MODE_INLINE,
509 revlog_constants.COMP_MODE_INLINE,
518 revlog_constants.COMP_MODE_INLINE,
510 )
519 )
511 return r
520 return r
512
521
513 def pack_header(self, header):
522 def pack_header(self, header):
514 """pack header information as binary"""
523 """pack header information as binary"""
515 v_fmt = revlog_constants.INDEX_HEADER
524 v_fmt = revlog_constants.INDEX_HEADER
516 return v_fmt.pack(header)
525 return v_fmt.pack(header)
517
526
518 def entry_binary(self, rev):
527 def entry_binary(self, rev):
519 """return the raw binary string representing a revision"""
528 """return the raw binary string representing a revision"""
520 entry = self[rev]
529 entry = self[rev]
521 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
530 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
522 if rev == 0:
531 if rev == 0:
523 p = p[revlog_constants.INDEX_HEADER.size :]
532 p = p[revlog_constants.INDEX_HEADER.size :]
524 return p
533 return p
525
534
526
535
527 class IndexObject(BaseIndexObject):
536 class IndexObject(BaseIndexObject):
528 def __init__(self, data):
537 def __init__(self, data):
529 assert len(data) % self.entry_size == 0, (
538 assert len(data) % self.entry_size == 0, (
530 len(data),
539 len(data),
531 self.entry_size,
540 self.entry_size,
532 len(data) % self.entry_size,
541 len(data) % self.entry_size,
533 )
542 )
534 self._data = data
543 self._data = data
535 self._lgt = len(data) // self.entry_size
544 self._lgt = len(data) // self.entry_size
536 self._extra = []
545 self._extra = []
537
546
538 def _calculate_index(self, i):
547 def _calculate_index(self, i):
539 return i * self.entry_size
548 return i * self.entry_size
540
549
541 def __delitem__(self, i):
550 def __delitem__(self, i):
542 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
551 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
543 raise ValueError(b"deleting slices only supports a:-1 with step 1")
552 raise ValueError(b"deleting slices only supports a:-1 with step 1")
544 i = i.start
553 i = i.start
545 self._check_index(i)
554 self._check_index(i)
546 self._stripnodes(i)
555 self._stripnodes(i)
547 if i < self._lgt:
556 if i < self._lgt:
548 self._data = self._data[: i * self.entry_size]
557 self._data = self._data[: i * self.entry_size]
549 self._lgt = i
558 self._lgt = i
550 self._extra = []
559 self._extra = []
551 else:
560 else:
552 self._extra = self._extra[: i - self._lgt]
561 self._extra = self._extra[: i - self._lgt]
553
562
554
563
555 class PersistentNodeMapIndexObject(IndexObject):
564 class PersistentNodeMapIndexObject(IndexObject):
556 """a Debug oriented class to test persistent nodemap
565 """a Debug oriented class to test persistent nodemap
557
566
558 We need a simple python object to test API and higher level behavior. See
567 We need a simple python object to test API and higher level behavior. See
559 the Rust implementation for more serious usage. This should be used only
568 the Rust implementation for more serious usage. This should be used only
560 through the dedicated `devel.persistent-nodemap` config.
569 through the dedicated `devel.persistent-nodemap` config.
561 """
570 """
562
571
563 def nodemap_data_all(self):
572 def nodemap_data_all(self):
564 """Return bytes containing a full serialization of a nodemap
573 """Return bytes containing a full serialization of a nodemap
565
574
566 The nodemap should be valid for the full set of revisions in the
575 The nodemap should be valid for the full set of revisions in the
567 index."""
576 index."""
568 return nodemaputil.persistent_data(self)
577 return nodemaputil.persistent_data(self)
569
578
570 def nodemap_data_incremental(self):
579 def nodemap_data_incremental(self):
571 """Return bytes containing a incremental update to persistent nodemap
580 """Return bytes containing a incremental update to persistent nodemap
572
581
573 This containst the data for an append-only update of the data provided
582 This containst the data for an append-only update of the data provided
574 in the last call to `update_nodemap_data`.
583 in the last call to `update_nodemap_data`.
575 """
584 """
576 if self._nm_root is None:
585 if self._nm_root is None:
577 return None
586 return None
578 docket = self._nm_docket
587 docket = self._nm_docket
579 changed, data = nodemaputil.update_persistent_data(
588 changed, data = nodemaputil.update_persistent_data(
580 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
589 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
581 )
590 )
582
591
583 self._nm_root = self._nm_max_idx = self._nm_docket = None
592 self._nm_root = self._nm_max_idx = self._nm_docket = None
584 return docket, changed, data
593 return docket, changed, data
585
594
586 def update_nodemap_data(self, docket, nm_data):
595 def update_nodemap_data(self, docket, nm_data):
587 """provide full block of persisted binary data for a nodemap
596 """provide full block of persisted binary data for a nodemap
588
597
589 The data are expected to come from disk. See `nodemap_data_all` for a
598 The data are expected to come from disk. See `nodemap_data_all` for a
590 produceur of such data."""
599 produceur of such data."""
591 if nm_data is not None:
600 if nm_data is not None:
592 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
601 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
593 if self._nm_root:
602 if self._nm_root:
594 self._nm_docket = docket
603 self._nm_docket = docket
595 else:
604 else:
596 self._nm_root = self._nm_max_idx = self._nm_docket = None
605 self._nm_root = self._nm_max_idx = self._nm_docket = None
597
606
598
607
599 class InlinedIndexObject(BaseIndexObject):
608 class InlinedIndexObject(BaseIndexObject):
600 def __init__(self, data, inline=0):
609 def __init__(self, data, inline=0):
601 self._data = data
610 self._data = data
602 self._lgt = self._inline_scan(None)
611 self._lgt = self._inline_scan(None)
603 self._inline_scan(self._lgt)
612 self._inline_scan(self._lgt)
604 self._extra = []
613 self._extra = []
605
614
606 def _inline_scan(self, lgt):
615 def _inline_scan(self, lgt):
607 off = 0
616 off = 0
608 if lgt is not None:
617 if lgt is not None:
609 self._offsets = [0] * lgt
618 self._offsets = [0] * lgt
610 count = 0
619 count = 0
611 while off <= len(self._data) - self.entry_size:
620 while off <= len(self._data) - self.entry_size:
612 start = off + self.big_int_size
621 start = off + self.big_int_size
613 (s,) = struct.unpack(
622 (s,) = struct.unpack(
614 b'>i',
623 b'>i',
615 self._data[start : start + self.int_size],
624 self._data[start : start + self.int_size],
616 )
625 )
617 if lgt is not None:
626 if lgt is not None:
618 self._offsets[count] = off
627 self._offsets[count] = off
619 count += 1
628 count += 1
620 off += self.entry_size + s
629 off += self.entry_size + s
621 if off != len(self._data):
630 if off != len(self._data):
622 raise ValueError(b"corrupted data")
631 raise ValueError(b"corrupted data")
623 return count
632 return count
624
633
625 def __delitem__(self, i):
634 def __delitem__(self, i):
626 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
635 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
627 raise ValueError(b"deleting slices only supports a:-1 with step 1")
636 raise ValueError(b"deleting slices only supports a:-1 with step 1")
628 i = i.start
637 i = i.start
629 self._check_index(i)
638 self._check_index(i)
630 self._stripnodes(i)
639 self._stripnodes(i)
631 if i < self._lgt:
640 if i < self._lgt:
632 self._offsets = self._offsets[:i]
641 self._offsets = self._offsets[:i]
633 self._lgt = i
642 self._lgt = i
634 self._extra = []
643 self._extra = []
635 else:
644 else:
636 self._extra = self._extra[: i - self._lgt]
645 self._extra = self._extra[: i - self._lgt]
637
646
638 def _calculate_index(self, i):
647 def _calculate_index(self, i):
639 return self._offsets[i]
648 return self._offsets[i]
640
649
641
650
642 def parse_index2(data, inline, revlogv2=False):
651 def parse_index2(data, inline, revlogv2=False):
643 if not inline:
652 if not inline:
644 cls = IndexObject2 if revlogv2 else IndexObject
653 cls = IndexObject2 if revlogv2 else IndexObject
645 return cls(data), None
654 return cls(data), None
646 cls = InlinedIndexObject
655 cls = InlinedIndexObject
647 return cls(data, inline), (0, data)
656 return cls(data, inline), (0, data)
648
657
649
658
650 def parse_index_cl_v2(data):
659 def parse_index_cl_v2(data):
651 return IndexChangelogV2(data), None
660 return IndexChangelogV2(data), None
652
661
653
662
654 class IndexObject2(IndexObject):
663 class IndexObject2(IndexObject):
655 index_format = revlog_constants.INDEX_ENTRY_V2
664 index_format = revlog_constants.INDEX_ENTRY_V2
656
665
657 def replace_sidedata_info(
666 def replace_sidedata_info(
658 self,
667 self,
659 rev,
668 rev,
660 sidedata_offset,
669 sidedata_offset,
661 sidedata_length,
670 sidedata_length,
662 offset_flags,
671 offset_flags,
663 compression_mode,
672 compression_mode,
664 ):
673 ):
665 """
674 """
666 Replace an existing index entry's sidedata offset and length with new
675 Replace an existing index entry's sidedata offset and length with new
667 ones.
676 ones.
668 This cannot be used outside of the context of sidedata rewriting,
677 This cannot be used outside of the context of sidedata rewriting,
669 inside the transaction that creates the revision `rev`.
678 inside the transaction that creates the revision `rev`.
670 """
679 """
671 if rev < 0:
680 if rev < 0:
672 raise KeyError
681 raise KeyError
673 self._check_index(rev)
682 self._check_index(rev)
674 if rev < self._lgt:
683 if rev < self._lgt:
675 msg = b"cannot rewrite entries outside of this transaction"
684 msg = b"cannot rewrite entries outside of this transaction"
676 raise KeyError(msg)
685 raise KeyError(msg)
677 else:
686 else:
678 entry = list(self[rev])
687 entry = list(self[rev])
679 entry[0] = offset_flags
688 entry[0] = offset_flags
680 entry[8] = sidedata_offset
689 entry[8] = sidedata_offset
681 entry[9] = sidedata_length
690 entry[9] = sidedata_length
682 entry[11] = compression_mode
691 entry[11] = compression_mode
683 entry = tuple(entry)
692 entry = tuple(entry)
684 new = self._pack_entry(rev, entry)
693 new = self._pack_entry(rev, entry)
685 self._extra[rev - self._lgt] = new
694 self._extra[rev - self._lgt] = new
686
695
687 def _unpack_entry(self, rev, data):
696 def _unpack_entry(self, rev, data):
688 data = self.index_format.unpack(data)
697 data = self.index_format.unpack(data)
689 entry = data[:10]
698 entry = data[:10]
690 data_comp = data[10] & 3
699 data_comp = data[10] & 3
691 sidedata_comp = (data[10] & (3 << 2)) >> 2
700 sidedata_comp = (data[10] & (3 << 2)) >> 2
692 return entry + (data_comp, sidedata_comp)
701 return entry + (data_comp, sidedata_comp)
693
702
694 def _pack_entry(self, rev, entry):
703 def _pack_entry(self, rev, entry):
695 data = entry[:10]
704 data = entry[:10]
696 data_comp = entry[10] & 3
705 data_comp = entry[10] & 3
697 sidedata_comp = (entry[11] & 3) << 2
706 sidedata_comp = (entry[11] & 3) << 2
698 data += (data_comp | sidedata_comp,)
707 data += (data_comp | sidedata_comp,)
699
708
700 return self.index_format.pack(*data)
709 return self.index_format.pack(*data)
701
710
702 def entry_binary(self, rev):
711 def entry_binary(self, rev):
703 """return the raw binary string representing a revision"""
712 """return the raw binary string representing a revision"""
704 entry = self[rev]
713 entry = self[rev]
705 return self._pack_entry(rev, entry)
714 return self._pack_entry(rev, entry)
706
715
707 def pack_header(self, header):
716 def pack_header(self, header):
708 """pack header information as binary"""
717 """pack header information as binary"""
709 msg = 'version header should go in the docket, not the index: %d'
718 msg = 'version header should go in the docket, not the index: %d'
710 msg %= header
719 msg %= header
711 raise error.ProgrammingError(msg)
720 raise error.ProgrammingError(msg)
712
721
713
722
714 class IndexChangelogV2(IndexObject2):
723 class IndexChangelogV2(IndexObject2):
715 index_format = revlog_constants.INDEX_ENTRY_CL_V2
724 index_format = revlog_constants.INDEX_ENTRY_CL_V2
716
725
717 def _unpack_entry(self, rev, data, r=True):
726 def _unpack_entry(self, rev, data, r=True):
718 items = self.index_format.unpack(data)
727 items = self.index_format.unpack(data)
719 entry = items[:3] + (rev, rev) + items[3:8]
728 entry = items[:3] + (rev, rev) + items[3:8]
720 data_comp = items[8] & 3
729 data_comp = items[8] & 3
721 sidedata_comp = (items[8] >> 2) & 3
730 sidedata_comp = (items[8] >> 2) & 3
722 return entry + (data_comp, sidedata_comp)
731 return entry + (data_comp, sidedata_comp)
723
732
724 def _pack_entry(self, rev, entry):
733 def _pack_entry(self, rev, entry):
725 assert entry[3] == rev, entry[3]
734 assert entry[3] == rev, entry[3]
726 assert entry[4] == rev, entry[4]
735 assert entry[4] == rev, entry[4]
727 data = entry[:3] + entry[5:10]
736 data = entry[:3] + entry[5:10]
728 data_comp = entry[10] & 3
737 data_comp = entry[10] & 3
729 sidedata_comp = (entry[11] & 3) << 2
738 sidedata_comp = (entry[11] & 3) << 2
730 data += (data_comp | sidedata_comp,)
739 data += (data_comp | sidedata_comp,)
731 return self.index_format.pack(*data)
740 return self.index_format.pack(*data)
732
741
733
742
734 def parse_index_devel_nodemap(data, inline):
743 def parse_index_devel_nodemap(data, inline):
735 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
744 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
736 return PersistentNodeMapIndexObject(data), None
745 return PersistentNodeMapIndexObject(data), None
737
746
738
747
739 def parse_dirstate(dmap, copymap, st):
748 def parse_dirstate(dmap, copymap, st):
740 parents = [st[:20], st[20:40]]
749 parents = [st[:20], st[20:40]]
741 # dereference fields so they will be local in loop
750 # dereference fields so they will be local in loop
742 format = b">cllll"
751 format = b">cllll"
743 e_size = struct.calcsize(format)
752 e_size = struct.calcsize(format)
744 pos1 = 40
753 pos1 = 40
745 l = len(st)
754 l = len(st)
746
755
747 # the inner loop
756 # the inner loop
748 while pos1 < l:
757 while pos1 < l:
749 pos2 = pos1 + e_size
758 pos2 = pos1 + e_size
750 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
759 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
751 pos1 = pos2 + e[4]
760 pos1 = pos2 + e[4]
752 f = st[pos2:pos1]
761 f = st[pos2:pos1]
753 if b'\0' in f:
762 if b'\0' in f:
754 f, c = f.split(b'\0')
763 f, c = f.split(b'\0')
755 copymap[f] = c
764 copymap[f] = c
756 dmap[f] = DirstateItem.from_v1_data(*e[:4])
765 dmap[f] = DirstateItem.from_v1_data(*e[:4])
757 return parents
766 return parents
758
767
759
768
760 def pack_dirstate(dmap, copymap, pl, now):
769 def pack_dirstate(dmap, copymap, pl, now):
761 now = int(now)
770 now = int(now)
762 cs = stringio()
771 cs = stringio()
763 write = cs.write
772 write = cs.write
764 write(b"".join(pl))
773 write(b"".join(pl))
765 for f, e in pycompat.iteritems(dmap):
774 for f, e in pycompat.iteritems(dmap):
766 if e.need_delay(now):
775 if e.need_delay(now):
767 # The file was last modified "simultaneously" with the current
776 # The file was last modified "simultaneously" with the current
768 # write to dirstate (i.e. within the same second for file-
777 # write to dirstate (i.e. within the same second for file-
769 # systems with a granularity of 1 sec). This commonly happens
778 # systems with a granularity of 1 sec). This commonly happens
770 # for at least a couple of files on 'update'.
779 # for at least a couple of files on 'update'.
771 # The user could change the file without changing its size
780 # The user could change the file without changing its size
772 # within the same second. Invalidate the file's mtime in
781 # within the same second. Invalidate the file's mtime in
773 # dirstate, forcing future 'status' calls to compare the
782 # dirstate, forcing future 'status' calls to compare the
774 # contents of the file if the size is the same. This prevents
783 # contents of the file if the size is the same. This prevents
775 # mistakenly treating such files as clean.
784 # mistakenly treating such files as clean.
776 e.set_possibly_dirty()
785 e.set_possibly_dirty()
777
786
778 if f in copymap:
787 if f in copymap:
779 f = b"%s\0%s" % (f, copymap[f])
788 f = b"%s\0%s" % (f, copymap[f])
780 e = _pack(
789 e = _pack(
781 b">cllll",
790 b">cllll",
782 e.v1_state(),
791 e.v1_state(),
783 e.v1_mode(),
792 e.v1_mode(),
784 e.v1_size(),
793 e.v1_size(),
785 e.v1_mtime(),
794 e.v1_mtime(),
786 len(f),
795 len(f),
787 )
796 )
788 write(e)
797 write(e)
789 write(f)
798 write(f)
790 return cs.getvalue()
799 return cs.getvalue()
@@ -1,726 +1,731 b''
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4
4
5 use crate::dirstate::TruncatedTimestamp;
5 use crate::dirstate::TruncatedTimestamp;
6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 use crate::dirstate_tree::path_with_basename::WithBasename;
7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 use crate::errors::HgError;
8 use crate::errors::HgError;
9 use crate::utils::hg_path::HgPath;
9 use crate::utils::hg_path::HgPath;
10 use crate::DirstateEntry;
10 use crate::DirstateEntry;
11 use crate::DirstateError;
11 use crate::DirstateError;
12 use crate::DirstateParents;
12 use crate::DirstateParents;
13 use bitflags::bitflags;
13 use bitflags::bitflags;
14 use bytes_cast::unaligned::{U16Be, U32Be};
14 use bytes_cast::unaligned::{U16Be, U32Be};
15 use bytes_cast::BytesCast;
15 use bytes_cast::BytesCast;
16 use format_bytes::format_bytes;
16 use format_bytes::format_bytes;
17 use std::borrow::Cow;
17 use std::borrow::Cow;
18 use std::convert::{TryFrom, TryInto};
18 use std::convert::{TryFrom, TryInto};
19
19
20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 /// This a redundant sanity check more than an actual "magic number" since
21 /// This a redundant sanity check more than an actual "magic number" since
22 /// `.hg/requires` already governs which format should be used.
22 /// `.hg/requires` already governs which format should be used.
23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24
24
25 /// Keep space for 256-bit hashes
25 /// Keep space for 256-bit hashes
26 const STORED_NODE_ID_BYTES: usize = 32;
26 const STORED_NODE_ID_BYTES: usize = 32;
27
27
28 /// … even though only 160 bits are used for now, with SHA-1
28 /// … even though only 160 bits are used for now, with SHA-1
29 const USED_NODE_ID_BYTES: usize = 20;
29 const USED_NODE_ID_BYTES: usize = 20;
30
30
31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33
33
34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
35 const TREE_METADATA_SIZE: usize = 44;
35 const TREE_METADATA_SIZE: usize = 44;
36 const NODE_SIZE: usize = 44;
36 const NODE_SIZE: usize = 44;
37
37
38 /// Make sure that size-affecting changes are made knowingly
38 /// Make sure that size-affecting changes are made knowingly
39 #[allow(unused)]
39 #[allow(unused)]
40 fn static_assert_size_of() {
40 fn static_assert_size_of() {
41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
44 }
44 }
45
45
46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 #[derive(BytesCast)]
47 #[derive(BytesCast)]
48 #[repr(C)]
48 #[repr(C)]
49 struct DocketHeader {
49 struct DocketHeader {
50 marker: [u8; V2_FORMAT_MARKER.len()],
50 marker: [u8; V2_FORMAT_MARKER.len()],
51 parent_1: [u8; STORED_NODE_ID_BYTES],
51 parent_1: [u8; STORED_NODE_ID_BYTES],
52 parent_2: [u8; STORED_NODE_ID_BYTES],
52 parent_2: [u8; STORED_NODE_ID_BYTES],
53
53
54 metadata: TreeMetadata,
54 metadata: TreeMetadata,
55
55
56 /// Counted in bytes
56 /// Counted in bytes
57 data_size: Size,
57 data_size: Size,
58
58
59 uuid_size: u8,
59 uuid_size: u8,
60 }
60 }
61
61
62 pub struct Docket<'on_disk> {
62 pub struct Docket<'on_disk> {
63 header: &'on_disk DocketHeader,
63 header: &'on_disk DocketHeader,
64 uuid: &'on_disk [u8],
64 uuid: &'on_disk [u8],
65 }
65 }
66
66
67 /// Fields are documented in the *Tree metadata in the docket file*
67 /// Fields are documented in the *Tree metadata in the docket file*
68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 #[derive(BytesCast)]
69 #[derive(BytesCast)]
70 #[repr(C)]
70 #[repr(C)]
71 struct TreeMetadata {
71 struct TreeMetadata {
72 root_nodes: ChildNodes,
72 root_nodes: ChildNodes,
73 nodes_with_entry_count: Size,
73 nodes_with_entry_count: Size,
74 nodes_with_copy_source_count: Size,
74 nodes_with_copy_source_count: Size,
75 unreachable_bytes: Size,
75 unreachable_bytes: Size,
76 unused: [u8; 4],
76 unused: [u8; 4],
77
77
78 /// See *Optional hash of ignore patterns* section of
78 /// See *Optional hash of ignore patterns* section of
79 /// `mercurial/helptext/internals/dirstate-v2.txt`
79 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 ignore_patterns_hash: IgnorePatternsHash,
80 ignore_patterns_hash: IgnorePatternsHash,
81 }
81 }
82
82
83 /// Fields are documented in the *The data file format*
83 /// Fields are documented in the *The data file format*
84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 #[derive(BytesCast)]
85 #[derive(BytesCast)]
86 #[repr(C)]
86 #[repr(C)]
87 pub(super) struct Node {
87 pub(super) struct Node {
88 full_path: PathSlice,
88 full_path: PathSlice,
89
89
90 /// In bytes from `self.full_path.start`
90 /// In bytes from `self.full_path.start`
91 base_name_start: PathSize,
91 base_name_start: PathSize,
92
92
93 copy_source: OptPathSlice,
93 copy_source: OptPathSlice,
94 children: ChildNodes,
94 children: ChildNodes,
95 pub(super) descendants_with_entry_count: Size,
95 pub(super) descendants_with_entry_count: Size,
96 pub(super) tracked_descendants_count: Size,
96 pub(super) tracked_descendants_count: Size,
97 flags: U16Be,
97 flags: U16Be,
98 size: U32Be,
98 size: U32Be,
99 mtime: PackedTruncatedTimestamp,
99 mtime: PackedTruncatedTimestamp,
100 }
100 }
101
101
102 bitflags! {
102 bitflags! {
103 #[repr(C)]
103 #[repr(C)]
104 struct Flags: u16 {
104 struct Flags: u16 {
105 const WDIR_TRACKED = 1 << 0;
105 const WDIR_TRACKED = 1 << 0;
106 const P1_TRACKED = 1 << 1;
106 const P1_TRACKED = 1 << 1;
107 const P2_INFO = 1 << 2;
107 const P2_INFO = 1 << 2;
108 const HAS_MODE_AND_SIZE = 1 << 3;
108 const HAS_MODE_AND_SIZE = 1 << 3;
109 const HAS_FILE_MTIME = 1 << 4;
109 const HAS_FILE_MTIME = 1 << 4;
110 const HAS_DIRECTORY_MTIME = 1 << 5;
110 const HAS_DIRECTORY_MTIME = 1 << 5;
111 const MODE_EXEC_PERM = 1 << 6;
111 const MODE_EXEC_PERM = 1 << 6;
112 const MODE_IS_SYMLINK = 1 << 7;
112 const MODE_IS_SYMLINK = 1 << 7;
113 const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
113 }
114 }
114 }
115 }
115
116
116 /// Duration since the Unix epoch
117 /// Duration since the Unix epoch
117 #[derive(BytesCast, Copy, Clone)]
118 #[derive(BytesCast, Copy, Clone)]
118 #[repr(C)]
119 #[repr(C)]
119 struct PackedTruncatedTimestamp {
120 struct PackedTruncatedTimestamp {
120 truncated_seconds: U32Be,
121 truncated_seconds: U32Be,
121 nanoseconds: U32Be,
122 nanoseconds: U32Be,
122 }
123 }
123
124
124 /// Counted in bytes from the start of the file
125 /// Counted in bytes from the start of the file
125 ///
126 ///
126 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
127 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
127 type Offset = U32Be;
128 type Offset = U32Be;
128
129
129 /// Counted in number of items
130 /// Counted in number of items
130 ///
131 ///
131 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
132 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
132 type Size = U32Be;
133 type Size = U32Be;
133
134
134 /// Counted in bytes
135 /// Counted in bytes
135 ///
136 ///
136 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
137 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
137 type PathSize = U16Be;
138 type PathSize = U16Be;
138
139
139 /// A contiguous sequence of `len` times `Node`, representing the child nodes
140 /// A contiguous sequence of `len` times `Node`, representing the child nodes
140 /// of either some other node or of the repository root.
141 /// of either some other node or of the repository root.
141 ///
142 ///
142 /// Always sorted by ascending `full_path`, to allow binary search.
143 /// Always sorted by ascending `full_path`, to allow binary search.
143 /// Since nodes with the same parent nodes also have the same parent path,
144 /// Since nodes with the same parent nodes also have the same parent path,
144 /// only the `base_name`s need to be compared during binary search.
145 /// only the `base_name`s need to be compared during binary search.
145 #[derive(BytesCast, Copy, Clone)]
146 #[derive(BytesCast, Copy, Clone)]
146 #[repr(C)]
147 #[repr(C)]
147 struct ChildNodes {
148 struct ChildNodes {
148 start: Offset,
149 start: Offset,
149 len: Size,
150 len: Size,
150 }
151 }
151
152
152 /// A `HgPath` of `len` bytes
153 /// A `HgPath` of `len` bytes
153 #[derive(BytesCast, Copy, Clone)]
154 #[derive(BytesCast, Copy, Clone)]
154 #[repr(C)]
155 #[repr(C)]
155 struct PathSlice {
156 struct PathSlice {
156 start: Offset,
157 start: Offset,
157 len: PathSize,
158 len: PathSize,
158 }
159 }
159
160
160 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
161 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
161 type OptPathSlice = PathSlice;
162 type OptPathSlice = PathSlice;
162
163
163 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
164 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
164 ///
165 ///
165 /// This should only happen if Mercurial is buggy or a repository is corrupted.
166 /// This should only happen if Mercurial is buggy or a repository is corrupted.
166 #[derive(Debug)]
167 #[derive(Debug)]
167 pub struct DirstateV2ParseError;
168 pub struct DirstateV2ParseError;
168
169
169 impl From<DirstateV2ParseError> for HgError {
170 impl From<DirstateV2ParseError> for HgError {
170 fn from(_: DirstateV2ParseError) -> Self {
171 fn from(_: DirstateV2ParseError) -> Self {
171 HgError::corrupted("dirstate-v2 parse error")
172 HgError::corrupted("dirstate-v2 parse error")
172 }
173 }
173 }
174 }
174
175
175 impl From<DirstateV2ParseError> for crate::DirstateError {
176 impl From<DirstateV2ParseError> for crate::DirstateError {
176 fn from(error: DirstateV2ParseError) -> Self {
177 fn from(error: DirstateV2ParseError) -> Self {
177 HgError::from(error).into()
178 HgError::from(error).into()
178 }
179 }
179 }
180 }
180
181
181 impl<'on_disk> Docket<'on_disk> {
182 impl<'on_disk> Docket<'on_disk> {
182 pub fn parents(&self) -> DirstateParents {
183 pub fn parents(&self) -> DirstateParents {
183 use crate::Node;
184 use crate::Node;
184 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
185 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
185 .unwrap()
186 .unwrap()
186 .clone();
187 .clone();
187 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
188 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
188 .unwrap()
189 .unwrap()
189 .clone();
190 .clone();
190 DirstateParents { p1, p2 }
191 DirstateParents { p1, p2 }
191 }
192 }
192
193
193 pub fn tree_metadata(&self) -> &[u8] {
194 pub fn tree_metadata(&self) -> &[u8] {
194 self.header.metadata.as_bytes()
195 self.header.metadata.as_bytes()
195 }
196 }
196
197
197 pub fn data_size(&self) -> usize {
198 pub fn data_size(&self) -> usize {
198 // This `unwrap` could only panic on a 16-bit CPU
199 // This `unwrap` could only panic on a 16-bit CPU
199 self.header.data_size.get().try_into().unwrap()
200 self.header.data_size.get().try_into().unwrap()
200 }
201 }
201
202
202 pub fn data_filename(&self) -> String {
203 pub fn data_filename(&self) -> String {
203 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
204 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
204 }
205 }
205 }
206 }
206
207
207 pub fn read_docket(
208 pub fn read_docket(
208 on_disk: &[u8],
209 on_disk: &[u8],
209 ) -> Result<Docket<'_>, DirstateV2ParseError> {
210 ) -> Result<Docket<'_>, DirstateV2ParseError> {
210 let (header, uuid) =
211 let (header, uuid) =
211 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
212 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
212 let uuid_size = header.uuid_size as usize;
213 let uuid_size = header.uuid_size as usize;
213 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
214 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
214 Ok(Docket { header, uuid })
215 Ok(Docket { header, uuid })
215 } else {
216 } else {
216 Err(DirstateV2ParseError)
217 Err(DirstateV2ParseError)
217 }
218 }
218 }
219 }
219
220
220 pub(super) fn read<'on_disk>(
221 pub(super) fn read<'on_disk>(
221 on_disk: &'on_disk [u8],
222 on_disk: &'on_disk [u8],
222 metadata: &[u8],
223 metadata: &[u8],
223 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
224 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
224 if on_disk.is_empty() {
225 if on_disk.is_empty() {
225 return Ok(DirstateMap::empty(on_disk));
226 return Ok(DirstateMap::empty(on_disk));
226 }
227 }
227 let (meta, _) = TreeMetadata::from_bytes(metadata)
228 let (meta, _) = TreeMetadata::from_bytes(metadata)
228 .map_err(|_| DirstateV2ParseError)?;
229 .map_err(|_| DirstateV2ParseError)?;
229 let dirstate_map = DirstateMap {
230 let dirstate_map = DirstateMap {
230 on_disk,
231 on_disk,
231 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
232 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
232 on_disk,
233 on_disk,
233 meta.root_nodes,
234 meta.root_nodes,
234 )?),
235 )?),
235 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
236 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
236 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
237 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
237 ignore_patterns_hash: meta.ignore_patterns_hash,
238 ignore_patterns_hash: meta.ignore_patterns_hash,
238 unreachable_bytes: meta.unreachable_bytes.get(),
239 unreachable_bytes: meta.unreachable_bytes.get(),
239 };
240 };
240 Ok(dirstate_map)
241 Ok(dirstate_map)
241 }
242 }
242
243
243 impl Node {
244 impl Node {
244 pub(super) fn full_path<'on_disk>(
245 pub(super) fn full_path<'on_disk>(
245 &self,
246 &self,
246 on_disk: &'on_disk [u8],
247 on_disk: &'on_disk [u8],
247 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
248 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
248 read_hg_path(on_disk, self.full_path)
249 read_hg_path(on_disk, self.full_path)
249 }
250 }
250
251
251 pub(super) fn base_name_start<'on_disk>(
252 pub(super) fn base_name_start<'on_disk>(
252 &self,
253 &self,
253 ) -> Result<usize, DirstateV2ParseError> {
254 ) -> Result<usize, DirstateV2ParseError> {
254 let start = self.base_name_start.get();
255 let start = self.base_name_start.get();
255 if start < self.full_path.len.get() {
256 if start < self.full_path.len.get() {
256 let start = usize::try_from(start)
257 let start = usize::try_from(start)
257 // u32 -> usize, could only panic on a 16-bit CPU
258 // u32 -> usize, could only panic on a 16-bit CPU
258 .expect("dirstate-v2 base_name_start out of bounds");
259 .expect("dirstate-v2 base_name_start out of bounds");
259 Ok(start)
260 Ok(start)
260 } else {
261 } else {
261 Err(DirstateV2ParseError)
262 Err(DirstateV2ParseError)
262 }
263 }
263 }
264 }
264
265
265 pub(super) fn base_name<'on_disk>(
266 pub(super) fn base_name<'on_disk>(
266 &self,
267 &self,
267 on_disk: &'on_disk [u8],
268 on_disk: &'on_disk [u8],
268 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
269 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
269 let full_path = self.full_path(on_disk)?;
270 let full_path = self.full_path(on_disk)?;
270 let base_name_start = self.base_name_start()?;
271 let base_name_start = self.base_name_start()?;
271 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
272 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
272 }
273 }
273
274
274 pub(super) fn path<'on_disk>(
275 pub(super) fn path<'on_disk>(
275 &self,
276 &self,
276 on_disk: &'on_disk [u8],
277 on_disk: &'on_disk [u8],
277 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
278 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
278 Ok(WithBasename::from_raw_parts(
279 Ok(WithBasename::from_raw_parts(
279 Cow::Borrowed(self.full_path(on_disk)?),
280 Cow::Borrowed(self.full_path(on_disk)?),
280 self.base_name_start()?,
281 self.base_name_start()?,
281 ))
282 ))
282 }
283 }
283
284
284 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
285 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
285 self.copy_source.start.get() != 0
286 self.copy_source.start.get() != 0
286 }
287 }
287
288
288 pub(super) fn copy_source<'on_disk>(
289 pub(super) fn copy_source<'on_disk>(
289 &self,
290 &self,
290 on_disk: &'on_disk [u8],
291 on_disk: &'on_disk [u8],
291 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
292 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
292 Ok(if self.has_copy_source() {
293 Ok(if self.has_copy_source() {
293 Some(read_hg_path(on_disk, self.copy_source)?)
294 Some(read_hg_path(on_disk, self.copy_source)?)
294 } else {
295 } else {
295 None
296 None
296 })
297 })
297 }
298 }
298
299
299 fn flags(&self) -> Flags {
300 fn flags(&self) -> Flags {
300 Flags::from_bits_truncate(self.flags.get())
301 Flags::from_bits_truncate(self.flags.get())
301 }
302 }
302
303
303 fn has_entry(&self) -> bool {
304 fn has_entry(&self) -> bool {
304 self.flags().intersects(
305 self.flags().intersects(
305 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
306 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
306 )
307 )
307 }
308 }
308
309
309 pub(super) fn node_data(
310 pub(super) fn node_data(
310 &self,
311 &self,
311 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
312 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
312 if self.has_entry() {
313 if self.has_entry() {
313 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
314 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
314 } else if let Some(mtime) = self.cached_directory_mtime()? {
315 } else if let Some(mtime) = self.cached_directory_mtime()? {
315 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
316 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
316 } else {
317 } else {
317 Ok(dirstate_map::NodeData::None)
318 Ok(dirstate_map::NodeData::None)
318 }
319 }
319 }
320 }
320
321
321 pub(super) fn cached_directory_mtime(
322 pub(super) fn cached_directory_mtime(
322 &self,
323 &self,
323 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
324 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
324 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME) {
325 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME) {
325 if self.flags().contains(Flags::HAS_FILE_MTIME) {
326 if self.flags().contains(Flags::HAS_FILE_MTIME) {
326 Err(DirstateV2ParseError)
327 Err(DirstateV2ParseError)
327 } else {
328 } else {
328 Ok(Some(self.mtime.try_into()?))
329 Ok(Some(self.mtime.try_into()?))
329 }
330 }
330 } else {
331 } else {
331 Ok(None)
332 Ok(None)
332 }
333 }
333 }
334 }
334
335
335 fn synthesize_unix_mode(&self) -> u32 {
336 fn synthesize_unix_mode(&self) -> u32 {
336 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
337 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
337 libc::S_IFLNK
338 libc::S_IFLNK
338 } else {
339 } else {
339 libc::S_IFREG
340 libc::S_IFREG
340 };
341 };
341 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
342 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
342 0o755
343 0o755
343 } else {
344 } else {
344 0o644
345 0o644
345 };
346 };
346 file_type | permisions
347 file_type | permisions
347 }
348 }
348
349
349 fn assume_entry(&self) -> DirstateEntry {
350 fn assume_entry(&self) -> DirstateEntry {
350 // TODO: convert through raw bits instead?
351 // TODO: convert through raw bits instead?
351 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
352 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
352 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
353 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
353 let p2_info = self.flags().contains(Flags::P2_INFO);
354 let p2_info = self.flags().contains(Flags::P2_INFO);
354 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) {
355 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
356 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
357 {
355 Some((self.synthesize_unix_mode(), self.size.into()))
358 Some((self.synthesize_unix_mode(), self.size.into()))
356 } else {
359 } else {
357 None
360 None
358 };
361 };
359 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) {
362 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
363 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
364 {
360 Some(self.mtime.truncated_seconds.into())
365 Some(self.mtime.truncated_seconds.into())
361 } else {
366 } else {
362 None
367 None
363 };
368 };
364 DirstateEntry::from_v2_data(
369 DirstateEntry::from_v2_data(
365 wdir_tracked,
370 wdir_tracked,
366 p1_tracked,
371 p1_tracked,
367 p2_info,
372 p2_info,
368 mode_size,
373 mode_size,
369 mtime,
374 mtime,
370 )
375 )
371 }
376 }
372
377
373 pub(super) fn entry(
378 pub(super) fn entry(
374 &self,
379 &self,
375 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
380 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
376 if self.has_entry() {
381 if self.has_entry() {
377 Ok(Some(self.assume_entry()))
382 Ok(Some(self.assume_entry()))
378 } else {
383 } else {
379 Ok(None)
384 Ok(None)
380 }
385 }
381 }
386 }
382
387
383 pub(super) fn children<'on_disk>(
388 pub(super) fn children<'on_disk>(
384 &self,
389 &self,
385 on_disk: &'on_disk [u8],
390 on_disk: &'on_disk [u8],
386 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
391 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
387 read_nodes(on_disk, self.children)
392 read_nodes(on_disk, self.children)
388 }
393 }
389
394
390 pub(super) fn to_in_memory_node<'on_disk>(
395 pub(super) fn to_in_memory_node<'on_disk>(
391 &self,
396 &self,
392 on_disk: &'on_disk [u8],
397 on_disk: &'on_disk [u8],
393 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
398 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
394 Ok(dirstate_map::Node {
399 Ok(dirstate_map::Node {
395 children: dirstate_map::ChildNodes::OnDisk(
400 children: dirstate_map::ChildNodes::OnDisk(
396 self.children(on_disk)?,
401 self.children(on_disk)?,
397 ),
402 ),
398 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
403 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
399 data: self.node_data()?,
404 data: self.node_data()?,
400 descendants_with_entry_count: self
405 descendants_with_entry_count: self
401 .descendants_with_entry_count
406 .descendants_with_entry_count
402 .get(),
407 .get(),
403 tracked_descendants_count: self.tracked_descendants_count.get(),
408 tracked_descendants_count: self.tracked_descendants_count.get(),
404 })
409 })
405 }
410 }
406
411
407 fn from_dirstate_entry(
412 fn from_dirstate_entry(
408 entry: &DirstateEntry,
413 entry: &DirstateEntry,
409 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
414 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
410 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
415 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
411 entry.v2_data();
416 entry.v2_data();
412 // TODO: convert throug raw flag bits instead?
417 // TODO: convert throug raw flag bits instead?
413 let mut flags = Flags::empty();
418 let mut flags = Flags::empty();
414 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
419 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
415 flags.set(Flags::P1_TRACKED, p1_tracked);
420 flags.set(Flags::P1_TRACKED, p1_tracked);
416 flags.set(Flags::P2_INFO, p2_info);
421 flags.set(Flags::P2_INFO, p2_info);
417 let size = if let Some((m, s)) = mode_size_opt {
422 let size = if let Some((m, s)) = mode_size_opt {
418 let exec_perm = m & libc::S_IXUSR != 0;
423 let exec_perm = m & libc::S_IXUSR != 0;
419 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
424 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
420 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
425 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
421 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
426 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
422 flags.insert(Flags::HAS_MODE_AND_SIZE);
427 flags.insert(Flags::HAS_MODE_AND_SIZE);
423 s.into()
428 s.into()
424 } else {
429 } else {
425 0.into()
430 0.into()
426 };
431 };
427 let mtime = if let Some(m) = mtime_opt {
432 let mtime = if let Some(m) = mtime_opt {
428 flags.insert(Flags::HAS_FILE_MTIME);
433 flags.insert(Flags::HAS_FILE_MTIME);
429 PackedTruncatedTimestamp {
434 PackedTruncatedTimestamp {
430 truncated_seconds: m.into(),
435 truncated_seconds: m.into(),
431 nanoseconds: 0.into(),
436 nanoseconds: 0.into(),
432 }
437 }
433 } else {
438 } else {
434 PackedTruncatedTimestamp::null()
439 PackedTruncatedTimestamp::null()
435 };
440 };
436 (flags, size, mtime)
441 (flags, size, mtime)
437 }
442 }
438 }
443 }
439
444
440 fn read_hg_path(
445 fn read_hg_path(
441 on_disk: &[u8],
446 on_disk: &[u8],
442 slice: PathSlice,
447 slice: PathSlice,
443 ) -> Result<&HgPath, DirstateV2ParseError> {
448 ) -> Result<&HgPath, DirstateV2ParseError> {
444 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
449 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
445 }
450 }
446
451
447 fn read_nodes(
452 fn read_nodes(
448 on_disk: &[u8],
453 on_disk: &[u8],
449 slice: ChildNodes,
454 slice: ChildNodes,
450 ) -> Result<&[Node], DirstateV2ParseError> {
455 ) -> Result<&[Node], DirstateV2ParseError> {
451 read_slice(on_disk, slice.start, slice.len.get())
456 read_slice(on_disk, slice.start, slice.len.get())
452 }
457 }
453
458
454 fn read_slice<T, Len>(
459 fn read_slice<T, Len>(
455 on_disk: &[u8],
460 on_disk: &[u8],
456 start: Offset,
461 start: Offset,
457 len: Len,
462 len: Len,
458 ) -> Result<&[T], DirstateV2ParseError>
463 ) -> Result<&[T], DirstateV2ParseError>
459 where
464 where
460 T: BytesCast,
465 T: BytesCast,
461 Len: TryInto<usize>,
466 Len: TryInto<usize>,
462 {
467 {
463 // Either `usize::MAX` would result in "out of bounds" error since a single
468 // Either `usize::MAX` would result in "out of bounds" error since a single
464 // `&[u8]` cannot occupy the entire addess space.
469 // `&[u8]` cannot occupy the entire addess space.
465 let start = start.get().try_into().unwrap_or(std::usize::MAX);
470 let start = start.get().try_into().unwrap_or(std::usize::MAX);
466 let len = len.try_into().unwrap_or(std::usize::MAX);
471 let len = len.try_into().unwrap_or(std::usize::MAX);
467 on_disk
472 on_disk
468 .get(start..)
473 .get(start..)
469 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
474 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
470 .map(|(slice, _rest)| slice)
475 .map(|(slice, _rest)| slice)
471 .ok_or_else(|| DirstateV2ParseError)
476 .ok_or_else(|| DirstateV2ParseError)
472 }
477 }
473
478
474 pub(crate) fn for_each_tracked_path<'on_disk>(
479 pub(crate) fn for_each_tracked_path<'on_disk>(
475 on_disk: &'on_disk [u8],
480 on_disk: &'on_disk [u8],
476 metadata: &[u8],
481 metadata: &[u8],
477 mut f: impl FnMut(&'on_disk HgPath),
482 mut f: impl FnMut(&'on_disk HgPath),
478 ) -> Result<(), DirstateV2ParseError> {
483 ) -> Result<(), DirstateV2ParseError> {
479 let (meta, _) = TreeMetadata::from_bytes(metadata)
484 let (meta, _) = TreeMetadata::from_bytes(metadata)
480 .map_err(|_| DirstateV2ParseError)?;
485 .map_err(|_| DirstateV2ParseError)?;
481 fn recur<'on_disk>(
486 fn recur<'on_disk>(
482 on_disk: &'on_disk [u8],
487 on_disk: &'on_disk [u8],
483 nodes: ChildNodes,
488 nodes: ChildNodes,
484 f: &mut impl FnMut(&'on_disk HgPath),
489 f: &mut impl FnMut(&'on_disk HgPath),
485 ) -> Result<(), DirstateV2ParseError> {
490 ) -> Result<(), DirstateV2ParseError> {
486 for node in read_nodes(on_disk, nodes)? {
491 for node in read_nodes(on_disk, nodes)? {
487 if let Some(entry) = node.entry()? {
492 if let Some(entry) = node.entry()? {
488 if entry.state().is_tracked() {
493 if entry.state().is_tracked() {
489 f(node.full_path(on_disk)?)
494 f(node.full_path(on_disk)?)
490 }
495 }
491 }
496 }
492 recur(on_disk, node.children, f)?
497 recur(on_disk, node.children, f)?
493 }
498 }
494 Ok(())
499 Ok(())
495 }
500 }
496 recur(on_disk, meta.root_nodes, &mut f)
501 recur(on_disk, meta.root_nodes, &mut f)
497 }
502 }
498
503
499 /// Returns new data and metadata, together with whether that data should be
504 /// Returns new data and metadata, together with whether that data should be
500 /// appended to the existing data file whose content is at
505 /// appended to the existing data file whose content is at
501 /// `dirstate_map.on_disk` (true), instead of written to a new data file
506 /// `dirstate_map.on_disk` (true), instead of written to a new data file
502 /// (false).
507 /// (false).
503 pub(super) fn write(
508 pub(super) fn write(
504 dirstate_map: &mut DirstateMap,
509 dirstate_map: &mut DirstateMap,
505 can_append: bool,
510 can_append: bool,
506 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
511 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
507 let append = can_append && dirstate_map.write_should_append();
512 let append = can_append && dirstate_map.write_should_append();
508
513
509 // This ignores the space for paths, and for nodes without an entry.
514 // This ignores the space for paths, and for nodes without an entry.
510 // TODO: better estimate? Skip the `Vec` and write to a file directly?
515 // TODO: better estimate? Skip the `Vec` and write to a file directly?
511 let size_guess = std::mem::size_of::<Node>()
516 let size_guess = std::mem::size_of::<Node>()
512 * dirstate_map.nodes_with_entry_count as usize;
517 * dirstate_map.nodes_with_entry_count as usize;
513
518
514 let mut writer = Writer {
519 let mut writer = Writer {
515 dirstate_map,
520 dirstate_map,
516 append,
521 append,
517 out: Vec::with_capacity(size_guess),
522 out: Vec::with_capacity(size_guess),
518 };
523 };
519
524
520 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
525 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
521
526
522 let meta = TreeMetadata {
527 let meta = TreeMetadata {
523 root_nodes,
528 root_nodes,
524 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
529 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
525 nodes_with_copy_source_count: dirstate_map
530 nodes_with_copy_source_count: dirstate_map
526 .nodes_with_copy_source_count
531 .nodes_with_copy_source_count
527 .into(),
532 .into(),
528 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
533 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
529 unused: [0; 4],
534 unused: [0; 4],
530 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
535 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
531 };
536 };
532 Ok((writer.out, meta.as_bytes().to_vec(), append))
537 Ok((writer.out, meta.as_bytes().to_vec(), append))
533 }
538 }
534
539
535 struct Writer<'dmap, 'on_disk> {
540 struct Writer<'dmap, 'on_disk> {
536 dirstate_map: &'dmap DirstateMap<'on_disk>,
541 dirstate_map: &'dmap DirstateMap<'on_disk>,
537 append: bool,
542 append: bool,
538 out: Vec<u8>,
543 out: Vec<u8>,
539 }
544 }
540
545
541 impl Writer<'_, '_> {
546 impl Writer<'_, '_> {
542 fn write_nodes(
547 fn write_nodes(
543 &mut self,
548 &mut self,
544 nodes: dirstate_map::ChildNodesRef,
549 nodes: dirstate_map::ChildNodesRef,
545 ) -> Result<ChildNodes, DirstateError> {
550 ) -> Result<ChildNodes, DirstateError> {
546 // Reuse already-written nodes if possible
551 // Reuse already-written nodes if possible
547 if self.append {
552 if self.append {
548 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
553 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
549 let start = self.on_disk_offset_of(nodes_slice).expect(
554 let start = self.on_disk_offset_of(nodes_slice).expect(
550 "dirstate-v2 OnDisk nodes not found within on_disk",
555 "dirstate-v2 OnDisk nodes not found within on_disk",
551 );
556 );
552 let len = child_nodes_len_from_usize(nodes_slice.len());
557 let len = child_nodes_len_from_usize(nodes_slice.len());
553 return Ok(ChildNodes { start, len });
558 return Ok(ChildNodes { start, len });
554 }
559 }
555 }
560 }
556
561
557 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
562 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
558 // undefined iteration order. Sort to enable binary search in the
563 // undefined iteration order. Sort to enable binary search in the
559 // written file.
564 // written file.
560 let nodes = nodes.sorted();
565 let nodes = nodes.sorted();
561 let nodes_len = nodes.len();
566 let nodes_len = nodes.len();
562
567
563 // First accumulate serialized nodes in a `Vec`
568 // First accumulate serialized nodes in a `Vec`
564 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
569 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
565 for node in nodes {
570 for node in nodes {
566 let children =
571 let children =
567 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
572 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
568 let full_path = node.full_path(self.dirstate_map.on_disk)?;
573 let full_path = node.full_path(self.dirstate_map.on_disk)?;
569 let full_path = self.write_path(full_path.as_bytes());
574 let full_path = self.write_path(full_path.as_bytes());
570 let copy_source = if let Some(source) =
575 let copy_source = if let Some(source) =
571 node.copy_source(self.dirstate_map.on_disk)?
576 node.copy_source(self.dirstate_map.on_disk)?
572 {
577 {
573 self.write_path(source.as_bytes())
578 self.write_path(source.as_bytes())
574 } else {
579 } else {
575 PathSlice {
580 PathSlice {
576 start: 0.into(),
581 start: 0.into(),
577 len: 0.into(),
582 len: 0.into(),
578 }
583 }
579 };
584 };
580 on_disk_nodes.push(match node {
585 on_disk_nodes.push(match node {
581 NodeRef::InMemory(path, node) => {
586 NodeRef::InMemory(path, node) => {
582 let (flags, size, mtime) = match &node.data {
587 let (flags, size, mtime) = match &node.data {
583 dirstate_map::NodeData::Entry(entry) => {
588 dirstate_map::NodeData::Entry(entry) => {
584 Node::from_dirstate_entry(entry)
589 Node::from_dirstate_entry(entry)
585 }
590 }
586 dirstate_map::NodeData::CachedDirectory { mtime } => (
591 dirstate_map::NodeData::CachedDirectory { mtime } => (
587 Flags::HAS_DIRECTORY_MTIME,
592 Flags::HAS_DIRECTORY_MTIME,
588 0.into(),
593 0.into(),
589 (*mtime).into(),
594 (*mtime).into(),
590 ),
595 ),
591 dirstate_map::NodeData::None => (
596 dirstate_map::NodeData::None => (
592 Flags::empty(),
597 Flags::empty(),
593 0.into(),
598 0.into(),
594 PackedTruncatedTimestamp::null(),
599 PackedTruncatedTimestamp::null(),
595 ),
600 ),
596 };
601 };
597 Node {
602 Node {
598 children,
603 children,
599 copy_source,
604 copy_source,
600 full_path,
605 full_path,
601 base_name_start: u16::try_from(path.base_name_start())
606 base_name_start: u16::try_from(path.base_name_start())
602 // Could only panic for paths over 64 KiB
607 // Could only panic for paths over 64 KiB
603 .expect("dirstate-v2 path length overflow")
608 .expect("dirstate-v2 path length overflow")
604 .into(),
609 .into(),
605 descendants_with_entry_count: node
610 descendants_with_entry_count: node
606 .descendants_with_entry_count
611 .descendants_with_entry_count
607 .into(),
612 .into(),
608 tracked_descendants_count: node
613 tracked_descendants_count: node
609 .tracked_descendants_count
614 .tracked_descendants_count
610 .into(),
615 .into(),
611 flags: flags.bits().into(),
616 flags: flags.bits().into(),
612 size,
617 size,
613 mtime,
618 mtime,
614 }
619 }
615 }
620 }
616 NodeRef::OnDisk(node) => Node {
621 NodeRef::OnDisk(node) => Node {
617 children,
622 children,
618 copy_source,
623 copy_source,
619 full_path,
624 full_path,
620 ..*node
625 ..*node
621 },
626 },
622 })
627 })
623 }
628 }
624 // … so we can write them contiguously, after writing everything else
629 // … so we can write them contiguously, after writing everything else
625 // they refer to.
630 // they refer to.
626 let start = self.current_offset();
631 let start = self.current_offset();
627 let len = child_nodes_len_from_usize(nodes_len);
632 let len = child_nodes_len_from_usize(nodes_len);
628 self.out.extend(on_disk_nodes.as_bytes());
633 self.out.extend(on_disk_nodes.as_bytes());
629 Ok(ChildNodes { start, len })
634 Ok(ChildNodes { start, len })
630 }
635 }
631
636
632 /// If the given slice of items is within `on_disk`, returns its offset
637 /// If the given slice of items is within `on_disk`, returns its offset
633 /// from the start of `on_disk`.
638 /// from the start of `on_disk`.
634 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
639 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
635 where
640 where
636 T: BytesCast,
641 T: BytesCast,
637 {
642 {
638 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
643 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
639 let start = slice.as_ptr() as usize;
644 let start = slice.as_ptr() as usize;
640 let end = start + slice.len();
645 let end = start + slice.len();
641 start..=end
646 start..=end
642 }
647 }
643 let slice_addresses = address_range(slice.as_bytes());
648 let slice_addresses = address_range(slice.as_bytes());
644 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
649 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
645 if on_disk_addresses.contains(slice_addresses.start())
650 if on_disk_addresses.contains(slice_addresses.start())
646 && on_disk_addresses.contains(slice_addresses.end())
651 && on_disk_addresses.contains(slice_addresses.end())
647 {
652 {
648 let offset = slice_addresses.start() - on_disk_addresses.start();
653 let offset = slice_addresses.start() - on_disk_addresses.start();
649 Some(offset_from_usize(offset))
654 Some(offset_from_usize(offset))
650 } else {
655 } else {
651 None
656 None
652 }
657 }
653 }
658 }
654
659
655 fn current_offset(&mut self) -> Offset {
660 fn current_offset(&mut self) -> Offset {
656 let mut offset = self.out.len();
661 let mut offset = self.out.len();
657 if self.append {
662 if self.append {
658 offset += self.dirstate_map.on_disk.len()
663 offset += self.dirstate_map.on_disk.len()
659 }
664 }
660 offset_from_usize(offset)
665 offset_from_usize(offset)
661 }
666 }
662
667
663 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
668 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
664 let len = path_len_from_usize(slice.len());
669 let len = path_len_from_usize(slice.len());
665 // Reuse an already-written path if possible
670 // Reuse an already-written path if possible
666 if self.append {
671 if self.append {
667 if let Some(start) = self.on_disk_offset_of(slice) {
672 if let Some(start) = self.on_disk_offset_of(slice) {
668 return PathSlice { start, len };
673 return PathSlice { start, len };
669 }
674 }
670 }
675 }
671 let start = self.current_offset();
676 let start = self.current_offset();
672 self.out.extend(slice.as_bytes());
677 self.out.extend(slice.as_bytes());
673 PathSlice { start, len }
678 PathSlice { start, len }
674 }
679 }
675 }
680 }
676
681
677 fn offset_from_usize(x: usize) -> Offset {
682 fn offset_from_usize(x: usize) -> Offset {
678 u32::try_from(x)
683 u32::try_from(x)
679 // Could only panic for a dirstate file larger than 4 GiB
684 // Could only panic for a dirstate file larger than 4 GiB
680 .expect("dirstate-v2 offset overflow")
685 .expect("dirstate-v2 offset overflow")
681 .into()
686 .into()
682 }
687 }
683
688
684 fn child_nodes_len_from_usize(x: usize) -> Size {
689 fn child_nodes_len_from_usize(x: usize) -> Size {
685 u32::try_from(x)
690 u32::try_from(x)
686 // Could only panic with over 4 billion nodes
691 // Could only panic with over 4 billion nodes
687 .expect("dirstate-v2 slice length overflow")
692 .expect("dirstate-v2 slice length overflow")
688 .into()
693 .into()
689 }
694 }
690
695
691 fn path_len_from_usize(x: usize) -> PathSize {
696 fn path_len_from_usize(x: usize) -> PathSize {
692 u16::try_from(x)
697 u16::try_from(x)
693 // Could only panic for paths over 64 KiB
698 // Could only panic for paths over 64 KiB
694 .expect("dirstate-v2 path length overflow")
699 .expect("dirstate-v2 path length overflow")
695 .into()
700 .into()
696 }
701 }
697
702
698 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
703 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
699 fn from(timestamp: TruncatedTimestamp) -> Self {
704 fn from(timestamp: TruncatedTimestamp) -> Self {
700 Self {
705 Self {
701 truncated_seconds: timestamp.truncated_seconds().into(),
706 truncated_seconds: timestamp.truncated_seconds().into(),
702 nanoseconds: timestamp.nanoseconds().into(),
707 nanoseconds: timestamp.nanoseconds().into(),
703 }
708 }
704 }
709 }
705 }
710 }
706
711
707 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
712 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
708 type Error = DirstateV2ParseError;
713 type Error = DirstateV2ParseError;
709
714
710 fn try_from(
715 fn try_from(
711 timestamp: PackedTruncatedTimestamp,
716 timestamp: PackedTruncatedTimestamp,
712 ) -> Result<Self, Self::Error> {
717 ) -> Result<Self, Self::Error> {
713 Self::from_already_truncated(
718 Self::from_already_truncated(
714 timestamp.truncated_seconds.get(),
719 timestamp.truncated_seconds.get(),
715 timestamp.nanoseconds.get(),
720 timestamp.nanoseconds.get(),
716 )
721 )
717 }
722 }
718 }
723 }
719 impl PackedTruncatedTimestamp {
724 impl PackedTruncatedTimestamp {
720 fn null() -> Self {
725 fn null() -> Self {
721 Self {
726 Self {
722 truncated_seconds: 0.into(),
727 truncated_seconds: 0.into(),
723 nanoseconds: 0.into(),
728 nanoseconds: 0.into(),
724 }
729 }
725 }
730 }
726 }
731 }
General Comments 0
You need to be logged in to leave comments. Login now