##// END OF EJS Templates
dirstate-v2: Separate HAS_FILE_MTIME and HAS_DIRECTORY_MTIME flags...
Simon Sapin -
r49046:f7fd629f default
parent child Browse files
Show More
@@ -1,1174 +1,1174 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <ctype.h>
12 #include <ctype.h>
13 #include <stddef.h>
13 #include <stddef.h>
14 #include <string.h>
14 #include <string.h>
15
15
16 #include "bitmanipulation.h"
16 #include "bitmanipulation.h"
17 #include "charencode.h"
17 #include "charencode.h"
18 #include "util.h"
18 #include "util.h"
19
19
20 #ifdef IS_PY3K
20 #ifdef IS_PY3K
21 /* The mapping of Python types is meant to be temporary to get Python
21 /* The mapping of Python types is meant to be temporary to get Python
22 * 3 to compile. We should remove this once Python 3 support is fully
22 * 3 to compile. We should remove this once Python 3 support is fully
23 * supported and proper types are used in the extensions themselves. */
23 * supported and proper types are used in the extensions themselves. */
24 #define PyInt_Check PyLong_Check
24 #define PyInt_Check PyLong_Check
25 #define PyInt_FromLong PyLong_FromLong
25 #define PyInt_FromLong PyLong_FromLong
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 #define PyInt_AsLong PyLong_AsLong
27 #define PyInt_AsLong PyLong_AsLong
28 #endif
28 #endif
29
29
30 static const char *const versionerrortext = "Python minor version mismatch";
30 static const char *const versionerrortext = "Python minor version mismatch";
31
31
32 static const int dirstate_v1_from_p2 = -2;
32 static const int dirstate_v1_from_p2 = -2;
33 static const int dirstate_v1_nonnormal = -1;
33 static const int dirstate_v1_nonnormal = -1;
34 static const int ambiguous_time = -1;
34 static const int ambiguous_time = -1;
35
35
36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
36 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
37 {
37 {
38 Py_ssize_t expected_size;
38 Py_ssize_t expected_size;
39
39
40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
40 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
41 return NULL;
41 return NULL;
42 }
42 }
43
43
44 return _dict_new_presized(expected_size);
44 return _dict_new_presized(expected_size);
45 }
45 }
46
46
47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
47 static PyObject *dirstate_item_new(PyTypeObject *subtype, PyObject *args,
48 PyObject *kwds)
48 PyObject *kwds)
49 {
49 {
50 /* We do all the initialization here and not a tp_init function because
50 /* We do all the initialization here and not a tp_init function because
51 * dirstate_item is immutable. */
51 * dirstate_item is immutable. */
52 dirstateItemObject *t;
52 dirstateItemObject *t;
53 int wc_tracked;
53 int wc_tracked;
54 int p1_tracked;
54 int p1_tracked;
55 int p2_info;
55 int p2_info;
56 int has_meaningful_data;
56 int has_meaningful_data;
57 int has_meaningful_mtime;
57 int has_meaningful_mtime;
58 int mode;
58 int mode;
59 int size;
59 int size;
60 int mtime;
60 int mtime;
61 PyObject *parentfiledata;
61 PyObject *parentfiledata;
62 static char *keywords_name[] = {
62 static char *keywords_name[] = {
63 "wc_tracked",
63 "wc_tracked",
64 "p1_tracked",
64 "p1_tracked",
65 "p2_info",
65 "p2_info",
66 "has_meaningful_data",
66 "has_meaningful_data",
67 "has_meaningful_mtime",
67 "has_meaningful_mtime",
68 "parentfiledata",
68 "parentfiledata",
69 NULL,
69 NULL,
70 };
70 };
71 wc_tracked = 0;
71 wc_tracked = 0;
72 p1_tracked = 0;
72 p1_tracked = 0;
73 p2_info = 0;
73 p2_info = 0;
74 has_meaningful_mtime = 1;
74 has_meaningful_mtime = 1;
75 has_meaningful_data = 1;
75 has_meaningful_data = 1;
76 parentfiledata = Py_None;
76 parentfiledata = Py_None;
77 if (!PyArg_ParseTupleAndKeywords(
77 if (!PyArg_ParseTupleAndKeywords(
78 args, kwds, "|iiiiiO", keywords_name, &wc_tracked, &p1_tracked,
78 args, kwds, "|iiiiiO", keywords_name, &wc_tracked, &p1_tracked,
79 &p2_info, &has_meaningful_data, &has_meaningful_mtime,
79 &p2_info, &has_meaningful_data, &has_meaningful_mtime,
80 &parentfiledata)) {
80 &parentfiledata)) {
81 return NULL;
81 return NULL;
82 }
82 }
83 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
83 t = (dirstateItemObject *)subtype->tp_alloc(subtype, 1);
84 if (!t) {
84 if (!t) {
85 return NULL;
85 return NULL;
86 }
86 }
87
87
88 t->flags = 0;
88 t->flags = 0;
89 if (wc_tracked) {
89 if (wc_tracked) {
90 t->flags |= dirstate_flag_wc_tracked;
90 t->flags |= dirstate_flag_wc_tracked;
91 }
91 }
92 if (p1_tracked) {
92 if (p1_tracked) {
93 t->flags |= dirstate_flag_p1_tracked;
93 t->flags |= dirstate_flag_p1_tracked;
94 }
94 }
95 if (p2_info) {
95 if (p2_info) {
96 t->flags |= dirstate_flag_p2_info;
96 t->flags |= dirstate_flag_p2_info;
97 }
97 }
98
98
99 if (parentfiledata != Py_None) {
99 if (parentfiledata != Py_None) {
100 if (!PyTuple_CheckExact(parentfiledata)) {
100 if (!PyTuple_CheckExact(parentfiledata)) {
101 PyErr_SetString(
101 PyErr_SetString(
102 PyExc_TypeError,
102 PyExc_TypeError,
103 "parentfiledata should be a Tuple or None");
103 "parentfiledata should be a Tuple or None");
104 return NULL;
104 return NULL;
105 }
105 }
106 mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
106 mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
107 size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
107 size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
108 mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
108 mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
109 } else {
109 } else {
110 has_meaningful_data = 0;
110 has_meaningful_data = 0;
111 has_meaningful_mtime = 0;
111 has_meaningful_mtime = 0;
112 }
112 }
113 if (has_meaningful_data) {
113 if (has_meaningful_data) {
114 t->flags |= dirstate_flag_has_meaningful_data;
114 t->flags |= dirstate_flag_has_meaningful_data;
115 t->mode = mode;
115 t->mode = mode;
116 t->size = size;
116 t->size = size;
117 } else {
117 } else {
118 t->mode = 0;
118 t->mode = 0;
119 t->size = 0;
119 t->size = 0;
120 }
120 }
121 if (has_meaningful_mtime) {
121 if (has_meaningful_mtime) {
122 t->flags |= dirstate_flag_has_meaningful_mtime;
122 t->flags |= dirstate_flag_has_file_mtime;
123 t->mtime = mtime;
123 t->mtime = mtime;
124 } else {
124 } else {
125 t->mtime = 0;
125 t->mtime = 0;
126 }
126 }
127 return (PyObject *)t;
127 return (PyObject *)t;
128 }
128 }
129
129
130 static void dirstate_item_dealloc(PyObject *o)
130 static void dirstate_item_dealloc(PyObject *o)
131 {
131 {
132 PyObject_Del(o);
132 PyObject_Del(o);
133 }
133 }
134
134
135 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
135 static inline bool dirstate_item_c_tracked(dirstateItemObject *self)
136 {
136 {
137 return (self->flags & dirstate_flag_wc_tracked);
137 return (self->flags & dirstate_flag_wc_tracked);
138 }
138 }
139
139
140 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
140 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
141 {
141 {
142 const unsigned char mask = dirstate_flag_wc_tracked |
142 const unsigned char mask = dirstate_flag_wc_tracked |
143 dirstate_flag_p1_tracked |
143 dirstate_flag_p1_tracked |
144 dirstate_flag_p2_info;
144 dirstate_flag_p2_info;
145 return (self->flags & mask);
145 return (self->flags & mask);
146 }
146 }
147
147
148 static inline bool dirstate_item_c_added(dirstateItemObject *self)
148 static inline bool dirstate_item_c_added(dirstateItemObject *self)
149 {
149 {
150 const unsigned char mask =
150 const unsigned char mask =
151 (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
151 (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
152 dirstate_flag_p2_info);
152 dirstate_flag_p2_info);
153 const unsigned char target = dirstate_flag_wc_tracked;
153 const unsigned char target = dirstate_flag_wc_tracked;
154 return (self->flags & mask) == target;
154 return (self->flags & mask) == target;
155 }
155 }
156
156
157 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
157 static inline bool dirstate_item_c_removed(dirstateItemObject *self)
158 {
158 {
159 if (self->flags & dirstate_flag_wc_tracked) {
159 if (self->flags & dirstate_flag_wc_tracked) {
160 return false;
160 return false;
161 }
161 }
162 return (self->flags &
162 return (self->flags &
163 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
163 (dirstate_flag_p1_tracked | dirstate_flag_p2_info));
164 }
164 }
165
165
166 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
166 static inline bool dirstate_item_c_merged(dirstateItemObject *self)
167 {
167 {
168 return ((self->flags & dirstate_flag_wc_tracked) &&
168 return ((self->flags & dirstate_flag_wc_tracked) &&
169 (self->flags & dirstate_flag_p1_tracked) &&
169 (self->flags & dirstate_flag_p1_tracked) &&
170 (self->flags & dirstate_flag_p2_info));
170 (self->flags & dirstate_flag_p2_info));
171 }
171 }
172
172
173 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
173 static inline bool dirstate_item_c_from_p2(dirstateItemObject *self)
174 {
174 {
175 return ((self->flags & dirstate_flag_wc_tracked) &&
175 return ((self->flags & dirstate_flag_wc_tracked) &&
176 !(self->flags & dirstate_flag_p1_tracked) &&
176 !(self->flags & dirstate_flag_p1_tracked) &&
177 (self->flags & dirstate_flag_p2_info));
177 (self->flags & dirstate_flag_p2_info));
178 }
178 }
179
179
180 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
180 static inline char dirstate_item_c_v1_state(dirstateItemObject *self)
181 {
181 {
182 if (dirstate_item_c_removed(self)) {
182 if (dirstate_item_c_removed(self)) {
183 return 'r';
183 return 'r';
184 } else if (dirstate_item_c_merged(self)) {
184 } else if (dirstate_item_c_merged(self)) {
185 return 'm';
185 return 'm';
186 } else if (dirstate_item_c_added(self)) {
186 } else if (dirstate_item_c_added(self)) {
187 return 'a';
187 return 'a';
188 } else {
188 } else {
189 return 'n';
189 return 'n';
190 }
190 }
191 }
191 }
192
192
193 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
193 static inline int dirstate_item_c_v1_mode(dirstateItemObject *self)
194 {
194 {
195 if (self->flags & dirstate_flag_has_meaningful_data) {
195 if (self->flags & dirstate_flag_has_meaningful_data) {
196 return self->mode;
196 return self->mode;
197 } else {
197 } else {
198 return 0;
198 return 0;
199 }
199 }
200 }
200 }
201
201
202 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
202 static inline int dirstate_item_c_v1_size(dirstateItemObject *self)
203 {
203 {
204 if (!(self->flags & dirstate_flag_wc_tracked) &&
204 if (!(self->flags & dirstate_flag_wc_tracked) &&
205 (self->flags & dirstate_flag_p2_info)) {
205 (self->flags & dirstate_flag_p2_info)) {
206 if (self->flags & dirstate_flag_p1_tracked) {
206 if (self->flags & dirstate_flag_p1_tracked) {
207 return dirstate_v1_nonnormal;
207 return dirstate_v1_nonnormal;
208 } else {
208 } else {
209 return dirstate_v1_from_p2;
209 return dirstate_v1_from_p2;
210 }
210 }
211 } else if (dirstate_item_c_removed(self)) {
211 } else if (dirstate_item_c_removed(self)) {
212 return 0;
212 return 0;
213 } else if (self->flags & dirstate_flag_p2_info) {
213 } else if (self->flags & dirstate_flag_p2_info) {
214 return dirstate_v1_from_p2;
214 return dirstate_v1_from_p2;
215 } else if (dirstate_item_c_added(self)) {
215 } else if (dirstate_item_c_added(self)) {
216 return dirstate_v1_nonnormal;
216 return dirstate_v1_nonnormal;
217 } else if (self->flags & dirstate_flag_has_meaningful_data) {
217 } else if (self->flags & dirstate_flag_has_meaningful_data) {
218 return self->size;
218 return self->size;
219 } else {
219 } else {
220 return dirstate_v1_nonnormal;
220 return dirstate_v1_nonnormal;
221 }
221 }
222 }
222 }
223
223
224 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
224 static inline int dirstate_item_c_v1_mtime(dirstateItemObject *self)
225 {
225 {
226 if (dirstate_item_c_removed(self)) {
226 if (dirstate_item_c_removed(self)) {
227 return 0;
227 return 0;
228 } else if (!(self->flags & dirstate_flag_has_meaningful_mtime) ||
228 } else if (!(self->flags & dirstate_flag_has_file_mtime) ||
229 !(self->flags & dirstate_flag_p1_tracked) ||
229 !(self->flags & dirstate_flag_p1_tracked) ||
230 !(self->flags & dirstate_flag_wc_tracked) ||
230 !(self->flags & dirstate_flag_wc_tracked) ||
231 (self->flags & dirstate_flag_p2_info)) {
231 (self->flags & dirstate_flag_p2_info)) {
232 return ambiguous_time;
232 return ambiguous_time;
233 } else {
233 } else {
234 return self->mtime;
234 return self->mtime;
235 }
235 }
236 }
236 }
237
237
238 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
238 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
239 {
239 {
240 unsigned char flags = self->flags;
240 unsigned char flags = self->flags;
241 int mode = dirstate_item_c_v1_mode(self);
241 int mode = dirstate_item_c_v1_mode(self);
242 if ((mode & S_IXUSR) != 0) {
242 if ((mode & S_IXUSR) != 0) {
243 flags |= dirstate_flag_mode_exec_perm;
243 flags |= dirstate_flag_mode_exec_perm;
244 } else {
244 } else {
245 flags &= ~dirstate_flag_mode_exec_perm;
245 flags &= ~dirstate_flag_mode_exec_perm;
246 }
246 }
247 if (S_ISLNK(mode)) {
247 if (S_ISLNK(mode)) {
248 flags |= dirstate_flag_mode_is_symlink;
248 flags |= dirstate_flag_mode_is_symlink;
249 } else {
249 } else {
250 flags &= ~dirstate_flag_mode_is_symlink;
250 flags &= ~dirstate_flag_mode_is_symlink;
251 }
251 }
252 return Py_BuildValue("Bii", flags, self->size, self->mtime);
252 return Py_BuildValue("Bii", flags, self->size, self->mtime);
253 };
253 };
254
254
255 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
255 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
256 {
256 {
257 char state = dirstate_item_c_v1_state(self);
257 char state = dirstate_item_c_v1_state(self);
258 return PyBytes_FromStringAndSize(&state, 1);
258 return PyBytes_FromStringAndSize(&state, 1);
259 };
259 };
260
260
261 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
261 static PyObject *dirstate_item_v1_mode(dirstateItemObject *self)
262 {
262 {
263 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
263 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
264 };
264 };
265
265
266 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
266 static PyObject *dirstate_item_v1_size(dirstateItemObject *self)
267 {
267 {
268 return PyInt_FromLong(dirstate_item_c_v1_size(self));
268 return PyInt_FromLong(dirstate_item_c_v1_size(self));
269 };
269 };
270
270
271 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
271 static PyObject *dirstate_item_v1_mtime(dirstateItemObject *self)
272 {
272 {
273 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
273 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
274 };
274 };
275
275
276 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
276 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
277 PyObject *value)
277 PyObject *value)
278 {
278 {
279 long now;
279 long now;
280 if (!pylong_to_long(value, &now)) {
280 if (!pylong_to_long(value, &now)) {
281 return NULL;
281 return NULL;
282 }
282 }
283 if (dirstate_item_c_v1_state(self) == 'n' &&
283 if (dirstate_item_c_v1_state(self) == 'n' &&
284 dirstate_item_c_v1_mtime(self) == now) {
284 dirstate_item_c_v1_mtime(self) == now) {
285 Py_RETURN_TRUE;
285 Py_RETURN_TRUE;
286 } else {
286 } else {
287 Py_RETURN_FALSE;
287 Py_RETURN_FALSE;
288 }
288 }
289 };
289 };
290
290
291 /* This will never change since it's bound to V1
291 /* This will never change since it's bound to V1
292 */
292 */
293 static inline dirstateItemObject *
293 static inline dirstateItemObject *
294 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
294 dirstate_item_from_v1_data(char state, int mode, int size, int mtime)
295 {
295 {
296 dirstateItemObject *t =
296 dirstateItemObject *t =
297 PyObject_New(dirstateItemObject, &dirstateItemType);
297 PyObject_New(dirstateItemObject, &dirstateItemType);
298 if (!t) {
298 if (!t) {
299 return NULL;
299 return NULL;
300 }
300 }
301 t->flags = 0;
301 t->flags = 0;
302 t->mode = 0;
302 t->mode = 0;
303 t->size = 0;
303 t->size = 0;
304 t->mtime = 0;
304 t->mtime = 0;
305
305
306 if (state == 'm') {
306 if (state == 'm') {
307 t->flags = (dirstate_flag_wc_tracked |
307 t->flags = (dirstate_flag_wc_tracked |
308 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
308 dirstate_flag_p1_tracked | dirstate_flag_p2_info);
309 } else if (state == 'a') {
309 } else if (state == 'a') {
310 t->flags = dirstate_flag_wc_tracked;
310 t->flags = dirstate_flag_wc_tracked;
311 } else if (state == 'r') {
311 } else if (state == 'r') {
312 if (size == dirstate_v1_nonnormal) {
312 if (size == dirstate_v1_nonnormal) {
313 t->flags =
313 t->flags =
314 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
314 dirstate_flag_p1_tracked | dirstate_flag_p2_info;
315 } else if (size == dirstate_v1_from_p2) {
315 } else if (size == dirstate_v1_from_p2) {
316 t->flags = dirstate_flag_p2_info;
316 t->flags = dirstate_flag_p2_info;
317 } else {
317 } else {
318 t->flags = dirstate_flag_p1_tracked;
318 t->flags = dirstate_flag_p1_tracked;
319 }
319 }
320 } else if (state == 'n') {
320 } else if (state == 'n') {
321 if (size == dirstate_v1_from_p2) {
321 if (size == dirstate_v1_from_p2) {
322 t->flags =
322 t->flags =
323 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
323 dirstate_flag_wc_tracked | dirstate_flag_p2_info;
324 } else if (size == dirstate_v1_nonnormal) {
324 } else if (size == dirstate_v1_nonnormal) {
325 t->flags =
325 t->flags =
326 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
326 dirstate_flag_wc_tracked | dirstate_flag_p1_tracked;
327 } else if (mtime == ambiguous_time) {
327 } else if (mtime == ambiguous_time) {
328 t->flags = (dirstate_flag_wc_tracked |
328 t->flags = (dirstate_flag_wc_tracked |
329 dirstate_flag_p1_tracked |
329 dirstate_flag_p1_tracked |
330 dirstate_flag_has_meaningful_data);
330 dirstate_flag_has_meaningful_data);
331 t->mode = mode;
331 t->mode = mode;
332 t->size = size;
332 t->size = size;
333 } else {
333 } else {
334 t->flags = (dirstate_flag_wc_tracked |
334 t->flags = (dirstate_flag_wc_tracked |
335 dirstate_flag_p1_tracked |
335 dirstate_flag_p1_tracked |
336 dirstate_flag_has_meaningful_data |
336 dirstate_flag_has_meaningful_data |
337 dirstate_flag_has_meaningful_mtime);
337 dirstate_flag_has_file_mtime);
338 t->mode = mode;
338 t->mode = mode;
339 t->size = size;
339 t->size = size;
340 t->mtime = mtime;
340 t->mtime = mtime;
341 }
341 }
342 } else {
342 } else {
343 PyErr_Format(PyExc_RuntimeError,
343 PyErr_Format(PyExc_RuntimeError,
344 "unknown state: `%c` (%d, %d, %d)", state, mode,
344 "unknown state: `%c` (%d, %d, %d)", state, mode,
345 size, mtime, NULL);
345 size, mtime, NULL);
346 Py_DECREF(t);
346 Py_DECREF(t);
347 return NULL;
347 return NULL;
348 }
348 }
349
349
350 return t;
350 return t;
351 }
351 }
352
352
353 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
353 /* This will never change since it's bound to V1, unlike `dirstate_item_new` */
354 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
354 static PyObject *dirstate_item_from_v1_meth(PyTypeObject *subtype,
355 PyObject *args)
355 PyObject *args)
356 {
356 {
357 /* We do all the initialization here and not a tp_init function because
357 /* We do all the initialization here and not a tp_init function because
358 * dirstate_item is immutable. */
358 * dirstate_item is immutable. */
359 char state;
359 char state;
360 int size, mode, mtime;
360 int size, mode, mtime;
361 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
361 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
362 return NULL;
362 return NULL;
363 }
363 }
364 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
364 return (PyObject *)dirstate_item_from_v1_data(state, mode, size, mtime);
365 };
365 };
366
366
367 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
367 static PyObject *dirstate_item_from_v2_meth(PyTypeObject *subtype,
368 PyObject *args)
368 PyObject *args)
369 {
369 {
370 dirstateItemObject *t =
370 dirstateItemObject *t =
371 PyObject_New(dirstateItemObject, &dirstateItemType);
371 PyObject_New(dirstateItemObject, &dirstateItemType);
372 if (!t) {
372 if (!t) {
373 return NULL;
373 return NULL;
374 }
374 }
375 if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
375 if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
376 return NULL;
376 return NULL;
377 }
377 }
378 t->mode = 0;
378 t->mode = 0;
379 if (t->flags & dirstate_flag_has_meaningful_data) {
379 if (t->flags & dirstate_flag_has_meaningful_data) {
380 if (t->flags & dirstate_flag_mode_exec_perm) {
380 if (t->flags & dirstate_flag_mode_exec_perm) {
381 t->mode = 0755;
381 t->mode = 0755;
382 } else {
382 } else {
383 t->mode = 0644;
383 t->mode = 0644;
384 }
384 }
385 if (t->flags & dirstate_flag_mode_is_symlink) {
385 if (t->flags & dirstate_flag_mode_is_symlink) {
386 t->mode |= S_IFLNK;
386 t->mode |= S_IFLNK;
387 } else {
387 } else {
388 t->mode |= S_IFREG;
388 t->mode |= S_IFREG;
389 }
389 }
390 }
390 }
391 return (PyObject *)t;
391 return (PyObject *)t;
392 };
392 };
393
393
394 /* This means the next status call will have to actually check its content
394 /* This means the next status call will have to actually check its content
395 to make sure it is correct. */
395 to make sure it is correct. */
396 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
396 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
397 {
397 {
398 self->flags &= ~dirstate_flag_has_meaningful_mtime;
398 self->flags &= ~dirstate_flag_has_file_mtime;
399 Py_RETURN_NONE;
399 Py_RETURN_NONE;
400 }
400 }
401
401
402 /* See docstring of the python implementation for details */
402 /* See docstring of the python implementation for details */
403 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
403 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
404 PyObject *args)
404 PyObject *args)
405 {
405 {
406 int size, mode, mtime;
406 int size, mode, mtime;
407 if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
407 if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
408 return NULL;
408 return NULL;
409 }
409 }
410 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
410 self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
411 dirstate_flag_has_meaningful_data |
411 dirstate_flag_has_meaningful_data |
412 dirstate_flag_has_meaningful_mtime;
412 dirstate_flag_has_file_mtime;
413 self->mode = mode;
413 self->mode = mode;
414 self->size = size;
414 self->size = size;
415 self->mtime = mtime;
415 self->mtime = mtime;
416 Py_RETURN_NONE;
416 Py_RETURN_NONE;
417 }
417 }
418
418
419 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
419 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
420 {
420 {
421 self->flags |= dirstate_flag_wc_tracked;
421 self->flags |= dirstate_flag_wc_tracked;
422 self->flags &= ~dirstate_flag_has_meaningful_mtime;
422 self->flags &= ~dirstate_flag_has_file_mtime;
423 Py_RETURN_NONE;
423 Py_RETURN_NONE;
424 }
424 }
425
425
426 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
426 static PyObject *dirstate_item_set_untracked(dirstateItemObject *self)
427 {
427 {
428 self->flags &= ~dirstate_flag_wc_tracked;
428 self->flags &= ~dirstate_flag_wc_tracked;
429 self->mode = 0;
429 self->mode = 0;
430 self->mtime = 0;
430 self->mtime = 0;
431 self->size = 0;
431 self->size = 0;
432 Py_RETURN_NONE;
432 Py_RETURN_NONE;
433 }
433 }
434
434
435 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
435 static PyObject *dirstate_item_drop_merge_data(dirstateItemObject *self)
436 {
436 {
437 if (self->flags & dirstate_flag_p2_info) {
437 if (self->flags & dirstate_flag_p2_info) {
438 self->flags &= ~(dirstate_flag_p2_info |
438 self->flags &= ~(dirstate_flag_p2_info |
439 dirstate_flag_has_meaningful_data |
439 dirstate_flag_has_meaningful_data |
440 dirstate_flag_has_meaningful_mtime);
440 dirstate_flag_has_file_mtime);
441 self->mode = 0;
441 self->mode = 0;
442 self->mtime = 0;
442 self->mtime = 0;
443 self->size = 0;
443 self->size = 0;
444 }
444 }
445 Py_RETURN_NONE;
445 Py_RETURN_NONE;
446 }
446 }
447 static PyMethodDef dirstate_item_methods[] = {
447 static PyMethodDef dirstate_item_methods[] = {
448 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
448 {"v2_data", (PyCFunction)dirstate_item_v2_data, METH_NOARGS,
449 "return data suitable for v2 serialization"},
449 "return data suitable for v2 serialization"},
450 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
450 {"v1_state", (PyCFunction)dirstate_item_v1_state, METH_NOARGS,
451 "return a \"state\" suitable for v1 serialization"},
451 "return a \"state\" suitable for v1 serialization"},
452 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
452 {"v1_mode", (PyCFunction)dirstate_item_v1_mode, METH_NOARGS,
453 "return a \"mode\" suitable for v1 serialization"},
453 "return a \"mode\" suitable for v1 serialization"},
454 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
454 {"v1_size", (PyCFunction)dirstate_item_v1_size, METH_NOARGS,
455 "return a \"size\" suitable for v1 serialization"},
455 "return a \"size\" suitable for v1 serialization"},
456 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
456 {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
457 "return a \"mtime\" suitable for v1 serialization"},
457 "return a \"mtime\" suitable for v1 serialization"},
458 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
458 {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
459 "True if the stored mtime would be ambiguous with the current time"},
459 "True if the stored mtime would be ambiguous with the current time"},
460 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
460 {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
461 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
461 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
462 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
462 {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
463 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
463 METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V2 data"},
464 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
464 {"set_possibly_dirty", (PyCFunction)dirstate_item_set_possibly_dirty,
465 METH_NOARGS, "mark a file as \"possibly dirty\""},
465 METH_NOARGS, "mark a file as \"possibly dirty\""},
466 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
466 {"set_clean", (PyCFunction)dirstate_item_set_clean, METH_VARARGS,
467 "mark a file as \"clean\""},
467 "mark a file as \"clean\""},
468 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
468 {"set_tracked", (PyCFunction)dirstate_item_set_tracked, METH_NOARGS,
469 "mark a file as \"tracked\""},
469 "mark a file as \"tracked\""},
470 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
470 {"set_untracked", (PyCFunction)dirstate_item_set_untracked, METH_NOARGS,
471 "mark a file as \"untracked\""},
471 "mark a file as \"untracked\""},
472 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
472 {"drop_merge_data", (PyCFunction)dirstate_item_drop_merge_data, METH_NOARGS,
473 "remove all \"merge-only\" from a DirstateItem"},
473 "remove all \"merge-only\" from a DirstateItem"},
474 {NULL} /* Sentinel */
474 {NULL} /* Sentinel */
475 };
475 };
476
476
477 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
477 static PyObject *dirstate_item_get_mode(dirstateItemObject *self)
478 {
478 {
479 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
479 return PyInt_FromLong(dirstate_item_c_v1_mode(self));
480 };
480 };
481
481
482 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
482 static PyObject *dirstate_item_get_size(dirstateItemObject *self)
483 {
483 {
484 return PyInt_FromLong(dirstate_item_c_v1_size(self));
484 return PyInt_FromLong(dirstate_item_c_v1_size(self));
485 };
485 };
486
486
487 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
487 static PyObject *dirstate_item_get_mtime(dirstateItemObject *self)
488 {
488 {
489 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
489 return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
490 };
490 };
491
491
492 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
492 static PyObject *dirstate_item_get_state(dirstateItemObject *self)
493 {
493 {
494 char state = dirstate_item_c_v1_state(self);
494 char state = dirstate_item_c_v1_state(self);
495 return PyBytes_FromStringAndSize(&state, 1);
495 return PyBytes_FromStringAndSize(&state, 1);
496 };
496 };
497
497
498 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
498 static PyObject *dirstate_item_get_tracked(dirstateItemObject *self)
499 {
499 {
500 if (dirstate_item_c_tracked(self)) {
500 if (dirstate_item_c_tracked(self)) {
501 Py_RETURN_TRUE;
501 Py_RETURN_TRUE;
502 } else {
502 } else {
503 Py_RETURN_FALSE;
503 Py_RETURN_FALSE;
504 }
504 }
505 };
505 };
506 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
506 static PyObject *dirstate_item_get_p1_tracked(dirstateItemObject *self)
507 {
507 {
508 if (self->flags & dirstate_flag_p1_tracked) {
508 if (self->flags & dirstate_flag_p1_tracked) {
509 Py_RETURN_TRUE;
509 Py_RETURN_TRUE;
510 } else {
510 } else {
511 Py_RETURN_FALSE;
511 Py_RETURN_FALSE;
512 }
512 }
513 };
513 };
514
514
515 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
515 static PyObject *dirstate_item_get_added(dirstateItemObject *self)
516 {
516 {
517 if (dirstate_item_c_added(self)) {
517 if (dirstate_item_c_added(self)) {
518 Py_RETURN_TRUE;
518 Py_RETURN_TRUE;
519 } else {
519 } else {
520 Py_RETURN_FALSE;
520 Py_RETURN_FALSE;
521 }
521 }
522 };
522 };
523
523
524 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
524 static PyObject *dirstate_item_get_p2_info(dirstateItemObject *self)
525 {
525 {
526 if (self->flags & dirstate_flag_wc_tracked &&
526 if (self->flags & dirstate_flag_wc_tracked &&
527 self->flags & dirstate_flag_p2_info) {
527 self->flags & dirstate_flag_p2_info) {
528 Py_RETURN_TRUE;
528 Py_RETURN_TRUE;
529 } else {
529 } else {
530 Py_RETURN_FALSE;
530 Py_RETURN_FALSE;
531 }
531 }
532 };
532 };
533
533
534 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
534 static PyObject *dirstate_item_get_merged(dirstateItemObject *self)
535 {
535 {
536 if (dirstate_item_c_merged(self)) {
536 if (dirstate_item_c_merged(self)) {
537 Py_RETURN_TRUE;
537 Py_RETURN_TRUE;
538 } else {
538 } else {
539 Py_RETURN_FALSE;
539 Py_RETURN_FALSE;
540 }
540 }
541 };
541 };
542
542
543 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
543 static PyObject *dirstate_item_get_from_p2(dirstateItemObject *self)
544 {
544 {
545 if (dirstate_item_c_from_p2(self)) {
545 if (dirstate_item_c_from_p2(self)) {
546 Py_RETURN_TRUE;
546 Py_RETURN_TRUE;
547 } else {
547 } else {
548 Py_RETURN_FALSE;
548 Py_RETURN_FALSE;
549 }
549 }
550 };
550 };
551
551
552 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
552 static PyObject *dirstate_item_get_maybe_clean(dirstateItemObject *self)
553 {
553 {
554 if (!(self->flags & dirstate_flag_wc_tracked)) {
554 if (!(self->flags & dirstate_flag_wc_tracked)) {
555 Py_RETURN_FALSE;
555 Py_RETURN_FALSE;
556 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
556 } else if (!(self->flags & dirstate_flag_p1_tracked)) {
557 Py_RETURN_FALSE;
557 Py_RETURN_FALSE;
558 } else if (self->flags & dirstate_flag_p2_info) {
558 } else if (self->flags & dirstate_flag_p2_info) {
559 Py_RETURN_FALSE;
559 Py_RETURN_FALSE;
560 } else {
560 } else {
561 Py_RETURN_TRUE;
561 Py_RETURN_TRUE;
562 }
562 }
563 };
563 };
564
564
565 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
565 static PyObject *dirstate_item_get_any_tracked(dirstateItemObject *self)
566 {
566 {
567 if (dirstate_item_c_any_tracked(self)) {
567 if (dirstate_item_c_any_tracked(self)) {
568 Py_RETURN_TRUE;
568 Py_RETURN_TRUE;
569 } else {
569 } else {
570 Py_RETURN_FALSE;
570 Py_RETURN_FALSE;
571 }
571 }
572 };
572 };
573
573
574 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
574 static PyObject *dirstate_item_get_removed(dirstateItemObject *self)
575 {
575 {
576 if (dirstate_item_c_removed(self)) {
576 if (dirstate_item_c_removed(self)) {
577 Py_RETURN_TRUE;
577 Py_RETURN_TRUE;
578 } else {
578 } else {
579 Py_RETURN_FALSE;
579 Py_RETURN_FALSE;
580 }
580 }
581 };
581 };
582
582
583 static PyGetSetDef dirstate_item_getset[] = {
583 static PyGetSetDef dirstate_item_getset[] = {
584 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
584 {"mode", (getter)dirstate_item_get_mode, NULL, "mode", NULL},
585 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
585 {"size", (getter)dirstate_item_get_size, NULL, "size", NULL},
586 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
586 {"mtime", (getter)dirstate_item_get_mtime, NULL, "mtime", NULL},
587 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
587 {"state", (getter)dirstate_item_get_state, NULL, "state", NULL},
588 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
588 {"tracked", (getter)dirstate_item_get_tracked, NULL, "tracked", NULL},
589 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
589 {"p1_tracked", (getter)dirstate_item_get_p1_tracked, NULL, "p1_tracked",
590 NULL},
590 NULL},
591 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
591 {"added", (getter)dirstate_item_get_added, NULL, "added", NULL},
592 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
592 {"p2_info", (getter)dirstate_item_get_p2_info, NULL, "p2_info", NULL},
593 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
593 {"merged", (getter)dirstate_item_get_merged, NULL, "merged", NULL},
594 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
594 {"from_p2", (getter)dirstate_item_get_from_p2, NULL, "from_p2", NULL},
595 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
595 {"maybe_clean", (getter)dirstate_item_get_maybe_clean, NULL, "maybe_clean",
596 NULL},
596 NULL},
597 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
597 {"any_tracked", (getter)dirstate_item_get_any_tracked, NULL, "any_tracked",
598 NULL},
598 NULL},
599 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
599 {"removed", (getter)dirstate_item_get_removed, NULL, "removed", NULL},
600 {NULL} /* Sentinel */
600 {NULL} /* Sentinel */
601 };
601 };
602
602
603 PyTypeObject dirstateItemType = {
603 PyTypeObject dirstateItemType = {
604 PyVarObject_HEAD_INIT(NULL, 0) /* header */
604 PyVarObject_HEAD_INIT(NULL, 0) /* header */
605 "dirstate_tuple", /* tp_name */
605 "dirstate_tuple", /* tp_name */
606 sizeof(dirstateItemObject), /* tp_basicsize */
606 sizeof(dirstateItemObject), /* tp_basicsize */
607 0, /* tp_itemsize */
607 0, /* tp_itemsize */
608 (destructor)dirstate_item_dealloc, /* tp_dealloc */
608 (destructor)dirstate_item_dealloc, /* tp_dealloc */
609 0, /* tp_print */
609 0, /* tp_print */
610 0, /* tp_getattr */
610 0, /* tp_getattr */
611 0, /* tp_setattr */
611 0, /* tp_setattr */
612 0, /* tp_compare */
612 0, /* tp_compare */
613 0, /* tp_repr */
613 0, /* tp_repr */
614 0, /* tp_as_number */
614 0, /* tp_as_number */
615 0, /* tp_as_sequence */
615 0, /* tp_as_sequence */
616 0, /* tp_as_mapping */
616 0, /* tp_as_mapping */
617 0, /* tp_hash */
617 0, /* tp_hash */
618 0, /* tp_call */
618 0, /* tp_call */
619 0, /* tp_str */
619 0, /* tp_str */
620 0, /* tp_getattro */
620 0, /* tp_getattro */
621 0, /* tp_setattro */
621 0, /* tp_setattro */
622 0, /* tp_as_buffer */
622 0, /* tp_as_buffer */
623 Py_TPFLAGS_DEFAULT, /* tp_flags */
623 Py_TPFLAGS_DEFAULT, /* tp_flags */
624 "dirstate tuple", /* tp_doc */
624 "dirstate tuple", /* tp_doc */
625 0, /* tp_traverse */
625 0, /* tp_traverse */
626 0, /* tp_clear */
626 0, /* tp_clear */
627 0, /* tp_richcompare */
627 0, /* tp_richcompare */
628 0, /* tp_weaklistoffset */
628 0, /* tp_weaklistoffset */
629 0, /* tp_iter */
629 0, /* tp_iter */
630 0, /* tp_iternext */
630 0, /* tp_iternext */
631 dirstate_item_methods, /* tp_methods */
631 dirstate_item_methods, /* tp_methods */
632 0, /* tp_members */
632 0, /* tp_members */
633 dirstate_item_getset, /* tp_getset */
633 dirstate_item_getset, /* tp_getset */
634 0, /* tp_base */
634 0, /* tp_base */
635 0, /* tp_dict */
635 0, /* tp_dict */
636 0, /* tp_descr_get */
636 0, /* tp_descr_get */
637 0, /* tp_descr_set */
637 0, /* tp_descr_set */
638 0, /* tp_dictoffset */
638 0, /* tp_dictoffset */
639 0, /* tp_init */
639 0, /* tp_init */
640 0, /* tp_alloc */
640 0, /* tp_alloc */
641 dirstate_item_new, /* tp_new */
641 dirstate_item_new, /* tp_new */
642 };
642 };
643
643
644 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
644 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
645 {
645 {
646 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
646 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
647 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
647 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
648 char state, *cur, *str, *cpos;
648 char state, *cur, *str, *cpos;
649 int mode, size, mtime;
649 int mode, size, mtime;
650 unsigned int flen, pos = 40;
650 unsigned int flen, pos = 40;
651 Py_ssize_t len = 40;
651 Py_ssize_t len = 40;
652 Py_ssize_t readlen;
652 Py_ssize_t readlen;
653
653
654 if (!PyArg_ParseTuple(
654 if (!PyArg_ParseTuple(
655 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
655 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
656 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
656 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
657 goto quit;
657 goto quit;
658 }
658 }
659
659
660 len = readlen;
660 len = readlen;
661
661
662 /* read parents */
662 /* read parents */
663 if (len < 40) {
663 if (len < 40) {
664 PyErr_SetString(PyExc_ValueError,
664 PyErr_SetString(PyExc_ValueError,
665 "too little data for parents");
665 "too little data for parents");
666 goto quit;
666 goto quit;
667 }
667 }
668
668
669 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
669 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
670 str + 20, (Py_ssize_t)20);
670 str + 20, (Py_ssize_t)20);
671 if (!parents) {
671 if (!parents) {
672 goto quit;
672 goto quit;
673 }
673 }
674
674
675 /* read filenames */
675 /* read filenames */
676 while (pos >= 40 && pos < len) {
676 while (pos >= 40 && pos < len) {
677 if (pos + 17 > len) {
677 if (pos + 17 > len) {
678 PyErr_SetString(PyExc_ValueError,
678 PyErr_SetString(PyExc_ValueError,
679 "overflow in dirstate");
679 "overflow in dirstate");
680 goto quit;
680 goto quit;
681 }
681 }
682 cur = str + pos;
682 cur = str + pos;
683 /* unpack header */
683 /* unpack header */
684 state = *cur;
684 state = *cur;
685 mode = getbe32(cur + 1);
685 mode = getbe32(cur + 1);
686 size = getbe32(cur + 5);
686 size = getbe32(cur + 5);
687 mtime = getbe32(cur + 9);
687 mtime = getbe32(cur + 9);
688 flen = getbe32(cur + 13);
688 flen = getbe32(cur + 13);
689 pos += 17;
689 pos += 17;
690 cur += 17;
690 cur += 17;
691 if (flen > len - pos) {
691 if (flen > len - pos) {
692 PyErr_SetString(PyExc_ValueError,
692 PyErr_SetString(PyExc_ValueError,
693 "overflow in dirstate");
693 "overflow in dirstate");
694 goto quit;
694 goto quit;
695 }
695 }
696
696
697 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
697 entry = (PyObject *)dirstate_item_from_v1_data(state, mode,
698 size, mtime);
698 size, mtime);
699 if (!entry)
699 if (!entry)
700 goto quit;
700 goto quit;
701 cpos = memchr(cur, 0, flen);
701 cpos = memchr(cur, 0, flen);
702 if (cpos) {
702 if (cpos) {
703 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
703 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
704 cname = PyBytes_FromStringAndSize(
704 cname = PyBytes_FromStringAndSize(
705 cpos + 1, flen - (cpos - cur) - 1);
705 cpos + 1, flen - (cpos - cur) - 1);
706 if (!fname || !cname ||
706 if (!fname || !cname ||
707 PyDict_SetItem(cmap, fname, cname) == -1 ||
707 PyDict_SetItem(cmap, fname, cname) == -1 ||
708 PyDict_SetItem(dmap, fname, entry) == -1) {
708 PyDict_SetItem(dmap, fname, entry) == -1) {
709 goto quit;
709 goto quit;
710 }
710 }
711 Py_DECREF(cname);
711 Py_DECREF(cname);
712 } else {
712 } else {
713 fname = PyBytes_FromStringAndSize(cur, flen);
713 fname = PyBytes_FromStringAndSize(cur, flen);
714 if (!fname ||
714 if (!fname ||
715 PyDict_SetItem(dmap, fname, entry) == -1) {
715 PyDict_SetItem(dmap, fname, entry) == -1) {
716 goto quit;
716 goto quit;
717 }
717 }
718 }
718 }
719 Py_DECREF(fname);
719 Py_DECREF(fname);
720 Py_DECREF(entry);
720 Py_DECREF(entry);
721 fname = cname = entry = NULL;
721 fname = cname = entry = NULL;
722 pos += flen;
722 pos += flen;
723 }
723 }
724
724
725 ret = parents;
725 ret = parents;
726 Py_INCREF(ret);
726 Py_INCREF(ret);
727 quit:
727 quit:
728 Py_XDECREF(fname);
728 Py_XDECREF(fname);
729 Py_XDECREF(cname);
729 Py_XDECREF(cname);
730 Py_XDECREF(entry);
730 Py_XDECREF(entry);
731 Py_XDECREF(parents);
731 Py_XDECREF(parents);
732 return ret;
732 return ret;
733 }
733 }
734
734
735 /*
735 /*
736 * Efficiently pack a dirstate object into its on-disk format.
736 * Efficiently pack a dirstate object into its on-disk format.
737 */
737 */
738 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
738 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
739 {
739 {
740 PyObject *packobj = NULL;
740 PyObject *packobj = NULL;
741 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
741 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
742 Py_ssize_t nbytes, pos, l;
742 Py_ssize_t nbytes, pos, l;
743 PyObject *k, *v = NULL, *pn;
743 PyObject *k, *v = NULL, *pn;
744 char *p, *s;
744 char *p, *s;
745 int now;
745 int now;
746
746
747 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
747 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
748 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
748 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
749 &now)) {
749 &now)) {
750 return NULL;
750 return NULL;
751 }
751 }
752
752
753 if (PyTuple_Size(pl) != 2) {
753 if (PyTuple_Size(pl) != 2) {
754 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
754 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
755 return NULL;
755 return NULL;
756 }
756 }
757
757
758 /* Figure out how much we need to allocate. */
758 /* Figure out how much we need to allocate. */
759 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
759 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
760 PyObject *c;
760 PyObject *c;
761 if (!PyBytes_Check(k)) {
761 if (!PyBytes_Check(k)) {
762 PyErr_SetString(PyExc_TypeError, "expected string key");
762 PyErr_SetString(PyExc_TypeError, "expected string key");
763 goto bail;
763 goto bail;
764 }
764 }
765 nbytes += PyBytes_GET_SIZE(k) + 17;
765 nbytes += PyBytes_GET_SIZE(k) + 17;
766 c = PyDict_GetItem(copymap, k);
766 c = PyDict_GetItem(copymap, k);
767 if (c) {
767 if (c) {
768 if (!PyBytes_Check(c)) {
768 if (!PyBytes_Check(c)) {
769 PyErr_SetString(PyExc_TypeError,
769 PyErr_SetString(PyExc_TypeError,
770 "expected string key");
770 "expected string key");
771 goto bail;
771 goto bail;
772 }
772 }
773 nbytes += PyBytes_GET_SIZE(c) + 1;
773 nbytes += PyBytes_GET_SIZE(c) + 1;
774 }
774 }
775 }
775 }
776
776
777 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
777 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
778 if (packobj == NULL) {
778 if (packobj == NULL) {
779 goto bail;
779 goto bail;
780 }
780 }
781
781
782 p = PyBytes_AS_STRING(packobj);
782 p = PyBytes_AS_STRING(packobj);
783
783
784 pn = PyTuple_GET_ITEM(pl, 0);
784 pn = PyTuple_GET_ITEM(pl, 0);
785 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
785 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
786 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
786 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
787 goto bail;
787 goto bail;
788 }
788 }
789 memcpy(p, s, l);
789 memcpy(p, s, l);
790 p += 20;
790 p += 20;
791 pn = PyTuple_GET_ITEM(pl, 1);
791 pn = PyTuple_GET_ITEM(pl, 1);
792 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
792 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
793 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
793 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
794 goto bail;
794 goto bail;
795 }
795 }
796 memcpy(p, s, l);
796 memcpy(p, s, l);
797 p += 20;
797 p += 20;
798
798
799 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
799 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
800 dirstateItemObject *tuple;
800 dirstateItemObject *tuple;
801 char state;
801 char state;
802 int mode, size, mtime;
802 int mode, size, mtime;
803 Py_ssize_t len, l;
803 Py_ssize_t len, l;
804 PyObject *o;
804 PyObject *o;
805 char *t;
805 char *t;
806
806
807 if (!dirstate_tuple_check(v)) {
807 if (!dirstate_tuple_check(v)) {
808 PyErr_SetString(PyExc_TypeError,
808 PyErr_SetString(PyExc_TypeError,
809 "expected a dirstate tuple");
809 "expected a dirstate tuple");
810 goto bail;
810 goto bail;
811 }
811 }
812 tuple = (dirstateItemObject *)v;
812 tuple = (dirstateItemObject *)v;
813
813
814 state = dirstate_item_c_v1_state(tuple);
814 state = dirstate_item_c_v1_state(tuple);
815 mode = dirstate_item_c_v1_mode(tuple);
815 mode = dirstate_item_c_v1_mode(tuple);
816 size = dirstate_item_c_v1_size(tuple);
816 size = dirstate_item_c_v1_size(tuple);
817 mtime = dirstate_item_c_v1_mtime(tuple);
817 mtime = dirstate_item_c_v1_mtime(tuple);
818 if (state == 'n' && mtime == now) {
818 if (state == 'n' && mtime == now) {
819 /* See pure/parsers.py:pack_dirstate for why we do
819 /* See pure/parsers.py:pack_dirstate for why we do
820 * this. */
820 * this. */
821 mtime = -1;
821 mtime = -1;
822 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
822 mtime_unset = (PyObject *)dirstate_item_from_v1_data(
823 state, mode, size, mtime);
823 state, mode, size, mtime);
824 if (!mtime_unset) {
824 if (!mtime_unset) {
825 goto bail;
825 goto bail;
826 }
826 }
827 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
827 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
828 goto bail;
828 goto bail;
829 }
829 }
830 Py_DECREF(mtime_unset);
830 Py_DECREF(mtime_unset);
831 mtime_unset = NULL;
831 mtime_unset = NULL;
832 }
832 }
833 *p++ = state;
833 *p++ = state;
834 putbe32((uint32_t)mode, p);
834 putbe32((uint32_t)mode, p);
835 putbe32((uint32_t)size, p + 4);
835 putbe32((uint32_t)size, p + 4);
836 putbe32((uint32_t)mtime, p + 8);
836 putbe32((uint32_t)mtime, p + 8);
837 t = p + 12;
837 t = p + 12;
838 p += 16;
838 p += 16;
839 len = PyBytes_GET_SIZE(k);
839 len = PyBytes_GET_SIZE(k);
840 memcpy(p, PyBytes_AS_STRING(k), len);
840 memcpy(p, PyBytes_AS_STRING(k), len);
841 p += len;
841 p += len;
842 o = PyDict_GetItem(copymap, k);
842 o = PyDict_GetItem(copymap, k);
843 if (o) {
843 if (o) {
844 *p++ = '\0';
844 *p++ = '\0';
845 l = PyBytes_GET_SIZE(o);
845 l = PyBytes_GET_SIZE(o);
846 memcpy(p, PyBytes_AS_STRING(o), l);
846 memcpy(p, PyBytes_AS_STRING(o), l);
847 p += l;
847 p += l;
848 len += l + 1;
848 len += l + 1;
849 }
849 }
850 putbe32((uint32_t)len, t);
850 putbe32((uint32_t)len, t);
851 }
851 }
852
852
853 pos = p - PyBytes_AS_STRING(packobj);
853 pos = p - PyBytes_AS_STRING(packobj);
854 if (pos != nbytes) {
854 if (pos != nbytes) {
855 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
855 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
856 (long)pos, (long)nbytes);
856 (long)pos, (long)nbytes);
857 goto bail;
857 goto bail;
858 }
858 }
859
859
860 return packobj;
860 return packobj;
861 bail:
861 bail:
862 Py_XDECREF(mtime_unset);
862 Py_XDECREF(mtime_unset);
863 Py_XDECREF(packobj);
863 Py_XDECREF(packobj);
864 Py_XDECREF(v);
864 Py_XDECREF(v);
865 return NULL;
865 return NULL;
866 }
866 }
867
867
868 #define BUMPED_FIX 1
868 #define BUMPED_FIX 1
869 #define USING_SHA_256 2
869 #define USING_SHA_256 2
870 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
870 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
871
871
872 static PyObject *readshas(const char *source, unsigned char num,
872 static PyObject *readshas(const char *source, unsigned char num,
873 Py_ssize_t hashwidth)
873 Py_ssize_t hashwidth)
874 {
874 {
875 int i;
875 int i;
876 PyObject *list = PyTuple_New(num);
876 PyObject *list = PyTuple_New(num);
877 if (list == NULL) {
877 if (list == NULL) {
878 return NULL;
878 return NULL;
879 }
879 }
880 for (i = 0; i < num; i++) {
880 for (i = 0; i < num; i++) {
881 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
881 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
882 if (hash == NULL) {
882 if (hash == NULL) {
883 Py_DECREF(list);
883 Py_DECREF(list);
884 return NULL;
884 return NULL;
885 }
885 }
886 PyTuple_SET_ITEM(list, i, hash);
886 PyTuple_SET_ITEM(list, i, hash);
887 source += hashwidth;
887 source += hashwidth;
888 }
888 }
889 return list;
889 return list;
890 }
890 }
891
891
892 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
892 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
893 uint32_t *msize)
893 uint32_t *msize)
894 {
894 {
895 const char *data = databegin;
895 const char *data = databegin;
896 const char *meta;
896 const char *meta;
897
897
898 double mtime;
898 double mtime;
899 int16_t tz;
899 int16_t tz;
900 uint16_t flags;
900 uint16_t flags;
901 unsigned char nsuccs, nparents, nmetadata;
901 unsigned char nsuccs, nparents, nmetadata;
902 Py_ssize_t hashwidth = 20;
902 Py_ssize_t hashwidth = 20;
903
903
904 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
904 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
905 PyObject *metadata = NULL, *ret = NULL;
905 PyObject *metadata = NULL, *ret = NULL;
906 int i;
906 int i;
907
907
908 if (data + FM1_HEADER_SIZE > dataend) {
908 if (data + FM1_HEADER_SIZE > dataend) {
909 goto overflow;
909 goto overflow;
910 }
910 }
911
911
912 *msize = getbe32(data);
912 *msize = getbe32(data);
913 data += 4;
913 data += 4;
914 mtime = getbefloat64(data);
914 mtime = getbefloat64(data);
915 data += 8;
915 data += 8;
916 tz = getbeint16(data);
916 tz = getbeint16(data);
917 data += 2;
917 data += 2;
918 flags = getbeuint16(data);
918 flags = getbeuint16(data);
919 data += 2;
919 data += 2;
920
920
921 if (flags & USING_SHA_256) {
921 if (flags & USING_SHA_256) {
922 hashwidth = 32;
922 hashwidth = 32;
923 }
923 }
924
924
925 nsuccs = (unsigned char)(*data++);
925 nsuccs = (unsigned char)(*data++);
926 nparents = (unsigned char)(*data++);
926 nparents = (unsigned char)(*data++);
927 nmetadata = (unsigned char)(*data++);
927 nmetadata = (unsigned char)(*data++);
928
928
929 if (databegin + *msize > dataend) {
929 if (databegin + *msize > dataend) {
930 goto overflow;
930 goto overflow;
931 }
931 }
932 dataend = databegin + *msize; /* narrow down to marker size */
932 dataend = databegin + *msize; /* narrow down to marker size */
933
933
934 if (data + hashwidth > dataend) {
934 if (data + hashwidth > dataend) {
935 goto overflow;
935 goto overflow;
936 }
936 }
937 prec = PyBytes_FromStringAndSize(data, hashwidth);
937 prec = PyBytes_FromStringAndSize(data, hashwidth);
938 data += hashwidth;
938 data += hashwidth;
939 if (prec == NULL) {
939 if (prec == NULL) {
940 goto bail;
940 goto bail;
941 }
941 }
942
942
943 if (data + nsuccs * hashwidth > dataend) {
943 if (data + nsuccs * hashwidth > dataend) {
944 goto overflow;
944 goto overflow;
945 }
945 }
946 succs = readshas(data, nsuccs, hashwidth);
946 succs = readshas(data, nsuccs, hashwidth);
947 if (succs == NULL) {
947 if (succs == NULL) {
948 goto bail;
948 goto bail;
949 }
949 }
950 data += nsuccs * hashwidth;
950 data += nsuccs * hashwidth;
951
951
952 if (nparents == 1 || nparents == 2) {
952 if (nparents == 1 || nparents == 2) {
953 if (data + nparents * hashwidth > dataend) {
953 if (data + nparents * hashwidth > dataend) {
954 goto overflow;
954 goto overflow;
955 }
955 }
956 parents = readshas(data, nparents, hashwidth);
956 parents = readshas(data, nparents, hashwidth);
957 if (parents == NULL) {
957 if (parents == NULL) {
958 goto bail;
958 goto bail;
959 }
959 }
960 data += nparents * hashwidth;
960 data += nparents * hashwidth;
961 } else {
961 } else {
962 parents = Py_None;
962 parents = Py_None;
963 Py_INCREF(parents);
963 Py_INCREF(parents);
964 }
964 }
965
965
966 if (data + 2 * nmetadata > dataend) {
966 if (data + 2 * nmetadata > dataend) {
967 goto overflow;
967 goto overflow;
968 }
968 }
969 meta = data + (2 * nmetadata);
969 meta = data + (2 * nmetadata);
970 metadata = PyTuple_New(nmetadata);
970 metadata = PyTuple_New(nmetadata);
971 if (metadata == NULL) {
971 if (metadata == NULL) {
972 goto bail;
972 goto bail;
973 }
973 }
974 for (i = 0; i < nmetadata; i++) {
974 for (i = 0; i < nmetadata; i++) {
975 PyObject *tmp, *left = NULL, *right = NULL;
975 PyObject *tmp, *left = NULL, *right = NULL;
976 Py_ssize_t leftsize = (unsigned char)(*data++);
976 Py_ssize_t leftsize = (unsigned char)(*data++);
977 Py_ssize_t rightsize = (unsigned char)(*data++);
977 Py_ssize_t rightsize = (unsigned char)(*data++);
978 if (meta + leftsize + rightsize > dataend) {
978 if (meta + leftsize + rightsize > dataend) {
979 goto overflow;
979 goto overflow;
980 }
980 }
981 left = PyBytes_FromStringAndSize(meta, leftsize);
981 left = PyBytes_FromStringAndSize(meta, leftsize);
982 meta += leftsize;
982 meta += leftsize;
983 right = PyBytes_FromStringAndSize(meta, rightsize);
983 right = PyBytes_FromStringAndSize(meta, rightsize);
984 meta += rightsize;
984 meta += rightsize;
985 tmp = PyTuple_New(2);
985 tmp = PyTuple_New(2);
986 if (!left || !right || !tmp) {
986 if (!left || !right || !tmp) {
987 Py_XDECREF(left);
987 Py_XDECREF(left);
988 Py_XDECREF(right);
988 Py_XDECREF(right);
989 Py_XDECREF(tmp);
989 Py_XDECREF(tmp);
990 goto bail;
990 goto bail;
991 }
991 }
992 PyTuple_SET_ITEM(tmp, 0, left);
992 PyTuple_SET_ITEM(tmp, 0, left);
993 PyTuple_SET_ITEM(tmp, 1, right);
993 PyTuple_SET_ITEM(tmp, 1, right);
994 PyTuple_SET_ITEM(metadata, i, tmp);
994 PyTuple_SET_ITEM(metadata, i, tmp);
995 }
995 }
996 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
996 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
997 (int)tz * 60, parents);
997 (int)tz * 60, parents);
998 goto bail; /* return successfully */
998 goto bail; /* return successfully */
999
999
1000 overflow:
1000 overflow:
1001 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1001 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
1002 bail:
1002 bail:
1003 Py_XDECREF(prec);
1003 Py_XDECREF(prec);
1004 Py_XDECREF(succs);
1004 Py_XDECREF(succs);
1005 Py_XDECREF(metadata);
1005 Py_XDECREF(metadata);
1006 Py_XDECREF(parents);
1006 Py_XDECREF(parents);
1007 return ret;
1007 return ret;
1008 }
1008 }
1009
1009
1010 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1010 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
1011 {
1011 {
1012 const char *data, *dataend;
1012 const char *data, *dataend;
1013 Py_ssize_t datalen, offset, stop;
1013 Py_ssize_t datalen, offset, stop;
1014 PyObject *markers = NULL;
1014 PyObject *markers = NULL;
1015
1015
1016 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1016 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
1017 &offset, &stop)) {
1017 &offset, &stop)) {
1018 return NULL;
1018 return NULL;
1019 }
1019 }
1020 if (offset < 0) {
1020 if (offset < 0) {
1021 PyErr_SetString(PyExc_ValueError,
1021 PyErr_SetString(PyExc_ValueError,
1022 "invalid negative offset in fm1readmarkers");
1022 "invalid negative offset in fm1readmarkers");
1023 return NULL;
1023 return NULL;
1024 }
1024 }
1025 if (stop > datalen) {
1025 if (stop > datalen) {
1026 PyErr_SetString(
1026 PyErr_SetString(
1027 PyExc_ValueError,
1027 PyExc_ValueError,
1028 "stop longer than data length in fm1readmarkers");
1028 "stop longer than data length in fm1readmarkers");
1029 return NULL;
1029 return NULL;
1030 }
1030 }
1031 dataend = data + datalen;
1031 dataend = data + datalen;
1032 data += offset;
1032 data += offset;
1033 markers = PyList_New(0);
1033 markers = PyList_New(0);
1034 if (!markers) {
1034 if (!markers) {
1035 return NULL;
1035 return NULL;
1036 }
1036 }
1037 while (offset < stop) {
1037 while (offset < stop) {
1038 uint32_t msize;
1038 uint32_t msize;
1039 int error;
1039 int error;
1040 PyObject *record = fm1readmarker(data, dataend, &msize);
1040 PyObject *record = fm1readmarker(data, dataend, &msize);
1041 if (!record) {
1041 if (!record) {
1042 goto bail;
1042 goto bail;
1043 }
1043 }
1044 error = PyList_Append(markers, record);
1044 error = PyList_Append(markers, record);
1045 Py_DECREF(record);
1045 Py_DECREF(record);
1046 if (error) {
1046 if (error) {
1047 goto bail;
1047 goto bail;
1048 }
1048 }
1049 data += msize;
1049 data += msize;
1050 offset += msize;
1050 offset += msize;
1051 }
1051 }
1052 return markers;
1052 return markers;
1053 bail:
1053 bail:
1054 Py_DECREF(markers);
1054 Py_DECREF(markers);
1055 return NULL;
1055 return NULL;
1056 }
1056 }
1057
1057
1058 static char parsers_doc[] = "Efficient content parsing.";
1058 static char parsers_doc[] = "Efficient content parsing.";
1059
1059
1060 PyObject *encodedir(PyObject *self, PyObject *args);
1060 PyObject *encodedir(PyObject *self, PyObject *args);
1061 PyObject *pathencode(PyObject *self, PyObject *args);
1061 PyObject *pathencode(PyObject *self, PyObject *args);
1062 PyObject *lowerencode(PyObject *self, PyObject *args);
1062 PyObject *lowerencode(PyObject *self, PyObject *args);
1063 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1063 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
1064
1064
1065 static PyMethodDef methods[] = {
1065 static PyMethodDef methods[] = {
1066 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1066 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1067 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1067 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1068 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1068 {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
1069 "parse a revlog index\n"},
1069 "parse a revlog index\n"},
1070 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1070 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
1071 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1071 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
1072 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1072 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
1073 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1073 {"dict_new_presized", dict_new_presized, METH_VARARGS,
1074 "construct a dict with an expected size\n"},
1074 "construct a dict with an expected size\n"},
1075 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1075 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
1076 "make file foldmap\n"},
1076 "make file foldmap\n"},
1077 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1077 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
1078 "escape a UTF-8 byte string to JSON (fast path)\n"},
1078 "escape a UTF-8 byte string to JSON (fast path)\n"},
1079 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1079 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1080 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1080 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1081 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1081 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1082 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1082 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
1083 "parse v1 obsolete markers\n"},
1083 "parse v1 obsolete markers\n"},
1084 {NULL, NULL}};
1084 {NULL, NULL}};
1085
1085
1086 void dirs_module_init(PyObject *mod);
1086 void dirs_module_init(PyObject *mod);
1087 void manifest_module_init(PyObject *mod);
1087 void manifest_module_init(PyObject *mod);
1088 void revlog_module_init(PyObject *mod);
1088 void revlog_module_init(PyObject *mod);
1089
1089
1090 static const int version = 20;
1090 static const int version = 20;
1091
1091
1092 static void module_init(PyObject *mod)
1092 static void module_init(PyObject *mod)
1093 {
1093 {
1094 PyModule_AddIntConstant(mod, "version", version);
1094 PyModule_AddIntConstant(mod, "version", version);
1095
1095
1096 /* This module constant has two purposes. First, it lets us unit test
1096 /* This module constant has two purposes. First, it lets us unit test
1097 * the ImportError raised without hard-coding any error text. This
1097 * the ImportError raised without hard-coding any error text. This
1098 * means we can change the text in the future without breaking tests,
1098 * means we can change the text in the future without breaking tests,
1099 * even across changesets without a recompile. Second, its presence
1099 * even across changesets without a recompile. Second, its presence
1100 * can be used to determine whether the version-checking logic is
1100 * can be used to determine whether the version-checking logic is
1101 * present, which also helps in testing across changesets without a
1101 * present, which also helps in testing across changesets without a
1102 * recompile. Note that this means the pure-Python version of parsers
1102 * recompile. Note that this means the pure-Python version of parsers
1103 * should not have this module constant. */
1103 * should not have this module constant. */
1104 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1104 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
1105
1105
1106 dirs_module_init(mod);
1106 dirs_module_init(mod);
1107 manifest_module_init(mod);
1107 manifest_module_init(mod);
1108 revlog_module_init(mod);
1108 revlog_module_init(mod);
1109
1109
1110 if (PyType_Ready(&dirstateItemType) < 0) {
1110 if (PyType_Ready(&dirstateItemType) < 0) {
1111 return;
1111 return;
1112 }
1112 }
1113 Py_INCREF(&dirstateItemType);
1113 Py_INCREF(&dirstateItemType);
1114 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1114 PyModule_AddObject(mod, "DirstateItem", (PyObject *)&dirstateItemType);
1115 }
1115 }
1116
1116
1117 static int check_python_version(void)
1117 static int check_python_version(void)
1118 {
1118 {
1119 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1119 PyObject *sys = PyImport_ImportModule("sys"), *ver;
1120 long hexversion;
1120 long hexversion;
1121 if (!sys) {
1121 if (!sys) {
1122 return -1;
1122 return -1;
1123 }
1123 }
1124 ver = PyObject_GetAttrString(sys, "hexversion");
1124 ver = PyObject_GetAttrString(sys, "hexversion");
1125 Py_DECREF(sys);
1125 Py_DECREF(sys);
1126 if (!ver) {
1126 if (!ver) {
1127 return -1;
1127 return -1;
1128 }
1128 }
1129 hexversion = PyInt_AsLong(ver);
1129 hexversion = PyInt_AsLong(ver);
1130 Py_DECREF(ver);
1130 Py_DECREF(ver);
1131 /* sys.hexversion is a 32-bit number by default, so the -1 case
1131 /* sys.hexversion is a 32-bit number by default, so the -1 case
1132 * should only occur in unusual circumstances (e.g. if sys.hexversion
1132 * should only occur in unusual circumstances (e.g. if sys.hexversion
1133 * is manually set to an invalid value). */
1133 * is manually set to an invalid value). */
1134 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1134 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
1135 PyErr_Format(PyExc_ImportError,
1135 PyErr_Format(PyExc_ImportError,
1136 "%s: The Mercurial extension "
1136 "%s: The Mercurial extension "
1137 "modules were compiled with Python " PY_VERSION
1137 "modules were compiled with Python " PY_VERSION
1138 ", but "
1138 ", but "
1139 "Mercurial is currently using Python with "
1139 "Mercurial is currently using Python with "
1140 "sys.hexversion=%ld: "
1140 "sys.hexversion=%ld: "
1141 "Python %s\n at: %s",
1141 "Python %s\n at: %s",
1142 versionerrortext, hexversion, Py_GetVersion(),
1142 versionerrortext, hexversion, Py_GetVersion(),
1143 Py_GetProgramFullPath());
1143 Py_GetProgramFullPath());
1144 return -1;
1144 return -1;
1145 }
1145 }
1146 return 0;
1146 return 0;
1147 }
1147 }
1148
1148
1149 #ifdef IS_PY3K
1149 #ifdef IS_PY3K
1150 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1150 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
1151 parsers_doc, -1, methods};
1151 parsers_doc, -1, methods};
1152
1152
1153 PyMODINIT_FUNC PyInit_parsers(void)
1153 PyMODINIT_FUNC PyInit_parsers(void)
1154 {
1154 {
1155 PyObject *mod;
1155 PyObject *mod;
1156
1156
1157 if (check_python_version() == -1)
1157 if (check_python_version() == -1)
1158 return NULL;
1158 return NULL;
1159 mod = PyModule_Create(&parsers_module);
1159 mod = PyModule_Create(&parsers_module);
1160 module_init(mod);
1160 module_init(mod);
1161 return mod;
1161 return mod;
1162 }
1162 }
1163 #else
1163 #else
1164 PyMODINIT_FUNC initparsers(void)
1164 PyMODINIT_FUNC initparsers(void)
1165 {
1165 {
1166 PyObject *mod;
1166 PyObject *mod;
1167
1167
1168 if (check_python_version() == -1) {
1168 if (check_python_version() == -1) {
1169 return;
1169 return;
1170 }
1170 }
1171 mod = Py_InitModule3("parsers", methods, parsers_doc);
1171 mod = Py_InitModule3("parsers", methods, parsers_doc);
1172 module_init(mod);
1172 module_init(mod);
1173 }
1173 }
1174 #endif
1174 #endif
@@ -1,82 +1,83 b''
1 /*
1 /*
2 util.h - utility functions for interfacing with the various python APIs.
2 util.h - utility functions for interfacing with the various python APIs.
3
3
4 This software may be used and distributed according to the terms of
4 This software may be used and distributed according to the terms of
5 the GNU General Public License, incorporated herein by reference.
5 the GNU General Public License, incorporated herein by reference.
6 */
6 */
7
7
8 #ifndef _HG_UTIL_H_
8 #ifndef _HG_UTIL_H_
9 #define _HG_UTIL_H_
9 #define _HG_UTIL_H_
10
10
11 #include "compat.h"
11 #include "compat.h"
12
12
13 #if PY_MAJOR_VERSION >= 3
13 #if PY_MAJOR_VERSION >= 3
14 #define IS_PY3K
14 #define IS_PY3K
15 #endif
15 #endif
16
16
17 /* helper to switch things like string literal depending on Python version */
17 /* helper to switch things like string literal depending on Python version */
18 #ifdef IS_PY3K
18 #ifdef IS_PY3K
19 #define PY23(py2, py3) py3
19 #define PY23(py2, py3) py3
20 #else
20 #else
21 #define PY23(py2, py3) py2
21 #define PY23(py2, py3) py2
22 #endif
22 #endif
23
23
24 /* clang-format off */
24 /* clang-format off */
25 typedef struct {
25 typedef struct {
26 PyObject_HEAD
26 PyObject_HEAD
27 unsigned char flags;
27 unsigned char flags;
28 int mode;
28 int mode;
29 int size;
29 int size;
30 int mtime;
30 int mtime;
31 } dirstateItemObject;
31 } dirstateItemObject;
32 /* clang-format on */
32 /* clang-format on */
33
33
34 static const unsigned char dirstate_flag_wc_tracked = 1;
34 static const unsigned char dirstate_flag_wc_tracked = 1;
35 static const unsigned char dirstate_flag_p1_tracked = 1 << 1;
35 static const unsigned char dirstate_flag_p1_tracked = 1 << 1;
36 static const unsigned char dirstate_flag_p2_info = 1 << 2;
36 static const unsigned char dirstate_flag_p2_info = 1 << 2;
37 static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
37 static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
38 static const unsigned char dirstate_flag_has_meaningful_mtime = 1 << 4;
38 static const unsigned char dirstate_flag_has_file_mtime = 1 << 4;
39 static const unsigned char dirstate_flag_mode_exec_perm = 1 << 5;
39 static const unsigned char dirstate_flag_has_directory_mtime = 1 << 5;
40 static const unsigned char dirstate_flag_mode_is_symlink = 1 << 6;
40 static const unsigned char dirstate_flag_mode_exec_perm = 1 << 6;
41 static const unsigned char dirstate_flag_mode_is_symlink = 1 << 7;
41
42
42 extern PyTypeObject dirstateItemType;
43 extern PyTypeObject dirstateItemType;
43 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
44 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
44
45
45 #ifndef MIN
46 #ifndef MIN
46 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
47 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
47 #endif
48 #endif
48 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
49 /* VC9 doesn't include bool and lacks stdbool.h based on my searching */
49 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
50 #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
50 #define true 1
51 #define true 1
51 #define false 0
52 #define false 0
52 typedef unsigned char bool;
53 typedef unsigned char bool;
53 #else
54 #else
54 #include <stdbool.h>
55 #include <stdbool.h>
55 #endif
56 #endif
56
57
57 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
58 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
58 {
59 {
59 /* _PyDict_NewPresized expects a minused parameter, but it actually
60 /* _PyDict_NewPresized expects a minused parameter, but it actually
60 creates a dictionary that's the nearest power of two bigger than the
61 creates a dictionary that's the nearest power of two bigger than the
61 parameter. For example, with the initial minused = 1000, the
62 parameter. For example, with the initial minused = 1000, the
62 dictionary created has size 1024. Of course in a lot of cases that
63 dictionary created has size 1024. Of course in a lot of cases that
63 can be greater than the maximum load factor Python's dict object
64 can be greater than the maximum load factor Python's dict object
64 expects (= 2/3), so as soon as we cross the threshold we'll resize
65 expects (= 2/3), so as soon as we cross the threshold we'll resize
65 anyway. So create a dictionary that's at least 3/2 the size. */
66 anyway. So create a dictionary that's at least 3/2 the size. */
66 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
67 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
67 }
68 }
68
69
69 /* Convert a PyInt or PyLong to a long. Returns false if there is an
70 /* Convert a PyInt or PyLong to a long. Returns false if there is an
70 error, in which case an exception will already have been set. */
71 error, in which case an exception will already have been set. */
71 static inline bool pylong_to_long(PyObject *pylong, long *out)
72 static inline bool pylong_to_long(PyObject *pylong, long *out)
72 {
73 {
73 *out = PyLong_AsLong(pylong);
74 *out = PyLong_AsLong(pylong);
74 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
75 /* Fast path to avoid hitting PyErr_Occurred if the value was obviously
75 * not an error. */
76 * not an error. */
76 if (*out != -1) {
77 if (*out != -1) {
77 return true;
78 return true;
78 }
79 }
79 return PyErr_Occurred() == NULL;
80 return PyErr_Occurred() == NULL;
80 }
81 }
81
82
82 #endif /* _HG_UTIL_H_ */
83 #endif /* _HG_UTIL_H_ */
@@ -1,503 +1,516 b''
1 The *dirstate* is what Mercurial uses internally to track
1 The *dirstate* is what Mercurial uses internally to track
2 the state of files in the working directory,
2 the state of files in the working directory,
3 such as set by commands like `hg add` and `hg rm`.
3 such as set by commands like `hg add` and `hg rm`.
4 It also contains some cached data that help make `hg status` faster.
4 It also contains some cached data that help make `hg status` faster.
5 The name refers both to `.hg/dirstate` on the filesystem
5 The name refers both to `.hg/dirstate` on the filesystem
6 and the corresponding data structure in memory while a Mercurial process
6 and the corresponding data structure in memory while a Mercurial process
7 is running.
7 is running.
8
8
9 The original file format, retroactively dubbed `dirstate-v1`,
9 The original file format, retroactively dubbed `dirstate-v1`,
10 is described at https://www.mercurial-scm.org/wiki/DirState.
10 is described at https://www.mercurial-scm.org/wiki/DirState.
11 It is made of a flat sequence of unordered variable-size entries,
11 It is made of a flat sequence of unordered variable-size entries,
12 so accessing any information in it requires parsing all of it.
12 so accessing any information in it requires parsing all of it.
13 Similarly, saving changes requires rewriting the entire file.
13 Similarly, saving changes requires rewriting the entire file.
14
14
15 The newer `dirsate-v2` file format is designed to fix these limitations
15 The newer `dirsate-v2` file format is designed to fix these limitations
16 and make `hg status` faster.
16 and make `hg status` faster.
17
17
18 User guide
18 User guide
19 ==========
19 ==========
20
20
21 Compatibility
21 Compatibility
22 -------------
22 -------------
23
23
24 The file format is experimental and may still change.
24 The file format is experimental and may still change.
25 Different versions of Mercurial may not be compatible with each other
25 Different versions of Mercurial may not be compatible with each other
26 when working on a local repository that uses this format.
26 when working on a local repository that uses this format.
27 When using an incompatible version with the experimental format,
27 When using an incompatible version with the experimental format,
28 anything can happen including data corruption.
28 anything can happen including data corruption.
29
29
30 Since the dirstate is entirely local and not relevant to the wire protocol,
30 Since the dirstate is entirely local and not relevant to the wire protocol,
31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
31 `dirstate-v2` does not affect compatibility with remote Mercurial versions.
32
32
33 When `share-safe` is enabled, different repositories sharing the same store
33 When `share-safe` is enabled, different repositories sharing the same store
34 can use different dirstate formats.
34 can use different dirstate formats.
35
35
36 Enabling `dirsate-v2` for new local repositories
36 Enabling `dirsate-v2` for new local repositories
37 ------------------------------------------------
37 ------------------------------------------------
38
38
39 When creating a new local repository such as with `hg init` or `hg clone`,
39 When creating a new local repository such as with `hg init` or `hg clone`,
40 the `exp-dirstate-v2` boolean in the `format` configuration section
40 the `exp-dirstate-v2` boolean in the `format` configuration section
41 controls whether to use this file format.
41 controls whether to use this file format.
42 This is disabled by default as of this writing.
42 This is disabled by default as of this writing.
43 To enable it for a single repository, run for example::
43 To enable it for a single repository, run for example::
44
44
45 $ hg init my-project --config format.exp-dirstate-v2=1
45 $ hg init my-project --config format.exp-dirstate-v2=1
46
46
47 Checking the format of an existing local repsitory
47 Checking the format of an existing local repsitory
48 --------------------------------------------------
48 --------------------------------------------------
49
49
50 The `debugformat` commands prints information about
50 The `debugformat` commands prints information about
51 which of multiple optional formats are used in the current repository,
51 which of multiple optional formats are used in the current repository,
52 including `dirstate-v2`::
52 including `dirstate-v2`::
53
53
54 $ hg debugformat
54 $ hg debugformat
55 format-variant repo
55 format-variant repo
56 fncache: yes
56 fncache: yes
57 dirstate-v2: yes
57 dirstate-v2: yes
58 […]
58 […]
59
59
60 Upgrading or downgrading an existing local repository
60 Upgrading or downgrading an existing local repository
61 -----------------------------------------------------
61 -----------------------------------------------------
62
62
63 The `debugupgrade` command does various upgrades or downgrades
63 The `debugupgrade` command does various upgrades or downgrades
64 on a local repository
64 on a local repository
65 based on the current Mercurial version and on configuration.
65 based on the current Mercurial version and on configuration.
66 The same `format.exp-dirstate-v2` configuration is used again.
66 The same `format.exp-dirstate-v2` configuration is used again.
67
67
68 Example to upgrade::
68 Example to upgrade::
69
69
70 $ hg debugupgrade --config format.exp-dirstate-v2=1
70 $ hg debugupgrade --config format.exp-dirstate-v2=1
71
71
72 Example to downgrade to `dirstate-v1`::
72 Example to downgrade to `dirstate-v1`::
73
73
74 $ hg debugupgrade --config format.exp-dirstate-v2=0
74 $ hg debugupgrade --config format.exp-dirstate-v2=0
75
75
76 Both of this commands do nothing but print a list of proposed changes,
76 Both of this commands do nothing but print a list of proposed changes,
77 which may include changes unrelated to the dirstate.
77 which may include changes unrelated to the dirstate.
78 Those other changes are controlled by their own configuration keys.
78 Those other changes are controlled by their own configuration keys.
79 Add `--run` to a command to actually apply the proposed changes.
79 Add `--run` to a command to actually apply the proposed changes.
80
80
81 Backups of `.hg/requires` and `.hg/dirstate` are created
81 Backups of `.hg/requires` and `.hg/dirstate` are created
82 in a `.hg/upgradebackup.*` directory.
82 in a `.hg/upgradebackup.*` directory.
83 If something goes wrong, restoring those files should undo the change.
83 If something goes wrong, restoring those files should undo the change.
84
84
85 Note that upgrading affects compatibility with older versions of Mercurial
85 Note that upgrading affects compatibility with older versions of Mercurial
86 as noted above.
86 as noted above.
87 This can be relevant when a repository’s files are on a USB drive
87 This can be relevant when a repository’s files are on a USB drive
88 or some other removable media, or shared over the network, etc.
88 or some other removable media, or shared over the network, etc.
89
89
90 Internal filesystem representation
90 Internal filesystem representation
91 ==================================
91 ==================================
92
92
93 Requirements file
93 Requirements file
94 -----------------
94 -----------------
95
95
96 The `.hg/requires` file indicates which of various optional file formats
96 The `.hg/requires` file indicates which of various optional file formats
97 are used by a given repository.
97 are used by a given repository.
98 Mercurial aborts when seeing a requirement it does not know about,
98 Mercurial aborts when seeing a requirement it does not know about,
99 which avoids older version accidentally messing up a respository
99 which avoids older version accidentally messing up a respository
100 that uses a format that was introduced later.
100 that uses a format that was introduced later.
101 For versions that do support a format, the presence or absence of
101 For versions that do support a format, the presence or absence of
102 the corresponding requirement indicates whether to use that format.
102 the corresponding requirement indicates whether to use that format.
103
103
104 When the file contains a `exp-dirstate-v2` line,
104 When the file contains a `exp-dirstate-v2` line,
105 the `dirstate-v2` format is used.
105 the `dirstate-v2` format is used.
106 With no such line `dirstate-v1` is used.
106 With no such line `dirstate-v1` is used.
107
107
108 High level description
108 High level description
109 ----------------------
109 ----------------------
110
110
111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
111 Whereas `dirstate-v1` uses a single `.hg/disrtate` file,
112 in `dirstate-v2` that file is a "docket" file
112 in `dirstate-v2` that file is a "docket" file
113 that only contains some metadata
113 that only contains some metadata
114 and points to separate data file named `.hg/dirstate.{ID}`,
114 and points to separate data file named `.hg/dirstate.{ID}`,
115 where `{ID}` is a random identifier.
115 where `{ID}` is a random identifier.
116
116
117 This separation allows making data files append-only
117 This separation allows making data files append-only
118 and therefore safer to memory-map.
118 and therefore safer to memory-map.
119 Creating a new data file (occasionally to clean up unused data)
119 Creating a new data file (occasionally to clean up unused data)
120 can be done with a different ID
120 can be done with a different ID
121 without disrupting another Mercurial process
121 without disrupting another Mercurial process
122 that could still be using the previous data file.
122 that could still be using the previous data file.
123
123
124 Both files have a format designed to reduce the need for parsing,
124 Both files have a format designed to reduce the need for parsing,
125 by using fixed-size binary components as much as possible.
125 by using fixed-size binary components as much as possible.
126 For data that is not fixed-size,
126 For data that is not fixed-size,
127 references to other parts of a file can be made by storing "pseudo-pointers":
127 references to other parts of a file can be made by storing "pseudo-pointers":
128 integers counted in bytes from the start of a file.
128 integers counted in bytes from the start of a file.
129 For read-only access no data structure is needed,
129 For read-only access no data structure is needed,
130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
130 only a bytes buffer (possibly memory-mapped directly from the filesystem)
131 with specific parts read on demand.
131 with specific parts read on demand.
132
132
133 The data file contains "nodes" organized in a tree.
133 The data file contains "nodes" organized in a tree.
134 Each node represents a file or directory inside the working directory
134 Each node represents a file or directory inside the working directory
135 or its parent changeset.
135 or its parent changeset.
136 This tree has the same structure as the filesystem,
136 This tree has the same structure as the filesystem,
137 so a node representing a directory has child nodes representing
137 so a node representing a directory has child nodes representing
138 the files and subdirectories contained directly in that directory.
138 the files and subdirectories contained directly in that directory.
139
139
140 The docket file format
140 The docket file format
141 ----------------------
141 ----------------------
142
142
143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
143 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
144 and `mercurial/dirstateutils/docket.py`.
144 and `mercurial/dirstateutils/docket.py`.
145
145
146 Components of the docket file are found at fixed offsets,
146 Components of the docket file are found at fixed offsets,
147 counted in bytes from the start of the file:
147 counted in bytes from the start of the file:
148
148
149 * Offset 0:
149 * Offset 0:
150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
150 The 12-bytes marker string "dirstate-v2\n" ending with a newline character.
151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
151 This makes it easier to tell a dirstate-v2 file from a dirstate-v1 file,
152 although it is not strictly necessary
152 although it is not strictly necessary
153 since `.hg/requires` determines which format to use.
153 since `.hg/requires` determines which format to use.
154
154
155 * Offset 12:
155 * Offset 12:
156 The changeset node ID on the first parent of the working directory,
156 The changeset node ID on the first parent of the working directory,
157 as up to 32 binary bytes.
157 as up to 32 binary bytes.
158 If a node ID is shorter (20 bytes for SHA-1),
158 If a node ID is shorter (20 bytes for SHA-1),
159 it is start-aligned and the rest of the bytes are set to zero.
159 it is start-aligned and the rest of the bytes are set to zero.
160
160
161 * Offset 44:
161 * Offset 44:
162 The changeset node ID on the second parent of the working directory,
162 The changeset node ID on the second parent of the working directory,
163 or all zeros if there isn’t one.
163 or all zeros if there isn’t one.
164 Also 32 binary bytes.
164 Also 32 binary bytes.
165
165
166 * Offset 76:
166 * Offset 76:
167 Tree metadata on 44 bytes, described below.
167 Tree metadata on 44 bytes, described below.
168 Its separation in this documentation from the rest of the docket
168 Its separation in this documentation from the rest of the docket
169 reflects a detail of the current implementation.
169 reflects a detail of the current implementation.
170 Since tree metadata is also made of fields at fixed offsets, those could
170 Since tree metadata is also made of fields at fixed offsets, those could
171 be inlined here by adding 76 bytes to each offset.
171 be inlined here by adding 76 bytes to each offset.
172
172
173 * Offset 120:
173 * Offset 120:
174 The used size of the data file, as a 32-bit big-endian integer.
174 The used size of the data file, as a 32-bit big-endian integer.
175 The actual size of the data file may be larger
175 The actual size of the data file may be larger
176 (if another Mercurial processis in appending to it
176 (if another Mercurial processis in appending to it
177 but has not updated the docket yet).
177 but has not updated the docket yet).
178 That extra data must be ignored.
178 That extra data must be ignored.
179
179
180 * Offset 124:
180 * Offset 124:
181 The length of the data file identifier, as a 8-bit integer.
181 The length of the data file identifier, as a 8-bit integer.
182
182
183 * Offset 125:
183 * Offset 125:
184 The data file identifier.
184 The data file identifier.
185
185
186 * Any additional data is current ignored, and dropped when updating the file.
186 * Any additional data is current ignored, and dropped when updating the file.
187
187
188 Tree metadata in the docket file
188 Tree metadata in the docket file
189 --------------------------------
189 --------------------------------
190
190
191 Tree metadata is similarly made of components at fixed offsets.
191 Tree metadata is similarly made of components at fixed offsets.
192 These offsets are counted in bytes from the start of tree metadata,
192 These offsets are counted in bytes from the start of tree metadata,
193 which is 76 bytes after the start of the docket file.
193 which is 76 bytes after the start of the docket file.
194
194
195 This metadata can be thought of as the singular root of the tree
195 This metadata can be thought of as the singular root of the tree
196 formed by nodes in the data file.
196 formed by nodes in the data file.
197
197
198 * Offset 0:
198 * Offset 0:
199 Pseudo-pointer to the start of root nodes,
199 Pseudo-pointer to the start of root nodes,
200 counted in bytes from the start of the data file,
200 counted in bytes from the start of the data file,
201 as a 32-bit big-endian integer.
201 as a 32-bit big-endian integer.
202 These nodes describe files and directories found directly
202 These nodes describe files and directories found directly
203 at the root of the working directory.
203 at the root of the working directory.
204
204
205 * Offset 4:
205 * Offset 4:
206 Number of root nodes, as a 32-bit big-endian integer.
206 Number of root nodes, as a 32-bit big-endian integer.
207
207
208 * Offset 8:
208 * Offset 8:
209 Total number of nodes in the entire tree that "have a dirstate entry",
209 Total number of nodes in the entire tree that "have a dirstate entry",
210 as a 32-bit big-endian integer.
210 as a 32-bit big-endian integer.
211 Those nodes represent files that would be present at all in `dirstate-v1`.
211 Those nodes represent files that would be present at all in `dirstate-v1`.
212 This is typically less than the total number of nodes.
212 This is typically less than the total number of nodes.
213 This counter is used to implement `len(dirstatemap)`.
213 This counter is used to implement `len(dirstatemap)`.
214
214
215 * Offset 12:
215 * Offset 12:
216 Number of nodes in the entire tree that have a copy source,
216 Number of nodes in the entire tree that have a copy source,
217 as a 32-bit big-endian integer.
217 as a 32-bit big-endian integer.
218 At the next commit, these files are recorded
218 At the next commit, these files are recorded
219 as having been copied or moved/renamed from that source.
219 as having been copied or moved/renamed from that source.
220 (A move is recorded as a copy and separate removal of the source.)
220 (A move is recorded as a copy and separate removal of the source.)
221 This counter is used to implement `len(dirstatemap.copymap)`.
221 This counter is used to implement `len(dirstatemap.copymap)`.
222
222
223 * Offset 16:
223 * Offset 16:
224 An estimation of how many bytes of the data file
224 An estimation of how many bytes of the data file
225 (within its used size) are unused, as a 32-bit big-endian integer.
225 (within its used size) are unused, as a 32-bit big-endian integer.
226 When appending to an existing data file,
226 When appending to an existing data file,
227 some existing nodes or paths can be unreachable from the new root
227 some existing nodes or paths can be unreachable from the new root
228 but they still take up space.
228 but they still take up space.
229 This counter is used to decide when to write a new data file from scratch
229 This counter is used to decide when to write a new data file from scratch
230 instead of appending to an existing one,
230 instead of appending to an existing one,
231 in order to get rid of that unreachable data
231 in order to get rid of that unreachable data
232 and avoid unbounded file size growth.
232 and avoid unbounded file size growth.
233
233
234 * Offset 20:
234 * Offset 20:
235 These four bytes are currently ignored
235 These four bytes are currently ignored
236 and reset to zero when updating a docket file.
236 and reset to zero when updating a docket file.
237 This is an attempt at forward compatibility:
237 This is an attempt at forward compatibility:
238 future Mercurial versions could use this as a bit field
238 future Mercurial versions could use this as a bit field
239 to indicate that a dirstate has additional data or constraints.
239 to indicate that a dirstate has additional data or constraints.
240 Finding a dirstate file with the relevant bit unset indicates that
240 Finding a dirstate file with the relevant bit unset indicates that
241 it was written by a then-older version
241 it was written by a then-older version
242 which is not aware of that future change.
242 which is not aware of that future change.
243
243
244 * Offset 24:
244 * Offset 24:
245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
245 Either 20 zero bytes, or a SHA-1 hash as 20 binary bytes.
246 When present, the hash is of ignore patterns
246 When present, the hash is of ignore patterns
247 that were used for some previous run of the `status` algorithm.
247 that were used for some previous run of the `status` algorithm.
248
248
249 * (Offset 44: end of tree metadata)
249 * (Offset 44: end of tree metadata)
250
250
251 Optional hash of ignore patterns
251 Optional hash of ignore patterns
252 --------------------------------
252 --------------------------------
253
253
254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
254 The implementation of `status` at `rust/hg-core/src/dirstate_tree/status.rs`
255 has been optimized such that its run time is dominated by calls
255 has been optimized such that its run time is dominated by calls
256 to `stat` for reading the filesystem metadata of a file or directory,
256 to `stat` for reading the filesystem metadata of a file or directory,
257 and to `readdir` for listing the contents of a directory.
257 and to `readdir` for listing the contents of a directory.
258 In some cases the algorithm can skip calls to `readdir`
258 In some cases the algorithm can skip calls to `readdir`
259 (saving significant time)
259 (saving significant time)
260 because the dirstate already contains enough of the relevant information
260 because the dirstate already contains enough of the relevant information
261 to build the correct `status` results.
261 to build the correct `status` results.
262
262
263 The default configuration of `hg status` is to list unknown files
263 The default configuration of `hg status` is to list unknown files
264 but not ignored files.
264 but not ignored files.
265 In this case, it matters for the `readdir`-skipping optimization
265 In this case, it matters for the `readdir`-skipping optimization
266 if a given file used to be ignored but became unknown
266 if a given file used to be ignored but became unknown
267 because `.hgignore` changed.
267 because `.hgignore` changed.
268 To detect the possibility of such a change,
268 To detect the possibility of such a change,
269 the tree metadata contains an optional hash of all ignore patterns.
269 the tree metadata contains an optional hash of all ignore patterns.
270
270
271 We define:
271 We define:
272
272
273 * "Root" ignore files as:
273 * "Root" ignore files as:
274
274
275 - `.hgignore` at the root of the repository if it exists
275 - `.hgignore` at the root of the repository if it exists
276 - And all files from `ui.ignore.*` config.
276 - And all files from `ui.ignore.*` config.
277
277
278 This set of files is sorted by the string representation of their path.
278 This set of files is sorted by the string representation of their path.
279
279
280 * The "expanded contents" of an ignore files is the byte string made
280 * The "expanded contents" of an ignore files is the byte string made
281 by the concatenation of its contents followed by the "expanded contents"
281 by the concatenation of its contents followed by the "expanded contents"
282 of other files included with `include:` or `subinclude:` directives,
282 of other files included with `include:` or `subinclude:` directives,
283 in inclusion order. This definition is recursive, as included files can
283 in inclusion order. This definition is recursive, as included files can
284 themselves include more files.
284 themselves include more files.
285
285
286 This hash is defined as the SHA-1 of the concatenation (in sorted
286 This hash is defined as the SHA-1 of the concatenation (in sorted
287 order) of the "expanded contents" of each "root" ignore file.
287 order) of the "expanded contents" of each "root" ignore file.
288 (Note that computing this does not require actually concatenating
288 (Note that computing this does not require actually concatenating
289 into a single contiguous byte sequence.
289 into a single contiguous byte sequence.
290 Instead a SHA-1 hasher object can be created
290 Instead a SHA-1 hasher object can be created
291 and fed separate chunks one by one.)
291 and fed separate chunks one by one.)
292
292
293 The data file format
293 The data file format
294 --------------------
294 --------------------
295
295
296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
296 This is implemented in `rust/hg-core/src/dirstate_tree/on_disk.rs`
297 and `mercurial/dirstateutils/v2.py`.
297 and `mercurial/dirstateutils/v2.py`.
298
298
299 The data file contains two types of data: paths and nodes.
299 The data file contains two types of data: paths and nodes.
300
300
301 Paths and nodes can be organized in any order in the file, except that sibling
301 Paths and nodes can be organized in any order in the file, except that sibling
302 nodes must be next to each other and sorted by their path.
302 nodes must be next to each other and sorted by their path.
303 Contiguity lets the parent refer to them all
303 Contiguity lets the parent refer to them all
304 by their count and a single pseudo-pointer,
304 by their count and a single pseudo-pointer,
305 instead of storing one pseudo-pointer per child node.
305 instead of storing one pseudo-pointer per child node.
306 Sorting allows using binary seach to find a child node with a given name
306 Sorting allows using binary seach to find a child node with a given name
307 in `O(log(n))` byte sequence comparisons.
307 in `O(log(n))` byte sequence comparisons.
308
308
309 The current implemention writes paths and child node before a given node
309 The current implemention writes paths and child node before a given node
310 for ease of figuring out the value of pseudo-pointers by the time the are to be
310 for ease of figuring out the value of pseudo-pointers by the time the are to be
311 written, but this is not an obligation and readers must not rely on it.
311 written, but this is not an obligation and readers must not rely on it.
312
312
313 A path is stored as a byte string anywhere in the file, without delimiter.
313 A path is stored as a byte string anywhere in the file, without delimiter.
314 It is refered to by one or more node by a pseudo-pointer to its start, and its
314 It is refered to by one or more node by a pseudo-pointer to its start, and its
315 length in bytes. Since there is no delimiter,
315 length in bytes. Since there is no delimiter,
316 when a path is a substring of another the same bytes could be reused,
316 when a path is a substring of another the same bytes could be reused,
317 although the implementation does not exploit this as of this writing.
317 although the implementation does not exploit this as of this writing.
318
318
319 A node is stored on 43 bytes with components at fixed offsets. Paths and
319 A node is stored on 43 bytes with components at fixed offsets. Paths and
320 child nodes relevant to a node are stored externally and referenced though
320 child nodes relevant to a node are stored externally and referenced though
321 pseudo-pointers.
321 pseudo-pointers.
322
322
323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
323 All integers are stored in big-endian. All pseudo-pointers are 32-bit integers
324 counting bytes from the start of the data file. Path lengths and positions
324 counting bytes from the start of the data file. Path lengths and positions
325 are 16-bit integers, also counted in bytes.
325 are 16-bit integers, also counted in bytes.
326
326
327 Node components are:
327 Node components are:
328
328
329 * Offset 0:
329 * Offset 0:
330 Pseudo-pointer to the full path of this node,
330 Pseudo-pointer to the full path of this node,
331 from the working directory root.
331 from the working directory root.
332
332
333 * Offset 4:
333 * Offset 4:
334 Length of the full path.
334 Length of the full path.
335
335
336 * Offset 6:
336 * Offset 6:
337 Position of the last `/` path separator within the full path,
337 Position of the last `/` path separator within the full path,
338 in bytes from the start of the full path,
338 in bytes from the start of the full path,
339 or zero if there isn’t one.
339 or zero if there isn’t one.
340 The part of the full path after this position is the "base name".
340 The part of the full path after this position is the "base name".
341 Since sibling nodes have the same parent, only their base name vary
341 Since sibling nodes have the same parent, only their base name vary
342 and needs to be considered when doing binary search to find a given path.
342 and needs to be considered when doing binary search to find a given path.
343
343
344 * Offset 8:
344 * Offset 8:
345 Pseudo-pointer to the "copy source" path for this node,
345 Pseudo-pointer to the "copy source" path for this node,
346 or zero if there is no copy source.
346 or zero if there is no copy source.
347
347
348 * Offset 12:
348 * Offset 12:
349 Length of the copy source path, or zero if there isn’t one.
349 Length of the copy source path, or zero if there isn’t one.
350
350
351 * Offset 14:
351 * Offset 14:
352 Pseudo-pointer to the start of child nodes.
352 Pseudo-pointer to the start of child nodes.
353
353
354 * Offset 18:
354 * Offset 18:
355 Number of child nodes, as a 32-bit integer.
355 Number of child nodes, as a 32-bit integer.
356 They occupy 43 times this number of bytes
356 They occupy 43 times this number of bytes
357 (not counting space for paths, and further descendants).
357 (not counting space for paths, and further descendants).
358
358
359 * Offset 22:
359 * Offset 22:
360 Number as a 32-bit integer of descendant nodes in this subtree,
360 Number as a 32-bit integer of descendant nodes in this subtree,
361 not including this node itself,
361 not including this node itself,
362 that "have a dirstate entry".
362 that "have a dirstate entry".
363 Those nodes represent files that would be present at all in `dirstate-v1`.
363 Those nodes represent files that would be present at all in `dirstate-v1`.
364 This is typically less than the total number of descendants.
364 This is typically less than the total number of descendants.
365 This counter is used to implement `has_dir`.
365 This counter is used to implement `has_dir`.
366
366
367 * Offset 26:
367 * Offset 26:
368 Number as a 32-bit integer of descendant nodes in this subtree,
368 Number as a 32-bit integer of descendant nodes in this subtree,
369 not including this node itself,
369 not including this node itself,
370 that represent files tracked in the working directory.
370 that represent files tracked in the working directory.
371 (For example, `hg rm` makes a file untracked.)
371 (For example, `hg rm` makes a file untracked.)
372 This counter is used to implement `has_tracked_dir`.
372 This counter is used to implement `has_tracked_dir`.
373
373
374 * Offset 30:
374 * Offset 30:
375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
375 A `flags` fields that packs some boolean values as bits of a 16-bit integer.
376 Starting from least-significant, bit masks are::
376 Starting from least-significant, bit masks are::
377
377
378 WDIR_TRACKED = 1 << 0
378 WDIR_TRACKED = 1 << 0
379 P1_TRACKED = 1 << 1
379 P1_TRACKED = 1 << 1
380 P2_INFO = 1 << 2
380 P2_INFO = 1 << 2
381 HAS_MODE_AND_SIZE = 1 << 3
381 HAS_MODE_AND_SIZE = 1 << 3
382 HAS_MTIME = 1 << 4
382 HAS_FILE_MTIME = 1 << 4
383 MODE_EXEC_PERM = 1 << 5
383 HAS_DIRECTORY_MTIME = 1 << 5
384 MODE_IS_SYMLINK = 1 << 6
384 MODE_EXEC_PERM = 1 << 6
385 MODE_IS_SYMLINK = 1 << 7
385
386
386 The meaning of each bit is described below.
387 The meaning of each bit is described below.
387
388
388 Other bits are unset.
389 Other bits are unset.
389 They may be assigned meaning if the future,
390 They may be assigned meaning if the future,
390 with the limitation that Mercurial versions that pre-date such meaning
391 with the limitation that Mercurial versions that pre-date such meaning
391 will always reset those bits to unset when writing nodes.
392 will always reset those bits to unset when writing nodes.
392 (A new node is written for any mutation in its subtree,
393 (A new node is written for any mutation in its subtree,
393 leaving the bytes of the old node unreachable
394 leaving the bytes of the old node unreachable
394 until the data file is rewritten entirely.)
395 until the data file is rewritten entirely.)
395
396
396 * Offset 32:
397 * Offset 32:
397 A `size` field described below, as a 32-bit integer.
398 A `size` field described below, as a 32-bit integer.
398 Unlike in dirstate-v1, negative values are not used.
399 Unlike in dirstate-v1, negative values are not used.
399
400
400 * Offset 36:
401 * Offset 36:
401 The seconds component of an `mtime` field described below,
402 The seconds component of an `mtime` field described below,
402 as a 32-bit integer.
403 as a 32-bit integer.
403 Unlike in dirstate-v1, negative values are not used.
404 Unlike in dirstate-v1, negative values are not used.
405 When `mtime` is used, this is number of seconds since the Unix epoch
406 truncated to its lower 31 bits.
404
407
405 * Offset 40:
408 * Offset 40:
406 The nanoseconds component of an `mtime` field described below,
409 The nanoseconds component of an `mtime` field described below,
407 as a 32-bit integer.
410 as a 32-bit integer.
411 When `mtime` is used,
412 this is the number of nanoseconds since `mtime.seconds`,
413 always stritctly less than one billion.
414
415 This may be zero if more precision is not available.
416 (This can happen because of limitations in any of Mercurial, Python,
417 libc, the operating system, …)
418
419 When comparing two mtimes and either has this component set to zero,
420 the sub-second precision of both should be ignored.
421 False positives when checking mtime equality due to clock resolution
422 are always possible and the status algorithm needs to deal with them,
423 but having too many false negatives could be harmful too.
408
424
409 * (Offset 44: end of this node)
425 * (Offset 44: end of this node)
410
426
411 The meaning of the boolean values packed in `flags` is:
427 The meaning of the boolean values packed in `flags` is:
412
428
413 `WDIR_TRACKED`
429 `WDIR_TRACKED`
414 Set if the working directory contains a tracked file at this node’s path.
430 Set if the working directory contains a tracked file at this node’s path.
415 This is typically set and unset by `hg add` and `hg rm`.
431 This is typically set and unset by `hg add` and `hg rm`.
416
432
417 `P1_TRACKED`
433 `P1_TRACKED`
418 Set if the working directory’s first parent changeset
434 Set if the working directory’s first parent changeset
419 (whose node identifier is found in tree metadata)
435 (whose node identifier is found in tree metadata)
420 contains a tracked file at this node’s path.
436 contains a tracked file at this node’s path.
421 This is a cache to reduce manifest lookups.
437 This is a cache to reduce manifest lookups.
422
438
423 `P2_INFO`
439 `P2_INFO`
424 Set if the file has been involved in some merge operation.
440 Set if the file has been involved in some merge operation.
425 Either because it was actually merged,
441 Either because it was actually merged,
426 or because the version in the second parent p2 version was ahead,
442 or because the version in the second parent p2 version was ahead,
427 or because some rename moved it there.
443 or because some rename moved it there.
428 In either case `hg status` will want it displayed as modified.
444 In either case `hg status` will want it displayed as modified.
429
445
430 Files that would be mentioned at all in the `dirstate-v1` file format
446 Files that would be mentioned at all in the `dirstate-v1` file format
431 have a node with at least one of the above three bits set in `dirstate-v2`.
447 have a node with at least one of the above three bits set in `dirstate-v2`.
432 Let’s call these files "tracked anywhere",
448 Let’s call these files "tracked anywhere",
433 and "untracked" the nodes with all three of these bits unset.
449 and "untracked" the nodes with all three of these bits unset.
434 Untracked nodes are typically for directories:
450 Untracked nodes are typically for directories:
435 they hold child nodes and form the tree structure.
451 they hold child nodes and form the tree structure.
436 Additional untracked nodes may also exist.
452 Additional untracked nodes may also exist.
437 Although implementations should strive to clean up nodes
453 Although implementations should strive to clean up nodes
438 that are entirely unused, other untracked nodes may also exist.
454 that are entirely unused, other untracked nodes may also exist.
439 For example, a future version of Mercurial might in some cases
455 For example, a future version of Mercurial might in some cases
440 add nodes for untracked files or/and ignored files in the working directory
456 add nodes for untracked files or/and ignored files in the working directory
441 in order to optimize `hg status`
457 in order to optimize `hg status`
442 by enabling it to skip `readdir` in more cases.
458 by enabling it to skip `readdir` in more cases.
443
459
444 `HAS_MODE_AND_SIZE`
460 `HAS_MODE_AND_SIZE`
445 Must be unset for untracked nodes.
461 Must be unset for untracked nodes.
446 For files tracked anywhere, if this is set:
462 For files tracked anywhere, if this is set:
447 - The `size` field is the expected file size,
463 - The `size` field is the expected file size,
448 in bytes truncated its lower to 31 bits,
464 in bytes truncated its lower to 31 bits,
449 for the file to be clean.
465 for the file to be clean.
450 - The expected execute permission for the file’s owner
466 - The expected execute permission for the file’s owner
451 is given by `MODE_EXEC_PERM`
467 is given by `MODE_EXEC_PERM`
452 - The expected file type is given by `MODE_IS_SIMLINK`:
468 - The expected file type is given by `MODE_IS_SIMLINK`:
453 a symbolic link if set, or a normal file if unset.
469 a symbolic link if set, or a normal file if unset.
454 If this is unset the expected size, permission, and file type are unknown.
470 If this is unset the expected size, permission, and file type are unknown.
455 The `size` field is unused (set to zero).
471 The `size` field is unused (set to zero).
456
472
457 `HAS_MTIME`
473 `HAS_FILE_MTIME`
458 If unset, the `mtime` field is unused (set to zero).
474 Must be unset for untracked nodes.
459 If set, it contains a timestamp represented as
475 If this and `HAS_DIRECTORY_MTIME` are both unset,
460 - the number of seconds since the Unix epoch,
476 the `mtime` field is unused (set to zero).
461 truncated to its lower 31 bits.
477 If this is set, `mtime` is the modification time
462 - and the number of nanoseconds since `mtime.seconds`,
478 expected for the file to be considered clean.
463 always stritctly less than one billion.
464 This may be zero if more precision is not available.
465 (This can happen because of limitations in any of Mercurial, Python,
466 libc, the operating system, …)
467
479
468 If set for a file tracked anywhere,
480 `HAS_DIRECTORY_MTIME`
469 `mtime` is the expected modification time for the file to be clean.
481 Must be unset for file tracked anywhere.
470
482 If this and `HAS_DIRECTORY_MTIME` are both unset,
471 If set for an untracked node, at some point,
483 the `mtime` field is unused (set to zero).
484 If this is set, at some point,
472 this path in the working directory was observed:
485 this path in the working directory was observed:
473
486
474 - To be a directory
487 - To be a directory
475 - With the modification time given in `mtime`
488 - With the modification time given in `mtime`
476 - That time was already strictly in the past when observed,
489 - That time was already strictly in the past when observed,
477 meaning that later changes cannot happen in the same clock tick
490 meaning that later changes cannot happen in the same clock tick
478 and must cause a different modification time
491 and must cause a different modification time
479 (unless the system clock jumps back and we get unlucky,
492 (unless the system clock jumps back and we get unlucky,
480 which is not impossible but deemed unlikely enough).
493 which is not impossible but deemed unlikely enough).
481 - All direct children of this directory
494 - All direct children of this directory
482 (as returned by `std::fs::read_dir`)
495 (as returned by `std::fs::read_dir`)
483 either have a corresponding dirstate node,
496 either have a corresponding dirstate node,
484 or are ignored by ignore patterns whose hash is in tree metadata.
497 or are ignored by ignore patterns whose hash is in tree metadata.
485
498
486 This means that if `std::fs::symlink_metadata` later reports
499 This means that if `std::fs::symlink_metadata` later reports
487 the same modification time
500 the same modification time
488 and ignored patterns haven’t changed,
501 and ignored patterns haven’t changed,
489 a run of status that is not listing ignored files
502 a run of status that is not listing ignored files
490 can skip calling `std::fs::read_dir` again for this directory,
503 can skip calling `std::fs::read_dir` again for this directory,
491 and iterate child dirstate nodes instead.
504 and iterate child dirstate nodes instead.
492
505
493 `MODE_EXEC_PERM`
506 `MODE_EXEC_PERM`
494 Must be unset if `HAS_MODE_AND_SIZE` is unset.
507 Must be unset if `HAS_MODE_AND_SIZE` is unset.
495 If `HAS_MODE_AND_SIZE` is set,
508 If `HAS_MODE_AND_SIZE` is set,
496 this indicates whether the file’s own is expected
509 this indicates whether the file’s own is expected
497 to have execute permission.
510 to have execute permission.
498
511
499 `MODE_IS_SYMLINK`
512 `MODE_IS_SYMLINK`
500 Must be unset if `HAS_MODE_AND_SIZE` is unset.
513 Must be unset if `HAS_MODE_AND_SIZE` is unset.
501 If `HAS_MODE_AND_SIZE` is set,
514 If `HAS_MODE_AND_SIZE` is set,
502 this indicates whether the file is expected to be a symlink
515 this indicates whether the file is expected to be a symlink
503 as opposed to a normal file.
516 as opposed to a normal file.
@@ -1,789 +1,790 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import stat
10 import stat
11 import struct
11 import struct
12 import zlib
12 import zlib
13
13
14 from ..node import (
14 from ..node import (
15 nullrev,
15 nullrev,
16 sha1nodeconstants,
16 sha1nodeconstants,
17 )
17 )
18 from ..thirdparty import attr
18 from ..thirdparty import attr
19 from .. import (
19 from .. import (
20 error,
20 error,
21 pycompat,
21 pycompat,
22 revlogutils,
22 revlogutils,
23 util,
23 util,
24 )
24 )
25
25
26 from ..revlogutils import nodemap as nodemaputil
26 from ..revlogutils import nodemap as nodemaputil
27 from ..revlogutils import constants as revlog_constants
27 from ..revlogutils import constants as revlog_constants
28
28
29 stringio = pycompat.bytesio
29 stringio = pycompat.bytesio
30
30
31
31
32 _pack = struct.pack
32 _pack = struct.pack
33 _unpack = struct.unpack
33 _unpack = struct.unpack
34 _compress = zlib.compress
34 _compress = zlib.compress
35 _decompress = zlib.decompress
35 _decompress = zlib.decompress
36
36
37
37
38 # a special value used internally for `size` if the file come from the other parent
38 # a special value used internally for `size` if the file come from the other parent
39 FROM_P2 = -2
39 FROM_P2 = -2
40
40
41 # a special value used internally for `size` if the file is modified/merged/added
41 # a special value used internally for `size` if the file is modified/merged/added
42 NONNORMAL = -1
42 NONNORMAL = -1
43
43
44 # a special value used internally for `time` if the time is ambigeous
44 # a special value used internally for `time` if the time is ambigeous
45 AMBIGUOUS_TIME = -1
45 AMBIGUOUS_TIME = -1
46
46
47 # Bits of the `flags` byte inside a node in the file format
47 # Bits of the `flags` byte inside a node in the file format
48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
48 DIRSTATE_V2_WDIR_TRACKED = 1 << 0
49 DIRSTATE_V2_P1_TRACKED = 1 << 1
49 DIRSTATE_V2_P1_TRACKED = 1 << 1
50 DIRSTATE_V2_P2_INFO = 1 << 2
50 DIRSTATE_V2_P2_INFO = 1 << 2
51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
51 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
52 DIRSTATE_V2_HAS_MTIME = 1 << 4
52 DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
53 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 5
53 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5 # Unused when Rust is not available
54 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 6
54 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
55 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
55
56
56
57
57 @attr.s(slots=True, init=False)
58 @attr.s(slots=True, init=False)
58 class DirstateItem(object):
59 class DirstateItem(object):
59 """represent a dirstate entry
60 """represent a dirstate entry
60
61
61 It hold multiple attributes
62 It hold multiple attributes
62
63
63 # about file tracking
64 # about file tracking
64 - wc_tracked: is the file tracked by the working copy
65 - wc_tracked: is the file tracked by the working copy
65 - p1_tracked: is the file tracked in working copy first parent
66 - p1_tracked: is the file tracked in working copy first parent
66 - p2_info: the file has been involved in some merge operation. Either
67 - p2_info: the file has been involved in some merge operation. Either
67 because it was actually merged, or because the p2 version was
68 because it was actually merged, or because the p2 version was
68 ahead, or because some rename moved it there. In either case
69 ahead, or because some rename moved it there. In either case
69 `hg status` will want it displayed as modified.
70 `hg status` will want it displayed as modified.
70
71
71 # about the file state expected from p1 manifest:
72 # about the file state expected from p1 manifest:
72 - mode: the file mode in p1
73 - mode: the file mode in p1
73 - size: the file size in p1
74 - size: the file size in p1
74
75
75 These value can be set to None, which mean we don't have a meaningful value
76 These value can be set to None, which mean we don't have a meaningful value
76 to compare with. Either because we don't really care about them as there
77 to compare with. Either because we don't really care about them as there
77 `status` is known without having to look at the disk or because we don't
78 `status` is known without having to look at the disk or because we don't
78 know these right now and a full comparison will be needed to find out if
79 know these right now and a full comparison will be needed to find out if
79 the file is clean.
80 the file is clean.
80
81
81 # about the file state on disk last time we saw it:
82 # about the file state on disk last time we saw it:
82 - mtime: the last known clean mtime for the file.
83 - mtime: the last known clean mtime for the file.
83
84
84 This value can be set to None if no cachable state exist. Either because we
85 This value can be set to None if no cachable state exist. Either because we
85 do not care (see previous section) or because we could not cache something
86 do not care (see previous section) or because we could not cache something
86 yet.
87 yet.
87 """
88 """
88
89
89 _wc_tracked = attr.ib()
90 _wc_tracked = attr.ib()
90 _p1_tracked = attr.ib()
91 _p1_tracked = attr.ib()
91 _p2_info = attr.ib()
92 _p2_info = attr.ib()
92 _mode = attr.ib()
93 _mode = attr.ib()
93 _size = attr.ib()
94 _size = attr.ib()
94 _mtime = attr.ib()
95 _mtime = attr.ib()
95
96
96 def __init__(
97 def __init__(
97 self,
98 self,
98 wc_tracked=False,
99 wc_tracked=False,
99 p1_tracked=False,
100 p1_tracked=False,
100 p2_info=False,
101 p2_info=False,
101 has_meaningful_data=True,
102 has_meaningful_data=True,
102 has_meaningful_mtime=True,
103 has_meaningful_mtime=True,
103 parentfiledata=None,
104 parentfiledata=None,
104 ):
105 ):
105 self._wc_tracked = wc_tracked
106 self._wc_tracked = wc_tracked
106 self._p1_tracked = p1_tracked
107 self._p1_tracked = p1_tracked
107 self._p2_info = p2_info
108 self._p2_info = p2_info
108
109
109 self._mode = None
110 self._mode = None
110 self._size = None
111 self._size = None
111 self._mtime = None
112 self._mtime = None
112 if parentfiledata is None:
113 if parentfiledata is None:
113 has_meaningful_mtime = False
114 has_meaningful_mtime = False
114 has_meaningful_data = False
115 has_meaningful_data = False
115 if has_meaningful_data:
116 if has_meaningful_data:
116 self._mode = parentfiledata[0]
117 self._mode = parentfiledata[0]
117 self._size = parentfiledata[1]
118 self._size = parentfiledata[1]
118 if has_meaningful_mtime:
119 if has_meaningful_mtime:
119 self._mtime = parentfiledata[2]
120 self._mtime = parentfiledata[2]
120
121
121 @classmethod
122 @classmethod
122 def from_v2_data(cls, flags, size, mtime):
123 def from_v2_data(cls, flags, size, mtime):
123 """Build a new DirstateItem object from V2 data"""
124 """Build a new DirstateItem object from V2 data"""
124 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
125 has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
125 mode = None
126 mode = None
126 if has_mode_size:
127 if has_mode_size:
127 assert stat.S_IXUSR == 0o100
128 assert stat.S_IXUSR == 0o100
128 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
129 if flags & DIRSTATE_V2_MODE_EXEC_PERM:
129 mode = 0o755
130 mode = 0o755
130 else:
131 else:
131 mode = 0o644
132 mode = 0o644
132 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
133 if flags & DIRSTATE_V2_MODE_IS_SYMLINK:
133 mode |= stat.S_IFLNK
134 mode |= stat.S_IFLNK
134 else:
135 else:
135 mode |= stat.S_IFREG
136 mode |= stat.S_IFREG
136 return cls(
137 return cls(
137 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
138 wc_tracked=bool(flags & DIRSTATE_V2_WDIR_TRACKED),
138 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
139 p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
139 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
140 p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
140 has_meaningful_data=has_mode_size,
141 has_meaningful_data=has_mode_size,
141 has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_MTIME),
142 has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_FILE_MTIME),
142 parentfiledata=(mode, size, mtime),
143 parentfiledata=(mode, size, mtime),
143 )
144 )
144
145
145 @classmethod
146 @classmethod
146 def from_v1_data(cls, state, mode, size, mtime):
147 def from_v1_data(cls, state, mode, size, mtime):
147 """Build a new DirstateItem object from V1 data
148 """Build a new DirstateItem object from V1 data
148
149
149 Since the dirstate-v1 format is frozen, the signature of this function
150 Since the dirstate-v1 format is frozen, the signature of this function
150 is not expected to change, unlike the __init__ one.
151 is not expected to change, unlike the __init__ one.
151 """
152 """
152 if state == b'm':
153 if state == b'm':
153 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
154 return cls(wc_tracked=True, p1_tracked=True, p2_info=True)
154 elif state == b'a':
155 elif state == b'a':
155 return cls(wc_tracked=True)
156 return cls(wc_tracked=True)
156 elif state == b'r':
157 elif state == b'r':
157 if size == NONNORMAL:
158 if size == NONNORMAL:
158 p1_tracked = True
159 p1_tracked = True
159 p2_info = True
160 p2_info = True
160 elif size == FROM_P2:
161 elif size == FROM_P2:
161 p1_tracked = False
162 p1_tracked = False
162 p2_info = True
163 p2_info = True
163 else:
164 else:
164 p1_tracked = True
165 p1_tracked = True
165 p2_info = False
166 p2_info = False
166 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
167 return cls(p1_tracked=p1_tracked, p2_info=p2_info)
167 elif state == b'n':
168 elif state == b'n':
168 if size == FROM_P2:
169 if size == FROM_P2:
169 return cls(wc_tracked=True, p2_info=True)
170 return cls(wc_tracked=True, p2_info=True)
170 elif size == NONNORMAL:
171 elif size == NONNORMAL:
171 return cls(wc_tracked=True, p1_tracked=True)
172 return cls(wc_tracked=True, p1_tracked=True)
172 elif mtime == AMBIGUOUS_TIME:
173 elif mtime == AMBIGUOUS_TIME:
173 return cls(
174 return cls(
174 wc_tracked=True,
175 wc_tracked=True,
175 p1_tracked=True,
176 p1_tracked=True,
176 has_meaningful_mtime=False,
177 has_meaningful_mtime=False,
177 parentfiledata=(mode, size, 42),
178 parentfiledata=(mode, size, 42),
178 )
179 )
179 else:
180 else:
180 return cls(
181 return cls(
181 wc_tracked=True,
182 wc_tracked=True,
182 p1_tracked=True,
183 p1_tracked=True,
183 parentfiledata=(mode, size, mtime),
184 parentfiledata=(mode, size, mtime),
184 )
185 )
185 else:
186 else:
186 raise RuntimeError(b'unknown state: %s' % state)
187 raise RuntimeError(b'unknown state: %s' % state)
187
188
188 def set_possibly_dirty(self):
189 def set_possibly_dirty(self):
189 """Mark a file as "possibly dirty"
190 """Mark a file as "possibly dirty"
190
191
191 This means the next status call will have to actually check its content
192 This means the next status call will have to actually check its content
192 to make sure it is correct.
193 to make sure it is correct.
193 """
194 """
194 self._mtime = None
195 self._mtime = None
195
196
196 def set_clean(self, mode, size, mtime):
197 def set_clean(self, mode, size, mtime):
197 """mark a file as "clean" cancelling potential "possibly dirty call"
198 """mark a file as "clean" cancelling potential "possibly dirty call"
198
199
199 Note: this function is a descendant of `dirstate.normal` and is
200 Note: this function is a descendant of `dirstate.normal` and is
200 currently expected to be call on "normal" entry only. There are not
201 currently expected to be call on "normal" entry only. There are not
201 reason for this to not change in the future as long as the ccode is
202 reason for this to not change in the future as long as the ccode is
202 updated to preserve the proper state of the non-normal files.
203 updated to preserve the proper state of the non-normal files.
203 """
204 """
204 self._wc_tracked = True
205 self._wc_tracked = True
205 self._p1_tracked = True
206 self._p1_tracked = True
206 self._mode = mode
207 self._mode = mode
207 self._size = size
208 self._size = size
208 self._mtime = mtime
209 self._mtime = mtime
209
210
210 def set_tracked(self):
211 def set_tracked(self):
211 """mark a file as tracked in the working copy
212 """mark a file as tracked in the working copy
212
213
213 This will ultimately be called by command like `hg add`.
214 This will ultimately be called by command like `hg add`.
214 """
215 """
215 self._wc_tracked = True
216 self._wc_tracked = True
216 # `set_tracked` is replacing various `normallookup` call. So we mark
217 # `set_tracked` is replacing various `normallookup` call. So we mark
217 # the files as needing lookup
218 # the files as needing lookup
218 #
219 #
219 # Consider dropping this in the future in favor of something less broad.
220 # Consider dropping this in the future in favor of something less broad.
220 self._mtime = None
221 self._mtime = None
221
222
222 def set_untracked(self):
223 def set_untracked(self):
223 """mark a file as untracked in the working copy
224 """mark a file as untracked in the working copy
224
225
225 This will ultimately be called by command like `hg remove`.
226 This will ultimately be called by command like `hg remove`.
226 """
227 """
227 self._wc_tracked = False
228 self._wc_tracked = False
228 self._mode = None
229 self._mode = None
229 self._size = None
230 self._size = None
230 self._mtime = None
231 self._mtime = None
231
232
232 def drop_merge_data(self):
233 def drop_merge_data(self):
233 """remove all "merge-only" from a DirstateItem
234 """remove all "merge-only" from a DirstateItem
234
235
235 This is to be call by the dirstatemap code when the second parent is dropped
236 This is to be call by the dirstatemap code when the second parent is dropped
236 """
237 """
237 if self._p2_info:
238 if self._p2_info:
238 self._p2_info = False
239 self._p2_info = False
239 self._mode = None
240 self._mode = None
240 self._size = None
241 self._size = None
241 self._mtime = None
242 self._mtime = None
242
243
243 @property
244 @property
244 def mode(self):
245 def mode(self):
245 return self.v1_mode()
246 return self.v1_mode()
246
247
247 @property
248 @property
248 def size(self):
249 def size(self):
249 return self.v1_size()
250 return self.v1_size()
250
251
251 @property
252 @property
252 def mtime(self):
253 def mtime(self):
253 return self.v1_mtime()
254 return self.v1_mtime()
254
255
255 @property
256 @property
256 def state(self):
257 def state(self):
257 """
258 """
258 States are:
259 States are:
259 n normal
260 n normal
260 m needs merging
261 m needs merging
261 r marked for removal
262 r marked for removal
262 a marked for addition
263 a marked for addition
263
264
264 XXX This "state" is a bit obscure and mostly a direct expression of the
265 XXX This "state" is a bit obscure and mostly a direct expression of the
265 dirstatev1 format. It would make sense to ultimately deprecate it in
266 dirstatev1 format. It would make sense to ultimately deprecate it in
266 favor of the more "semantic" attributes.
267 favor of the more "semantic" attributes.
267 """
268 """
268 if not self.any_tracked:
269 if not self.any_tracked:
269 return b'?'
270 return b'?'
270 return self.v1_state()
271 return self.v1_state()
271
272
272 @property
273 @property
273 def tracked(self):
274 def tracked(self):
274 """True is the file is tracked in the working copy"""
275 """True is the file is tracked in the working copy"""
275 return self._wc_tracked
276 return self._wc_tracked
276
277
277 @property
278 @property
278 def any_tracked(self):
279 def any_tracked(self):
279 """True is the file is tracked anywhere (wc or parents)"""
280 """True is the file is tracked anywhere (wc or parents)"""
280 return self._wc_tracked or self._p1_tracked or self._p2_info
281 return self._wc_tracked or self._p1_tracked or self._p2_info
281
282
282 @property
283 @property
283 def added(self):
284 def added(self):
284 """True if the file has been added"""
285 """True if the file has been added"""
285 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
286 return self._wc_tracked and not (self._p1_tracked or self._p2_info)
286
287
287 @property
288 @property
288 def maybe_clean(self):
289 def maybe_clean(self):
289 """True if the file has a chance to be in the "clean" state"""
290 """True if the file has a chance to be in the "clean" state"""
290 if not self._wc_tracked:
291 if not self._wc_tracked:
291 return False
292 return False
292 elif not self._p1_tracked:
293 elif not self._p1_tracked:
293 return False
294 return False
294 elif self._p2_info:
295 elif self._p2_info:
295 return False
296 return False
296 return True
297 return True
297
298
298 @property
299 @property
299 def p1_tracked(self):
300 def p1_tracked(self):
300 """True if the file is tracked in the first parent manifest"""
301 """True if the file is tracked in the first parent manifest"""
301 return self._p1_tracked
302 return self._p1_tracked
302
303
303 @property
304 @property
304 def p2_info(self):
305 def p2_info(self):
305 """True if the file needed to merge or apply any input from p2
306 """True if the file needed to merge or apply any input from p2
306
307
307 See the class documentation for details.
308 See the class documentation for details.
308 """
309 """
309 return self._wc_tracked and self._p2_info
310 return self._wc_tracked and self._p2_info
310
311
311 @property
312 @property
312 def removed(self):
313 def removed(self):
313 """True if the file has been removed"""
314 """True if the file has been removed"""
314 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
315 return not self._wc_tracked and (self._p1_tracked or self._p2_info)
315
316
316 def v2_data(self):
317 def v2_data(self):
317 """Returns (flags, mode, size, mtime) for v2 serialization"""
318 """Returns (flags, mode, size, mtime) for v2 serialization"""
318 flags = 0
319 flags = 0
319 if self._wc_tracked:
320 if self._wc_tracked:
320 flags |= DIRSTATE_V2_WDIR_TRACKED
321 flags |= DIRSTATE_V2_WDIR_TRACKED
321 if self._p1_tracked:
322 if self._p1_tracked:
322 flags |= DIRSTATE_V2_P1_TRACKED
323 flags |= DIRSTATE_V2_P1_TRACKED
323 if self._p2_info:
324 if self._p2_info:
324 flags |= DIRSTATE_V2_P2_INFO
325 flags |= DIRSTATE_V2_P2_INFO
325 if self._mode is not None and self._size is not None:
326 if self._mode is not None and self._size is not None:
326 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
327 flags |= DIRSTATE_V2_HAS_MODE_AND_SIZE
327 if self.mode & stat.S_IXUSR:
328 if self.mode & stat.S_IXUSR:
328 flags |= DIRSTATE_V2_MODE_EXEC_PERM
329 flags |= DIRSTATE_V2_MODE_EXEC_PERM
329 if stat.S_ISLNK(self.mode):
330 if stat.S_ISLNK(self.mode):
330 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
331 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
331 if self._mtime is not None:
332 if self._mtime is not None:
332 flags |= DIRSTATE_V2_HAS_MTIME
333 flags |= DIRSTATE_V2_HAS_FILE_MTIME
333 return (flags, self._size or 0, self._mtime or 0)
334 return (flags, self._size or 0, self._mtime or 0)
334
335
335 def v1_state(self):
336 def v1_state(self):
336 """return a "state" suitable for v1 serialization"""
337 """return a "state" suitable for v1 serialization"""
337 if not self.any_tracked:
338 if not self.any_tracked:
338 # the object has no state to record, this is -currently-
339 # the object has no state to record, this is -currently-
339 # unsupported
340 # unsupported
340 raise RuntimeError('untracked item')
341 raise RuntimeError('untracked item')
341 elif self.removed:
342 elif self.removed:
342 return b'r'
343 return b'r'
343 elif self._p1_tracked and self._p2_info:
344 elif self._p1_tracked and self._p2_info:
344 return b'm'
345 return b'm'
345 elif self.added:
346 elif self.added:
346 return b'a'
347 return b'a'
347 else:
348 else:
348 return b'n'
349 return b'n'
349
350
350 def v1_mode(self):
351 def v1_mode(self):
351 """return a "mode" suitable for v1 serialization"""
352 """return a "mode" suitable for v1 serialization"""
352 return self._mode if self._mode is not None else 0
353 return self._mode if self._mode is not None else 0
353
354
354 def v1_size(self):
355 def v1_size(self):
355 """return a "size" suitable for v1 serialization"""
356 """return a "size" suitable for v1 serialization"""
356 if not self.any_tracked:
357 if not self.any_tracked:
357 # the object has no state to record, this is -currently-
358 # the object has no state to record, this is -currently-
358 # unsupported
359 # unsupported
359 raise RuntimeError('untracked item')
360 raise RuntimeError('untracked item')
360 elif self.removed and self._p1_tracked and self._p2_info:
361 elif self.removed and self._p1_tracked and self._p2_info:
361 return NONNORMAL
362 return NONNORMAL
362 elif self._p2_info:
363 elif self._p2_info:
363 return FROM_P2
364 return FROM_P2
364 elif self.removed:
365 elif self.removed:
365 return 0
366 return 0
366 elif self.added:
367 elif self.added:
367 return NONNORMAL
368 return NONNORMAL
368 elif self._size is None:
369 elif self._size is None:
369 return NONNORMAL
370 return NONNORMAL
370 else:
371 else:
371 return self._size
372 return self._size
372
373
373 def v1_mtime(self):
374 def v1_mtime(self):
374 """return a "mtime" suitable for v1 serialization"""
375 """return a "mtime" suitable for v1 serialization"""
375 if not self.any_tracked:
376 if not self.any_tracked:
376 # the object has no state to record, this is -currently-
377 # the object has no state to record, this is -currently-
377 # unsupported
378 # unsupported
378 raise RuntimeError('untracked item')
379 raise RuntimeError('untracked item')
379 elif self.removed:
380 elif self.removed:
380 return 0
381 return 0
381 elif self._mtime is None:
382 elif self._mtime is None:
382 return AMBIGUOUS_TIME
383 return AMBIGUOUS_TIME
383 elif self._p2_info:
384 elif self._p2_info:
384 return AMBIGUOUS_TIME
385 return AMBIGUOUS_TIME
385 elif not self._p1_tracked:
386 elif not self._p1_tracked:
386 return AMBIGUOUS_TIME
387 return AMBIGUOUS_TIME
387 else:
388 else:
388 return self._mtime
389 return self._mtime
389
390
390 def need_delay(self, now):
391 def need_delay(self, now):
391 """True if the stored mtime would be ambiguous with the current time"""
392 """True if the stored mtime would be ambiguous with the current time"""
392 return self.v1_state() == b'n' and self.v1_mtime() == now
393 return self.v1_state() == b'n' and self.v1_mtime() == now
393
394
394
395
395 def gettype(q):
396 def gettype(q):
396 return int(q & 0xFFFF)
397 return int(q & 0xFFFF)
397
398
398
399
399 class BaseIndexObject(object):
400 class BaseIndexObject(object):
400 # Can I be passed to an algorithme implemented in Rust ?
401 # Can I be passed to an algorithme implemented in Rust ?
401 rust_ext_compat = 0
402 rust_ext_compat = 0
402 # Format of an index entry according to Python's `struct` language
403 # Format of an index entry according to Python's `struct` language
403 index_format = revlog_constants.INDEX_ENTRY_V1
404 index_format = revlog_constants.INDEX_ENTRY_V1
404 # Size of a C unsigned long long int, platform independent
405 # Size of a C unsigned long long int, platform independent
405 big_int_size = struct.calcsize(b'>Q')
406 big_int_size = struct.calcsize(b'>Q')
406 # Size of a C long int, platform independent
407 # Size of a C long int, platform independent
407 int_size = struct.calcsize(b'>i')
408 int_size = struct.calcsize(b'>i')
408 # An empty index entry, used as a default value to be overridden, or nullrev
409 # An empty index entry, used as a default value to be overridden, or nullrev
409 null_item = (
410 null_item = (
410 0,
411 0,
411 0,
412 0,
412 0,
413 0,
413 -1,
414 -1,
414 -1,
415 -1,
415 -1,
416 -1,
416 -1,
417 -1,
417 sha1nodeconstants.nullid,
418 sha1nodeconstants.nullid,
418 0,
419 0,
419 0,
420 0,
420 revlog_constants.COMP_MODE_INLINE,
421 revlog_constants.COMP_MODE_INLINE,
421 revlog_constants.COMP_MODE_INLINE,
422 revlog_constants.COMP_MODE_INLINE,
422 )
423 )
423
424
424 @util.propertycache
425 @util.propertycache
425 def entry_size(self):
426 def entry_size(self):
426 return self.index_format.size
427 return self.index_format.size
427
428
428 @property
429 @property
429 def nodemap(self):
430 def nodemap(self):
430 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
431 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
431 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
432 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
432 return self._nodemap
433 return self._nodemap
433
434
434 @util.propertycache
435 @util.propertycache
435 def _nodemap(self):
436 def _nodemap(self):
436 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
437 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
437 for r in range(0, len(self)):
438 for r in range(0, len(self)):
438 n = self[r][7]
439 n = self[r][7]
439 nodemap[n] = r
440 nodemap[n] = r
440 return nodemap
441 return nodemap
441
442
442 def has_node(self, node):
443 def has_node(self, node):
443 """return True if the node exist in the index"""
444 """return True if the node exist in the index"""
444 return node in self._nodemap
445 return node in self._nodemap
445
446
446 def rev(self, node):
447 def rev(self, node):
447 """return a revision for a node
448 """return a revision for a node
448
449
449 If the node is unknown, raise a RevlogError"""
450 If the node is unknown, raise a RevlogError"""
450 return self._nodemap[node]
451 return self._nodemap[node]
451
452
452 def get_rev(self, node):
453 def get_rev(self, node):
453 """return a revision for a node
454 """return a revision for a node
454
455
455 If the node is unknown, return None"""
456 If the node is unknown, return None"""
456 return self._nodemap.get(node)
457 return self._nodemap.get(node)
457
458
458 def _stripnodes(self, start):
459 def _stripnodes(self, start):
459 if '_nodemap' in vars(self):
460 if '_nodemap' in vars(self):
460 for r in range(start, len(self)):
461 for r in range(start, len(self)):
461 n = self[r][7]
462 n = self[r][7]
462 del self._nodemap[n]
463 del self._nodemap[n]
463
464
464 def clearcaches(self):
465 def clearcaches(self):
465 self.__dict__.pop('_nodemap', None)
466 self.__dict__.pop('_nodemap', None)
466
467
467 def __len__(self):
468 def __len__(self):
468 return self._lgt + len(self._extra)
469 return self._lgt + len(self._extra)
469
470
470 def append(self, tup):
471 def append(self, tup):
471 if '_nodemap' in vars(self):
472 if '_nodemap' in vars(self):
472 self._nodemap[tup[7]] = len(self)
473 self._nodemap[tup[7]] = len(self)
473 data = self._pack_entry(len(self), tup)
474 data = self._pack_entry(len(self), tup)
474 self._extra.append(data)
475 self._extra.append(data)
475
476
476 def _pack_entry(self, rev, entry):
477 def _pack_entry(self, rev, entry):
477 assert entry[8] == 0
478 assert entry[8] == 0
478 assert entry[9] == 0
479 assert entry[9] == 0
479 return self.index_format.pack(*entry[:8])
480 return self.index_format.pack(*entry[:8])
480
481
481 def _check_index(self, i):
482 def _check_index(self, i):
482 if not isinstance(i, int):
483 if not isinstance(i, int):
483 raise TypeError(b"expecting int indexes")
484 raise TypeError(b"expecting int indexes")
484 if i < 0 or i >= len(self):
485 if i < 0 or i >= len(self):
485 raise IndexError
486 raise IndexError
486
487
487 def __getitem__(self, i):
488 def __getitem__(self, i):
488 if i == -1:
489 if i == -1:
489 return self.null_item
490 return self.null_item
490 self._check_index(i)
491 self._check_index(i)
491 if i >= self._lgt:
492 if i >= self._lgt:
492 data = self._extra[i - self._lgt]
493 data = self._extra[i - self._lgt]
493 else:
494 else:
494 index = self._calculate_index(i)
495 index = self._calculate_index(i)
495 data = self._data[index : index + self.entry_size]
496 data = self._data[index : index + self.entry_size]
496 r = self._unpack_entry(i, data)
497 r = self._unpack_entry(i, data)
497 if self._lgt and i == 0:
498 if self._lgt and i == 0:
498 offset = revlogutils.offset_type(0, gettype(r[0]))
499 offset = revlogutils.offset_type(0, gettype(r[0]))
499 r = (offset,) + r[1:]
500 r = (offset,) + r[1:]
500 return r
501 return r
501
502
502 def _unpack_entry(self, rev, data):
503 def _unpack_entry(self, rev, data):
503 r = self.index_format.unpack(data)
504 r = self.index_format.unpack(data)
504 r = r + (
505 r = r + (
505 0,
506 0,
506 0,
507 0,
507 revlog_constants.COMP_MODE_INLINE,
508 revlog_constants.COMP_MODE_INLINE,
508 revlog_constants.COMP_MODE_INLINE,
509 revlog_constants.COMP_MODE_INLINE,
509 )
510 )
510 return r
511 return r
511
512
512 def pack_header(self, header):
513 def pack_header(self, header):
513 """pack header information as binary"""
514 """pack header information as binary"""
514 v_fmt = revlog_constants.INDEX_HEADER
515 v_fmt = revlog_constants.INDEX_HEADER
515 return v_fmt.pack(header)
516 return v_fmt.pack(header)
516
517
517 def entry_binary(self, rev):
518 def entry_binary(self, rev):
518 """return the raw binary string representing a revision"""
519 """return the raw binary string representing a revision"""
519 entry = self[rev]
520 entry = self[rev]
520 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
521 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
521 if rev == 0:
522 if rev == 0:
522 p = p[revlog_constants.INDEX_HEADER.size :]
523 p = p[revlog_constants.INDEX_HEADER.size :]
523 return p
524 return p
524
525
525
526
526 class IndexObject(BaseIndexObject):
527 class IndexObject(BaseIndexObject):
527 def __init__(self, data):
528 def __init__(self, data):
528 assert len(data) % self.entry_size == 0, (
529 assert len(data) % self.entry_size == 0, (
529 len(data),
530 len(data),
530 self.entry_size,
531 self.entry_size,
531 len(data) % self.entry_size,
532 len(data) % self.entry_size,
532 )
533 )
533 self._data = data
534 self._data = data
534 self._lgt = len(data) // self.entry_size
535 self._lgt = len(data) // self.entry_size
535 self._extra = []
536 self._extra = []
536
537
537 def _calculate_index(self, i):
538 def _calculate_index(self, i):
538 return i * self.entry_size
539 return i * self.entry_size
539
540
540 def __delitem__(self, i):
541 def __delitem__(self, i):
541 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
542 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
542 raise ValueError(b"deleting slices only supports a:-1 with step 1")
543 raise ValueError(b"deleting slices only supports a:-1 with step 1")
543 i = i.start
544 i = i.start
544 self._check_index(i)
545 self._check_index(i)
545 self._stripnodes(i)
546 self._stripnodes(i)
546 if i < self._lgt:
547 if i < self._lgt:
547 self._data = self._data[: i * self.entry_size]
548 self._data = self._data[: i * self.entry_size]
548 self._lgt = i
549 self._lgt = i
549 self._extra = []
550 self._extra = []
550 else:
551 else:
551 self._extra = self._extra[: i - self._lgt]
552 self._extra = self._extra[: i - self._lgt]
552
553
553
554
554 class PersistentNodeMapIndexObject(IndexObject):
555 class PersistentNodeMapIndexObject(IndexObject):
555 """a Debug oriented class to test persistent nodemap
556 """a Debug oriented class to test persistent nodemap
556
557
557 We need a simple python object to test API and higher level behavior. See
558 We need a simple python object to test API and higher level behavior. See
558 the Rust implementation for more serious usage. This should be used only
559 the Rust implementation for more serious usage. This should be used only
559 through the dedicated `devel.persistent-nodemap` config.
560 through the dedicated `devel.persistent-nodemap` config.
560 """
561 """
561
562
562 def nodemap_data_all(self):
563 def nodemap_data_all(self):
563 """Return bytes containing a full serialization of a nodemap
564 """Return bytes containing a full serialization of a nodemap
564
565
565 The nodemap should be valid for the full set of revisions in the
566 The nodemap should be valid for the full set of revisions in the
566 index."""
567 index."""
567 return nodemaputil.persistent_data(self)
568 return nodemaputil.persistent_data(self)
568
569
569 def nodemap_data_incremental(self):
570 def nodemap_data_incremental(self):
570 """Return bytes containing a incremental update to persistent nodemap
571 """Return bytes containing a incremental update to persistent nodemap
571
572
572 This containst the data for an append-only update of the data provided
573 This containst the data for an append-only update of the data provided
573 in the last call to `update_nodemap_data`.
574 in the last call to `update_nodemap_data`.
574 """
575 """
575 if self._nm_root is None:
576 if self._nm_root is None:
576 return None
577 return None
577 docket = self._nm_docket
578 docket = self._nm_docket
578 changed, data = nodemaputil.update_persistent_data(
579 changed, data = nodemaputil.update_persistent_data(
579 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
580 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
580 )
581 )
581
582
582 self._nm_root = self._nm_max_idx = self._nm_docket = None
583 self._nm_root = self._nm_max_idx = self._nm_docket = None
583 return docket, changed, data
584 return docket, changed, data
584
585
585 def update_nodemap_data(self, docket, nm_data):
586 def update_nodemap_data(self, docket, nm_data):
586 """provide full block of persisted binary data for a nodemap
587 """provide full block of persisted binary data for a nodemap
587
588
588 The data are expected to come from disk. See `nodemap_data_all` for a
589 The data are expected to come from disk. See `nodemap_data_all` for a
589 produceur of such data."""
590 produceur of such data."""
590 if nm_data is not None:
591 if nm_data is not None:
591 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
592 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
592 if self._nm_root:
593 if self._nm_root:
593 self._nm_docket = docket
594 self._nm_docket = docket
594 else:
595 else:
595 self._nm_root = self._nm_max_idx = self._nm_docket = None
596 self._nm_root = self._nm_max_idx = self._nm_docket = None
596
597
597
598
598 class InlinedIndexObject(BaseIndexObject):
599 class InlinedIndexObject(BaseIndexObject):
599 def __init__(self, data, inline=0):
600 def __init__(self, data, inline=0):
600 self._data = data
601 self._data = data
601 self._lgt = self._inline_scan(None)
602 self._lgt = self._inline_scan(None)
602 self._inline_scan(self._lgt)
603 self._inline_scan(self._lgt)
603 self._extra = []
604 self._extra = []
604
605
605 def _inline_scan(self, lgt):
606 def _inline_scan(self, lgt):
606 off = 0
607 off = 0
607 if lgt is not None:
608 if lgt is not None:
608 self._offsets = [0] * lgt
609 self._offsets = [0] * lgt
609 count = 0
610 count = 0
610 while off <= len(self._data) - self.entry_size:
611 while off <= len(self._data) - self.entry_size:
611 start = off + self.big_int_size
612 start = off + self.big_int_size
612 (s,) = struct.unpack(
613 (s,) = struct.unpack(
613 b'>i',
614 b'>i',
614 self._data[start : start + self.int_size],
615 self._data[start : start + self.int_size],
615 )
616 )
616 if lgt is not None:
617 if lgt is not None:
617 self._offsets[count] = off
618 self._offsets[count] = off
618 count += 1
619 count += 1
619 off += self.entry_size + s
620 off += self.entry_size + s
620 if off != len(self._data):
621 if off != len(self._data):
621 raise ValueError(b"corrupted data")
622 raise ValueError(b"corrupted data")
622 return count
623 return count
623
624
624 def __delitem__(self, i):
625 def __delitem__(self, i):
625 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
626 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
626 raise ValueError(b"deleting slices only supports a:-1 with step 1")
627 raise ValueError(b"deleting slices only supports a:-1 with step 1")
627 i = i.start
628 i = i.start
628 self._check_index(i)
629 self._check_index(i)
629 self._stripnodes(i)
630 self._stripnodes(i)
630 if i < self._lgt:
631 if i < self._lgt:
631 self._offsets = self._offsets[:i]
632 self._offsets = self._offsets[:i]
632 self._lgt = i
633 self._lgt = i
633 self._extra = []
634 self._extra = []
634 else:
635 else:
635 self._extra = self._extra[: i - self._lgt]
636 self._extra = self._extra[: i - self._lgt]
636
637
637 def _calculate_index(self, i):
638 def _calculate_index(self, i):
638 return self._offsets[i]
639 return self._offsets[i]
639
640
640
641
641 def parse_index2(data, inline, revlogv2=False):
642 def parse_index2(data, inline, revlogv2=False):
642 if not inline:
643 if not inline:
643 cls = IndexObject2 if revlogv2 else IndexObject
644 cls = IndexObject2 if revlogv2 else IndexObject
644 return cls(data), None
645 return cls(data), None
645 cls = InlinedIndexObject
646 cls = InlinedIndexObject
646 return cls(data, inline), (0, data)
647 return cls(data, inline), (0, data)
647
648
648
649
649 def parse_index_cl_v2(data):
650 def parse_index_cl_v2(data):
650 return IndexChangelogV2(data), None
651 return IndexChangelogV2(data), None
651
652
652
653
653 class IndexObject2(IndexObject):
654 class IndexObject2(IndexObject):
654 index_format = revlog_constants.INDEX_ENTRY_V2
655 index_format = revlog_constants.INDEX_ENTRY_V2
655
656
656 def replace_sidedata_info(
657 def replace_sidedata_info(
657 self,
658 self,
658 rev,
659 rev,
659 sidedata_offset,
660 sidedata_offset,
660 sidedata_length,
661 sidedata_length,
661 offset_flags,
662 offset_flags,
662 compression_mode,
663 compression_mode,
663 ):
664 ):
664 """
665 """
665 Replace an existing index entry's sidedata offset and length with new
666 Replace an existing index entry's sidedata offset and length with new
666 ones.
667 ones.
667 This cannot be used outside of the context of sidedata rewriting,
668 This cannot be used outside of the context of sidedata rewriting,
668 inside the transaction that creates the revision `rev`.
669 inside the transaction that creates the revision `rev`.
669 """
670 """
670 if rev < 0:
671 if rev < 0:
671 raise KeyError
672 raise KeyError
672 self._check_index(rev)
673 self._check_index(rev)
673 if rev < self._lgt:
674 if rev < self._lgt:
674 msg = b"cannot rewrite entries outside of this transaction"
675 msg = b"cannot rewrite entries outside of this transaction"
675 raise KeyError(msg)
676 raise KeyError(msg)
676 else:
677 else:
677 entry = list(self[rev])
678 entry = list(self[rev])
678 entry[0] = offset_flags
679 entry[0] = offset_flags
679 entry[8] = sidedata_offset
680 entry[8] = sidedata_offset
680 entry[9] = sidedata_length
681 entry[9] = sidedata_length
681 entry[11] = compression_mode
682 entry[11] = compression_mode
682 entry = tuple(entry)
683 entry = tuple(entry)
683 new = self._pack_entry(rev, entry)
684 new = self._pack_entry(rev, entry)
684 self._extra[rev - self._lgt] = new
685 self._extra[rev - self._lgt] = new
685
686
686 def _unpack_entry(self, rev, data):
687 def _unpack_entry(self, rev, data):
687 data = self.index_format.unpack(data)
688 data = self.index_format.unpack(data)
688 entry = data[:10]
689 entry = data[:10]
689 data_comp = data[10] & 3
690 data_comp = data[10] & 3
690 sidedata_comp = (data[10] & (3 << 2)) >> 2
691 sidedata_comp = (data[10] & (3 << 2)) >> 2
691 return entry + (data_comp, sidedata_comp)
692 return entry + (data_comp, sidedata_comp)
692
693
693 def _pack_entry(self, rev, entry):
694 def _pack_entry(self, rev, entry):
694 data = entry[:10]
695 data = entry[:10]
695 data_comp = entry[10] & 3
696 data_comp = entry[10] & 3
696 sidedata_comp = (entry[11] & 3) << 2
697 sidedata_comp = (entry[11] & 3) << 2
697 data += (data_comp | sidedata_comp,)
698 data += (data_comp | sidedata_comp,)
698
699
699 return self.index_format.pack(*data)
700 return self.index_format.pack(*data)
700
701
701 def entry_binary(self, rev):
702 def entry_binary(self, rev):
702 """return the raw binary string representing a revision"""
703 """return the raw binary string representing a revision"""
703 entry = self[rev]
704 entry = self[rev]
704 return self._pack_entry(rev, entry)
705 return self._pack_entry(rev, entry)
705
706
706 def pack_header(self, header):
707 def pack_header(self, header):
707 """pack header information as binary"""
708 """pack header information as binary"""
708 msg = 'version header should go in the docket, not the index: %d'
709 msg = 'version header should go in the docket, not the index: %d'
709 msg %= header
710 msg %= header
710 raise error.ProgrammingError(msg)
711 raise error.ProgrammingError(msg)
711
712
712
713
713 class IndexChangelogV2(IndexObject2):
714 class IndexChangelogV2(IndexObject2):
714 index_format = revlog_constants.INDEX_ENTRY_CL_V2
715 index_format = revlog_constants.INDEX_ENTRY_CL_V2
715
716
716 def _unpack_entry(self, rev, data, r=True):
717 def _unpack_entry(self, rev, data, r=True):
717 items = self.index_format.unpack(data)
718 items = self.index_format.unpack(data)
718 entry = items[:3] + (rev, rev) + items[3:8]
719 entry = items[:3] + (rev, rev) + items[3:8]
719 data_comp = items[8] & 3
720 data_comp = items[8] & 3
720 sidedata_comp = (items[8] >> 2) & 3
721 sidedata_comp = (items[8] >> 2) & 3
721 return entry + (data_comp, sidedata_comp)
722 return entry + (data_comp, sidedata_comp)
722
723
723 def _pack_entry(self, rev, entry):
724 def _pack_entry(self, rev, entry):
724 assert entry[3] == rev, entry[3]
725 assert entry[3] == rev, entry[3]
725 assert entry[4] == rev, entry[4]
726 assert entry[4] == rev, entry[4]
726 data = entry[:3] + entry[5:10]
727 data = entry[:3] + entry[5:10]
727 data_comp = entry[10] & 3
728 data_comp = entry[10] & 3
728 sidedata_comp = (entry[11] & 3) << 2
729 sidedata_comp = (entry[11] & 3) << 2
729 data += (data_comp | sidedata_comp,)
730 data += (data_comp | sidedata_comp,)
730 return self.index_format.pack(*data)
731 return self.index_format.pack(*data)
731
732
732
733
733 def parse_index_devel_nodemap(data, inline):
734 def parse_index_devel_nodemap(data, inline):
734 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
735 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
735 return PersistentNodeMapIndexObject(data), None
736 return PersistentNodeMapIndexObject(data), None
736
737
737
738
738 def parse_dirstate(dmap, copymap, st):
739 def parse_dirstate(dmap, copymap, st):
739 parents = [st[:20], st[20:40]]
740 parents = [st[:20], st[20:40]]
740 # dereference fields so they will be local in loop
741 # dereference fields so they will be local in loop
741 format = b">cllll"
742 format = b">cllll"
742 e_size = struct.calcsize(format)
743 e_size = struct.calcsize(format)
743 pos1 = 40
744 pos1 = 40
744 l = len(st)
745 l = len(st)
745
746
746 # the inner loop
747 # the inner loop
747 while pos1 < l:
748 while pos1 < l:
748 pos2 = pos1 + e_size
749 pos2 = pos1 + e_size
749 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
750 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
750 pos1 = pos2 + e[4]
751 pos1 = pos2 + e[4]
751 f = st[pos2:pos1]
752 f = st[pos2:pos1]
752 if b'\0' in f:
753 if b'\0' in f:
753 f, c = f.split(b'\0')
754 f, c = f.split(b'\0')
754 copymap[f] = c
755 copymap[f] = c
755 dmap[f] = DirstateItem.from_v1_data(*e[:4])
756 dmap[f] = DirstateItem.from_v1_data(*e[:4])
756 return parents
757 return parents
757
758
758
759
759 def pack_dirstate(dmap, copymap, pl, now):
760 def pack_dirstate(dmap, copymap, pl, now):
760 now = int(now)
761 now = int(now)
761 cs = stringio()
762 cs = stringio()
762 write = cs.write
763 write = cs.write
763 write(b"".join(pl))
764 write(b"".join(pl))
764 for f, e in pycompat.iteritems(dmap):
765 for f, e in pycompat.iteritems(dmap):
765 if e.need_delay(now):
766 if e.need_delay(now):
766 # The file was last modified "simultaneously" with the current
767 # The file was last modified "simultaneously" with the current
767 # write to dirstate (i.e. within the same second for file-
768 # write to dirstate (i.e. within the same second for file-
768 # systems with a granularity of 1 sec). This commonly happens
769 # systems with a granularity of 1 sec). This commonly happens
769 # for at least a couple of files on 'update'.
770 # for at least a couple of files on 'update'.
770 # The user could change the file without changing its size
771 # The user could change the file without changing its size
771 # within the same second. Invalidate the file's mtime in
772 # within the same second. Invalidate the file's mtime in
772 # dirstate, forcing future 'status' calls to compare the
773 # dirstate, forcing future 'status' calls to compare the
773 # contents of the file if the size is the same. This prevents
774 # contents of the file if the size is the same. This prevents
774 # mistakenly treating such files as clean.
775 # mistakenly treating such files as clean.
775 e.set_possibly_dirty()
776 e.set_possibly_dirty()
776
777
777 if f in copymap:
778 if f in copymap:
778 f = b"%s\0%s" % (f, copymap[f])
779 f = b"%s\0%s" % (f, copymap[f])
779 e = _pack(
780 e = _pack(
780 b">cllll",
781 b">cllll",
781 e.v1_state(),
782 e.v1_state(),
782 e.v1_mode(),
783 e.v1_mode(),
783 e.v1_size(),
784 e.v1_size(),
784 e.v1_mtime(),
785 e.v1_mtime(),
785 len(f),
786 len(f),
786 )
787 )
787 write(e)
788 write(e)
788 write(f)
789 write(f)
789 return cs.getvalue()
790 return cs.getvalue()
@@ -1,721 +1,726 b''
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4
4
5 use crate::dirstate::TruncatedTimestamp;
5 use crate::dirstate::TruncatedTimestamp;
6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 use crate::dirstate_tree::path_with_basename::WithBasename;
7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 use crate::errors::HgError;
8 use crate::errors::HgError;
9 use crate::utils::hg_path::HgPath;
9 use crate::utils::hg_path::HgPath;
10 use crate::DirstateEntry;
10 use crate::DirstateEntry;
11 use crate::DirstateError;
11 use crate::DirstateError;
12 use crate::DirstateParents;
12 use crate::DirstateParents;
13 use bitflags::bitflags;
13 use bitflags::bitflags;
14 use bytes_cast::unaligned::{U16Be, U32Be};
14 use bytes_cast::unaligned::{U16Be, U32Be};
15 use bytes_cast::BytesCast;
15 use bytes_cast::BytesCast;
16 use format_bytes::format_bytes;
16 use format_bytes::format_bytes;
17 use std::borrow::Cow;
17 use std::borrow::Cow;
18 use std::convert::{TryFrom, TryInto};
18 use std::convert::{TryFrom, TryInto};
19
19
20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 /// This a redundant sanity check more than an actual "magic number" since
21 /// This a redundant sanity check more than an actual "magic number" since
22 /// `.hg/requires` already governs which format should be used.
22 /// `.hg/requires` already governs which format should be used.
23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24
24
25 /// Keep space for 256-bit hashes
25 /// Keep space for 256-bit hashes
26 const STORED_NODE_ID_BYTES: usize = 32;
26 const STORED_NODE_ID_BYTES: usize = 32;
27
27
28 /// … even though only 160 bits are used for now, with SHA-1
28 /// … even though only 160 bits are used for now, with SHA-1
29 const USED_NODE_ID_BYTES: usize = 20;
29 const USED_NODE_ID_BYTES: usize = 20;
30
30
31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33
33
34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
35 const TREE_METADATA_SIZE: usize = 44;
35 const TREE_METADATA_SIZE: usize = 44;
36 const NODE_SIZE: usize = 44;
36 const NODE_SIZE: usize = 44;
37
37
38 /// Make sure that size-affecting changes are made knowingly
38 /// Make sure that size-affecting changes are made knowingly
39 #[allow(unused)]
39 #[allow(unused)]
40 fn static_assert_size_of() {
40 fn static_assert_size_of() {
41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
44 }
44 }
45
45
46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 #[derive(BytesCast)]
47 #[derive(BytesCast)]
48 #[repr(C)]
48 #[repr(C)]
49 struct DocketHeader {
49 struct DocketHeader {
50 marker: [u8; V2_FORMAT_MARKER.len()],
50 marker: [u8; V2_FORMAT_MARKER.len()],
51 parent_1: [u8; STORED_NODE_ID_BYTES],
51 parent_1: [u8; STORED_NODE_ID_BYTES],
52 parent_2: [u8; STORED_NODE_ID_BYTES],
52 parent_2: [u8; STORED_NODE_ID_BYTES],
53
53
54 metadata: TreeMetadata,
54 metadata: TreeMetadata,
55
55
56 /// Counted in bytes
56 /// Counted in bytes
57 data_size: Size,
57 data_size: Size,
58
58
59 uuid_size: u8,
59 uuid_size: u8,
60 }
60 }
61
61
62 pub struct Docket<'on_disk> {
62 pub struct Docket<'on_disk> {
63 header: &'on_disk DocketHeader,
63 header: &'on_disk DocketHeader,
64 uuid: &'on_disk [u8],
64 uuid: &'on_disk [u8],
65 }
65 }
66
66
67 /// Fields are documented in the *Tree metadata in the docket file*
67 /// Fields are documented in the *Tree metadata in the docket file*
68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 #[derive(BytesCast)]
69 #[derive(BytesCast)]
70 #[repr(C)]
70 #[repr(C)]
71 struct TreeMetadata {
71 struct TreeMetadata {
72 root_nodes: ChildNodes,
72 root_nodes: ChildNodes,
73 nodes_with_entry_count: Size,
73 nodes_with_entry_count: Size,
74 nodes_with_copy_source_count: Size,
74 nodes_with_copy_source_count: Size,
75 unreachable_bytes: Size,
75 unreachable_bytes: Size,
76 unused: [u8; 4],
76 unused: [u8; 4],
77
77
78 /// See *Optional hash of ignore patterns* section of
78 /// See *Optional hash of ignore patterns* section of
79 /// `mercurial/helptext/internals/dirstate-v2.txt`
79 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 ignore_patterns_hash: IgnorePatternsHash,
80 ignore_patterns_hash: IgnorePatternsHash,
81 }
81 }
82
82
83 /// Fields are documented in the *The data file format*
83 /// Fields are documented in the *The data file format*
84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 #[derive(BytesCast)]
85 #[derive(BytesCast)]
86 #[repr(C)]
86 #[repr(C)]
87 pub(super) struct Node {
87 pub(super) struct Node {
88 full_path: PathSlice,
88 full_path: PathSlice,
89
89
90 /// In bytes from `self.full_path.start`
90 /// In bytes from `self.full_path.start`
91 base_name_start: PathSize,
91 base_name_start: PathSize,
92
92
93 copy_source: OptPathSlice,
93 copy_source: OptPathSlice,
94 children: ChildNodes,
94 children: ChildNodes,
95 pub(super) descendants_with_entry_count: Size,
95 pub(super) descendants_with_entry_count: Size,
96 pub(super) tracked_descendants_count: Size,
96 pub(super) tracked_descendants_count: Size,
97 flags: U16Be,
97 flags: U16Be,
98 size: U32Be,
98 size: U32Be,
99 mtime: PackedTruncatedTimestamp,
99 mtime: PackedTruncatedTimestamp,
100 }
100 }
101
101
102 bitflags! {
102 bitflags! {
103 #[repr(C)]
103 #[repr(C)]
104 struct Flags: u16 {
104 struct Flags: u16 {
105 const WDIR_TRACKED = 1 << 0;
105 const WDIR_TRACKED = 1 << 0;
106 const P1_TRACKED = 1 << 1;
106 const P1_TRACKED = 1 << 1;
107 const P2_INFO = 1 << 2;
107 const P2_INFO = 1 << 2;
108 const HAS_MODE_AND_SIZE = 1 << 3;
108 const HAS_MODE_AND_SIZE = 1 << 3;
109 const HAS_MTIME = 1 << 4;
109 const HAS_FILE_MTIME = 1 << 4;
110 const MODE_EXEC_PERM = 1 << 5;
110 const HAS_DIRECTORY_MTIME = 1 << 5;
111 const MODE_IS_SYMLINK = 1 << 6;
111 const MODE_EXEC_PERM = 1 << 6;
112 const MODE_IS_SYMLINK = 1 << 7;
112 }
113 }
113 }
114 }
114
115
115 /// Duration since the Unix epoch
116 /// Duration since the Unix epoch
116 #[derive(BytesCast, Copy, Clone)]
117 #[derive(BytesCast, Copy, Clone)]
117 #[repr(C)]
118 #[repr(C)]
118 struct PackedTruncatedTimestamp {
119 struct PackedTruncatedTimestamp {
119 truncated_seconds: U32Be,
120 truncated_seconds: U32Be,
120 nanoseconds: U32Be,
121 nanoseconds: U32Be,
121 }
122 }
122
123
123 /// Counted in bytes from the start of the file
124 /// Counted in bytes from the start of the file
124 ///
125 ///
125 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
126 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
126 type Offset = U32Be;
127 type Offset = U32Be;
127
128
128 /// Counted in number of items
129 /// Counted in number of items
129 ///
130 ///
130 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
131 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
131 type Size = U32Be;
132 type Size = U32Be;
132
133
133 /// Counted in bytes
134 /// Counted in bytes
134 ///
135 ///
135 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
136 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
136 type PathSize = U16Be;
137 type PathSize = U16Be;
137
138
138 /// A contiguous sequence of `len` times `Node`, representing the child nodes
139 /// A contiguous sequence of `len` times `Node`, representing the child nodes
139 /// of either some other node or of the repository root.
140 /// of either some other node or of the repository root.
140 ///
141 ///
141 /// Always sorted by ascending `full_path`, to allow binary search.
142 /// Always sorted by ascending `full_path`, to allow binary search.
142 /// Since nodes with the same parent nodes also have the same parent path,
143 /// Since nodes with the same parent nodes also have the same parent path,
143 /// only the `base_name`s need to be compared during binary search.
144 /// only the `base_name`s need to be compared during binary search.
144 #[derive(BytesCast, Copy, Clone)]
145 #[derive(BytesCast, Copy, Clone)]
145 #[repr(C)]
146 #[repr(C)]
146 struct ChildNodes {
147 struct ChildNodes {
147 start: Offset,
148 start: Offset,
148 len: Size,
149 len: Size,
149 }
150 }
150
151
151 /// A `HgPath` of `len` bytes
152 /// A `HgPath` of `len` bytes
152 #[derive(BytesCast, Copy, Clone)]
153 #[derive(BytesCast, Copy, Clone)]
153 #[repr(C)]
154 #[repr(C)]
154 struct PathSlice {
155 struct PathSlice {
155 start: Offset,
156 start: Offset,
156 len: PathSize,
157 len: PathSize,
157 }
158 }
158
159
159 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
160 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
160 type OptPathSlice = PathSlice;
161 type OptPathSlice = PathSlice;
161
162
162 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
163 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
163 ///
164 ///
164 /// This should only happen if Mercurial is buggy or a repository is corrupted.
165 /// This should only happen if Mercurial is buggy or a repository is corrupted.
165 #[derive(Debug)]
166 #[derive(Debug)]
166 pub struct DirstateV2ParseError;
167 pub struct DirstateV2ParseError;
167
168
168 impl From<DirstateV2ParseError> for HgError {
169 impl From<DirstateV2ParseError> for HgError {
169 fn from(_: DirstateV2ParseError) -> Self {
170 fn from(_: DirstateV2ParseError) -> Self {
170 HgError::corrupted("dirstate-v2 parse error")
171 HgError::corrupted("dirstate-v2 parse error")
171 }
172 }
172 }
173 }
173
174
174 impl From<DirstateV2ParseError> for crate::DirstateError {
175 impl From<DirstateV2ParseError> for crate::DirstateError {
175 fn from(error: DirstateV2ParseError) -> Self {
176 fn from(error: DirstateV2ParseError) -> Self {
176 HgError::from(error).into()
177 HgError::from(error).into()
177 }
178 }
178 }
179 }
179
180
180 impl<'on_disk> Docket<'on_disk> {
181 impl<'on_disk> Docket<'on_disk> {
181 pub fn parents(&self) -> DirstateParents {
182 pub fn parents(&self) -> DirstateParents {
182 use crate::Node;
183 use crate::Node;
183 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
184 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
184 .unwrap()
185 .unwrap()
185 .clone();
186 .clone();
186 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
187 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
187 .unwrap()
188 .unwrap()
188 .clone();
189 .clone();
189 DirstateParents { p1, p2 }
190 DirstateParents { p1, p2 }
190 }
191 }
191
192
192 pub fn tree_metadata(&self) -> &[u8] {
193 pub fn tree_metadata(&self) -> &[u8] {
193 self.header.metadata.as_bytes()
194 self.header.metadata.as_bytes()
194 }
195 }
195
196
196 pub fn data_size(&self) -> usize {
197 pub fn data_size(&self) -> usize {
197 // This `unwrap` could only panic on a 16-bit CPU
198 // This `unwrap` could only panic on a 16-bit CPU
198 self.header.data_size.get().try_into().unwrap()
199 self.header.data_size.get().try_into().unwrap()
199 }
200 }
200
201
201 pub fn data_filename(&self) -> String {
202 pub fn data_filename(&self) -> String {
202 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
203 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
203 }
204 }
204 }
205 }
205
206
206 pub fn read_docket(
207 pub fn read_docket(
207 on_disk: &[u8],
208 on_disk: &[u8],
208 ) -> Result<Docket<'_>, DirstateV2ParseError> {
209 ) -> Result<Docket<'_>, DirstateV2ParseError> {
209 let (header, uuid) =
210 let (header, uuid) =
210 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
211 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
211 let uuid_size = header.uuid_size as usize;
212 let uuid_size = header.uuid_size as usize;
212 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
213 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
213 Ok(Docket { header, uuid })
214 Ok(Docket { header, uuid })
214 } else {
215 } else {
215 Err(DirstateV2ParseError)
216 Err(DirstateV2ParseError)
216 }
217 }
217 }
218 }
218
219
219 pub(super) fn read<'on_disk>(
220 pub(super) fn read<'on_disk>(
220 on_disk: &'on_disk [u8],
221 on_disk: &'on_disk [u8],
221 metadata: &[u8],
222 metadata: &[u8],
222 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
223 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
223 if on_disk.is_empty() {
224 if on_disk.is_empty() {
224 return Ok(DirstateMap::empty(on_disk));
225 return Ok(DirstateMap::empty(on_disk));
225 }
226 }
226 let (meta, _) = TreeMetadata::from_bytes(metadata)
227 let (meta, _) = TreeMetadata::from_bytes(metadata)
227 .map_err(|_| DirstateV2ParseError)?;
228 .map_err(|_| DirstateV2ParseError)?;
228 let dirstate_map = DirstateMap {
229 let dirstate_map = DirstateMap {
229 on_disk,
230 on_disk,
230 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
231 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
231 on_disk,
232 on_disk,
232 meta.root_nodes,
233 meta.root_nodes,
233 )?),
234 )?),
234 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
235 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
235 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
236 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
236 ignore_patterns_hash: meta.ignore_patterns_hash,
237 ignore_patterns_hash: meta.ignore_patterns_hash,
237 unreachable_bytes: meta.unreachable_bytes.get(),
238 unreachable_bytes: meta.unreachable_bytes.get(),
238 };
239 };
239 Ok(dirstate_map)
240 Ok(dirstate_map)
240 }
241 }
241
242
242 impl Node {
243 impl Node {
243 pub(super) fn full_path<'on_disk>(
244 pub(super) fn full_path<'on_disk>(
244 &self,
245 &self,
245 on_disk: &'on_disk [u8],
246 on_disk: &'on_disk [u8],
246 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
247 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
247 read_hg_path(on_disk, self.full_path)
248 read_hg_path(on_disk, self.full_path)
248 }
249 }
249
250
250 pub(super) fn base_name_start<'on_disk>(
251 pub(super) fn base_name_start<'on_disk>(
251 &self,
252 &self,
252 ) -> Result<usize, DirstateV2ParseError> {
253 ) -> Result<usize, DirstateV2ParseError> {
253 let start = self.base_name_start.get();
254 let start = self.base_name_start.get();
254 if start < self.full_path.len.get() {
255 if start < self.full_path.len.get() {
255 let start = usize::try_from(start)
256 let start = usize::try_from(start)
256 // u32 -> usize, could only panic on a 16-bit CPU
257 // u32 -> usize, could only panic on a 16-bit CPU
257 .expect("dirstate-v2 base_name_start out of bounds");
258 .expect("dirstate-v2 base_name_start out of bounds");
258 Ok(start)
259 Ok(start)
259 } else {
260 } else {
260 Err(DirstateV2ParseError)
261 Err(DirstateV2ParseError)
261 }
262 }
262 }
263 }
263
264
264 pub(super) fn base_name<'on_disk>(
265 pub(super) fn base_name<'on_disk>(
265 &self,
266 &self,
266 on_disk: &'on_disk [u8],
267 on_disk: &'on_disk [u8],
267 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
268 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
268 let full_path = self.full_path(on_disk)?;
269 let full_path = self.full_path(on_disk)?;
269 let base_name_start = self.base_name_start()?;
270 let base_name_start = self.base_name_start()?;
270 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
271 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
271 }
272 }
272
273
273 pub(super) fn path<'on_disk>(
274 pub(super) fn path<'on_disk>(
274 &self,
275 &self,
275 on_disk: &'on_disk [u8],
276 on_disk: &'on_disk [u8],
276 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
277 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
277 Ok(WithBasename::from_raw_parts(
278 Ok(WithBasename::from_raw_parts(
278 Cow::Borrowed(self.full_path(on_disk)?),
279 Cow::Borrowed(self.full_path(on_disk)?),
279 self.base_name_start()?,
280 self.base_name_start()?,
280 ))
281 ))
281 }
282 }
282
283
283 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
284 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
284 self.copy_source.start.get() != 0
285 self.copy_source.start.get() != 0
285 }
286 }
286
287
287 pub(super) fn copy_source<'on_disk>(
288 pub(super) fn copy_source<'on_disk>(
288 &self,
289 &self,
289 on_disk: &'on_disk [u8],
290 on_disk: &'on_disk [u8],
290 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
291 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
291 Ok(if self.has_copy_source() {
292 Ok(if self.has_copy_source() {
292 Some(read_hg_path(on_disk, self.copy_source)?)
293 Some(read_hg_path(on_disk, self.copy_source)?)
293 } else {
294 } else {
294 None
295 None
295 })
296 })
296 }
297 }
297
298
298 fn flags(&self) -> Flags {
299 fn flags(&self) -> Flags {
299 Flags::from_bits_truncate(self.flags.get())
300 Flags::from_bits_truncate(self.flags.get())
300 }
301 }
301
302
302 fn has_entry(&self) -> bool {
303 fn has_entry(&self) -> bool {
303 self.flags().intersects(
304 self.flags().intersects(
304 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
305 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
305 )
306 )
306 }
307 }
307
308
308 pub(super) fn node_data(
309 pub(super) fn node_data(
309 &self,
310 &self,
310 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
311 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
311 if self.has_entry() {
312 if self.has_entry() {
312 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
313 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
313 } else if let Some(mtime) = self.cached_directory_mtime()? {
314 } else if let Some(mtime) = self.cached_directory_mtime()? {
314 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
315 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
315 } else {
316 } else {
316 Ok(dirstate_map::NodeData::None)
317 Ok(dirstate_map::NodeData::None)
317 }
318 }
318 }
319 }
319
320
320 pub(super) fn cached_directory_mtime(
321 pub(super) fn cached_directory_mtime(
321 &self,
322 &self,
322 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
323 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
323 Ok(
324 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME) {
324 if self.flags().contains(Flags::HAS_MTIME) && !self.has_entry() {
325 if self.flags().contains(Flags::HAS_FILE_MTIME) {
325 Some(self.mtime.try_into()?)
326 Err(DirstateV2ParseError)
326 } else {
327 } else {
327 None
328 Ok(Some(self.mtime.try_into()?))
328 },
329 }
329 )
330 } else {
331 Ok(None)
332 }
330 }
333 }
331
334
332 fn synthesize_unix_mode(&self) -> u32 {
335 fn synthesize_unix_mode(&self) -> u32 {
333 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
336 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
334 libc::S_IFLNK
337 libc::S_IFLNK
335 } else {
338 } else {
336 libc::S_IFREG
339 libc::S_IFREG
337 };
340 };
338 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
341 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
339 0o755
342 0o755
340 } else {
343 } else {
341 0o644
344 0o644
342 };
345 };
343 file_type | permisions
346 file_type | permisions
344 }
347 }
345
348
346 fn assume_entry(&self) -> DirstateEntry {
349 fn assume_entry(&self) -> DirstateEntry {
347 // TODO: convert through raw bits instead?
350 // TODO: convert through raw bits instead?
348 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
351 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
349 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
352 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
350 let p2_info = self.flags().contains(Flags::P2_INFO);
353 let p2_info = self.flags().contains(Flags::P2_INFO);
351 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) {
354 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) {
352 Some((self.synthesize_unix_mode(), self.size.into()))
355 Some((self.synthesize_unix_mode(), self.size.into()))
353 } else {
356 } else {
354 None
357 None
355 };
358 };
356 let mtime = if self.flags().contains(Flags::HAS_MTIME) {
359 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) {
357 Some(self.mtime.truncated_seconds.into())
360 Some(self.mtime.truncated_seconds.into())
358 } else {
361 } else {
359 None
362 None
360 };
363 };
361 DirstateEntry::from_v2_data(
364 DirstateEntry::from_v2_data(
362 wdir_tracked,
365 wdir_tracked,
363 p1_tracked,
366 p1_tracked,
364 p2_info,
367 p2_info,
365 mode_size,
368 mode_size,
366 mtime,
369 mtime,
367 )
370 )
368 }
371 }
369
372
370 pub(super) fn entry(
373 pub(super) fn entry(
371 &self,
374 &self,
372 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
375 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
373 if self.has_entry() {
376 if self.has_entry() {
374 Ok(Some(self.assume_entry()))
377 Ok(Some(self.assume_entry()))
375 } else {
378 } else {
376 Ok(None)
379 Ok(None)
377 }
380 }
378 }
381 }
379
382
380 pub(super) fn children<'on_disk>(
383 pub(super) fn children<'on_disk>(
381 &self,
384 &self,
382 on_disk: &'on_disk [u8],
385 on_disk: &'on_disk [u8],
383 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
386 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
384 read_nodes(on_disk, self.children)
387 read_nodes(on_disk, self.children)
385 }
388 }
386
389
387 pub(super) fn to_in_memory_node<'on_disk>(
390 pub(super) fn to_in_memory_node<'on_disk>(
388 &self,
391 &self,
389 on_disk: &'on_disk [u8],
392 on_disk: &'on_disk [u8],
390 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
393 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
391 Ok(dirstate_map::Node {
394 Ok(dirstate_map::Node {
392 children: dirstate_map::ChildNodes::OnDisk(
395 children: dirstate_map::ChildNodes::OnDisk(
393 self.children(on_disk)?,
396 self.children(on_disk)?,
394 ),
397 ),
395 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
398 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
396 data: self.node_data()?,
399 data: self.node_data()?,
397 descendants_with_entry_count: self
400 descendants_with_entry_count: self
398 .descendants_with_entry_count
401 .descendants_with_entry_count
399 .get(),
402 .get(),
400 tracked_descendants_count: self.tracked_descendants_count.get(),
403 tracked_descendants_count: self.tracked_descendants_count.get(),
401 })
404 })
402 }
405 }
403
406
404 fn from_dirstate_entry(
407 fn from_dirstate_entry(
405 entry: &DirstateEntry,
408 entry: &DirstateEntry,
406 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
409 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
407 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
410 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
408 entry.v2_data();
411 entry.v2_data();
409 // TODO: convert throug raw flag bits instead?
412 // TODO: convert throug raw flag bits instead?
410 let mut flags = Flags::empty();
413 let mut flags = Flags::empty();
411 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
414 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
412 flags.set(Flags::P1_TRACKED, p1_tracked);
415 flags.set(Flags::P1_TRACKED, p1_tracked);
413 flags.set(Flags::P2_INFO, p2_info);
416 flags.set(Flags::P2_INFO, p2_info);
414 let size = if let Some((m, s)) = mode_size_opt {
417 let size = if let Some((m, s)) = mode_size_opt {
415 let exec_perm = m & libc::S_IXUSR != 0;
418 let exec_perm = m & libc::S_IXUSR != 0;
416 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
419 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
417 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
420 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
418 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
421 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
419 flags.insert(Flags::HAS_MODE_AND_SIZE);
422 flags.insert(Flags::HAS_MODE_AND_SIZE);
420 s.into()
423 s.into()
421 } else {
424 } else {
422 0.into()
425 0.into()
423 };
426 };
424 let mtime = if let Some(m) = mtime_opt {
427 let mtime = if let Some(m) = mtime_opt {
425 flags.insert(Flags::HAS_MTIME);
428 flags.insert(Flags::HAS_FILE_MTIME);
426 PackedTruncatedTimestamp {
429 PackedTruncatedTimestamp {
427 truncated_seconds: m.into(),
430 truncated_seconds: m.into(),
428 nanoseconds: 0.into(),
431 nanoseconds: 0.into(),
429 }
432 }
430 } else {
433 } else {
431 PackedTruncatedTimestamp::null()
434 PackedTruncatedTimestamp::null()
432 };
435 };
433 (flags, size, mtime)
436 (flags, size, mtime)
434 }
437 }
435 }
438 }
436
439
437 fn read_hg_path(
440 fn read_hg_path(
438 on_disk: &[u8],
441 on_disk: &[u8],
439 slice: PathSlice,
442 slice: PathSlice,
440 ) -> Result<&HgPath, DirstateV2ParseError> {
443 ) -> Result<&HgPath, DirstateV2ParseError> {
441 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
444 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
442 }
445 }
443
446
444 fn read_nodes(
447 fn read_nodes(
445 on_disk: &[u8],
448 on_disk: &[u8],
446 slice: ChildNodes,
449 slice: ChildNodes,
447 ) -> Result<&[Node], DirstateV2ParseError> {
450 ) -> Result<&[Node], DirstateV2ParseError> {
448 read_slice(on_disk, slice.start, slice.len.get())
451 read_slice(on_disk, slice.start, slice.len.get())
449 }
452 }
450
453
451 fn read_slice<T, Len>(
454 fn read_slice<T, Len>(
452 on_disk: &[u8],
455 on_disk: &[u8],
453 start: Offset,
456 start: Offset,
454 len: Len,
457 len: Len,
455 ) -> Result<&[T], DirstateV2ParseError>
458 ) -> Result<&[T], DirstateV2ParseError>
456 where
459 where
457 T: BytesCast,
460 T: BytesCast,
458 Len: TryInto<usize>,
461 Len: TryInto<usize>,
459 {
462 {
460 // Either `usize::MAX` would result in "out of bounds" error since a single
463 // Either `usize::MAX` would result in "out of bounds" error since a single
461 // `&[u8]` cannot occupy the entire addess space.
464 // `&[u8]` cannot occupy the entire addess space.
462 let start = start.get().try_into().unwrap_or(std::usize::MAX);
465 let start = start.get().try_into().unwrap_or(std::usize::MAX);
463 let len = len.try_into().unwrap_or(std::usize::MAX);
466 let len = len.try_into().unwrap_or(std::usize::MAX);
464 on_disk
467 on_disk
465 .get(start..)
468 .get(start..)
466 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
469 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
467 .map(|(slice, _rest)| slice)
470 .map(|(slice, _rest)| slice)
468 .ok_or_else(|| DirstateV2ParseError)
471 .ok_or_else(|| DirstateV2ParseError)
469 }
472 }
470
473
471 pub(crate) fn for_each_tracked_path<'on_disk>(
474 pub(crate) fn for_each_tracked_path<'on_disk>(
472 on_disk: &'on_disk [u8],
475 on_disk: &'on_disk [u8],
473 metadata: &[u8],
476 metadata: &[u8],
474 mut f: impl FnMut(&'on_disk HgPath),
477 mut f: impl FnMut(&'on_disk HgPath),
475 ) -> Result<(), DirstateV2ParseError> {
478 ) -> Result<(), DirstateV2ParseError> {
476 let (meta, _) = TreeMetadata::from_bytes(metadata)
479 let (meta, _) = TreeMetadata::from_bytes(metadata)
477 .map_err(|_| DirstateV2ParseError)?;
480 .map_err(|_| DirstateV2ParseError)?;
478 fn recur<'on_disk>(
481 fn recur<'on_disk>(
479 on_disk: &'on_disk [u8],
482 on_disk: &'on_disk [u8],
480 nodes: ChildNodes,
483 nodes: ChildNodes,
481 f: &mut impl FnMut(&'on_disk HgPath),
484 f: &mut impl FnMut(&'on_disk HgPath),
482 ) -> Result<(), DirstateV2ParseError> {
485 ) -> Result<(), DirstateV2ParseError> {
483 for node in read_nodes(on_disk, nodes)? {
486 for node in read_nodes(on_disk, nodes)? {
484 if let Some(entry) = node.entry()? {
487 if let Some(entry) = node.entry()? {
485 if entry.state().is_tracked() {
488 if entry.state().is_tracked() {
486 f(node.full_path(on_disk)?)
489 f(node.full_path(on_disk)?)
487 }
490 }
488 }
491 }
489 recur(on_disk, node.children, f)?
492 recur(on_disk, node.children, f)?
490 }
493 }
491 Ok(())
494 Ok(())
492 }
495 }
493 recur(on_disk, meta.root_nodes, &mut f)
496 recur(on_disk, meta.root_nodes, &mut f)
494 }
497 }
495
498
496 /// Returns new data and metadata, together with whether that data should be
499 /// Returns new data and metadata, together with whether that data should be
497 /// appended to the existing data file whose content is at
500 /// appended to the existing data file whose content is at
498 /// `dirstate_map.on_disk` (true), instead of written to a new data file
501 /// `dirstate_map.on_disk` (true), instead of written to a new data file
499 /// (false).
502 /// (false).
500 pub(super) fn write(
503 pub(super) fn write(
501 dirstate_map: &mut DirstateMap,
504 dirstate_map: &mut DirstateMap,
502 can_append: bool,
505 can_append: bool,
503 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
506 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
504 let append = can_append && dirstate_map.write_should_append();
507 let append = can_append && dirstate_map.write_should_append();
505
508
506 // This ignores the space for paths, and for nodes without an entry.
509 // This ignores the space for paths, and for nodes without an entry.
507 // TODO: better estimate? Skip the `Vec` and write to a file directly?
510 // TODO: better estimate? Skip the `Vec` and write to a file directly?
508 let size_guess = std::mem::size_of::<Node>()
511 let size_guess = std::mem::size_of::<Node>()
509 * dirstate_map.nodes_with_entry_count as usize;
512 * dirstate_map.nodes_with_entry_count as usize;
510
513
511 let mut writer = Writer {
514 let mut writer = Writer {
512 dirstate_map,
515 dirstate_map,
513 append,
516 append,
514 out: Vec::with_capacity(size_guess),
517 out: Vec::with_capacity(size_guess),
515 };
518 };
516
519
517 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
520 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
518
521
519 let meta = TreeMetadata {
522 let meta = TreeMetadata {
520 root_nodes,
523 root_nodes,
521 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
524 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
522 nodes_with_copy_source_count: dirstate_map
525 nodes_with_copy_source_count: dirstate_map
523 .nodes_with_copy_source_count
526 .nodes_with_copy_source_count
524 .into(),
527 .into(),
525 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
528 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
526 unused: [0; 4],
529 unused: [0; 4],
527 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
530 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
528 };
531 };
529 Ok((writer.out, meta.as_bytes().to_vec(), append))
532 Ok((writer.out, meta.as_bytes().to_vec(), append))
530 }
533 }
531
534
532 struct Writer<'dmap, 'on_disk> {
535 struct Writer<'dmap, 'on_disk> {
533 dirstate_map: &'dmap DirstateMap<'on_disk>,
536 dirstate_map: &'dmap DirstateMap<'on_disk>,
534 append: bool,
537 append: bool,
535 out: Vec<u8>,
538 out: Vec<u8>,
536 }
539 }
537
540
538 impl Writer<'_, '_> {
541 impl Writer<'_, '_> {
539 fn write_nodes(
542 fn write_nodes(
540 &mut self,
543 &mut self,
541 nodes: dirstate_map::ChildNodesRef,
544 nodes: dirstate_map::ChildNodesRef,
542 ) -> Result<ChildNodes, DirstateError> {
545 ) -> Result<ChildNodes, DirstateError> {
543 // Reuse already-written nodes if possible
546 // Reuse already-written nodes if possible
544 if self.append {
547 if self.append {
545 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
548 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
546 let start = self.on_disk_offset_of(nodes_slice).expect(
549 let start = self.on_disk_offset_of(nodes_slice).expect(
547 "dirstate-v2 OnDisk nodes not found within on_disk",
550 "dirstate-v2 OnDisk nodes not found within on_disk",
548 );
551 );
549 let len = child_nodes_len_from_usize(nodes_slice.len());
552 let len = child_nodes_len_from_usize(nodes_slice.len());
550 return Ok(ChildNodes { start, len });
553 return Ok(ChildNodes { start, len });
551 }
554 }
552 }
555 }
553
556
554 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
557 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
555 // undefined iteration order. Sort to enable binary search in the
558 // undefined iteration order. Sort to enable binary search in the
556 // written file.
559 // written file.
557 let nodes = nodes.sorted();
560 let nodes = nodes.sorted();
558 let nodes_len = nodes.len();
561 let nodes_len = nodes.len();
559
562
560 // First accumulate serialized nodes in a `Vec`
563 // First accumulate serialized nodes in a `Vec`
561 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
564 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
562 for node in nodes {
565 for node in nodes {
563 let children =
566 let children =
564 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
567 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
565 let full_path = node.full_path(self.dirstate_map.on_disk)?;
568 let full_path = node.full_path(self.dirstate_map.on_disk)?;
566 let full_path = self.write_path(full_path.as_bytes());
569 let full_path = self.write_path(full_path.as_bytes());
567 let copy_source = if let Some(source) =
570 let copy_source = if let Some(source) =
568 node.copy_source(self.dirstate_map.on_disk)?
571 node.copy_source(self.dirstate_map.on_disk)?
569 {
572 {
570 self.write_path(source.as_bytes())
573 self.write_path(source.as_bytes())
571 } else {
574 } else {
572 PathSlice {
575 PathSlice {
573 start: 0.into(),
576 start: 0.into(),
574 len: 0.into(),
577 len: 0.into(),
575 }
578 }
576 };
579 };
577 on_disk_nodes.push(match node {
580 on_disk_nodes.push(match node {
578 NodeRef::InMemory(path, node) => {
581 NodeRef::InMemory(path, node) => {
579 let (flags, size, mtime) = match &node.data {
582 let (flags, size, mtime) = match &node.data {
580 dirstate_map::NodeData::Entry(entry) => {
583 dirstate_map::NodeData::Entry(entry) => {
581 Node::from_dirstate_entry(entry)
584 Node::from_dirstate_entry(entry)
582 }
585 }
583 dirstate_map::NodeData::CachedDirectory { mtime } => {
586 dirstate_map::NodeData::CachedDirectory { mtime } => (
584 (Flags::HAS_MTIME, 0.into(), (*mtime).into())
587 Flags::HAS_DIRECTORY_MTIME,
585 }
588 0.into(),
589 (*mtime).into(),
590 ),
586 dirstate_map::NodeData::None => (
591 dirstate_map::NodeData::None => (
587 Flags::empty(),
592 Flags::empty(),
588 0.into(),
593 0.into(),
589 PackedTruncatedTimestamp::null(),
594 PackedTruncatedTimestamp::null(),
590 ),
595 ),
591 };
596 };
592 Node {
597 Node {
593 children,
598 children,
594 copy_source,
599 copy_source,
595 full_path,
600 full_path,
596 base_name_start: u16::try_from(path.base_name_start())
601 base_name_start: u16::try_from(path.base_name_start())
597 // Could only panic for paths over 64 KiB
602 // Could only panic for paths over 64 KiB
598 .expect("dirstate-v2 path length overflow")
603 .expect("dirstate-v2 path length overflow")
599 .into(),
604 .into(),
600 descendants_with_entry_count: node
605 descendants_with_entry_count: node
601 .descendants_with_entry_count
606 .descendants_with_entry_count
602 .into(),
607 .into(),
603 tracked_descendants_count: node
608 tracked_descendants_count: node
604 .tracked_descendants_count
609 .tracked_descendants_count
605 .into(),
610 .into(),
606 flags: flags.bits().into(),
611 flags: flags.bits().into(),
607 size,
612 size,
608 mtime,
613 mtime,
609 }
614 }
610 }
615 }
611 NodeRef::OnDisk(node) => Node {
616 NodeRef::OnDisk(node) => Node {
612 children,
617 children,
613 copy_source,
618 copy_source,
614 full_path,
619 full_path,
615 ..*node
620 ..*node
616 },
621 },
617 })
622 })
618 }
623 }
619 // … so we can write them contiguously, after writing everything else
624 // … so we can write them contiguously, after writing everything else
620 // they refer to.
625 // they refer to.
621 let start = self.current_offset();
626 let start = self.current_offset();
622 let len = child_nodes_len_from_usize(nodes_len);
627 let len = child_nodes_len_from_usize(nodes_len);
623 self.out.extend(on_disk_nodes.as_bytes());
628 self.out.extend(on_disk_nodes.as_bytes());
624 Ok(ChildNodes { start, len })
629 Ok(ChildNodes { start, len })
625 }
630 }
626
631
627 /// If the given slice of items is within `on_disk`, returns its offset
632 /// If the given slice of items is within `on_disk`, returns its offset
628 /// from the start of `on_disk`.
633 /// from the start of `on_disk`.
629 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
634 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
630 where
635 where
631 T: BytesCast,
636 T: BytesCast,
632 {
637 {
633 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
638 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
634 let start = slice.as_ptr() as usize;
639 let start = slice.as_ptr() as usize;
635 let end = start + slice.len();
640 let end = start + slice.len();
636 start..=end
641 start..=end
637 }
642 }
638 let slice_addresses = address_range(slice.as_bytes());
643 let slice_addresses = address_range(slice.as_bytes());
639 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
644 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
640 if on_disk_addresses.contains(slice_addresses.start())
645 if on_disk_addresses.contains(slice_addresses.start())
641 && on_disk_addresses.contains(slice_addresses.end())
646 && on_disk_addresses.contains(slice_addresses.end())
642 {
647 {
643 let offset = slice_addresses.start() - on_disk_addresses.start();
648 let offset = slice_addresses.start() - on_disk_addresses.start();
644 Some(offset_from_usize(offset))
649 Some(offset_from_usize(offset))
645 } else {
650 } else {
646 None
651 None
647 }
652 }
648 }
653 }
649
654
650 fn current_offset(&mut self) -> Offset {
655 fn current_offset(&mut self) -> Offset {
651 let mut offset = self.out.len();
656 let mut offset = self.out.len();
652 if self.append {
657 if self.append {
653 offset += self.dirstate_map.on_disk.len()
658 offset += self.dirstate_map.on_disk.len()
654 }
659 }
655 offset_from_usize(offset)
660 offset_from_usize(offset)
656 }
661 }
657
662
658 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
663 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
659 let len = path_len_from_usize(slice.len());
664 let len = path_len_from_usize(slice.len());
660 // Reuse an already-written path if possible
665 // Reuse an already-written path if possible
661 if self.append {
666 if self.append {
662 if let Some(start) = self.on_disk_offset_of(slice) {
667 if let Some(start) = self.on_disk_offset_of(slice) {
663 return PathSlice { start, len };
668 return PathSlice { start, len };
664 }
669 }
665 }
670 }
666 let start = self.current_offset();
671 let start = self.current_offset();
667 self.out.extend(slice.as_bytes());
672 self.out.extend(slice.as_bytes());
668 PathSlice { start, len }
673 PathSlice { start, len }
669 }
674 }
670 }
675 }
671
676
672 fn offset_from_usize(x: usize) -> Offset {
677 fn offset_from_usize(x: usize) -> Offset {
673 u32::try_from(x)
678 u32::try_from(x)
674 // Could only panic for a dirstate file larger than 4 GiB
679 // Could only panic for a dirstate file larger than 4 GiB
675 .expect("dirstate-v2 offset overflow")
680 .expect("dirstate-v2 offset overflow")
676 .into()
681 .into()
677 }
682 }
678
683
679 fn child_nodes_len_from_usize(x: usize) -> Size {
684 fn child_nodes_len_from_usize(x: usize) -> Size {
680 u32::try_from(x)
685 u32::try_from(x)
681 // Could only panic with over 4 billion nodes
686 // Could only panic with over 4 billion nodes
682 .expect("dirstate-v2 slice length overflow")
687 .expect("dirstate-v2 slice length overflow")
683 .into()
688 .into()
684 }
689 }
685
690
686 fn path_len_from_usize(x: usize) -> PathSize {
691 fn path_len_from_usize(x: usize) -> PathSize {
687 u16::try_from(x)
692 u16::try_from(x)
688 // Could only panic for paths over 64 KiB
693 // Could only panic for paths over 64 KiB
689 .expect("dirstate-v2 path length overflow")
694 .expect("dirstate-v2 path length overflow")
690 .into()
695 .into()
691 }
696 }
692
697
693 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
698 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
694 fn from(timestamp: TruncatedTimestamp) -> Self {
699 fn from(timestamp: TruncatedTimestamp) -> Self {
695 Self {
700 Self {
696 truncated_seconds: timestamp.truncated_seconds().into(),
701 truncated_seconds: timestamp.truncated_seconds().into(),
697 nanoseconds: timestamp.nanoseconds().into(),
702 nanoseconds: timestamp.nanoseconds().into(),
698 }
703 }
699 }
704 }
700 }
705 }
701
706
702 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
707 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
703 type Error = DirstateV2ParseError;
708 type Error = DirstateV2ParseError;
704
709
705 fn try_from(
710 fn try_from(
706 timestamp: PackedTruncatedTimestamp,
711 timestamp: PackedTruncatedTimestamp,
707 ) -> Result<Self, Self::Error> {
712 ) -> Result<Self, Self::Error> {
708 Self::from_already_truncated(
713 Self::from_already_truncated(
709 timestamp.truncated_seconds.get(),
714 timestamp.truncated_seconds.get(),
710 timestamp.nanoseconds.get(),
715 timestamp.nanoseconds.get(),
711 )
716 )
712 }
717 }
713 }
718 }
714 impl PackedTruncatedTimestamp {
719 impl PackedTruncatedTimestamp {
715 fn null() -> Self {
720 fn null() -> Self {
716 Self {
721 Self {
717 truncated_seconds: 0.into(),
722 truncated_seconds: 0.into(),
718 nanoseconds: 0.into(),
723 nanoseconds: 0.into(),
719 }
724 }
720 }
725 }
721 }
726 }
General Comments 0
You need to be logged in to leave comments. Login now