##// END OF EJS Templates
parsers: inline fields of dirstate values in C version...
Siddharth Agarwal -
r21809:e250b830 default
parent child Browse files
Show More
@@ -1,305 +1,292 b''
1 1 /*
2 2 dirs.c - dynamic directory diddling for dirstates
3 3
4 4 Copyright 2013 Facebook
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include "util.h"
13 13
14 14 /*
15 15 * This is a multiset of directory names, built from the files that
16 16 * appear in a dirstate or manifest.
17 17 *
18 18 * A few implementation notes:
19 19 *
20 20 * We modify Python integers for refcounting, but those integers are
21 21 * never visible to Python code.
22 22 *
23 23 * We mutate strings in-place, but leave them immutable once they can
24 24 * be seen by Python code.
25 25 */
26 26 typedef struct {
27 27 PyObject_HEAD
28 28 PyObject *dict;
29 29 } dirsObject;
30 30
31 31 static inline Py_ssize_t _finddir(PyObject *path, Py_ssize_t pos)
32 32 {
33 33 const char *s = PyString_AS_STRING(path);
34 34
35 35 while (pos != -1) {
36 36 if (s[pos] == '/')
37 37 break;
38 38 pos -= 1;
39 39 }
40 40
41 41 return pos;
42 42 }
43 43
44 44 static int _addpath(PyObject *dirs, PyObject *path)
45 45 {
46 46 const char *cpath = PyString_AS_STRING(path);
47 47 Py_ssize_t pos = PyString_GET_SIZE(path);
48 48 PyObject *key = NULL;
49 49 int ret = -1;
50 50
51 51 while ((pos = _finddir(path, pos - 1)) != -1) {
52 52 PyObject *val;
53 53
54 54 /* It's likely that every prefix already has an entry
55 55 in our dict. Try to avoid allocating and
56 56 deallocating a string for each prefix we check. */
57 57 if (key != NULL)
58 58 ((PyStringObject *)key)->ob_shash = -1;
59 59 else {
60 60 /* Force Python to not reuse a small shared string. */
61 61 key = PyString_FromStringAndSize(cpath,
62 62 pos < 2 ? 2 : pos);
63 63 if (key == NULL)
64 64 goto bail;
65 65 }
66 66 PyString_GET_SIZE(key) = pos;
67 67 PyString_AS_STRING(key)[pos] = '\0';
68 68
69 69 val = PyDict_GetItem(dirs, key);
70 70 if (val != NULL) {
71 71 PyInt_AS_LONG(val) += 1;
72 72 continue;
73 73 }
74 74
75 75 /* Force Python to not reuse a small shared int. */
76 76 val = PyInt_FromLong(0x1eadbeef);
77 77
78 78 if (val == NULL)
79 79 goto bail;
80 80
81 81 PyInt_AS_LONG(val) = 1;
82 82 ret = PyDict_SetItem(dirs, key, val);
83 83 Py_DECREF(val);
84 84 if (ret == -1)
85 85 goto bail;
86 86 Py_CLEAR(key);
87 87 }
88 88 ret = 0;
89 89
90 90 bail:
91 91 Py_XDECREF(key);
92 92
93 93 return ret;
94 94 }
95 95
96 96 static int _delpath(PyObject *dirs, PyObject *path)
97 97 {
98 98 Py_ssize_t pos = PyString_GET_SIZE(path);
99 99 PyObject *key = NULL;
100 100 int ret = -1;
101 101
102 102 while ((pos = _finddir(path, pos - 1)) != -1) {
103 103 PyObject *val;
104 104
105 105 key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
106 106
107 107 if (key == NULL)
108 108 goto bail;
109 109
110 110 val = PyDict_GetItem(dirs, key);
111 111 if (val == NULL) {
112 112 PyErr_SetString(PyExc_ValueError,
113 113 "expected a value, found none");
114 114 goto bail;
115 115 }
116 116
117 117 if (--PyInt_AS_LONG(val) <= 0 &&
118 118 PyDict_DelItem(dirs, key) == -1)
119 119 goto bail;
120 120 Py_CLEAR(key);
121 121 }
122 122 ret = 0;
123 123
124 124 bail:
125 125 Py_XDECREF(key);
126 126
127 127 return ret;
128 128 }
129 129
130 130 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
131 131 {
132 132 PyObject *key, *value;
133 133 Py_ssize_t pos = 0;
134 134
135 135 while (PyDict_Next(source, &pos, &key, &value)) {
136 136 if (!PyString_Check(key)) {
137 137 PyErr_SetString(PyExc_TypeError, "expected string key");
138 138 return -1;
139 139 }
140 140 if (skipchar) {
141 PyObject *st;
142
143 if (!PyTuple_Check(value) ||
144 PyTuple_GET_SIZE(value) == 0) {
141 if (!dirstate_tuple_check(value)) {
145 142 PyErr_SetString(PyExc_TypeError,
146 "expected non-empty tuple");
143 "expected a dirstate tuple");
147 144 return -1;
148 145 }
149
150 st = PyTuple_GET_ITEM(value, 0);
151
152 if (!PyString_Check(st) || PyString_GET_SIZE(st) == 0) {
153 PyErr_SetString(PyExc_TypeError,
154 "expected non-empty string "
155 "at tuple index 0");
156 return -1;
157 }
158
159 if (PyString_AS_STRING(st)[0] == skipchar)
146 if (((dirstateTupleObject *)value)->state == skipchar)
160 147 continue;
161 148 }
162 149
163 150 if (_addpath(dirs, key) == -1)
164 151 return -1;
165 152 }
166 153
167 154 return 0;
168 155 }
169 156
170 157 static int dirs_fromiter(PyObject *dirs, PyObject *source)
171 158 {
172 159 PyObject *iter, *item = NULL;
173 160 int ret;
174 161
175 162 iter = PyObject_GetIter(source);
176 163 if (iter == NULL)
177 164 return -1;
178 165
179 166 while ((item = PyIter_Next(iter)) != NULL) {
180 167 if (!PyString_Check(item)) {
181 168 PyErr_SetString(PyExc_TypeError, "expected string");
182 169 break;
183 170 }
184 171
185 172 if (_addpath(dirs, item) == -1)
186 173 break;
187 174 Py_CLEAR(item);
188 175 }
189 176
190 177 ret = PyErr_Occurred() ? -1 : 0;
191 178 Py_XDECREF(item);
192 179 return ret;
193 180 }
194 181
195 182 /*
196 183 * Calculate a refcounted set of directory names for the files in a
197 184 * dirstate.
198 185 */
199 186 static int dirs_init(dirsObject *self, PyObject *args)
200 187 {
201 188 PyObject *dirs = NULL, *source = NULL;
202 189 char skipchar = 0;
203 190 int ret = -1;
204 191
205 192 self->dict = NULL;
206 193
207 194 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
208 195 return -1;
209 196
210 197 dirs = PyDict_New();
211 198
212 199 if (dirs == NULL)
213 200 return -1;
214 201
215 202 if (source == NULL)
216 203 ret = 0;
217 204 else if (PyDict_Check(source))
218 205 ret = dirs_fromdict(dirs, source, skipchar);
219 206 else if (skipchar)
220 207 PyErr_SetString(PyExc_ValueError,
221 208 "skip character is only supported "
222 209 "with a dict source");
223 210 else
224 211 ret = dirs_fromiter(dirs, source);
225 212
226 213 if (ret == -1)
227 214 Py_XDECREF(dirs);
228 215 else
229 216 self->dict = dirs;
230 217
231 218 return ret;
232 219 }
233 220
234 221 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
235 222 {
236 223 PyObject *path;
237 224
238 225 if (!PyArg_ParseTuple(args, "O!:addpath", &PyString_Type, &path))
239 226 return NULL;
240 227
241 228 if (_addpath(self->dict, path) == -1)
242 229 return NULL;
243 230
244 231 Py_RETURN_NONE;
245 232 }
246 233
247 234 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
248 235 {
249 236 PyObject *path;
250 237
251 238 if (!PyArg_ParseTuple(args, "O!:delpath", &PyString_Type, &path))
252 239 return NULL;
253 240
254 241 if (_delpath(self->dict, path) == -1)
255 242 return NULL;
256 243
257 244 Py_RETURN_NONE;
258 245 }
259 246
260 247 static int dirs_contains(dirsObject *self, PyObject *value)
261 248 {
262 249 return PyString_Check(value) ? PyDict_Contains(self->dict, value) : 0;
263 250 }
264 251
265 252 static void dirs_dealloc(dirsObject *self)
266 253 {
267 254 Py_XDECREF(self->dict);
268 255 PyObject_Del(self);
269 256 }
270 257
271 258 static PyObject *dirs_iter(dirsObject *self)
272 259 {
273 260 return PyObject_GetIter(self->dict);
274 261 }
275 262
276 263 static PySequenceMethods dirs_sequence_methods;
277 264
278 265 static PyMethodDef dirs_methods[] = {
279 266 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
280 267 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
281 268 {NULL} /* Sentinel */
282 269 };
283 270
284 271 static PyTypeObject dirsType = { PyObject_HEAD_INIT(NULL) };
285 272
286 273 void dirs_module_init(PyObject *mod)
287 274 {
288 275 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
289 276 dirsType.tp_name = "parsers.dirs";
290 277 dirsType.tp_new = PyType_GenericNew;
291 278 dirsType.tp_basicsize = sizeof(dirsObject);
292 279 dirsType.tp_dealloc = (destructor)dirs_dealloc;
293 280 dirsType.tp_as_sequence = &dirs_sequence_methods;
294 281 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
295 282 dirsType.tp_doc = "dirs";
296 283 dirsType.tp_iter = (getiterfunc)dirs_iter;
297 284 dirsType.tp_methods = dirs_methods;
298 285 dirsType.tp_init = (initproc)dirs_init;
299 286
300 287 if (PyType_Ready(&dirsType) < 0)
301 288 return;
302 289 Py_INCREF(&dirsType);
303 290
304 291 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
305 292 }
@@ -1,858 +1,856 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import nullid
9 9 from i18n import _
10 10 import scmutil, util, ignore, osutil, parsers, encoding, pathutil
11 11 import os, stat, errno, gc
12 12
13 13 propertycache = util.propertycache
14 14 filecache = scmutil.filecache
15 15 _rangemask = 0x7fffffff
16 16
17 def dirstatetuple(*x):
18 # x is a tuple
19 return x
17 dirstatetuple = parsers.dirstatetuple
20 18
21 19 class repocache(filecache):
22 20 """filecache for files in .hg/"""
23 21 def join(self, obj, fname):
24 22 return obj._opener.join(fname)
25 23
26 24 class rootcache(filecache):
27 25 """filecache for files in the repository root"""
28 26 def join(self, obj, fname):
29 27 return obj._join(fname)
30 28
31 29 class dirstate(object):
32 30
33 31 def __init__(self, opener, ui, root, validate):
34 32 '''Create a new dirstate object.
35 33
36 34 opener is an open()-like callable that can be used to open the
37 35 dirstate file; root is the root of the directory tracked by
38 36 the dirstate.
39 37 '''
40 38 self._opener = opener
41 39 self._validate = validate
42 40 self._root = root
43 41 self._rootdir = os.path.join(root, '')
44 42 self._dirty = False
45 43 self._dirtypl = False
46 44 self._lastnormaltime = 0
47 45 self._ui = ui
48 46 self._filecache = {}
49 47
50 48 @propertycache
51 49 def _map(self):
52 50 '''Return the dirstate contents as a map from filename to
53 51 (state, mode, size, time).'''
54 52 self._read()
55 53 return self._map
56 54
57 55 @propertycache
58 56 def _copymap(self):
59 57 self._read()
60 58 return self._copymap
61 59
62 60 @propertycache
63 61 def _foldmap(self):
64 62 f = {}
65 63 for name, s in self._map.iteritems():
66 64 if s[0] != 'r':
67 65 f[util.normcase(name)] = name
68 66 for name in self._dirs:
69 67 f[util.normcase(name)] = name
70 68 f['.'] = '.' # prevents useless util.fspath() invocation
71 69 return f
72 70
73 71 @repocache('branch')
74 72 def _branch(self):
75 73 try:
76 74 return self._opener.read("branch").strip() or "default"
77 75 except IOError, inst:
78 76 if inst.errno != errno.ENOENT:
79 77 raise
80 78 return "default"
81 79
82 80 @propertycache
83 81 def _pl(self):
84 82 try:
85 83 fp = self._opener("dirstate")
86 84 st = fp.read(40)
87 85 fp.close()
88 86 l = len(st)
89 87 if l == 40:
90 88 return st[:20], st[20:40]
91 89 elif l > 0 and l < 40:
92 90 raise util.Abort(_('working directory state appears damaged!'))
93 91 except IOError, err:
94 92 if err.errno != errno.ENOENT:
95 93 raise
96 94 return [nullid, nullid]
97 95
98 96 @propertycache
99 97 def _dirs(self):
100 98 return scmutil.dirs(self._map, 'r')
101 99
102 100 def dirs(self):
103 101 return self._dirs
104 102
105 103 @rootcache('.hgignore')
106 104 def _ignore(self):
107 105 files = [self._join('.hgignore')]
108 106 for name, path in self._ui.configitems("ui"):
109 107 if name == 'ignore' or name.startswith('ignore.'):
110 108 files.append(util.expandpath(path))
111 109 return ignore.ignore(self._root, files, self._ui.warn)
112 110
113 111 @propertycache
114 112 def _slash(self):
115 113 return self._ui.configbool('ui', 'slash') and os.sep != '/'
116 114
117 115 @propertycache
118 116 def _checklink(self):
119 117 return util.checklink(self._root)
120 118
121 119 @propertycache
122 120 def _checkexec(self):
123 121 return util.checkexec(self._root)
124 122
125 123 @propertycache
126 124 def _checkcase(self):
127 125 return not util.checkcase(self._join('.hg'))
128 126
129 127 def _join(self, f):
130 128 # much faster than os.path.join()
131 129 # it's safe because f is always a relative path
132 130 return self._rootdir + f
133 131
134 132 def flagfunc(self, buildfallback):
135 133 if self._checklink and self._checkexec:
136 134 def f(x):
137 135 try:
138 136 st = os.lstat(self._join(x))
139 137 if util.statislink(st):
140 138 return 'l'
141 139 if util.statisexec(st):
142 140 return 'x'
143 141 except OSError:
144 142 pass
145 143 return ''
146 144 return f
147 145
148 146 fallback = buildfallback()
149 147 if self._checklink:
150 148 def f(x):
151 149 if os.path.islink(self._join(x)):
152 150 return 'l'
153 151 if 'x' in fallback(x):
154 152 return 'x'
155 153 return ''
156 154 return f
157 155 if self._checkexec:
158 156 def f(x):
159 157 if 'l' in fallback(x):
160 158 return 'l'
161 159 if util.isexec(self._join(x)):
162 160 return 'x'
163 161 return ''
164 162 return f
165 163 else:
166 164 return fallback
167 165
168 166 @propertycache
169 167 def _cwd(self):
170 168 return os.getcwd()
171 169
172 170 def getcwd(self):
173 171 cwd = self._cwd
174 172 if cwd == self._root:
175 173 return ''
176 174 # self._root ends with a path separator if self._root is '/' or 'C:\'
177 175 rootsep = self._root
178 176 if not util.endswithsep(rootsep):
179 177 rootsep += os.sep
180 178 if cwd.startswith(rootsep):
181 179 return cwd[len(rootsep):]
182 180 else:
183 181 # we're outside the repo. return an absolute path.
184 182 return cwd
185 183
186 184 def pathto(self, f, cwd=None):
187 185 if cwd is None:
188 186 cwd = self.getcwd()
189 187 path = util.pathto(self._root, cwd, f)
190 188 if self._slash:
191 189 return util.pconvert(path)
192 190 return path
193 191
194 192 def __getitem__(self, key):
195 193 '''Return the current state of key (a filename) in the dirstate.
196 194
197 195 States are:
198 196 n normal
199 197 m needs merging
200 198 r marked for removal
201 199 a marked for addition
202 200 ? not tracked
203 201 '''
204 202 return self._map.get(key, ("?",))[0]
205 203
206 204 def __contains__(self, key):
207 205 return key in self._map
208 206
209 207 def __iter__(self):
210 208 for x in sorted(self._map):
211 209 yield x
212 210
213 211 def iteritems(self):
214 212 return self._map.iteritems()
215 213
216 214 def parents(self):
217 215 return [self._validate(p) for p in self._pl]
218 216
219 217 def p1(self):
220 218 return self._validate(self._pl[0])
221 219
222 220 def p2(self):
223 221 return self._validate(self._pl[1])
224 222
225 223 def branch(self):
226 224 return encoding.tolocal(self._branch)
227 225
228 226 def setparents(self, p1, p2=nullid):
229 227 """Set dirstate parents to p1 and p2.
230 228
231 229 When moving from two parents to one, 'm' merged entries a
232 230 adjusted to normal and previous copy records discarded and
233 231 returned by the call.
234 232
235 233 See localrepo.setparents()
236 234 """
237 235 self._dirty = self._dirtypl = True
238 236 oldp2 = self._pl[1]
239 237 self._pl = p1, p2
240 238 copies = {}
241 239 if oldp2 != nullid and p2 == nullid:
242 240 # Discard 'm' markers when moving away from a merge state
243 241 for f, s in self._map.iteritems():
244 242 if s[0] == 'm':
245 243 if f in self._copymap:
246 244 copies[f] = self._copymap[f]
247 245 self.normallookup(f)
248 246 return copies
249 247
250 248 def setbranch(self, branch):
251 249 self._branch = encoding.fromlocal(branch)
252 250 f = self._opener('branch', 'w', atomictemp=True)
253 251 try:
254 252 f.write(self._branch + '\n')
255 253 f.close()
256 254
257 255 # make sure filecache has the correct stat info for _branch after
258 256 # replacing the underlying file
259 257 ce = self._filecache['_branch']
260 258 if ce:
261 259 ce.refresh()
262 260 except: # re-raises
263 261 f.discard()
264 262 raise
265 263
266 264 def _read(self):
267 265 self._map = {}
268 266 self._copymap = {}
269 267 try:
270 268 st = self._opener.read("dirstate")
271 269 except IOError, err:
272 270 if err.errno != errno.ENOENT:
273 271 raise
274 272 return
275 273 if not st:
276 274 return
277 275
278 276 # Python's garbage collector triggers a GC each time a certain number
279 277 # of container objects (the number being defined by
280 278 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
281 279 # for each file in the dirstate. The C version then immediately marks
282 280 # them as not to be tracked by the collector. However, this has no
283 281 # effect on when GCs are triggered, only on what objects the GC looks
284 282 # into. This means that O(number of files) GCs are unavoidable.
285 283 # Depending on when in the process's lifetime the dirstate is parsed,
286 284 # this can get very expensive. As a workaround, disable GC while
287 285 # parsing the dirstate.
288 286 gcenabled = gc.isenabled()
289 287 gc.disable()
290 288 try:
291 289 p = parsers.parse_dirstate(self._map, self._copymap, st)
292 290 finally:
293 291 if gcenabled:
294 292 gc.enable()
295 293 if not self._dirtypl:
296 294 self._pl = p
297 295
298 296 def invalidate(self):
299 297 for a in ("_map", "_copymap", "_foldmap", "_branch", "_pl", "_dirs",
300 298 "_ignore"):
301 299 if a in self.__dict__:
302 300 delattr(self, a)
303 301 self._lastnormaltime = 0
304 302 self._dirty = False
305 303
306 304 def copy(self, source, dest):
307 305 """Mark dest as a copy of source. Unmark dest if source is None."""
308 306 if source == dest:
309 307 return
310 308 self._dirty = True
311 309 if source is not None:
312 310 self._copymap[dest] = source
313 311 elif dest in self._copymap:
314 312 del self._copymap[dest]
315 313
316 314 def copied(self, file):
317 315 return self._copymap.get(file, None)
318 316
319 317 def copies(self):
320 318 return self._copymap
321 319
322 320 def _droppath(self, f):
323 321 if self[f] not in "?r" and "_dirs" in self.__dict__:
324 322 self._dirs.delpath(f)
325 323
326 324 def _addpath(self, f, state, mode, size, mtime):
327 325 oldstate = self[f]
328 326 if state == 'a' or oldstate == 'r':
329 327 scmutil.checkfilename(f)
330 328 if f in self._dirs:
331 329 raise util.Abort(_('directory %r already in dirstate') % f)
332 330 # shadows
333 331 for d in scmutil.finddirs(f):
334 332 if d in self._dirs:
335 333 break
336 334 if d in self._map and self[d] != 'r':
337 335 raise util.Abort(
338 336 _('file %r in dirstate clashes with %r') % (d, f))
339 337 if oldstate in "?r" and "_dirs" in self.__dict__:
340 338 self._dirs.addpath(f)
341 339 self._dirty = True
342 340 self._map[f] = dirstatetuple(state, mode, size, mtime)
343 341
344 342 def normal(self, f):
345 343 '''Mark a file normal and clean.'''
346 344 s = os.lstat(self._join(f))
347 345 mtime = int(s.st_mtime)
348 346 self._addpath(f, 'n', s.st_mode,
349 347 s.st_size & _rangemask, mtime & _rangemask)
350 348 if f in self._copymap:
351 349 del self._copymap[f]
352 350 if mtime > self._lastnormaltime:
353 351 # Remember the most recent modification timeslot for status(),
354 352 # to make sure we won't miss future size-preserving file content
355 353 # modifications that happen within the same timeslot.
356 354 self._lastnormaltime = mtime
357 355
358 356 def normallookup(self, f):
359 357 '''Mark a file normal, but possibly dirty.'''
360 358 if self._pl[1] != nullid and f in self._map:
361 359 # if there is a merge going on and the file was either
362 360 # in state 'm' (-1) or coming from other parent (-2) before
363 361 # being removed, restore that state.
364 362 entry = self._map[f]
365 363 if entry[0] == 'r' and entry[2] in (-1, -2):
366 364 source = self._copymap.get(f)
367 365 if entry[2] == -1:
368 366 self.merge(f)
369 367 elif entry[2] == -2:
370 368 self.otherparent(f)
371 369 if source:
372 370 self.copy(source, f)
373 371 return
374 372 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
375 373 return
376 374 self._addpath(f, 'n', 0, -1, -1)
377 375 if f in self._copymap:
378 376 del self._copymap[f]
379 377
380 378 def otherparent(self, f):
381 379 '''Mark as coming from the other parent, always dirty.'''
382 380 if self._pl[1] == nullid:
383 381 raise util.Abort(_("setting %r to other parent "
384 382 "only allowed in merges") % f)
385 383 self._addpath(f, 'n', 0, -2, -1)
386 384 if f in self._copymap:
387 385 del self._copymap[f]
388 386
389 387 def add(self, f):
390 388 '''Mark a file added.'''
391 389 self._addpath(f, 'a', 0, -1, -1)
392 390 if f in self._copymap:
393 391 del self._copymap[f]
394 392
395 393 def remove(self, f):
396 394 '''Mark a file removed.'''
397 395 self._dirty = True
398 396 self._droppath(f)
399 397 size = 0
400 398 if self._pl[1] != nullid and f in self._map:
401 399 # backup the previous state
402 400 entry = self._map[f]
403 401 if entry[0] == 'm': # merge
404 402 size = -1
405 403 elif entry[0] == 'n' and entry[2] == -2: # other parent
406 404 size = -2
407 405 self._map[f] = dirstatetuple('r', 0, size, 0)
408 406 if size == 0 and f in self._copymap:
409 407 del self._copymap[f]
410 408
411 409 def merge(self, f):
412 410 '''Mark a file merged.'''
413 411 if self._pl[1] == nullid:
414 412 return self.normallookup(f)
415 413 s = os.lstat(self._join(f))
416 414 self._addpath(f, 'm', s.st_mode,
417 415 s.st_size & _rangemask, int(s.st_mtime) & _rangemask)
418 416 if f in self._copymap:
419 417 del self._copymap[f]
420 418
421 419 def drop(self, f):
422 420 '''Drop a file from the dirstate'''
423 421 if f in self._map:
424 422 self._dirty = True
425 423 self._droppath(f)
426 424 del self._map[f]
427 425
428 426 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
429 427 normed = util.normcase(path)
430 428 folded = self._foldmap.get(normed, None)
431 429 if folded is None:
432 430 if isknown:
433 431 folded = path
434 432 else:
435 433 if exists is None:
436 434 exists = os.path.lexists(os.path.join(self._root, path))
437 435 if not exists:
438 436 # Maybe a path component exists
439 437 if not ignoremissing and '/' in path:
440 438 d, f = path.rsplit('/', 1)
441 439 d = self._normalize(d, isknown, ignoremissing, None)
442 440 folded = d + "/" + f
443 441 else:
444 442 # No path components, preserve original case
445 443 folded = path
446 444 else:
447 445 # recursively normalize leading directory components
448 446 # against dirstate
449 447 if '/' in normed:
450 448 d, f = normed.rsplit('/', 1)
451 449 d = self._normalize(d, isknown, ignoremissing, True)
452 450 r = self._root + "/" + d
453 451 folded = d + "/" + util.fspath(f, r)
454 452 else:
455 453 folded = util.fspath(normed, self._root)
456 454 self._foldmap[normed] = folded
457 455
458 456 return folded
459 457
460 458 def normalize(self, path, isknown=False, ignoremissing=False):
461 459 '''
462 460 normalize the case of a pathname when on a casefolding filesystem
463 461
464 462 isknown specifies whether the filename came from walking the
465 463 disk, to avoid extra filesystem access.
466 464
467 465 If ignoremissing is True, missing path are returned
468 466 unchanged. Otherwise, we try harder to normalize possibly
469 467 existing path components.
470 468
471 469 The normalized case is determined based on the following precedence:
472 470
473 471 - version of name already stored in the dirstate
474 472 - version of name stored on disk
475 473 - version provided via command arguments
476 474 '''
477 475
478 476 if self._checkcase:
479 477 return self._normalize(path, isknown, ignoremissing)
480 478 return path
481 479
482 480 def clear(self):
483 481 self._map = {}
484 482 if "_dirs" in self.__dict__:
485 483 delattr(self, "_dirs")
486 484 self._copymap = {}
487 485 self._pl = [nullid, nullid]
488 486 self._lastnormaltime = 0
489 487 self._dirty = True
490 488
491 489 def rebuild(self, parent, allfiles, changedfiles=None):
492 490 changedfiles = changedfiles or allfiles
493 491 oldmap = self._map
494 492 self.clear()
495 493 for f in allfiles:
496 494 if f not in changedfiles:
497 495 self._map[f] = oldmap[f]
498 496 else:
499 497 if 'x' in allfiles.flags(f):
500 498 self._map[f] = dirstatetuple('n', 0777, -1, 0)
501 499 else:
502 500 self._map[f] = dirstatetuple('n', 0666, -1, 0)
503 501 self._pl = (parent, nullid)
504 502 self._dirty = True
505 503
506 504 def write(self):
507 505 if not self._dirty:
508 506 return
509 507 st = self._opener("dirstate", "w", atomictemp=True)
510 508 # use the modification time of the newly created temporary file as the
511 509 # filesystem's notion of 'now'
512 510 now = util.fstat(st).st_mtime
513 511 st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
514 512 st.close()
515 513 self._lastnormaltime = 0
516 514 self._dirty = self._dirtypl = False
517 515
518 516 def _dirignore(self, f):
519 517 if f == '.':
520 518 return False
521 519 if self._ignore(f):
522 520 return True
523 521 for p in scmutil.finddirs(f):
524 522 if self._ignore(p):
525 523 return True
526 524 return False
527 525
528 526 def _walkexplicit(self, match, subrepos):
529 527 '''Get stat data about the files explicitly specified by match.
530 528
531 529 Return a triple (results, dirsfound, dirsnotfound).
532 530 - results is a mapping from filename to stat result. It also contains
533 531 listings mapping subrepos and .hg to None.
534 532 - dirsfound is a list of files found to be directories.
535 533 - dirsnotfound is a list of files that the dirstate thinks are
536 534 directories and that were not found.'''
537 535
538 536 def badtype(mode):
539 537 kind = _('unknown')
540 538 if stat.S_ISCHR(mode):
541 539 kind = _('character device')
542 540 elif stat.S_ISBLK(mode):
543 541 kind = _('block device')
544 542 elif stat.S_ISFIFO(mode):
545 543 kind = _('fifo')
546 544 elif stat.S_ISSOCK(mode):
547 545 kind = _('socket')
548 546 elif stat.S_ISDIR(mode):
549 547 kind = _('directory')
550 548 return _('unsupported file type (type is %s)') % kind
551 549
552 550 matchedir = match.explicitdir
553 551 badfn = match.bad
554 552 dmap = self._map
555 553 normpath = util.normpath
556 554 lstat = os.lstat
557 555 getkind = stat.S_IFMT
558 556 dirkind = stat.S_IFDIR
559 557 regkind = stat.S_IFREG
560 558 lnkkind = stat.S_IFLNK
561 559 join = self._join
562 560 dirsfound = []
563 561 foundadd = dirsfound.append
564 562 dirsnotfound = []
565 563 notfoundadd = dirsnotfound.append
566 564
567 565 if match.matchfn != match.exact and self._checkcase:
568 566 normalize = self._normalize
569 567 else:
570 568 normalize = None
571 569
572 570 files = sorted(match.files())
573 571 subrepos.sort()
574 572 i, j = 0, 0
575 573 while i < len(files) and j < len(subrepos):
576 574 subpath = subrepos[j] + "/"
577 575 if files[i] < subpath:
578 576 i += 1
579 577 continue
580 578 while i < len(files) and files[i].startswith(subpath):
581 579 del files[i]
582 580 j += 1
583 581
584 582 if not files or '.' in files:
585 583 files = ['']
586 584 results = dict.fromkeys(subrepos)
587 585 results['.hg'] = None
588 586
589 587 for ff in files:
590 588 if normalize:
591 589 nf = normalize(normpath(ff), False, True)
592 590 else:
593 591 nf = normpath(ff)
594 592 if nf in results:
595 593 continue
596 594
597 595 try:
598 596 st = lstat(join(nf))
599 597 kind = getkind(st.st_mode)
600 598 if kind == dirkind:
601 599 if nf in dmap:
602 600 # file replaced by dir on disk but still in dirstate
603 601 results[nf] = None
604 602 if matchedir:
605 603 matchedir(nf)
606 604 foundadd(nf)
607 605 elif kind == regkind or kind == lnkkind:
608 606 results[nf] = st
609 607 else:
610 608 badfn(ff, badtype(kind))
611 609 if nf in dmap:
612 610 results[nf] = None
613 611 except OSError, inst: # nf not found on disk - it is dirstate only
614 612 if nf in dmap: # does it exactly match a missing file?
615 613 results[nf] = None
616 614 else: # does it match a missing directory?
617 615 prefix = nf + "/"
618 616 for fn in dmap:
619 617 if fn.startswith(prefix):
620 618 if matchedir:
621 619 matchedir(nf)
622 620 notfoundadd(nf)
623 621 break
624 622 else:
625 623 badfn(ff, inst.strerror)
626 624
627 625 return results, dirsfound, dirsnotfound
628 626
629 627 def walk(self, match, subrepos, unknown, ignored, full=True):
630 628 '''
631 629 Walk recursively through the directory tree, finding all files
632 630 matched by match.
633 631
634 632 If full is False, maybe skip some known-clean files.
635 633
636 634 Return a dict mapping filename to stat-like object (either
637 635 mercurial.osutil.stat instance or return value of os.stat()).
638 636
639 637 '''
640 638 # full is a flag that extensions that hook into walk can use -- this
641 639 # implementation doesn't use it at all. This satisfies the contract
642 640 # because we only guarantee a "maybe".
643 641
644 642 if ignored:
645 643 ignore = util.never
646 644 dirignore = util.never
647 645 elif unknown:
648 646 ignore = self._ignore
649 647 dirignore = self._dirignore
650 648 else:
651 649 # if not unknown and not ignored, drop dir recursion and step 2
652 650 ignore = util.always
653 651 dirignore = util.always
654 652
655 653 matchfn = match.matchfn
656 654 matchalways = match.always()
657 655 matchtdir = match.traversedir
658 656 dmap = self._map
659 657 listdir = osutil.listdir
660 658 lstat = os.lstat
661 659 dirkind = stat.S_IFDIR
662 660 regkind = stat.S_IFREG
663 661 lnkkind = stat.S_IFLNK
664 662 join = self._join
665 663
666 664 exact = skipstep3 = False
667 665 if matchfn == match.exact: # match.exact
668 666 exact = True
669 667 dirignore = util.always # skip step 2
670 668 elif match.files() and not match.anypats(): # match.match, no patterns
671 669 skipstep3 = True
672 670
673 671 if not exact and self._checkcase:
674 672 normalize = self._normalize
675 673 skipstep3 = False
676 674 else:
677 675 normalize = None
678 676
679 677 # step 1: find all explicit files
680 678 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
681 679
682 680 skipstep3 = skipstep3 and not (work or dirsnotfound)
683 681 work = [d for d in work if not dirignore(d)]
684 682 wadd = work.append
685 683
686 684 # step 2: visit subdirectories
687 685 while work:
688 686 nd = work.pop()
689 687 skip = None
690 688 if nd == '.':
691 689 nd = ''
692 690 else:
693 691 skip = '.hg'
694 692 try:
695 693 entries = listdir(join(nd), stat=True, skip=skip)
696 694 except OSError, inst:
697 695 if inst.errno in (errno.EACCES, errno.ENOENT):
698 696 match.bad(self.pathto(nd), inst.strerror)
699 697 continue
700 698 raise
701 699 for f, kind, st in entries:
702 700 if normalize:
703 701 nf = normalize(nd and (nd + "/" + f) or f, True, True)
704 702 else:
705 703 nf = nd and (nd + "/" + f) or f
706 704 if nf not in results:
707 705 if kind == dirkind:
708 706 if not ignore(nf):
709 707 if matchtdir:
710 708 matchtdir(nf)
711 709 wadd(nf)
712 710 if nf in dmap and (matchalways or matchfn(nf)):
713 711 results[nf] = None
714 712 elif kind == regkind or kind == lnkkind:
715 713 if nf in dmap:
716 714 if matchalways or matchfn(nf):
717 715 results[nf] = st
718 716 elif (matchalways or matchfn(nf)) and not ignore(nf):
719 717 results[nf] = st
720 718 elif nf in dmap and (matchalways or matchfn(nf)):
721 719 results[nf] = None
722 720
723 721 for s in subrepos:
724 722 del results[s]
725 723 del results['.hg']
726 724
727 725 # step 3: visit remaining files from dmap
728 726 if not skipstep3 and not exact:
729 727 # If a dmap file is not in results yet, it was either
730 728 # a) not matching matchfn b) ignored, c) missing, or d) under a
731 729 # symlink directory.
732 730 if not results and matchalways:
733 731 visit = dmap.keys()
734 732 else:
735 733 visit = [f for f in dmap if f not in results and matchfn(f)]
736 734 visit.sort()
737 735
738 736 if unknown:
739 737 # unknown == True means we walked all dirs under the roots
740 738 # that wasn't ignored, and everything that matched was stat'ed
741 739 # and is already in results.
742 740 # The rest must thus be ignored or under a symlink.
743 741 audit_path = pathutil.pathauditor(self._root)
744 742
745 743 for nf in iter(visit):
746 744 # Report ignored items in the dmap as long as they are not
747 745 # under a symlink directory.
748 746 if audit_path.check(nf):
749 747 try:
750 748 results[nf] = lstat(join(nf))
751 749 # file was just ignored, no links, and exists
752 750 except OSError:
753 751 # file doesn't exist
754 752 results[nf] = None
755 753 else:
756 754 # It's either missing or under a symlink directory
757 755 # which we in this case report as missing
758 756 results[nf] = None
759 757 else:
760 758 # We may not have walked the full directory tree above,
761 759 # so stat and check everything we missed.
762 760 nf = iter(visit).next
763 761 for st in util.statfiles([join(i) for i in visit]):
764 762 results[nf()] = st
765 763 return results
766 764
767 765 def status(self, match, subrepos, ignored, clean, unknown):
768 766 '''Determine the status of the working copy relative to the
769 767 dirstate and return a tuple of lists (unsure, modified, added,
770 768 removed, deleted, unknown, ignored, clean), where:
771 769
772 770 unsure:
773 771 files that might have been modified since the dirstate was
774 772 written, but need to be read to be sure (size is the same
775 773 but mtime differs)
776 774 modified:
777 775 files that have definitely been modified since the dirstate
778 776 was written (different size or mode)
779 777 added:
780 778 files that have been explicitly added with hg add
781 779 removed:
782 780 files that have been explicitly removed with hg remove
783 781 deleted:
784 782 files that have been deleted through other means ("missing")
785 783 unknown:
786 784 files not in the dirstate that are not ignored
787 785 ignored:
788 786 files not in the dirstate that are ignored
789 787 (by _dirignore())
790 788 clean:
791 789 files that have definitely not been modified since the
792 790 dirstate was written
793 791 '''
794 792 listignored, listclean, listunknown = ignored, clean, unknown
795 793 lookup, modified, added, unknown, ignored = [], [], [], [], []
796 794 removed, deleted, clean = [], [], []
797 795
798 796 dmap = self._map
799 797 ladd = lookup.append # aka "unsure"
800 798 madd = modified.append
801 799 aadd = added.append
802 800 uadd = unknown.append
803 801 iadd = ignored.append
804 802 radd = removed.append
805 803 dadd = deleted.append
806 804 cadd = clean.append
807 805 mexact = match.exact
808 806 dirignore = self._dirignore
809 807 checkexec = self._checkexec
810 808 copymap = self._copymap
811 809 lastnormaltime = self._lastnormaltime
812 810
813 811 # We need to do full walks when either
814 812 # - we're listing all clean files, or
815 813 # - match.traversedir does something, because match.traversedir should
816 814 # be called for every dir in the working dir
817 815 full = listclean or match.traversedir is not None
818 816 for fn, st in self.walk(match, subrepos, listunknown, listignored,
819 817 full=full).iteritems():
820 818 if fn not in dmap:
821 819 if (listignored or mexact(fn)) and dirignore(fn):
822 820 if listignored:
823 821 iadd(fn)
824 822 else:
825 823 uadd(fn)
826 824 continue
827 825
828 826 state, mode, size, time = dmap[fn]
829 827
830 828 if not st and state in "nma":
831 829 dadd(fn)
832 830 elif state == 'n':
833 831 mtime = int(st.st_mtime)
834 832 if (size >= 0 and
835 833 ((size != st.st_size and size != st.st_size & _rangemask)
836 834 or ((mode ^ st.st_mode) & 0100 and checkexec))
837 835 or size == -2 # other parent
838 836 or fn in copymap):
839 837 madd(fn)
840 838 elif time != mtime and time != mtime & _rangemask:
841 839 ladd(fn)
842 840 elif mtime == lastnormaltime:
843 841 # fn may have been changed in the same timeslot without
844 842 # changing its size. This can happen if we quickly do
845 843 # multiple commits in a single transaction.
846 844 # Force lookup, so we don't miss such a racy file change.
847 845 ladd(fn)
848 846 elif listclean:
849 847 cadd(fn)
850 848 elif state == 'm':
851 849 madd(fn)
852 850 elif state == 'a':
853 851 aadd(fn)
854 852 elif state == 'r':
855 853 radd(fn)
856 854
857 855 return (lookup, modified, added, removed, deleted, unknown, ignored,
858 856 clean)
@@ -1,2083 +1,2197 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static char *versionerrortext = "Python minor version mismatch";
18 18
19 19 static int8_t hextable[256] = {
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
22 22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
23 23 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
34 34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
36 36 };
37 37
38 38 static inline int hexdigit(const char *p, Py_ssize_t off)
39 39 {
40 40 int8_t val = hextable[(unsigned char)p[off]];
41 41
42 42 if (val >= 0) {
43 43 return val;
44 44 }
45 45
46 46 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
47 47 return 0;
48 48 }
49 49
50 50 /*
51 51 * Turn a hex-encoded string into binary.
52 52 */
53 53 static PyObject *unhexlify(const char *str, int len)
54 54 {
55 55 PyObject *ret;
56 56 char *d;
57 57 int i;
58 58
59 59 ret = PyBytes_FromStringAndSize(NULL, len / 2);
60 60
61 61 if (!ret)
62 62 return NULL;
63 63
64 64 d = PyBytes_AsString(ret);
65 65
66 66 for (i = 0; i < len;) {
67 67 int hi = hexdigit(str, i++);
68 68 int lo = hexdigit(str, i++);
69 69 *d++ = (hi << 4) | lo;
70 70 }
71 71
72 72 return ret;
73 73 }
74 74
75 75 /*
76 76 * This code assumes that a manifest is stitched together with newline
77 77 * ('\n') characters.
78 78 */
79 79 static PyObject *parse_manifest(PyObject *self, PyObject *args)
80 80 {
81 81 PyObject *mfdict, *fdict;
82 82 char *str, *start, *end;
83 83 int len;
84 84
85 85 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
86 86 &PyDict_Type, &mfdict,
87 87 &PyDict_Type, &fdict,
88 88 &str, &len))
89 89 goto quit;
90 90
91 91 start = str;
92 92 end = str + len;
93 93 while (start < end) {
94 94 PyObject *file = NULL, *node = NULL;
95 95 PyObject *flags = NULL;
96 96 char *zero = NULL, *newline = NULL;
97 97 ptrdiff_t nlen;
98 98
99 99 zero = memchr(start, '\0', end - start);
100 100 if (!zero) {
101 101 PyErr_SetString(PyExc_ValueError,
102 102 "manifest entry has no separator");
103 103 goto quit;
104 104 }
105 105
106 106 newline = memchr(zero + 1, '\n', end - (zero + 1));
107 107 if (!newline) {
108 108 PyErr_SetString(PyExc_ValueError,
109 109 "manifest contains trailing garbage");
110 110 goto quit;
111 111 }
112 112
113 113 file = PyBytes_FromStringAndSize(start, zero - start);
114 114
115 115 if (!file)
116 116 goto bail;
117 117
118 118 nlen = newline - zero - 1;
119 119
120 120 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
121 121 if (!node)
122 122 goto bail;
123 123
124 124 if (nlen > 40) {
125 125 flags = PyBytes_FromStringAndSize(zero + 41,
126 126 nlen - 40);
127 127 if (!flags)
128 128 goto bail;
129 129
130 130 if (PyDict_SetItem(fdict, file, flags) == -1)
131 131 goto bail;
132 132 }
133 133
134 134 if (PyDict_SetItem(mfdict, file, node) == -1)
135 135 goto bail;
136 136
137 137 start = newline + 1;
138 138
139 139 Py_XDECREF(flags);
140 140 Py_XDECREF(node);
141 141 Py_XDECREF(file);
142 142 continue;
143 143 bail:
144 144 Py_XDECREF(flags);
145 145 Py_XDECREF(node);
146 146 Py_XDECREF(file);
147 147 goto quit;
148 148 }
149 149
150 150 Py_INCREF(Py_None);
151 151 return Py_None;
152 152 quit:
153 153 return NULL;
154 154 }
155 155
156 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
157 int size, int mtime)
158 {
159 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
160 &dirstateTupleType);
161 if (!t)
162 return NULL;
163 t->state = state;
164 t->mode = mode;
165 t->size = size;
166 t->mtime = mtime;
167 return t;
168 }
169
170 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
171 PyObject *kwds)
172 {
173 /* We do all the initialization here and not a tp_init function because
174 * dirstate_tuple is immutable. */
175 dirstateTupleObject *t;
176 char state;
177 int size, mode, mtime;
178 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
179 return NULL;
180
181 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
182 if (!t)
183 return NULL;
184 t->state = state;
185 t->mode = mode;
186 t->size = size;
187 t->mtime = mtime;
188
189 return (PyObject *)t;
190 }
191
192 static void dirstate_tuple_dealloc(PyObject *o)
193 {
194 PyObject_Del(o);
195 }
196
197 static Py_ssize_t dirstate_tuple_length(PyObject *o)
198 {
199 return 4;
200 }
201
202 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
203 {
204 dirstateTupleObject *t = (dirstateTupleObject *)o;
205 switch (i) {
206 case 0:
207 return PyBytes_FromStringAndSize(&t->state, 1);
208 case 1:
209 return PyInt_FromLong(t->mode);
210 case 2:
211 return PyInt_FromLong(t->size);
212 case 3:
213 return PyInt_FromLong(t->mtime);
214 default:
215 PyErr_SetString(PyExc_IndexError, "index out of range");
216 return NULL;
217 }
218 }
219
220 static PySequenceMethods dirstate_tuple_sq = {
221 dirstate_tuple_length, /* sq_length */
222 0, /* sq_concat */
223 0, /* sq_repeat */
224 dirstate_tuple_item, /* sq_item */
225 0, /* sq_ass_item */
226 0, /* sq_contains */
227 0, /* sq_inplace_concat */
228 0 /* sq_inplace_repeat */
229 };
230
231 PyTypeObject dirstateTupleType = {
232 PyVarObject_HEAD_INIT(NULL, 0)
233 "dirstate_tuple", /* tp_name */
234 sizeof(dirstateTupleObject),/* tp_basicsize */
235 0, /* tp_itemsize */
236 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
237 0, /* tp_print */
238 0, /* tp_getattr */
239 0, /* tp_setattr */
240 0, /* tp_compare */
241 0, /* tp_repr */
242 0, /* tp_as_number */
243 &dirstate_tuple_sq, /* tp_as_sequence */
244 0, /* tp_as_mapping */
245 0, /* tp_hash */
246 0, /* tp_call */
247 0, /* tp_str */
248 0, /* tp_getattro */
249 0, /* tp_setattro */
250 0, /* tp_as_buffer */
251 Py_TPFLAGS_DEFAULT, /* tp_flags */
252 "dirstate tuple", /* tp_doc */
253 0, /* tp_traverse */
254 0, /* tp_clear */
255 0, /* tp_richcompare */
256 0, /* tp_weaklistoffset */
257 0, /* tp_iter */
258 0, /* tp_iternext */
259 0, /* tp_methods */
260 0, /* tp_members */
261 0, /* tp_getset */
262 0, /* tp_base */
263 0, /* tp_dict */
264 0, /* tp_descr_get */
265 0, /* tp_descr_set */
266 0, /* tp_dictoffset */
267 0, /* tp_init */
268 0, /* tp_alloc */
269 dirstate_tuple_new, /* tp_new */
270 };
271
156 272 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
157 273 {
158 274 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
159 275 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
160 276 char state, *cur, *str, *cpos;
161 277 int mode, size, mtime;
162 278 unsigned int flen;
163 279 int len, pos = 40;
164 280
165 281 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
166 282 &PyDict_Type, &dmap,
167 283 &PyDict_Type, &cmap,
168 284 &str, &len))
169 285 goto quit;
170 286
171 287 /* read parents */
172 288 if (len < 40)
173 289 goto quit;
174 290
175 291 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
176 292 if (!parents)
177 293 goto quit;
178 294
179 295 /* read filenames */
180 296 while (pos >= 40 && pos < len) {
181 297 cur = str + pos;
182 298 /* unpack header */
183 299 state = *cur;
184 300 mode = getbe32(cur + 1);
185 301 size = getbe32(cur + 5);
186 302 mtime = getbe32(cur + 9);
187 303 flen = getbe32(cur + 13);
188 304 pos += 17;
189 305 cur += 17;
190 306 if (flen > len - pos) {
191 307 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
192 308 goto quit;
193 309 }
194 310
195 entry = Py_BuildValue("ciii", state, mode, size, mtime);
196 if (!entry)
197 goto quit;
198 PyObject_GC_UnTrack(entry); /* don't waste time with this */
199
311 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
312 mtime);
200 313 cpos = memchr(cur, 0, flen);
201 314 if (cpos) {
202 315 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
203 316 cname = PyBytes_FromStringAndSize(cpos + 1,
204 317 flen - (cpos - cur) - 1);
205 318 if (!fname || !cname ||
206 319 PyDict_SetItem(cmap, fname, cname) == -1 ||
207 320 PyDict_SetItem(dmap, fname, entry) == -1)
208 321 goto quit;
209 322 Py_DECREF(cname);
210 323 } else {
211 324 fname = PyBytes_FromStringAndSize(cur, flen);
212 325 if (!fname ||
213 326 PyDict_SetItem(dmap, fname, entry) == -1)
214 327 goto quit;
215 328 }
216 329 Py_DECREF(fname);
217 330 Py_DECREF(entry);
218 331 fname = cname = entry = NULL;
219 332 pos += flen;
220 333 }
221 334
222 335 ret = parents;
223 336 Py_INCREF(ret);
224 337 quit:
225 338 Py_XDECREF(fname);
226 339 Py_XDECREF(cname);
227 340 Py_XDECREF(entry);
228 341 Py_XDECREF(parents);
229 342 return ret;
230 343 }
231 344
232 345 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
233 346 {
234 347 PyObject *o = PyTuple_GET_ITEM(tuple, off);
235 348 long val;
236 349
237 350 if (PyInt_Check(o))
238 351 val = PyInt_AS_LONG(o);
239 352 else if (PyLong_Check(o)) {
240 353 val = PyLong_AsLong(o);
241 354 if (val == -1 && PyErr_Occurred())
242 355 return -1;
243 356 } else {
244 357 PyErr_SetString(PyExc_TypeError, "expected an int or long");
245 358 return -1;
246 359 }
247 360 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
248 361 PyErr_SetString(PyExc_OverflowError,
249 362 "Python value to large to convert to uint32_t");
250 363 return -1;
251 364 }
252 365 *v = (uint32_t)val;
253 366 return 0;
254 367 }
255 368
256 369 /*
257 370 * Efficiently pack a dirstate object into its on-disk format.
258 371 */
259 372 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
260 373 {
261 374 PyObject *packobj = NULL;
262 375 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
263 376 Py_ssize_t nbytes, pos, l;
264 377 PyObject *k, *v, *pn;
265 378 char *p, *s;
266 379 double now;
267 380
268 381 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
269 382 &PyDict_Type, &map, &PyDict_Type, &copymap,
270 383 &pl, &now))
271 384 return NULL;
272 385
273 386 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
274 387 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
275 388 return NULL;
276 389 }
277 390
278 391 /* Figure out how much we need to allocate. */
279 392 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
280 393 PyObject *c;
281 394 if (!PyString_Check(k)) {
282 395 PyErr_SetString(PyExc_TypeError, "expected string key");
283 396 goto bail;
284 397 }
285 398 nbytes += PyString_GET_SIZE(k) + 17;
286 399 c = PyDict_GetItem(copymap, k);
287 400 if (c) {
288 401 if (!PyString_Check(c)) {
289 402 PyErr_SetString(PyExc_TypeError,
290 403 "expected string key");
291 404 goto bail;
292 405 }
293 406 nbytes += PyString_GET_SIZE(c) + 1;
294 407 }
295 408 }
296 409
297 410 packobj = PyString_FromStringAndSize(NULL, nbytes);
298 411 if (packobj == NULL)
299 412 goto bail;
300 413
301 414 p = PyString_AS_STRING(packobj);
302 415
303 416 pn = PySequence_ITEM(pl, 0);
304 417 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
305 418 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
306 419 goto bail;
307 420 }
308 421 memcpy(p, s, l);
309 422 p += 20;
310 423 pn = PySequence_ITEM(pl, 1);
311 424 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
312 425 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
313 426 goto bail;
314 427 }
315 428 memcpy(p, s, l);
316 429 p += 20;
317 430
318 431 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
432 dirstateTupleObject *tuple;
433 char state;
319 434 uint32_t mode, size, mtime;
320 435 Py_ssize_t len, l;
321 436 PyObject *o;
322 char *s, *t;
437 char *t;
323 438
324 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
325 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
439 if (!dirstate_tuple_check(v)) {
440 PyErr_SetString(PyExc_TypeError,
441 "expected a dirstate tuple");
326 442 goto bail;
327 443 }
328 o = PyTuple_GET_ITEM(v, 0);
329 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
330 PyErr_SetString(PyExc_TypeError, "expected one byte");
331 goto bail;
332 }
333 *p++ = *s;
334 if (getintat(v, 1, &mode) == -1)
335 goto bail;
336 if (getintat(v, 2, &size) == -1)
337 goto bail;
338 if (getintat(v, 3, &mtime) == -1)
339 goto bail;
340 if (*s == 'n' && mtime == (uint32_t)now) {
444 tuple = (dirstateTupleObject *)v;
445
446 state = tuple->state;
447 mode = tuple->mode;
448 size = tuple->size;
449 mtime = tuple->mtime;
450 if (state == 'n' && mtime == (uint32_t)now) {
341 451 /* See pure/parsers.py:pack_dirstate for why we do
342 452 * this. */
343 453 mtime = -1;
344 mtime_unset = Py_BuildValue(
345 "ciii", *s, mode, size, mtime);
454 mtime_unset = (PyObject *)make_dirstate_tuple(
455 state, mode, size, mtime);
346 456 if (!mtime_unset)
347 457 goto bail;
348 458 if (PyDict_SetItem(map, k, mtime_unset) == -1)
349 459 goto bail;
350 460 Py_DECREF(mtime_unset);
351 461 mtime_unset = NULL;
352 462 }
463 *p++ = state;
353 464 putbe32(mode, p);
354 465 putbe32(size, p + 4);
355 466 putbe32(mtime, p + 8);
356 467 t = p + 12;
357 468 p += 16;
358 469 len = PyString_GET_SIZE(k);
359 470 memcpy(p, PyString_AS_STRING(k), len);
360 471 p += len;
361 472 o = PyDict_GetItem(copymap, k);
362 473 if (o) {
363 474 *p++ = '\0';
364 475 l = PyString_GET_SIZE(o);
365 476 memcpy(p, PyString_AS_STRING(o), l);
366 477 p += l;
367 478 len += l + 1;
368 479 }
369 480 putbe32((uint32_t)len, t);
370 481 }
371 482
372 483 pos = p - PyString_AS_STRING(packobj);
373 484 if (pos != nbytes) {
374 485 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
375 486 (long)pos, (long)nbytes);
376 487 goto bail;
377 488 }
378 489
379 490 return packobj;
380 491 bail:
381 492 Py_XDECREF(mtime_unset);
382 493 Py_XDECREF(packobj);
383 494 return NULL;
384 495 }
385 496
386 497 /*
387 498 * A base-16 trie for fast node->rev mapping.
388 499 *
389 500 * Positive value is index of the next node in the trie
390 501 * Negative value is a leaf: -(rev + 1)
391 502 * Zero is empty
392 503 */
393 504 typedef struct {
394 505 int children[16];
395 506 } nodetree;
396 507
397 508 /*
398 509 * This class has two behaviours.
399 510 *
400 511 * When used in a list-like way (with integer keys), we decode an
401 512 * entry in a RevlogNG index file on demand. Our last entry is a
402 513 * sentinel, always a nullid. We have limited support for
403 514 * integer-keyed insert and delete, only at elements right before the
404 515 * sentinel.
405 516 *
406 517 * With string keys, we lazily perform a reverse mapping from node to
407 518 * rev, using a base-16 trie.
408 519 */
409 520 typedef struct {
410 521 PyObject_HEAD
411 522 /* Type-specific fields go here. */
412 523 PyObject *data; /* raw bytes of index */
413 524 PyObject **cache; /* cached tuples */
414 525 const char **offsets; /* populated on demand */
415 526 Py_ssize_t raw_length; /* original number of elements */
416 527 Py_ssize_t length; /* current number of elements */
417 528 PyObject *added; /* populated on demand */
418 529 PyObject *headrevs; /* cache, invalidated on changes */
419 530 nodetree *nt; /* base-16 trie */
420 531 int ntlength; /* # nodes in use */
421 532 int ntcapacity; /* # nodes allocated */
422 533 int ntdepth; /* maximum depth of tree */
423 534 int ntsplits; /* # splits performed */
424 535 int ntrev; /* last rev scanned */
425 536 int ntlookups; /* # lookups */
426 537 int ntmisses; /* # lookups that miss the cache */
427 538 int inlined;
428 539 } indexObject;
429 540
430 541 static Py_ssize_t index_length(const indexObject *self)
431 542 {
432 543 if (self->added == NULL)
433 544 return self->length;
434 545 return self->length + PyList_GET_SIZE(self->added);
435 546 }
436 547
437 548 static PyObject *nullentry;
438 549 static const char nullid[20];
439 550
440 551 static long inline_scan(indexObject *self, const char **offsets);
441 552
442 553 #if LONG_MAX == 0x7fffffffL
443 554 static char *tuple_format = "Kiiiiiis#";
444 555 #else
445 556 static char *tuple_format = "kiiiiiis#";
446 557 #endif
447 558
448 559 /* A RevlogNG v1 index entry is 64 bytes long. */
449 560 static const long v1_hdrsize = 64;
450 561
451 562 /*
452 563 * Return a pointer to the beginning of a RevlogNG record.
453 564 */
454 565 static const char *index_deref(indexObject *self, Py_ssize_t pos)
455 566 {
456 567 if (self->inlined && pos > 0) {
457 568 if (self->offsets == NULL) {
458 569 self->offsets = malloc(self->raw_length *
459 570 sizeof(*self->offsets));
460 571 if (self->offsets == NULL)
461 572 return (const char *)PyErr_NoMemory();
462 573 inline_scan(self, self->offsets);
463 574 }
464 575 return self->offsets[pos];
465 576 }
466 577
467 578 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
468 579 }
469 580
470 581 /*
471 582 * RevlogNG format (all in big endian, data may be inlined):
472 583 * 6 bytes: offset
473 584 * 2 bytes: flags
474 585 * 4 bytes: compressed length
475 586 * 4 bytes: uncompressed length
476 587 * 4 bytes: base revision
477 588 * 4 bytes: link revision
478 589 * 4 bytes: parent 1 revision
479 590 * 4 bytes: parent 2 revision
480 591 * 32 bytes: nodeid (only 20 bytes used)
481 592 */
482 593 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
483 594 {
484 595 uint64_t offset_flags;
485 596 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
486 597 const char *c_node_id;
487 598 const char *data;
488 599 Py_ssize_t length = index_length(self);
489 600 PyObject *entry;
490 601
491 602 if (pos < 0)
492 603 pos += length;
493 604
494 605 if (pos < 0 || pos >= length) {
495 606 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
496 607 return NULL;
497 608 }
498 609
499 610 if (pos == length - 1) {
500 611 Py_INCREF(nullentry);
501 612 return nullentry;
502 613 }
503 614
504 615 if (pos >= self->length - 1) {
505 616 PyObject *obj;
506 617 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
507 618 Py_INCREF(obj);
508 619 return obj;
509 620 }
510 621
511 622 if (self->cache) {
512 623 if (self->cache[pos]) {
513 624 Py_INCREF(self->cache[pos]);
514 625 return self->cache[pos];
515 626 }
516 627 } else {
517 628 self->cache = calloc(self->raw_length, sizeof(PyObject *));
518 629 if (self->cache == NULL)
519 630 return PyErr_NoMemory();
520 631 }
521 632
522 633 data = index_deref(self, pos);
523 634 if (data == NULL)
524 635 return NULL;
525 636
526 637 offset_flags = getbe32(data + 4);
527 638 if (pos == 0) /* mask out version number for the first entry */
528 639 offset_flags &= 0xFFFF;
529 640 else {
530 641 uint32_t offset_high = getbe32(data);
531 642 offset_flags |= ((uint64_t)offset_high) << 32;
532 643 }
533 644
534 645 comp_len = getbe32(data + 8);
535 646 uncomp_len = getbe32(data + 12);
536 647 base_rev = getbe32(data + 16);
537 648 link_rev = getbe32(data + 20);
538 649 parent_1 = getbe32(data + 24);
539 650 parent_2 = getbe32(data + 28);
540 651 c_node_id = data + 32;
541 652
542 653 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
543 654 uncomp_len, base_rev, link_rev,
544 655 parent_1, parent_2, c_node_id, 20);
545 656
546 657 if (entry) {
547 658 PyObject_GC_UnTrack(entry);
548 659 Py_INCREF(entry);
549 660 }
550 661
551 662 self->cache[pos] = entry;
552 663
553 664 return entry;
554 665 }
555 666
556 667 /*
557 668 * Return the 20-byte SHA of the node corresponding to the given rev.
558 669 */
559 670 static const char *index_node(indexObject *self, Py_ssize_t pos)
560 671 {
561 672 Py_ssize_t length = index_length(self);
562 673 const char *data;
563 674
564 675 if (pos == length - 1 || pos == INT_MAX)
565 676 return nullid;
566 677
567 678 if (pos >= length)
568 679 return NULL;
569 680
570 681 if (pos >= self->length - 1) {
571 682 PyObject *tuple, *str;
572 683 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
573 684 str = PyTuple_GetItem(tuple, 7);
574 685 return str ? PyString_AS_STRING(str) : NULL;
575 686 }
576 687
577 688 data = index_deref(self, pos);
578 689 return data ? data + 32 : NULL;
579 690 }
580 691
581 692 static int nt_insert(indexObject *self, const char *node, int rev);
582 693
583 694 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
584 695 {
585 696 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
586 697 return -1;
587 698 if (*nodelen == 20)
588 699 return 0;
589 700 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
590 701 return -1;
591 702 }
592 703
593 704 static PyObject *index_insert(indexObject *self, PyObject *args)
594 705 {
595 706 PyObject *obj;
596 707 char *node;
597 708 long offset;
598 709 Py_ssize_t len, nodelen;
599 710
600 711 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
601 712 return NULL;
602 713
603 714 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
604 715 PyErr_SetString(PyExc_TypeError, "8-tuple required");
605 716 return NULL;
606 717 }
607 718
608 719 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
609 720 return NULL;
610 721
611 722 len = index_length(self);
612 723
613 724 if (offset < 0)
614 725 offset += len;
615 726
616 727 if (offset != len - 1) {
617 728 PyErr_SetString(PyExc_IndexError,
618 729 "insert only supported at index -1");
619 730 return NULL;
620 731 }
621 732
622 733 if (offset > INT_MAX) {
623 734 PyErr_SetString(PyExc_ValueError,
624 735 "currently only 2**31 revs supported");
625 736 return NULL;
626 737 }
627 738
628 739 if (self->added == NULL) {
629 740 self->added = PyList_New(0);
630 741 if (self->added == NULL)
631 742 return NULL;
632 743 }
633 744
634 745 if (PyList_Append(self->added, obj) == -1)
635 746 return NULL;
636 747
637 748 if (self->nt)
638 749 nt_insert(self, node, (int)offset);
639 750
640 751 Py_CLEAR(self->headrevs);
641 752 Py_RETURN_NONE;
642 753 }
643 754
644 755 static void _index_clearcaches(indexObject *self)
645 756 {
646 757 if (self->cache) {
647 758 Py_ssize_t i;
648 759
649 760 for (i = 0; i < self->raw_length; i++)
650 761 Py_CLEAR(self->cache[i]);
651 762 free(self->cache);
652 763 self->cache = NULL;
653 764 }
654 765 if (self->offsets) {
655 766 free(self->offsets);
656 767 self->offsets = NULL;
657 768 }
658 769 if (self->nt) {
659 770 free(self->nt);
660 771 self->nt = NULL;
661 772 }
662 773 Py_CLEAR(self->headrevs);
663 774 }
664 775
665 776 static PyObject *index_clearcaches(indexObject *self)
666 777 {
667 778 _index_clearcaches(self);
668 779 self->ntlength = self->ntcapacity = 0;
669 780 self->ntdepth = self->ntsplits = 0;
670 781 self->ntrev = -1;
671 782 self->ntlookups = self->ntmisses = 0;
672 783 Py_RETURN_NONE;
673 784 }
674 785
675 786 static PyObject *index_stats(indexObject *self)
676 787 {
677 788 PyObject *obj = PyDict_New();
678 789
679 790 if (obj == NULL)
680 791 return NULL;
681 792
682 793 #define istat(__n, __d) \
683 794 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
684 795 goto bail;
685 796
686 797 if (self->added) {
687 798 Py_ssize_t len = PyList_GET_SIZE(self->added);
688 799 if (PyDict_SetItemString(obj, "index entries added",
689 800 PyInt_FromSsize_t(len)) == -1)
690 801 goto bail;
691 802 }
692 803
693 804 if (self->raw_length != self->length - 1)
694 805 istat(raw_length, "revs on disk");
695 806 istat(length, "revs in memory");
696 807 istat(ntcapacity, "node trie capacity");
697 808 istat(ntdepth, "node trie depth");
698 809 istat(ntlength, "node trie count");
699 810 istat(ntlookups, "node trie lookups");
700 811 istat(ntmisses, "node trie misses");
701 812 istat(ntrev, "node trie last rev scanned");
702 813 istat(ntsplits, "node trie splits");
703 814
704 815 #undef istat
705 816
706 817 return obj;
707 818
708 819 bail:
709 820 Py_XDECREF(obj);
710 821 return NULL;
711 822 }
712 823
713 824 /*
714 825 * When we cache a list, we want to be sure the caller can't mutate
715 826 * the cached copy.
716 827 */
717 828 static PyObject *list_copy(PyObject *list)
718 829 {
719 830 Py_ssize_t len = PyList_GET_SIZE(list);
720 831 PyObject *newlist = PyList_New(len);
721 832 Py_ssize_t i;
722 833
723 834 if (newlist == NULL)
724 835 return NULL;
725 836
726 837 for (i = 0; i < len; i++) {
727 838 PyObject *obj = PyList_GET_ITEM(list, i);
728 839 Py_INCREF(obj);
729 840 PyList_SET_ITEM(newlist, i, obj);
730 841 }
731 842
732 843 return newlist;
733 844 }
734 845
735 846 static PyObject *index_headrevs(indexObject *self)
736 847 {
737 848 Py_ssize_t i, len, addlen;
738 849 char *nothead = NULL;
739 850 PyObject *heads;
740 851
741 852 if (self->headrevs)
742 853 return list_copy(self->headrevs);
743 854
744 855 len = index_length(self) - 1;
745 856 heads = PyList_New(0);
746 857 if (heads == NULL)
747 858 goto bail;
748 859 if (len == 0) {
749 860 PyObject *nullid = PyInt_FromLong(-1);
750 861 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
751 862 Py_XDECREF(nullid);
752 863 goto bail;
753 864 }
754 865 goto done;
755 866 }
756 867
757 868 nothead = calloc(len, 1);
758 869 if (nothead == NULL)
759 870 goto bail;
760 871
761 872 for (i = 0; i < self->raw_length; i++) {
762 873 const char *data = index_deref(self, i);
763 874 int parent_1 = getbe32(data + 24);
764 875 int parent_2 = getbe32(data + 28);
765 876 if (parent_1 >= 0)
766 877 nothead[parent_1] = 1;
767 878 if (parent_2 >= 0)
768 879 nothead[parent_2] = 1;
769 880 }
770 881
771 882 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
772 883
773 884 for (i = 0; i < addlen; i++) {
774 885 PyObject *rev = PyList_GET_ITEM(self->added, i);
775 886 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
776 887 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
777 888 long parent_1, parent_2;
778 889
779 890 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
780 891 PyErr_SetString(PyExc_TypeError,
781 892 "revlog parents are invalid");
782 893 goto bail;
783 894 }
784 895 parent_1 = PyInt_AS_LONG(p1);
785 896 parent_2 = PyInt_AS_LONG(p2);
786 897 if (parent_1 >= 0)
787 898 nothead[parent_1] = 1;
788 899 if (parent_2 >= 0)
789 900 nothead[parent_2] = 1;
790 901 }
791 902
792 903 for (i = 0; i < len; i++) {
793 904 PyObject *head;
794 905
795 906 if (nothead[i])
796 907 continue;
797 908 head = PyInt_FromLong(i);
798 909 if (head == NULL || PyList_Append(heads, head) == -1) {
799 910 Py_XDECREF(head);
800 911 goto bail;
801 912 }
802 913 }
803 914
804 915 done:
805 916 self->headrevs = heads;
806 917 free(nothead);
807 918 return list_copy(self->headrevs);
808 919 bail:
809 920 Py_XDECREF(heads);
810 921 free(nothead);
811 922 return NULL;
812 923 }
813 924
814 925 static inline int nt_level(const char *node, Py_ssize_t level)
815 926 {
816 927 int v = node[level>>1];
817 928 if (!(level & 1))
818 929 v >>= 4;
819 930 return v & 0xf;
820 931 }
821 932
822 933 /*
823 934 * Return values:
824 935 *
825 936 * -4: match is ambiguous (multiple candidates)
826 937 * -2: not found
827 938 * rest: valid rev
828 939 */
829 940 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
830 941 int hex)
831 942 {
832 943 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
833 944 int level, maxlevel, off;
834 945
835 946 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
836 947 return -1;
837 948
838 949 if (self->nt == NULL)
839 950 return -2;
840 951
841 952 if (hex)
842 953 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
843 954 else
844 955 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
845 956
846 957 for (level = off = 0; level < maxlevel; level++) {
847 958 int k = getnybble(node, level);
848 959 nodetree *n = &self->nt[off];
849 960 int v = n->children[k];
850 961
851 962 if (v < 0) {
852 963 const char *n;
853 964 Py_ssize_t i;
854 965
855 966 v = -v - 1;
856 967 n = index_node(self, v);
857 968 if (n == NULL)
858 969 return -2;
859 970 for (i = level; i < maxlevel; i++)
860 971 if (getnybble(node, i) != nt_level(n, i))
861 972 return -2;
862 973 return v;
863 974 }
864 975 if (v == 0)
865 976 return -2;
866 977 off = v;
867 978 }
868 979 /* multiple matches against an ambiguous prefix */
869 980 return -4;
870 981 }
871 982
872 983 static int nt_new(indexObject *self)
873 984 {
874 985 if (self->ntlength == self->ntcapacity) {
875 986 self->ntcapacity *= 2;
876 987 self->nt = realloc(self->nt,
877 988 self->ntcapacity * sizeof(nodetree));
878 989 if (self->nt == NULL) {
879 990 PyErr_SetString(PyExc_MemoryError, "out of memory");
880 991 return -1;
881 992 }
882 993 memset(&self->nt[self->ntlength], 0,
883 994 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
884 995 }
885 996 return self->ntlength++;
886 997 }
887 998
888 999 static int nt_insert(indexObject *self, const char *node, int rev)
889 1000 {
890 1001 int level = 0;
891 1002 int off = 0;
892 1003
893 1004 while (level < 40) {
894 1005 int k = nt_level(node, level);
895 1006 nodetree *n;
896 1007 int v;
897 1008
898 1009 n = &self->nt[off];
899 1010 v = n->children[k];
900 1011
901 1012 if (v == 0) {
902 1013 n->children[k] = -rev - 1;
903 1014 return 0;
904 1015 }
905 1016 if (v < 0) {
906 1017 const char *oldnode = index_node(self, -v - 1);
907 1018 int noff;
908 1019
909 1020 if (!oldnode || !memcmp(oldnode, node, 20)) {
910 1021 n->children[k] = -rev - 1;
911 1022 return 0;
912 1023 }
913 1024 noff = nt_new(self);
914 1025 if (noff == -1)
915 1026 return -1;
916 1027 /* self->nt may have been changed by realloc */
917 1028 self->nt[off].children[k] = noff;
918 1029 off = noff;
919 1030 n = &self->nt[off];
920 1031 n->children[nt_level(oldnode, ++level)] = v;
921 1032 if (level > self->ntdepth)
922 1033 self->ntdepth = level;
923 1034 self->ntsplits += 1;
924 1035 } else {
925 1036 level += 1;
926 1037 off = v;
927 1038 }
928 1039 }
929 1040
930 1041 return -1;
931 1042 }
932 1043
933 1044 static int nt_init(indexObject *self)
934 1045 {
935 1046 if (self->nt == NULL) {
936 1047 if (self->raw_length > INT_MAX) {
937 1048 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
938 1049 return -1;
939 1050 }
940 1051 self->ntcapacity = self->raw_length < 4
941 1052 ? 4 : (int)self->raw_length / 2;
942 1053
943 1054 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
944 1055 if (self->nt == NULL) {
945 1056 PyErr_NoMemory();
946 1057 return -1;
947 1058 }
948 1059 self->ntlength = 1;
949 1060 self->ntrev = (int)index_length(self) - 1;
950 1061 self->ntlookups = 1;
951 1062 self->ntmisses = 0;
952 1063 if (nt_insert(self, nullid, INT_MAX) == -1)
953 1064 return -1;
954 1065 }
955 1066 return 0;
956 1067 }
957 1068
958 1069 /*
959 1070 * Return values:
960 1071 *
961 1072 * -3: error (exception set)
962 1073 * -2: not found (no exception set)
963 1074 * rest: valid rev
964 1075 */
965 1076 static int index_find_node(indexObject *self,
966 1077 const char *node, Py_ssize_t nodelen)
967 1078 {
968 1079 int rev;
969 1080
970 1081 self->ntlookups++;
971 1082 rev = nt_find(self, node, nodelen, 0);
972 1083 if (rev >= -1)
973 1084 return rev;
974 1085
975 1086 if (nt_init(self) == -1)
976 1087 return -3;
977 1088
978 1089 /*
979 1090 * For the first handful of lookups, we scan the entire index,
980 1091 * and cache only the matching nodes. This optimizes for cases
981 1092 * like "hg tip", where only a few nodes are accessed.
982 1093 *
983 1094 * After that, we cache every node we visit, using a single
984 1095 * scan amortized over multiple lookups. This gives the best
985 1096 * bulk performance, e.g. for "hg log".
986 1097 */
987 1098 if (self->ntmisses++ < 4) {
988 1099 for (rev = self->ntrev - 1; rev >= 0; rev--) {
989 1100 const char *n = index_node(self, rev);
990 1101 if (n == NULL)
991 1102 return -2;
992 1103 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
993 1104 if (nt_insert(self, n, rev) == -1)
994 1105 return -3;
995 1106 break;
996 1107 }
997 1108 }
998 1109 } else {
999 1110 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1000 1111 const char *n = index_node(self, rev);
1001 1112 if (n == NULL) {
1002 1113 self->ntrev = rev + 1;
1003 1114 return -2;
1004 1115 }
1005 1116 if (nt_insert(self, n, rev) == -1) {
1006 1117 self->ntrev = rev + 1;
1007 1118 return -3;
1008 1119 }
1009 1120 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1010 1121 break;
1011 1122 }
1012 1123 }
1013 1124 self->ntrev = rev;
1014 1125 }
1015 1126
1016 1127 if (rev >= 0)
1017 1128 return rev;
1018 1129 return -2;
1019 1130 }
1020 1131
1021 1132 static PyObject *raise_revlog_error(void)
1022 1133 {
1023 1134 static PyObject *errclass;
1024 1135 PyObject *mod = NULL, *errobj;
1025 1136
1026 1137 if (errclass == NULL) {
1027 1138 PyObject *dict;
1028 1139
1029 1140 mod = PyImport_ImportModule("mercurial.error");
1030 1141 if (mod == NULL)
1031 1142 goto classfail;
1032 1143
1033 1144 dict = PyModule_GetDict(mod);
1034 1145 if (dict == NULL)
1035 1146 goto classfail;
1036 1147
1037 1148 errclass = PyDict_GetItemString(dict, "RevlogError");
1038 1149 if (errclass == NULL) {
1039 1150 PyErr_SetString(PyExc_SystemError,
1040 1151 "could not find RevlogError");
1041 1152 goto classfail;
1042 1153 }
1043 1154 Py_INCREF(errclass);
1044 1155 }
1045 1156
1046 1157 errobj = PyObject_CallFunction(errclass, NULL);
1047 1158 if (errobj == NULL)
1048 1159 return NULL;
1049 1160 PyErr_SetObject(errclass, errobj);
1050 1161 return errobj;
1051 1162
1052 1163 classfail:
1053 1164 Py_XDECREF(mod);
1054 1165 return NULL;
1055 1166 }
1056 1167
1057 1168 static PyObject *index_getitem(indexObject *self, PyObject *value)
1058 1169 {
1059 1170 char *node;
1060 1171 Py_ssize_t nodelen;
1061 1172 int rev;
1062 1173
1063 1174 if (PyInt_Check(value))
1064 1175 return index_get(self, PyInt_AS_LONG(value));
1065 1176
1066 1177 if (node_check(value, &node, &nodelen) == -1)
1067 1178 return NULL;
1068 1179 rev = index_find_node(self, node, nodelen);
1069 1180 if (rev >= -1)
1070 1181 return PyInt_FromLong(rev);
1071 1182 if (rev == -2)
1072 1183 raise_revlog_error();
1073 1184 return NULL;
1074 1185 }
1075 1186
1076 1187 static int nt_partialmatch(indexObject *self, const char *node,
1077 1188 Py_ssize_t nodelen)
1078 1189 {
1079 1190 int rev;
1080 1191
1081 1192 if (nt_init(self) == -1)
1082 1193 return -3;
1083 1194
1084 1195 if (self->ntrev > 0) {
1085 1196 /* ensure that the radix tree is fully populated */
1086 1197 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1087 1198 const char *n = index_node(self, rev);
1088 1199 if (n == NULL)
1089 1200 return -2;
1090 1201 if (nt_insert(self, n, rev) == -1)
1091 1202 return -3;
1092 1203 }
1093 1204 self->ntrev = rev;
1094 1205 }
1095 1206
1096 1207 return nt_find(self, node, nodelen, 1);
1097 1208 }
1098 1209
1099 1210 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1100 1211 {
1101 1212 const char *fullnode;
1102 1213 int nodelen;
1103 1214 char *node;
1104 1215 int rev, i;
1105 1216
1106 1217 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1107 1218 return NULL;
1108 1219
1109 1220 if (nodelen < 4) {
1110 1221 PyErr_SetString(PyExc_ValueError, "key too short");
1111 1222 return NULL;
1112 1223 }
1113 1224
1114 1225 if (nodelen > 40) {
1115 1226 PyErr_SetString(PyExc_ValueError, "key too long");
1116 1227 return NULL;
1117 1228 }
1118 1229
1119 1230 for (i = 0; i < nodelen; i++)
1120 1231 hexdigit(node, i);
1121 1232 if (PyErr_Occurred()) {
1122 1233 /* input contains non-hex characters */
1123 1234 PyErr_Clear();
1124 1235 Py_RETURN_NONE;
1125 1236 }
1126 1237
1127 1238 rev = nt_partialmatch(self, node, nodelen);
1128 1239
1129 1240 switch (rev) {
1130 1241 case -4:
1131 1242 raise_revlog_error();
1132 1243 case -3:
1133 1244 return NULL;
1134 1245 case -2:
1135 1246 Py_RETURN_NONE;
1136 1247 case -1:
1137 1248 return PyString_FromStringAndSize(nullid, 20);
1138 1249 }
1139 1250
1140 1251 fullnode = index_node(self, rev);
1141 1252 if (fullnode == NULL) {
1142 1253 PyErr_Format(PyExc_IndexError,
1143 1254 "could not access rev %d", rev);
1144 1255 return NULL;
1145 1256 }
1146 1257 return PyString_FromStringAndSize(fullnode, 20);
1147 1258 }
1148 1259
1149 1260 static PyObject *index_m_get(indexObject *self, PyObject *args)
1150 1261 {
1151 1262 Py_ssize_t nodelen;
1152 1263 PyObject *val;
1153 1264 char *node;
1154 1265 int rev;
1155 1266
1156 1267 if (!PyArg_ParseTuple(args, "O", &val))
1157 1268 return NULL;
1158 1269 if (node_check(val, &node, &nodelen) == -1)
1159 1270 return NULL;
1160 1271 rev = index_find_node(self, node, nodelen);
1161 1272 if (rev == -3)
1162 1273 return NULL;
1163 1274 if (rev == -2)
1164 1275 Py_RETURN_NONE;
1165 1276 return PyInt_FromLong(rev);
1166 1277 }
1167 1278
1168 1279 static int index_contains(indexObject *self, PyObject *value)
1169 1280 {
1170 1281 char *node;
1171 1282 Py_ssize_t nodelen;
1172 1283
1173 1284 if (PyInt_Check(value)) {
1174 1285 long rev = PyInt_AS_LONG(value);
1175 1286 return rev >= -1 && rev < index_length(self);
1176 1287 }
1177 1288
1178 1289 if (node_check(value, &node, &nodelen) == -1)
1179 1290 return -1;
1180 1291
1181 1292 switch (index_find_node(self, node, nodelen)) {
1182 1293 case -3:
1183 1294 return -1;
1184 1295 case -2:
1185 1296 return 0;
1186 1297 default:
1187 1298 return 1;
1188 1299 }
1189 1300 }
1190 1301
1191 1302 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1192 1303 {
1193 1304 if (rev >= self->length - 1) {
1194 1305 PyObject *tuple = PyList_GET_ITEM(self->added,
1195 1306 rev - self->length + 1);
1196 1307 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1197 1308 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1198 1309 } else {
1199 1310 const char *data = index_deref(self, rev);
1200 1311 ps[0] = getbe32(data + 24);
1201 1312 ps[1] = getbe32(data + 28);
1202 1313 }
1203 1314 }
1204 1315
1205 1316 typedef uint64_t bitmask;
1206 1317
1207 1318 /*
1208 1319 * Given a disjoint set of revs, return all candidates for the
1209 1320 * greatest common ancestor. In revset notation, this is the set
1210 1321 * "heads(::a and ::b and ...)"
1211 1322 */
1212 1323 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1213 1324 int revcount)
1214 1325 {
1215 1326 const bitmask allseen = (1ull << revcount) - 1;
1216 1327 const bitmask poison = 1ull << revcount;
1217 1328 PyObject *gca = PyList_New(0);
1218 1329 int i, v, interesting;
1219 1330 int maxrev = -1;
1220 1331 long sp;
1221 1332 bitmask *seen;
1222 1333
1223 1334 if (gca == NULL)
1224 1335 return PyErr_NoMemory();
1225 1336
1226 1337 for (i = 0; i < revcount; i++) {
1227 1338 if (revs[i] > maxrev)
1228 1339 maxrev = revs[i];
1229 1340 }
1230 1341
1231 1342 seen = calloc(sizeof(*seen), maxrev + 1);
1232 1343 if (seen == NULL) {
1233 1344 Py_DECREF(gca);
1234 1345 return PyErr_NoMemory();
1235 1346 }
1236 1347
1237 1348 for (i = 0; i < revcount; i++)
1238 1349 seen[revs[i]] = 1ull << i;
1239 1350
1240 1351 interesting = revcount;
1241 1352
1242 1353 for (v = maxrev; v >= 0 && interesting; v--) {
1243 1354 long sv = seen[v];
1244 1355 int parents[2];
1245 1356
1246 1357 if (!sv)
1247 1358 continue;
1248 1359
1249 1360 if (sv < poison) {
1250 1361 interesting -= 1;
1251 1362 if (sv == allseen) {
1252 1363 PyObject *obj = PyInt_FromLong(v);
1253 1364 if (obj == NULL)
1254 1365 goto bail;
1255 1366 if (PyList_Append(gca, obj) == -1) {
1256 1367 Py_DECREF(obj);
1257 1368 goto bail;
1258 1369 }
1259 1370 sv |= poison;
1260 1371 for (i = 0; i < revcount; i++) {
1261 1372 if (revs[i] == v)
1262 1373 goto done;
1263 1374 }
1264 1375 }
1265 1376 }
1266 1377 index_get_parents(self, v, parents);
1267 1378
1268 1379 for (i = 0; i < 2; i++) {
1269 1380 int p = parents[i];
1270 1381 if (p == -1)
1271 1382 continue;
1272 1383 sp = seen[p];
1273 1384 if (sv < poison) {
1274 1385 if (sp == 0) {
1275 1386 seen[p] = sv;
1276 1387 interesting++;
1277 1388 }
1278 1389 else if (sp != sv)
1279 1390 seen[p] |= sv;
1280 1391 } else {
1281 1392 if (sp && sp < poison)
1282 1393 interesting--;
1283 1394 seen[p] = sv;
1284 1395 }
1285 1396 }
1286 1397 }
1287 1398
1288 1399 done:
1289 1400 free(seen);
1290 1401 return gca;
1291 1402 bail:
1292 1403 free(seen);
1293 1404 Py_XDECREF(gca);
1294 1405 return NULL;
1295 1406 }
1296 1407
1297 1408 /*
1298 1409 * Given a disjoint set of revs, return the subset with the longest
1299 1410 * path to the root.
1300 1411 */
1301 1412 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1302 1413 {
1303 1414 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1304 1415 static const Py_ssize_t capacity = 24;
1305 1416 int *depth, *interesting = NULL;
1306 1417 int i, j, v, ninteresting;
1307 1418 PyObject *dict = NULL, *keys = NULL;
1308 1419 long *seen = NULL;
1309 1420 int maxrev = -1;
1310 1421 long final;
1311 1422
1312 1423 if (revcount > capacity) {
1313 1424 PyErr_Format(PyExc_OverflowError,
1314 1425 "bitset size (%ld) > capacity (%ld)",
1315 1426 (long)revcount, (long)capacity);
1316 1427 return NULL;
1317 1428 }
1318 1429
1319 1430 for (i = 0; i < revcount; i++) {
1320 1431 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1321 1432 if (n > maxrev)
1322 1433 maxrev = n;
1323 1434 }
1324 1435
1325 1436 depth = calloc(sizeof(*depth), maxrev + 1);
1326 1437 if (depth == NULL)
1327 1438 return PyErr_NoMemory();
1328 1439
1329 1440 seen = calloc(sizeof(*seen), maxrev + 1);
1330 1441 if (seen == NULL) {
1331 1442 PyErr_NoMemory();
1332 1443 goto bail;
1333 1444 }
1334 1445
1335 1446 interesting = calloc(sizeof(*interesting), 2 << revcount);
1336 1447 if (interesting == NULL) {
1337 1448 PyErr_NoMemory();
1338 1449 goto bail;
1339 1450 }
1340 1451
1341 1452 if (PyList_Sort(revs) == -1)
1342 1453 goto bail;
1343 1454
1344 1455 for (i = 0; i < revcount; i++) {
1345 1456 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1346 1457 long b = 1l << i;
1347 1458 depth[n] = 1;
1348 1459 seen[n] = b;
1349 1460 interesting[b] = 1;
1350 1461 }
1351 1462
1352 1463 ninteresting = (int)revcount;
1353 1464
1354 1465 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1355 1466 int dv = depth[v];
1356 1467 int parents[2];
1357 1468 long sv;
1358 1469
1359 1470 if (dv == 0)
1360 1471 continue;
1361 1472
1362 1473 sv = seen[v];
1363 1474 index_get_parents(self, v, parents);
1364 1475
1365 1476 for (i = 0; i < 2; i++) {
1366 1477 int p = parents[i];
1367 1478 long nsp, sp;
1368 1479 int dp;
1369 1480
1370 1481 if (p == -1)
1371 1482 continue;
1372 1483
1373 1484 dp = depth[p];
1374 1485 nsp = sp = seen[p];
1375 1486 if (dp <= dv) {
1376 1487 depth[p] = dv + 1;
1377 1488 if (sp != sv) {
1378 1489 interesting[sv] += 1;
1379 1490 nsp = seen[p] = sv;
1380 1491 if (sp) {
1381 1492 interesting[sp] -= 1;
1382 1493 if (interesting[sp] == 0)
1383 1494 ninteresting -= 1;
1384 1495 }
1385 1496 }
1386 1497 }
1387 1498 else if (dv == dp - 1) {
1388 1499 nsp = sp | sv;
1389 1500 if (nsp == sp)
1390 1501 continue;
1391 1502 seen[p] = nsp;
1392 1503 interesting[sp] -= 1;
1393 1504 if (interesting[sp] == 0 && interesting[nsp] > 0)
1394 1505 ninteresting -= 1;
1395 1506 interesting[nsp] += 1;
1396 1507 }
1397 1508 }
1398 1509 interesting[sv] -= 1;
1399 1510 if (interesting[sv] == 0)
1400 1511 ninteresting -= 1;
1401 1512 }
1402 1513
1403 1514 final = 0;
1404 1515 j = ninteresting;
1405 1516 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1406 1517 if (interesting[i] == 0)
1407 1518 continue;
1408 1519 final |= i;
1409 1520 j -= 1;
1410 1521 }
1411 1522 if (final == 0) {
1412 1523 keys = PyList_New(0);
1413 1524 goto bail;
1414 1525 }
1415 1526
1416 1527 dict = PyDict_New();
1417 1528 if (dict == NULL)
1418 1529 goto bail;
1419 1530
1420 1531 for (i = 0; i < revcount; i++) {
1421 1532 PyObject *key;
1422 1533
1423 1534 if ((final & (1 << i)) == 0)
1424 1535 continue;
1425 1536
1426 1537 key = PyList_GET_ITEM(revs, i);
1427 1538 Py_INCREF(key);
1428 1539 Py_INCREF(Py_None);
1429 1540 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1430 1541 Py_DECREF(key);
1431 1542 Py_DECREF(Py_None);
1432 1543 goto bail;
1433 1544 }
1434 1545 }
1435 1546
1436 1547 keys = PyDict_Keys(dict);
1437 1548
1438 1549 bail:
1439 1550 free(depth);
1440 1551 free(seen);
1441 1552 free(interesting);
1442 1553 Py_XDECREF(dict);
1443 1554
1444 1555 return keys;
1445 1556 }
1446 1557
1447 1558 /*
1448 1559 * Given a (possibly overlapping) set of revs, return the greatest
1449 1560 * common ancestors: those with the longest path to the root.
1450 1561 */
1451 1562 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1452 1563 {
1453 1564 PyObject *ret = NULL, *gca = NULL;
1454 1565 Py_ssize_t argcount, i, len;
1455 1566 bitmask repeat = 0;
1456 1567 int revcount = 0;
1457 1568 int *revs;
1458 1569
1459 1570 argcount = PySequence_Length(args);
1460 1571 revs = malloc(argcount * sizeof(*revs));
1461 1572 if (argcount > 0 && revs == NULL)
1462 1573 return PyErr_NoMemory();
1463 1574 len = index_length(self) - 1;
1464 1575
1465 1576 for (i = 0; i < argcount; i++) {
1466 1577 static const int capacity = 24;
1467 1578 PyObject *obj = PySequence_GetItem(args, i);
1468 1579 bitmask x;
1469 1580 long val;
1470 1581
1471 1582 if (!PyInt_Check(obj)) {
1472 1583 PyErr_SetString(PyExc_TypeError,
1473 1584 "arguments must all be ints");
1474 1585 goto bail;
1475 1586 }
1476 1587 val = PyInt_AsLong(obj);
1477 1588 if (val == -1) {
1478 1589 ret = PyList_New(0);
1479 1590 goto done;
1480 1591 }
1481 1592 if (val < 0 || val >= len) {
1482 1593 PyErr_SetString(PyExc_IndexError,
1483 1594 "index out of range");
1484 1595 goto bail;
1485 1596 }
1486 1597 /* this cheesy bloom filter lets us avoid some more
1487 1598 * expensive duplicate checks in the common set-is-disjoint
1488 1599 * case */
1489 1600 x = 1ull << (val & 0x3f);
1490 1601 if (repeat & x) {
1491 1602 int k;
1492 1603 for (k = 0; k < revcount; k++) {
1493 1604 if (val == revs[k])
1494 1605 goto duplicate;
1495 1606 }
1496 1607 }
1497 1608 else repeat |= x;
1498 1609 if (revcount >= capacity) {
1499 1610 PyErr_Format(PyExc_OverflowError,
1500 1611 "bitset size (%d) > capacity (%d)",
1501 1612 revcount, capacity);
1502 1613 goto bail;
1503 1614 }
1504 1615 revs[revcount++] = (int)val;
1505 1616 duplicate:;
1506 1617 }
1507 1618
1508 1619 if (revcount == 0) {
1509 1620 ret = PyList_New(0);
1510 1621 goto done;
1511 1622 }
1512 1623 if (revcount == 1) {
1513 1624 PyObject *obj;
1514 1625 ret = PyList_New(1);
1515 1626 if (ret == NULL)
1516 1627 goto bail;
1517 1628 obj = PyInt_FromLong(revs[0]);
1518 1629 if (obj == NULL)
1519 1630 goto bail;
1520 1631 PyList_SET_ITEM(ret, 0, obj);
1521 1632 goto done;
1522 1633 }
1523 1634
1524 1635 gca = find_gca_candidates(self, revs, revcount);
1525 1636 if (gca == NULL)
1526 1637 goto bail;
1527 1638
1528 1639 if (PyList_GET_SIZE(gca) <= 1) {
1529 1640 ret = gca;
1530 1641 Py_INCREF(gca);
1531 1642 }
1532 1643 else ret = find_deepest(self, gca);
1533 1644
1534 1645 done:
1535 1646 free(revs);
1536 1647 Py_XDECREF(gca);
1537 1648
1538 1649 return ret;
1539 1650
1540 1651 bail:
1541 1652 free(revs);
1542 1653 Py_XDECREF(gca);
1543 1654 Py_XDECREF(ret);
1544 1655 return NULL;
1545 1656 }
1546 1657
1547 1658 /*
1548 1659 * Given a (possibly overlapping) set of revs, return all the
1549 1660 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
1550 1661 */
1551 1662 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
1552 1663 {
1553 1664 PyObject *ret = NULL;
1554 1665 Py_ssize_t argcount, i, len;
1555 1666 bitmask repeat = 0;
1556 1667 int revcount = 0;
1557 1668 int *revs;
1558 1669
1559 1670 argcount = PySequence_Length(args);
1560 1671 revs = malloc(argcount * sizeof(*revs));
1561 1672 if (argcount > 0 && revs == NULL)
1562 1673 return PyErr_NoMemory();
1563 1674 len = index_length(self) - 1;
1564 1675
1565 1676 for (i = 0; i < argcount; i++) {
1566 1677 static const int capacity = 24;
1567 1678 PyObject *obj = PySequence_GetItem(args, i);
1568 1679 bitmask x;
1569 1680 long val;
1570 1681
1571 1682 if (!PyInt_Check(obj)) {
1572 1683 PyErr_SetString(PyExc_TypeError,
1573 1684 "arguments must all be ints");
1574 1685 goto bail;
1575 1686 }
1576 1687 val = PyInt_AsLong(obj);
1577 1688 if (val == -1) {
1578 1689 ret = PyList_New(0);
1579 1690 goto done;
1580 1691 }
1581 1692 if (val < 0 || val >= len) {
1582 1693 PyErr_SetString(PyExc_IndexError,
1583 1694 "index out of range");
1584 1695 goto bail;
1585 1696 }
1586 1697 /* this cheesy bloom filter lets us avoid some more
1587 1698 * expensive duplicate checks in the common set-is-disjoint
1588 1699 * case */
1589 1700 x = 1ull << (val & 0x3f);
1590 1701 if (repeat & x) {
1591 1702 int k;
1592 1703 for (k = 0; k < revcount; k++) {
1593 1704 if (val == revs[k])
1594 1705 goto duplicate;
1595 1706 }
1596 1707 }
1597 1708 else repeat |= x;
1598 1709 if (revcount >= capacity) {
1599 1710 PyErr_Format(PyExc_OverflowError,
1600 1711 "bitset size (%d) > capacity (%d)",
1601 1712 revcount, capacity);
1602 1713 goto bail;
1603 1714 }
1604 1715 revs[revcount++] = (int)val;
1605 1716 duplicate:;
1606 1717 }
1607 1718
1608 1719 if (revcount == 0) {
1609 1720 ret = PyList_New(0);
1610 1721 goto done;
1611 1722 }
1612 1723 if (revcount == 1) {
1613 1724 PyObject *obj;
1614 1725 ret = PyList_New(1);
1615 1726 if (ret == NULL)
1616 1727 goto bail;
1617 1728 obj = PyInt_FromLong(revs[0]);
1618 1729 if (obj == NULL)
1619 1730 goto bail;
1620 1731 PyList_SET_ITEM(ret, 0, obj);
1621 1732 goto done;
1622 1733 }
1623 1734
1624 1735 ret = find_gca_candidates(self, revs, revcount);
1625 1736 if (ret == NULL)
1626 1737 goto bail;
1627 1738
1628 1739 done:
1629 1740 free(revs);
1630 1741 return ret;
1631 1742
1632 1743 bail:
1633 1744 free(revs);
1634 1745 Py_XDECREF(ret);
1635 1746 return NULL;
1636 1747 }
1637 1748
1638 1749 /*
1639 1750 * Invalidate any trie entries introduced by added revs.
1640 1751 */
1641 1752 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1642 1753 {
1643 1754 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1644 1755
1645 1756 for (i = start; i < len; i++) {
1646 1757 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1647 1758 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1648 1759
1649 1760 nt_insert(self, PyString_AS_STRING(node), -1);
1650 1761 }
1651 1762
1652 1763 if (start == 0)
1653 1764 Py_CLEAR(self->added);
1654 1765 }
1655 1766
1656 1767 /*
1657 1768 * Delete a numeric range of revs, which must be at the end of the
1658 1769 * range, but exclude the sentinel nullid entry.
1659 1770 */
1660 1771 static int index_slice_del(indexObject *self, PyObject *item)
1661 1772 {
1662 1773 Py_ssize_t start, stop, step, slicelength;
1663 1774 Py_ssize_t length = index_length(self);
1664 1775 int ret = 0;
1665 1776
1666 1777 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1667 1778 &start, &stop, &step, &slicelength) < 0)
1668 1779 return -1;
1669 1780
1670 1781 if (slicelength <= 0)
1671 1782 return 0;
1672 1783
1673 1784 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1674 1785 stop = start;
1675 1786
1676 1787 if (step < 0) {
1677 1788 stop = start + 1;
1678 1789 start = stop + step*(slicelength - 1) - 1;
1679 1790 step = -step;
1680 1791 }
1681 1792
1682 1793 if (step != 1) {
1683 1794 PyErr_SetString(PyExc_ValueError,
1684 1795 "revlog index delete requires step size of 1");
1685 1796 return -1;
1686 1797 }
1687 1798
1688 1799 if (stop != length - 1) {
1689 1800 PyErr_SetString(PyExc_IndexError,
1690 1801 "revlog index deletion indices are invalid");
1691 1802 return -1;
1692 1803 }
1693 1804
1694 1805 if (start < self->length - 1) {
1695 1806 if (self->nt) {
1696 1807 Py_ssize_t i;
1697 1808
1698 1809 for (i = start + 1; i < self->length - 1; i++) {
1699 1810 const char *node = index_node(self, i);
1700 1811
1701 1812 if (node)
1702 1813 nt_insert(self, node, -1);
1703 1814 }
1704 1815 if (self->added)
1705 1816 nt_invalidate_added(self, 0);
1706 1817 if (self->ntrev > start)
1707 1818 self->ntrev = (int)start;
1708 1819 }
1709 1820 self->length = start + 1;
1710 1821 if (start < self->raw_length) {
1711 1822 if (self->cache) {
1712 1823 Py_ssize_t i;
1713 1824 for (i = start; i < self->raw_length; i++)
1714 1825 Py_CLEAR(self->cache[i]);
1715 1826 }
1716 1827 self->raw_length = start;
1717 1828 }
1718 1829 goto done;
1719 1830 }
1720 1831
1721 1832 if (self->nt) {
1722 1833 nt_invalidate_added(self, start - self->length + 1);
1723 1834 if (self->ntrev > start)
1724 1835 self->ntrev = (int)start;
1725 1836 }
1726 1837 if (self->added)
1727 1838 ret = PyList_SetSlice(self->added, start - self->length + 1,
1728 1839 PyList_GET_SIZE(self->added), NULL);
1729 1840 done:
1730 1841 Py_CLEAR(self->headrevs);
1731 1842 return ret;
1732 1843 }
1733 1844
1734 1845 /*
1735 1846 * Supported ops:
1736 1847 *
1737 1848 * slice deletion
1738 1849 * string assignment (extend node->rev mapping)
1739 1850 * string deletion (shrink node->rev mapping)
1740 1851 */
1741 1852 static int index_assign_subscript(indexObject *self, PyObject *item,
1742 1853 PyObject *value)
1743 1854 {
1744 1855 char *node;
1745 1856 Py_ssize_t nodelen;
1746 1857 long rev;
1747 1858
1748 1859 if (PySlice_Check(item) && value == NULL)
1749 1860 return index_slice_del(self, item);
1750 1861
1751 1862 if (node_check(item, &node, &nodelen) == -1)
1752 1863 return -1;
1753 1864
1754 1865 if (value == NULL)
1755 1866 return self->nt ? nt_insert(self, node, -1) : 0;
1756 1867 rev = PyInt_AsLong(value);
1757 1868 if (rev > INT_MAX || rev < 0) {
1758 1869 if (!PyErr_Occurred())
1759 1870 PyErr_SetString(PyExc_ValueError, "rev out of range");
1760 1871 return -1;
1761 1872 }
1762 1873 return nt_insert(self, node, (int)rev);
1763 1874 }
1764 1875
1765 1876 /*
1766 1877 * Find all RevlogNG entries in an index that has inline data. Update
1767 1878 * the optional "offsets" table with those entries.
1768 1879 */
1769 1880 static long inline_scan(indexObject *self, const char **offsets)
1770 1881 {
1771 1882 const char *data = PyString_AS_STRING(self->data);
1772 1883 Py_ssize_t pos = 0;
1773 1884 Py_ssize_t end = PyString_GET_SIZE(self->data);
1774 1885 long incr = v1_hdrsize;
1775 1886 Py_ssize_t len = 0;
1776 1887
1777 1888 while (pos + v1_hdrsize <= end && pos >= 0) {
1778 1889 uint32_t comp_len;
1779 1890 /* 3rd element of header is length of compressed inline data */
1780 1891 comp_len = getbe32(data + pos + 8);
1781 1892 incr = v1_hdrsize + comp_len;
1782 1893 if (offsets)
1783 1894 offsets[len] = data + pos;
1784 1895 len++;
1785 1896 pos += incr;
1786 1897 }
1787 1898
1788 1899 if (pos != end) {
1789 1900 if (!PyErr_Occurred())
1790 1901 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1791 1902 return -1;
1792 1903 }
1793 1904
1794 1905 return len;
1795 1906 }
1796 1907
1797 1908 static int index_init(indexObject *self, PyObject *args)
1798 1909 {
1799 1910 PyObject *data_obj, *inlined_obj;
1800 1911 Py_ssize_t size;
1801 1912
1802 1913 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1803 1914 self->raw_length = 0;
1804 1915 self->added = NULL;
1805 1916 self->cache = NULL;
1806 1917 self->data = NULL;
1807 1918 self->headrevs = NULL;
1808 1919 self->nt = NULL;
1809 1920 self->offsets = NULL;
1810 1921
1811 1922 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1812 1923 return -1;
1813 1924 if (!PyString_Check(data_obj)) {
1814 1925 PyErr_SetString(PyExc_TypeError, "data is not a string");
1815 1926 return -1;
1816 1927 }
1817 1928 size = PyString_GET_SIZE(data_obj);
1818 1929
1819 1930 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1820 1931 self->data = data_obj;
1821 1932
1822 1933 self->ntlength = self->ntcapacity = 0;
1823 1934 self->ntdepth = self->ntsplits = 0;
1824 1935 self->ntlookups = self->ntmisses = 0;
1825 1936 self->ntrev = -1;
1826 1937 Py_INCREF(self->data);
1827 1938
1828 1939 if (self->inlined) {
1829 1940 long len = inline_scan(self, NULL);
1830 1941 if (len == -1)
1831 1942 goto bail;
1832 1943 self->raw_length = len;
1833 1944 self->length = len + 1;
1834 1945 } else {
1835 1946 if (size % v1_hdrsize) {
1836 1947 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1837 1948 goto bail;
1838 1949 }
1839 1950 self->raw_length = size / v1_hdrsize;
1840 1951 self->length = self->raw_length + 1;
1841 1952 }
1842 1953
1843 1954 return 0;
1844 1955 bail:
1845 1956 return -1;
1846 1957 }
1847 1958
1848 1959 static PyObject *index_nodemap(indexObject *self)
1849 1960 {
1850 1961 Py_INCREF(self);
1851 1962 return (PyObject *)self;
1852 1963 }
1853 1964
1854 1965 static void index_dealloc(indexObject *self)
1855 1966 {
1856 1967 _index_clearcaches(self);
1857 1968 Py_XDECREF(self->data);
1858 1969 Py_XDECREF(self->added);
1859 1970 PyObject_Del(self);
1860 1971 }
1861 1972
1862 1973 static PySequenceMethods index_sequence_methods = {
1863 1974 (lenfunc)index_length, /* sq_length */
1864 1975 0, /* sq_concat */
1865 1976 0, /* sq_repeat */
1866 1977 (ssizeargfunc)index_get, /* sq_item */
1867 1978 0, /* sq_slice */
1868 1979 0, /* sq_ass_item */
1869 1980 0, /* sq_ass_slice */
1870 1981 (objobjproc)index_contains, /* sq_contains */
1871 1982 };
1872 1983
1873 1984 static PyMappingMethods index_mapping_methods = {
1874 1985 (lenfunc)index_length, /* mp_length */
1875 1986 (binaryfunc)index_getitem, /* mp_subscript */
1876 1987 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1877 1988 };
1878 1989
1879 1990 static PyMethodDef index_methods[] = {
1880 1991 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1881 1992 "return the gca set of the given revs"},
1882 1993 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
1883 1994 METH_VARARGS,
1884 1995 "return the heads of the common ancestors of the given revs"},
1885 1996 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1886 1997 "clear the index caches"},
1887 1998 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1888 1999 "get an index entry"},
1889 2000 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1890 2001 "get head revisions"},
1891 2002 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1892 2003 "insert an index entry"},
1893 2004 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1894 2005 "match a potentially ambiguous node ID"},
1895 2006 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1896 2007 "stats for the index"},
1897 2008 {NULL} /* Sentinel */
1898 2009 };
1899 2010
1900 2011 static PyGetSetDef index_getset[] = {
1901 2012 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1902 2013 {NULL} /* Sentinel */
1903 2014 };
1904 2015
1905 2016 static PyTypeObject indexType = {
1906 2017 PyObject_HEAD_INIT(NULL)
1907 2018 0, /* ob_size */
1908 2019 "parsers.index", /* tp_name */
1909 2020 sizeof(indexObject), /* tp_basicsize */
1910 2021 0, /* tp_itemsize */
1911 2022 (destructor)index_dealloc, /* tp_dealloc */
1912 2023 0, /* tp_print */
1913 2024 0, /* tp_getattr */
1914 2025 0, /* tp_setattr */
1915 2026 0, /* tp_compare */
1916 2027 0, /* tp_repr */
1917 2028 0, /* tp_as_number */
1918 2029 &index_sequence_methods, /* tp_as_sequence */
1919 2030 &index_mapping_methods, /* tp_as_mapping */
1920 2031 0, /* tp_hash */
1921 2032 0, /* tp_call */
1922 2033 0, /* tp_str */
1923 2034 0, /* tp_getattro */
1924 2035 0, /* tp_setattro */
1925 2036 0, /* tp_as_buffer */
1926 2037 Py_TPFLAGS_DEFAULT, /* tp_flags */
1927 2038 "revlog index", /* tp_doc */
1928 2039 0, /* tp_traverse */
1929 2040 0, /* tp_clear */
1930 2041 0, /* tp_richcompare */
1931 2042 0, /* tp_weaklistoffset */
1932 2043 0, /* tp_iter */
1933 2044 0, /* tp_iternext */
1934 2045 index_methods, /* tp_methods */
1935 2046 0, /* tp_members */
1936 2047 index_getset, /* tp_getset */
1937 2048 0, /* tp_base */
1938 2049 0, /* tp_dict */
1939 2050 0, /* tp_descr_get */
1940 2051 0, /* tp_descr_set */
1941 2052 0, /* tp_dictoffset */
1942 2053 (initproc)index_init, /* tp_init */
1943 2054 0, /* tp_alloc */
1944 2055 };
1945 2056
1946 2057 /*
1947 2058 * returns a tuple of the form (index, index, cache) with elements as
1948 2059 * follows:
1949 2060 *
1950 2061 * index: an index object that lazily parses RevlogNG records
1951 2062 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1952 2063 *
1953 2064 * added complications are for backwards compatibility
1954 2065 */
1955 2066 static PyObject *parse_index2(PyObject *self, PyObject *args)
1956 2067 {
1957 2068 PyObject *tuple = NULL, *cache = NULL;
1958 2069 indexObject *idx;
1959 2070 int ret;
1960 2071
1961 2072 idx = PyObject_New(indexObject, &indexType);
1962 2073 if (idx == NULL)
1963 2074 goto bail;
1964 2075
1965 2076 ret = index_init(idx, args);
1966 2077 if (ret == -1)
1967 2078 goto bail;
1968 2079
1969 2080 if (idx->inlined) {
1970 2081 cache = Py_BuildValue("iO", 0, idx->data);
1971 2082 if (cache == NULL)
1972 2083 goto bail;
1973 2084 } else {
1974 2085 cache = Py_None;
1975 2086 Py_INCREF(cache);
1976 2087 }
1977 2088
1978 2089 tuple = Py_BuildValue("NN", idx, cache);
1979 2090 if (!tuple)
1980 2091 goto bail;
1981 2092 return tuple;
1982 2093
1983 2094 bail:
1984 2095 Py_XDECREF(idx);
1985 2096 Py_XDECREF(cache);
1986 2097 Py_XDECREF(tuple);
1987 2098 return NULL;
1988 2099 }
1989 2100
1990 2101 static char parsers_doc[] = "Efficient content parsing.";
1991 2102
1992 2103 PyObject *encodedir(PyObject *self, PyObject *args);
1993 2104 PyObject *pathencode(PyObject *self, PyObject *args);
1994 2105 PyObject *lowerencode(PyObject *self, PyObject *args);
1995 2106
1996 2107 static PyMethodDef methods[] = {
1997 2108 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1998 2109 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1999 2110 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2000 2111 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2001 2112 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2002 2113 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2003 2114 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2004 2115 {NULL, NULL}
2005 2116 };
2006 2117
2007 2118 void dirs_module_init(PyObject *mod);
2008 2119
2009 2120 static void module_init(PyObject *mod)
2010 2121 {
2011 2122 /* This module constant has two purposes. First, it lets us unit test
2012 2123 * the ImportError raised without hard-coding any error text. This
2013 2124 * means we can change the text in the future without breaking tests,
2014 2125 * even across changesets without a recompile. Second, its presence
2015 2126 * can be used to determine whether the version-checking logic is
2016 2127 * present, which also helps in testing across changesets without a
2017 2128 * recompile. Note that this means the pure-Python version of parsers
2018 2129 * should not have this module constant. */
2019 2130 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
2020 2131
2021 2132 dirs_module_init(mod);
2022 2133
2023 2134 indexType.tp_new = PyType_GenericNew;
2024 if (PyType_Ready(&indexType) < 0)
2135 if (PyType_Ready(&indexType) < 0 ||
2136 PyType_Ready(&dirstateTupleType) < 0)
2025 2137 return;
2026 2138 Py_INCREF(&indexType);
2027
2028 2139 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2140 Py_INCREF(&dirstateTupleType);
2141 PyModule_AddObject(mod, "dirstatetuple",
2142 (PyObject *)&dirstateTupleType);
2029 2143
2030 2144 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2031 2145 -1, -1, -1, -1, nullid, 20);
2032 2146 if (nullentry)
2033 2147 PyObject_GC_UnTrack(nullentry);
2034 2148 }
2035 2149
2036 2150 static int check_python_version(void)
2037 2151 {
2038 2152 PyObject *sys = PyImport_ImportModule("sys");
2039 2153 long hexversion = PyInt_AsLong(PyObject_GetAttrString(sys, "hexversion"));
2040 2154 /* sys.hexversion is a 32-bit number by default, so the -1 case
2041 2155 * should only occur in unusual circumstances (e.g. if sys.hexversion
2042 2156 * is manually set to an invalid value). */
2043 2157 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
2044 2158 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
2045 2159 "modules were compiled with Python " PY_VERSION ", but "
2046 2160 "Mercurial is currently using Python with sys.hexversion=%ld: "
2047 2161 "Python %s\n at: %s", versionerrortext, hexversion,
2048 2162 Py_GetVersion(), Py_GetProgramFullPath());
2049 2163 return -1;
2050 2164 }
2051 2165 return 0;
2052 2166 }
2053 2167
2054 2168 #ifdef IS_PY3K
2055 2169 static struct PyModuleDef parsers_module = {
2056 2170 PyModuleDef_HEAD_INIT,
2057 2171 "parsers",
2058 2172 parsers_doc,
2059 2173 -1,
2060 2174 methods
2061 2175 };
2062 2176
2063 2177 PyMODINIT_FUNC PyInit_parsers(void)
2064 2178 {
2065 2179 PyObject *mod;
2066 2180
2067 2181 if (check_python_version() == -1)
2068 2182 return;
2069 2183 mod = PyModule_Create(&parsers_module);
2070 2184 module_init(mod);
2071 2185 return mod;
2072 2186 }
2073 2187 #else
2074 2188 PyMODINIT_FUNC initparsers(void)
2075 2189 {
2076 2190 PyObject *mod;
2077 2191
2078 2192 if (check_python_version() == -1)
2079 2193 return;
2080 2194 mod = Py_InitModule3("parsers", methods, parsers_doc);
2081 2195 module_init(mod);
2082 2196 }
2083 2197 #endif
@@ -1,115 +1,121 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from mercurial.node import bin, nullid
9 9 from mercurial import util
10 10 import struct, zlib, cStringIO
11 11
12 12 _pack = struct.pack
13 13 _unpack = struct.unpack
14 14 _compress = zlib.compress
15 15 _decompress = zlib.decompress
16 16 _sha = util.sha1
17 17
18 # Some code below makes tuples directly because it's more convenient. However,
19 # code outside this module should always use dirstatetuple.
20 def dirstatetuple(*x):
21 # x is a tuple
22 return x
23
18 24 def parse_manifest(mfdict, fdict, lines):
19 25 for l in lines.splitlines():
20 26 f, n = l.split('\0')
21 27 if len(n) > 40:
22 28 fdict[f] = n[40:]
23 29 mfdict[f] = bin(n[:40])
24 30 else:
25 31 mfdict[f] = bin(n)
26 32
27 33 def parse_index2(data, inline):
28 34 def gettype(q):
29 35 return int(q & 0xFFFF)
30 36
31 37 def offset_type(offset, type):
32 38 return long(long(offset) << 16 | type)
33 39
34 40 indexformatng = ">Qiiiiii20s12x"
35 41
36 42 s = struct.calcsize(indexformatng)
37 43 index = []
38 44 cache = None
39 45 off = 0
40 46
41 47 l = len(data) - s
42 48 append = index.append
43 49 if inline:
44 50 cache = (0, data)
45 51 while off <= l:
46 52 e = _unpack(indexformatng, data[off:off + s])
47 53 append(e)
48 54 if e[1] < 0:
49 55 break
50 56 off += e[1] + s
51 57 else:
52 58 while off <= l:
53 59 e = _unpack(indexformatng, data[off:off + s])
54 60 append(e)
55 61 off += s
56 62
57 63 if off != len(data):
58 64 raise ValueError('corrupt index file')
59 65
60 66 if index:
61 67 e = list(index[0])
62 68 type = gettype(e[0])
63 69 e[0] = offset_type(0, type)
64 70 index[0] = tuple(e)
65 71
66 72 # add the magic null revision at -1
67 73 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
68 74
69 75 return index, cache
70 76
71 77 def parse_dirstate(dmap, copymap, st):
72 78 parents = [st[:20], st[20: 40]]
73 79 # dereference fields so they will be local in loop
74 80 format = ">cllll"
75 81 e_size = struct.calcsize(format)
76 82 pos1 = 40
77 83 l = len(st)
78 84
79 85 # the inner loop
80 86 while pos1 < l:
81 87 pos2 = pos1 + e_size
82 88 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
83 89 pos1 = pos2 + e[4]
84 90 f = st[pos2:pos1]
85 91 if '\0' in f:
86 92 f, c = f.split('\0')
87 93 copymap[f] = c
88 94 dmap[f] = e[:4]
89 95 return parents
90 96
91 97 def pack_dirstate(dmap, copymap, pl, now):
92 98 now = int(now)
93 99 cs = cStringIO.StringIO()
94 100 write = cs.write
95 101 write("".join(pl))
96 102 for f, e in dmap.iteritems():
97 103 if e[0] == 'n' and e[3] == now:
98 104 # The file was last modified "simultaneously" with the current
99 105 # write to dirstate (i.e. within the same second for file-
100 106 # systems with a granularity of 1 sec). This commonly happens
101 107 # for at least a couple of files on 'update'.
102 108 # The user could change the file without changing its size
103 109 # within the same second. Invalidate the file's mtime in
104 110 # dirstate, forcing future 'status' calls to compare the
105 111 # contents of the file if the size is the same. This prevents
106 112 # mistakenly treating such files as clean.
107 e = (e[0], e[1], e[2], -1)
113 e = dirstatetuple(e[0], e[1], e[2], -1)
108 114 dmap[f] = e
109 115
110 116 if f in copymap:
111 117 f = "%s\0%s" % (f, copymap[f])
112 118 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
113 119 write(e)
114 120 write(f)
115 121 return cs.getvalue()
@@ -1,172 +1,183 b''
1 1 /*
2 2 util.h - utility functions for interfacing with the various python APIs.
3 3
4 4 This software may be used and distributed according to the terms of
5 5 the GNU General Public License, incorporated herein by reference.
6 6 */
7 7
8 8 #ifndef _HG_UTIL_H_
9 9 #define _HG_UTIL_H_
10 10
11 11 #if PY_MAJOR_VERSION >= 3
12 12
13 13 #define IS_PY3K
14 14 #define PyInt_FromLong PyLong_FromLong
15 15 #define PyInt_AsLong PyLong_AsLong
16 16
17 17 /*
18 18 Mapping of some of the python < 2.x PyString* functions to py3k's PyUnicode.
19 19
20 20 The commented names below represent those that are present in the PyBytes
21 21 definitions for python < 2.6 (below in this file) that don't have a direct
22 22 implementation.
23 23 */
24 24
25 25 #define PyStringObject PyUnicodeObject
26 26 #define PyString_Type PyUnicode_Type
27 27
28 28 #define PyString_Check PyUnicode_Check
29 29 #define PyString_CheckExact PyUnicode_CheckExact
30 30 #define PyString_CHECK_INTERNED PyUnicode_CHECK_INTERNED
31 31 #define PyString_AS_STRING PyUnicode_AsLatin1String
32 32 #define PyString_GET_SIZE PyUnicode_GET_SIZE
33 33
34 34 #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
35 35 #define PyString_FromString PyUnicode_FromString
36 36 #define PyString_FromFormatV PyUnicode_FromFormatV
37 37 #define PyString_FromFormat PyUnicode_FromFormat
38 38 /* #define PyString_Size PyUnicode_GET_SIZE */
39 39 /* #define PyString_AsString */
40 40 /* #define PyString_Repr */
41 41 #define PyString_Concat PyUnicode_Concat
42 42 #define PyString_ConcatAndDel PyUnicode_AppendAndDel
43 43 #define _PyString_Resize PyUnicode_Resize
44 44 /* #define _PyString_Eq */
45 45 #define PyString_Format PyUnicode_Format
46 46 /* #define _PyString_FormatLong */
47 47 /* #define PyString_DecodeEscape */
48 48 #define _PyString_Join PyUnicode_Join
49 49 #define PyString_Decode PyUnicode_Decode
50 50 #define PyString_Encode PyUnicode_Encode
51 51 #define PyString_AsEncodedObject PyUnicode_AsEncodedObject
52 52 #define PyString_AsEncodedString PyUnicode_AsEncodedString
53 53 #define PyString_AsDecodedObject PyUnicode_AsDecodedObject
54 54 #define PyString_AsDecodedString PyUnicode_AsDecodedUnicode
55 55 /* #define PyString_AsStringAndSize */
56 56 #define _PyString_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping
57 57
58 58 #endif /* PY_MAJOR_VERSION */
59 59
60 60 /* Backports from 2.6 */
61 61 #if PY_VERSION_HEX < 0x02060000
62 62
63 63 #define Py_TYPE(ob) (ob)->ob_type
64 64 #define Py_SIZE(ob) (ob)->ob_size
65 65 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
66 66
67 67 /* Shamelessly stolen from bytesobject.h */
68 68 #define PyBytesObject PyStringObject
69 69 #define PyBytes_Type PyString_Type
70 70
71 71 #define PyBytes_Check PyString_Check
72 72 #define PyBytes_CheckExact PyString_CheckExact
73 73 #define PyBytes_CHECK_INTERNED PyString_CHECK_INTERNED
74 74 #define PyBytes_AS_STRING PyString_AS_STRING
75 75 #define PyBytes_GET_SIZE PyString_GET_SIZE
76 76 #define Py_TPFLAGS_BYTES_SUBCLASS Py_TPFLAGS_STRING_SUBCLASS
77 77
78 78 #define PyBytes_FromStringAndSize PyString_FromStringAndSize
79 79 #define PyBytes_FromString PyString_FromString
80 80 #define PyBytes_FromFormatV PyString_FromFormatV
81 81 #define PyBytes_FromFormat PyString_FromFormat
82 82 #define PyBytes_Size PyString_Size
83 83 #define PyBytes_AsString PyString_AsString
84 84 #define PyBytes_Repr PyString_Repr
85 85 #define PyBytes_Concat PyString_Concat
86 86 #define PyBytes_ConcatAndDel PyString_ConcatAndDel
87 87 #define _PyBytes_Resize _PyString_Resize
88 88 #define _PyBytes_Eq _PyString_Eq
89 89 #define PyBytes_Format PyString_Format
90 90 #define _PyBytes_FormatLong _PyString_FormatLong
91 91 #define PyBytes_DecodeEscape PyString_DecodeEscape
92 92 #define _PyBytes_Join _PyString_Join
93 93 #define PyBytes_Decode PyString_Decode
94 94 #define PyBytes_Encode PyString_Encode
95 95 #define PyBytes_AsEncodedObject PyString_AsEncodedObject
96 96 #define PyBytes_AsEncodedString PyString_AsEncodedString
97 97 #define PyBytes_AsDecodedObject PyString_AsDecodedObject
98 98 #define PyBytes_AsDecodedString PyString_AsDecodedString
99 99 #define PyBytes_AsStringAndSize PyString_AsStringAndSize
100 100 #define _PyBytes_InsertThousandsGrouping _PyString_InsertThousandsGrouping
101 101
102 102 #endif /* PY_VERSION_HEX */
103 103
104 104 #if (PY_VERSION_HEX < 0x02050000)
105 105 /* Definitions to get compatibility with python 2.4 and earlier which
106 106 does not have Py_ssize_t. See also PEP 353.
107 107 Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t.
108 108 */
109 109 typedef int Py_ssize_t;
110 110 typedef Py_ssize_t (*lenfunc)(PyObject *);
111 111 typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t);
112 112 #define PyInt_FromSsize_t PyInt_FromLong
113 113
114 114 #if !defined(PY_SSIZE_T_MIN)
115 115 #define PY_SSIZE_T_MAX INT_MAX
116 116 #define PY_SSIZE_T_MIN INT_MIN
117 117 #endif
118 118 #endif
119 119
120 120 #ifdef _WIN32
121 121 #ifdef _MSC_VER
122 122 /* msvc 6.0 has problems */
123 123 #define inline __inline
124 124 typedef signed char int8_t;
125 125 typedef short int16_t;
126 126 typedef long int32_t;
127 127 typedef __int64 int64_t;
128 128 typedef unsigned char uint8_t;
129 129 typedef unsigned short uint16_t;
130 130 typedef unsigned long uint32_t;
131 131 typedef unsigned __int64 uint64_t;
132 132 #else
133 133 #include <stdint.h>
134 134 #endif
135 135 #else
136 136 /* not windows */
137 137 #include <sys/types.h>
138 138 #if defined __BEOS__ && !defined __HAIKU__
139 139 #include <ByteOrder.h>
140 140 #else
141 141 #include <arpa/inet.h>
142 142 #endif
143 143 #include <inttypes.h>
144 144 #endif
145 145
146 146 #if defined __hpux || defined __SUNPRO_C || defined _AIX
147 147 #define inline
148 148 #endif
149 149
150 150 #ifdef __linux
151 151 #define inline __inline
152 152 #endif
153 153
154 typedef struct {
155 PyObject_HEAD
156 char state;
157 int mode;
158 int size;
159 int mtime;
160 } dirstateTupleObject;
161
162 PyTypeObject dirstateTupleType;
163 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateTupleType)
164
154 165 static inline uint32_t getbe32(const char *c)
155 166 {
156 167 const unsigned char *d = (const unsigned char *)c;
157 168
158 169 return ((d[0] << 24) |
159 170 (d[1] << 16) |
160 171 (d[2] << 8) |
161 172 (d[3]));
162 173 }
163 174
164 175 static inline void putbe32(uint32_t x, char *c)
165 176 {
166 177 c[0] = (x >> 24) & 0xff;
167 178 c[1] = (x >> 16) & 0xff;
168 179 c[2] = (x >> 8) & 0xff;
169 180 c[3] = (x) & 0xff;
170 181 }
171 182
172 183 #endif /* _HG_UTIL_H_ */
General Comments 0
You need to be logged in to leave comments. Login now