##// END OF EJS Templates
ancestors: remove unnecessary handling of 'left'...
Mads Kiilerich -
r20555:4add4386 default
parent child Browse files
Show More
@@ -1,387 +1,385 b''
1 1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import heapq
9 9 import util
10 10 from node import nullrev
11 11
12 12 def ancestors(pfunc, *orignodes):
13 13 """
14 14 Returns the common ancestors of a and b that are furthest from a
15 15 root (as measured by longest path).
16 16
17 17 pfunc must return a list of parent vertices for a given vertex.
18 18 """
19 19 if not isinstance(orignodes, set):
20 20 orignodes = set(orignodes)
21 21 if nullrev in orignodes:
22 22 return set()
23 23 if len(orignodes) <= 1:
24 24 return orignodes
25 25
26 26 def candidates(nodes):
27 27 allseen = (1 << len(nodes)) - 1
28 28 seen = [0] * (max(nodes) + 1)
29 29 for i, n in enumerate(nodes):
30 30 seen[n] = 1 << i
31 31 poison = 1 << (i + 1)
32 32
33 33 gca = set()
34 interesting = left = len(nodes)
34 interesting = len(nodes)
35 35 nv = len(seen) - 1
36 36 while nv >= 0 and interesting:
37 37 v = nv
38 38 nv -= 1
39 39 if not seen[v]:
40 40 continue
41 41 sv = seen[v]
42 42 if sv < poison:
43 43 interesting -= 1
44 44 if sv == allseen:
45 45 gca.add(v)
46 46 sv |= poison
47 47 if v in nodes:
48 left -= 1
49 if left <= 1:
50 # history is linear
51 return set([v])
48 # history is linear
49 return set([v])
52 50 if sv < poison:
53 51 for p in pfunc(v):
54 52 sp = seen[p]
55 53 if p == nullrev:
56 54 continue
57 55 if sp == 0:
58 56 seen[p] = sv
59 57 interesting += 1
60 58 elif sp != sv:
61 59 seen[p] |= sv
62 60 else:
63 61 for p in pfunc(v):
64 62 if p == nullrev:
65 63 continue
66 64 sp = seen[p]
67 65 if sp and sp < poison:
68 66 interesting -= 1
69 67 seen[p] = sv
70 68 return gca
71 69
72 70 def deepest(nodes):
73 71 interesting = {}
74 72 count = max(nodes) + 1
75 73 depth = [0] * count
76 74 seen = [0] * count
77 75 mapping = []
78 76 for (i, n) in enumerate(sorted(nodes)):
79 77 depth[n] = 1
80 78 b = 1 << i
81 79 seen[n] = b
82 80 interesting[b] = 1
83 81 mapping.append((b, n))
84 82 nv = count - 1
85 83 while nv >= 0 and len(interesting) > 1:
86 84 v = nv
87 85 nv -= 1
88 86 dv = depth[v]
89 87 if dv == 0:
90 88 continue
91 89 sv = seen[v]
92 90 for p in pfunc(v):
93 91 if p == nullrev:
94 92 continue
95 93 dp = depth[p]
96 94 nsp = sp = seen[p]
97 95 if dp <= dv:
98 96 depth[p] = dv + 1
99 97 if sp != sv:
100 98 interesting[sv] += 1
101 99 nsp = seen[p] = sv
102 100 if sp:
103 101 interesting[sp] -= 1
104 102 if interesting[sp] == 0:
105 103 del interesting[sp]
106 104 elif dv == dp - 1:
107 105 nsp = sp | sv
108 106 if nsp == sp:
109 107 continue
110 108 seen[p] = nsp
111 109 interesting.setdefault(nsp, 0)
112 110 interesting[nsp] += 1
113 111 interesting[sp] -= 1
114 112 if interesting[sp] == 0:
115 113 del interesting[sp]
116 114 interesting[sv] -= 1
117 115 if interesting[sv] == 0:
118 116 del interesting[sv]
119 117
120 118 if len(interesting) != 1:
121 119 return []
122 120
123 121 k = 0
124 122 for i in interesting:
125 123 k |= i
126 124 return set(n for (i, n) in mapping if k & i)
127 125
128 126 gca = candidates(orignodes)
129 127
130 128 if len(gca) <= 1:
131 129 return gca
132 130 return deepest(gca)
133 131
134 132 def genericancestor(a, b, pfunc):
135 133 """
136 134 Returns the common ancestor of a and b that is furthest from a
137 135 root (as measured by longest path) or None if no ancestor is
138 136 found. If there are multiple common ancestors at the same
139 137 distance, the first one found is returned.
140 138
141 139 pfunc must return a list of parent vertices for a given vertex
142 140 """
143 141
144 142 if a == b:
145 143 return a
146 144
147 145 a, b = sorted([a, b])
148 146
149 147 # find depth from root of all ancestors
150 148 # depth is stored as a negative for heapq
151 149 parentcache = {}
152 150 visit = [a, b]
153 151 depth = {}
154 152 while visit:
155 153 vertex = visit[-1]
156 154 pl = [p for p in pfunc(vertex) if p != nullrev]
157 155 parentcache[vertex] = pl
158 156 if not pl:
159 157 depth[vertex] = 0
160 158 visit.pop()
161 159 else:
162 160 for p in pl:
163 161 if p == a or p == b: # did we find a or b as a parent?
164 162 return p # we're done
165 163 if p not in depth:
166 164 visit.append(p)
167 165 if visit[-1] == vertex:
168 166 # -(maximum distance of parents + 1)
169 167 depth[vertex] = min([depth[p] for p in pl]) - 1
170 168 visit.pop()
171 169
172 170 # traverse ancestors in order of decreasing distance from root
173 171 def ancestors(vertex):
174 172 h = [(depth[vertex], vertex)]
175 173 seen = set()
176 174 while h:
177 175 d, n = heapq.heappop(h)
178 176 if n not in seen:
179 177 seen.add(n)
180 178 yield (d, n)
181 179 for p in parentcache[n]:
182 180 heapq.heappush(h, (depth[p], p))
183 181
184 182 def generations(vertex):
185 183 sg, s = None, set()
186 184 for g, v in ancestors(vertex):
187 185 if g != sg:
188 186 if sg:
189 187 yield sg, s
190 188 sg, s = g, set((v,))
191 189 else:
192 190 s.add(v)
193 191 yield sg, s
194 192
195 193 x = generations(a)
196 194 y = generations(b)
197 195 gx = x.next()
198 196 gy = y.next()
199 197
200 198 # increment each ancestor list until it is closer to root than
201 199 # the other, or they match
202 200 try:
203 201 while True:
204 202 if gx[0] == gy[0]:
205 203 for v in gx[1]:
206 204 if v in gy[1]:
207 205 return v
208 206 gy = y.next()
209 207 gx = x.next()
210 208 elif gx[0] > gy[0]:
211 209 gy = y.next()
212 210 else:
213 211 gx = x.next()
214 212 except StopIteration:
215 213 return None
216 214
217 215 def missingancestors(revs, bases, pfunc):
218 216 """Return all the ancestors of revs that are not ancestors of bases.
219 217
220 218 This may include elements from revs.
221 219
222 220 Equivalent to the revset (::revs - ::bases). Revs are returned in
223 221 revision number order, which is a topological order.
224 222
225 223 revs and bases should both be iterables. pfunc must return a list of
226 224 parent revs for a given revs.
227 225 """
228 226
229 227 revsvisit = set(revs)
230 228 basesvisit = set(bases)
231 229 if not revsvisit:
232 230 return []
233 231 if not basesvisit:
234 232 basesvisit.add(nullrev)
235 233 start = max(max(revsvisit), max(basesvisit))
236 234 bothvisit = revsvisit.intersection(basesvisit)
237 235 revsvisit.difference_update(bothvisit)
238 236 basesvisit.difference_update(bothvisit)
239 237 # At this point, we hold the invariants that:
240 238 # - revsvisit is the set of nodes we know are an ancestor of at least one
241 239 # of the nodes in revs
242 240 # - basesvisit is the same for bases
243 241 # - bothvisit is the set of nodes we know are ancestors of at least one of
244 242 # the nodes in revs and one of the nodes in bases
245 243 # - a node may be in none or one, but not more, of revsvisit, basesvisit
246 244 # and bothvisit at any given time
247 245 # Now we walk down in reverse topo order, adding parents of nodes already
248 246 # visited to the sets while maintaining the invariants. When a node is
249 247 # found in both revsvisit and basesvisit, it is removed from them and
250 248 # added to bothvisit instead. When revsvisit becomes empty, there are no
251 249 # more ancestors of revs that aren't also ancestors of bases, so exit.
252 250
253 251 missing = []
254 252 for curr in xrange(start, nullrev, -1):
255 253 if not revsvisit:
256 254 break
257 255
258 256 if curr in bothvisit:
259 257 bothvisit.remove(curr)
260 258 # curr's parents might have made it into revsvisit or basesvisit
261 259 # through another path
262 260 for p in pfunc(curr):
263 261 revsvisit.discard(p)
264 262 basesvisit.discard(p)
265 263 bothvisit.add(p)
266 264 continue
267 265
268 266 # curr will never be in both revsvisit and basesvisit, since if it
269 267 # were it'd have been pushed to bothvisit
270 268 if curr in revsvisit:
271 269 missing.append(curr)
272 270 thisvisit = revsvisit
273 271 othervisit = basesvisit
274 272 elif curr in basesvisit:
275 273 thisvisit = basesvisit
276 274 othervisit = revsvisit
277 275 else:
278 276 # not an ancestor of revs or bases: ignore
279 277 continue
280 278
281 279 thisvisit.remove(curr)
282 280 for p in pfunc(curr):
283 281 if p == nullrev:
284 282 pass
285 283 elif p in othervisit or p in bothvisit:
286 284 # p is implicitly in thisvisit. This means p is or should be
287 285 # in bothvisit
288 286 revsvisit.discard(p)
289 287 basesvisit.discard(p)
290 288 bothvisit.add(p)
291 289 else:
292 290 # visit later
293 291 thisvisit.add(p)
294 292
295 293 missing.reverse()
296 294 return missing
297 295
298 296 class lazyancestors(object):
299 297 def __init__(self, cl, revs, stoprev=0, inclusive=False):
300 298 """Create a new object generating ancestors for the given revs. Does
301 299 not generate revs lower than stoprev.
302 300
303 301 This is computed lazily starting from revs. The object supports
304 302 iteration and membership.
305 303
306 304 cl should be a changelog and revs should be an iterable. inclusive is
307 305 a boolean that indicates whether revs should be included. Revs lower
308 306 than stoprev will not be generated.
309 307
310 308 Result does not include the null revision."""
311 309 self._parentrevs = cl.parentrevs
312 310 self._initrevs = revs
313 311 self._stoprev = stoprev
314 312 self._inclusive = inclusive
315 313
316 314 # Initialize data structures for __contains__.
317 315 # For __contains__, we use a heap rather than a deque because
318 316 # (a) it minimizes the number of parentrevs calls made
319 317 # (b) it makes the loop termination condition obvious
320 318 # Python's heap is a min-heap. Multiply all values by -1 to convert it
321 319 # into a max-heap.
322 320 self._containsvisit = [-rev for rev in revs]
323 321 heapq.heapify(self._containsvisit)
324 322 if inclusive:
325 323 self._containsseen = set(revs)
326 324 else:
327 325 self._containsseen = set()
328 326
329 327 def __iter__(self):
330 328 """Generate the ancestors of _initrevs in reverse topological order.
331 329
332 330 If inclusive is False, yield a sequence of revision numbers starting
333 331 with the parents of each revision in revs, i.e., each revision is *not*
334 332 considered an ancestor of itself. Results are in breadth-first order:
335 333 parents of each rev in revs, then parents of those, etc.
336 334
337 335 If inclusive is True, yield all the revs first (ignoring stoprev),
338 336 then yield all the ancestors of revs as when inclusive is False.
339 337 If an element in revs is an ancestor of a different rev it is not
340 338 yielded again."""
341 339 seen = set()
342 340 revs = self._initrevs
343 341 if self._inclusive:
344 342 for rev in revs:
345 343 yield rev
346 344 seen.update(revs)
347 345
348 346 parentrevs = self._parentrevs
349 347 stoprev = self._stoprev
350 348 visit = util.deque(revs)
351 349
352 350 while visit:
353 351 for parent in parentrevs(visit.popleft()):
354 352 if parent >= stoprev and parent not in seen:
355 353 visit.append(parent)
356 354 seen.add(parent)
357 355 yield parent
358 356
359 357 def __contains__(self, target):
360 358 """Test whether target is an ancestor of self._initrevs."""
361 359 # Trying to do both __iter__ and __contains__ using the same visit
362 360 # heap and seen set is complex enough that it slows down both. Keep
363 361 # them separate.
364 362 seen = self._containsseen
365 363 if target in seen:
366 364 return True
367 365
368 366 parentrevs = self._parentrevs
369 367 visit = self._containsvisit
370 368 stoprev = self._stoprev
371 369 heappop = heapq.heappop
372 370 heappush = heapq.heappush
373 371
374 372 targetseen = False
375 373
376 374 while visit and -visit[0] > target and not targetseen:
377 375 for parent in parentrevs(-heappop(visit)):
378 376 if parent < stoprev or parent in seen:
379 377 continue
380 378 # We need to make sure we push all parents into the heap so
381 379 # that we leave it in a consistent state for future calls.
382 380 heappush(visit, -parent)
383 381 seen.add(parent)
384 382 if parent == target:
385 383 targetseen = True
386 384
387 385 return targetseen
@@ -1,1955 +1,1952 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static int8_t hextable[256] = {
18 18 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
19 19 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
22 22 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
23 23 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
34 34 };
35 35
36 36 static inline int hexdigit(const char *p, Py_ssize_t off)
37 37 {
38 38 int8_t val = hextable[(unsigned char)p[off]];
39 39
40 40 if (val >= 0) {
41 41 return val;
42 42 }
43 43
44 44 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
45 45 return 0;
46 46 }
47 47
48 48 /*
49 49 * Turn a hex-encoded string into binary.
50 50 */
51 51 static PyObject *unhexlify(const char *str, int len)
52 52 {
53 53 PyObject *ret;
54 54 char *d;
55 55 int i;
56 56
57 57 ret = PyBytes_FromStringAndSize(NULL, len / 2);
58 58
59 59 if (!ret)
60 60 return NULL;
61 61
62 62 d = PyBytes_AsString(ret);
63 63
64 64 for (i = 0; i < len;) {
65 65 int hi = hexdigit(str, i++);
66 66 int lo = hexdigit(str, i++);
67 67 *d++ = (hi << 4) | lo;
68 68 }
69 69
70 70 return ret;
71 71 }
72 72
73 73 /*
74 74 * This code assumes that a manifest is stitched together with newline
75 75 * ('\n') characters.
76 76 */
77 77 static PyObject *parse_manifest(PyObject *self, PyObject *args)
78 78 {
79 79 PyObject *mfdict, *fdict;
80 80 char *str, *start, *end;
81 81 int len;
82 82
83 83 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
84 84 &PyDict_Type, &mfdict,
85 85 &PyDict_Type, &fdict,
86 86 &str, &len))
87 87 goto quit;
88 88
89 89 start = str;
90 90 end = str + len;
91 91 while (start < end) {
92 92 PyObject *file = NULL, *node = NULL;
93 93 PyObject *flags = NULL;
94 94 char *zero = NULL, *newline = NULL;
95 95 ptrdiff_t nlen;
96 96
97 97 zero = memchr(start, '\0', end - start);
98 98 if (!zero) {
99 99 PyErr_SetString(PyExc_ValueError,
100 100 "manifest entry has no separator");
101 101 goto quit;
102 102 }
103 103
104 104 newline = memchr(zero + 1, '\n', end - (zero + 1));
105 105 if (!newline) {
106 106 PyErr_SetString(PyExc_ValueError,
107 107 "manifest contains trailing garbage");
108 108 goto quit;
109 109 }
110 110
111 111 file = PyBytes_FromStringAndSize(start, zero - start);
112 112
113 113 if (!file)
114 114 goto bail;
115 115
116 116 nlen = newline - zero - 1;
117 117
118 118 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
119 119 if (!node)
120 120 goto bail;
121 121
122 122 if (nlen > 40) {
123 123 flags = PyBytes_FromStringAndSize(zero + 41,
124 124 nlen - 40);
125 125 if (!flags)
126 126 goto bail;
127 127
128 128 if (PyDict_SetItem(fdict, file, flags) == -1)
129 129 goto bail;
130 130 }
131 131
132 132 if (PyDict_SetItem(mfdict, file, node) == -1)
133 133 goto bail;
134 134
135 135 start = newline + 1;
136 136
137 137 Py_XDECREF(flags);
138 138 Py_XDECREF(node);
139 139 Py_XDECREF(file);
140 140 continue;
141 141 bail:
142 142 Py_XDECREF(flags);
143 143 Py_XDECREF(node);
144 144 Py_XDECREF(file);
145 145 goto quit;
146 146 }
147 147
148 148 Py_INCREF(Py_None);
149 149 return Py_None;
150 150 quit:
151 151 return NULL;
152 152 }
153 153
154 154 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
155 155 {
156 156 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
157 157 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
158 158 char state, *cur, *str, *cpos;
159 159 int mode, size, mtime;
160 160 unsigned int flen;
161 161 int len, pos = 40;
162 162
163 163 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
164 164 &PyDict_Type, &dmap,
165 165 &PyDict_Type, &cmap,
166 166 &str, &len))
167 167 goto quit;
168 168
169 169 /* read parents */
170 170 if (len < 40)
171 171 goto quit;
172 172
173 173 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
174 174 if (!parents)
175 175 goto quit;
176 176
177 177 /* read filenames */
178 178 while (pos >= 40 && pos < len) {
179 179 cur = str + pos;
180 180 /* unpack header */
181 181 state = *cur;
182 182 mode = getbe32(cur + 1);
183 183 size = getbe32(cur + 5);
184 184 mtime = getbe32(cur + 9);
185 185 flen = getbe32(cur + 13);
186 186 pos += 17;
187 187 cur += 17;
188 188 if (flen > len - pos) {
189 189 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
190 190 goto quit;
191 191 }
192 192
193 193 entry = Py_BuildValue("ciii", state, mode, size, mtime);
194 194 if (!entry)
195 195 goto quit;
196 196 PyObject_GC_UnTrack(entry); /* don't waste time with this */
197 197
198 198 cpos = memchr(cur, 0, flen);
199 199 if (cpos) {
200 200 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
201 201 cname = PyBytes_FromStringAndSize(cpos + 1,
202 202 flen - (cpos - cur) - 1);
203 203 if (!fname || !cname ||
204 204 PyDict_SetItem(cmap, fname, cname) == -1 ||
205 205 PyDict_SetItem(dmap, fname, entry) == -1)
206 206 goto quit;
207 207 Py_DECREF(cname);
208 208 } else {
209 209 fname = PyBytes_FromStringAndSize(cur, flen);
210 210 if (!fname ||
211 211 PyDict_SetItem(dmap, fname, entry) == -1)
212 212 goto quit;
213 213 }
214 214 Py_DECREF(fname);
215 215 Py_DECREF(entry);
216 216 fname = cname = entry = NULL;
217 217 pos += flen;
218 218 }
219 219
220 220 ret = parents;
221 221 Py_INCREF(ret);
222 222 quit:
223 223 Py_XDECREF(fname);
224 224 Py_XDECREF(cname);
225 225 Py_XDECREF(entry);
226 226 Py_XDECREF(parents);
227 227 return ret;
228 228 }
229 229
230 230 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
231 231 {
232 232 PyObject *o = PyTuple_GET_ITEM(tuple, off);
233 233 long val;
234 234
235 235 if (PyInt_Check(o))
236 236 val = PyInt_AS_LONG(o);
237 237 else if (PyLong_Check(o)) {
238 238 val = PyLong_AsLong(o);
239 239 if (val == -1 && PyErr_Occurred())
240 240 return -1;
241 241 } else {
242 242 PyErr_SetString(PyExc_TypeError, "expected an int or long");
243 243 return -1;
244 244 }
245 245 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
246 246 PyErr_SetString(PyExc_OverflowError,
247 247 "Python value to large to convert to uint32_t");
248 248 return -1;
249 249 }
250 250 *v = (uint32_t)val;
251 251 return 0;
252 252 }
253 253
254 254 static PyObject *dirstate_unset;
255 255
256 256 /*
257 257 * Efficiently pack a dirstate object into its on-disk format.
258 258 */
259 259 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
260 260 {
261 261 PyObject *packobj = NULL;
262 262 PyObject *map, *copymap, *pl;
263 263 Py_ssize_t nbytes, pos, l;
264 264 PyObject *k, *v, *pn;
265 265 char *p, *s;
266 266 double now;
267 267
268 268 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
269 269 &PyDict_Type, &map, &PyDict_Type, &copymap,
270 270 &pl, &now))
271 271 return NULL;
272 272
273 273 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
274 274 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
275 275 return NULL;
276 276 }
277 277
278 278 /* Figure out how much we need to allocate. */
279 279 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
280 280 PyObject *c;
281 281 if (!PyString_Check(k)) {
282 282 PyErr_SetString(PyExc_TypeError, "expected string key");
283 283 goto bail;
284 284 }
285 285 nbytes += PyString_GET_SIZE(k) + 17;
286 286 c = PyDict_GetItem(copymap, k);
287 287 if (c) {
288 288 if (!PyString_Check(c)) {
289 289 PyErr_SetString(PyExc_TypeError,
290 290 "expected string key");
291 291 goto bail;
292 292 }
293 293 nbytes += PyString_GET_SIZE(c) + 1;
294 294 }
295 295 }
296 296
297 297 packobj = PyString_FromStringAndSize(NULL, nbytes);
298 298 if (packobj == NULL)
299 299 goto bail;
300 300
301 301 p = PyString_AS_STRING(packobj);
302 302
303 303 pn = PySequence_ITEM(pl, 0);
304 304 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
305 305 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
306 306 goto bail;
307 307 }
308 308 memcpy(p, s, l);
309 309 p += 20;
310 310 pn = PySequence_ITEM(pl, 1);
311 311 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
312 312 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
313 313 goto bail;
314 314 }
315 315 memcpy(p, s, l);
316 316 p += 20;
317 317
318 318 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
319 319 uint32_t mode, size, mtime;
320 320 Py_ssize_t len, l;
321 321 PyObject *o;
322 322 char *s, *t;
323 323
324 324 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
325 325 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
326 326 goto bail;
327 327 }
328 328 o = PyTuple_GET_ITEM(v, 0);
329 329 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
330 330 PyErr_SetString(PyExc_TypeError, "expected one byte");
331 331 goto bail;
332 332 }
333 333 *p++ = *s;
334 334 if (getintat(v, 1, &mode) == -1)
335 335 goto bail;
336 336 if (getintat(v, 2, &size) == -1)
337 337 goto bail;
338 338 if (getintat(v, 3, &mtime) == -1)
339 339 goto bail;
340 340 if (*s == 'n' && mtime == (uint32_t)now) {
341 341 /* See pure/parsers.py:pack_dirstate for why we do
342 342 * this. */
343 343 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
344 344 goto bail;
345 345 mtime = -1;
346 346 }
347 347 putbe32(mode, p);
348 348 putbe32(size, p + 4);
349 349 putbe32(mtime, p + 8);
350 350 t = p + 12;
351 351 p += 16;
352 352 len = PyString_GET_SIZE(k);
353 353 memcpy(p, PyString_AS_STRING(k), len);
354 354 p += len;
355 355 o = PyDict_GetItem(copymap, k);
356 356 if (o) {
357 357 *p++ = '\0';
358 358 l = PyString_GET_SIZE(o);
359 359 memcpy(p, PyString_AS_STRING(o), l);
360 360 p += l;
361 361 len += l + 1;
362 362 }
363 363 putbe32((uint32_t)len, t);
364 364 }
365 365
366 366 pos = p - PyString_AS_STRING(packobj);
367 367 if (pos != nbytes) {
368 368 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
369 369 (long)pos, (long)nbytes);
370 370 goto bail;
371 371 }
372 372
373 373 return packobj;
374 374 bail:
375 375 Py_XDECREF(packobj);
376 376 return NULL;
377 377 }
378 378
379 379 /*
380 380 * A base-16 trie for fast node->rev mapping.
381 381 *
382 382 * Positive value is index of the next node in the trie
383 383 * Negative value is a leaf: -(rev + 1)
384 384 * Zero is empty
385 385 */
386 386 typedef struct {
387 387 int children[16];
388 388 } nodetree;
389 389
390 390 /*
391 391 * This class has two behaviours.
392 392 *
393 393 * When used in a list-like way (with integer keys), we decode an
394 394 * entry in a RevlogNG index file on demand. Our last entry is a
395 395 * sentinel, always a nullid. We have limited support for
396 396 * integer-keyed insert and delete, only at elements right before the
397 397 * sentinel.
398 398 *
399 399 * With string keys, we lazily perform a reverse mapping from node to
400 400 * rev, using a base-16 trie.
401 401 */
402 402 typedef struct {
403 403 PyObject_HEAD
404 404 /* Type-specific fields go here. */
405 405 PyObject *data; /* raw bytes of index */
406 406 PyObject **cache; /* cached tuples */
407 407 const char **offsets; /* populated on demand */
408 408 Py_ssize_t raw_length; /* original number of elements */
409 409 Py_ssize_t length; /* current number of elements */
410 410 PyObject *added; /* populated on demand */
411 411 PyObject *headrevs; /* cache, invalidated on changes */
412 412 nodetree *nt; /* base-16 trie */
413 413 int ntlength; /* # nodes in use */
414 414 int ntcapacity; /* # nodes allocated */
415 415 int ntdepth; /* maximum depth of tree */
416 416 int ntsplits; /* # splits performed */
417 417 int ntrev; /* last rev scanned */
418 418 int ntlookups; /* # lookups */
419 419 int ntmisses; /* # lookups that miss the cache */
420 420 int inlined;
421 421 } indexObject;
422 422
423 423 static Py_ssize_t index_length(const indexObject *self)
424 424 {
425 425 if (self->added == NULL)
426 426 return self->length;
427 427 return self->length + PyList_GET_SIZE(self->added);
428 428 }
429 429
430 430 static PyObject *nullentry;
431 431 static const char nullid[20];
432 432
433 433 static long inline_scan(indexObject *self, const char **offsets);
434 434
435 435 #if LONG_MAX == 0x7fffffffL
436 436 static char *tuple_format = "Kiiiiiis#";
437 437 #else
438 438 static char *tuple_format = "kiiiiiis#";
439 439 #endif
440 440
441 441 /* A RevlogNG v1 index entry is 64 bytes long. */
442 442 static const long v1_hdrsize = 64;
443 443
444 444 /*
445 445 * Return a pointer to the beginning of a RevlogNG record.
446 446 */
447 447 static const char *index_deref(indexObject *self, Py_ssize_t pos)
448 448 {
449 449 if (self->inlined && pos > 0) {
450 450 if (self->offsets == NULL) {
451 451 self->offsets = malloc(self->raw_length *
452 452 sizeof(*self->offsets));
453 453 if (self->offsets == NULL)
454 454 return (const char *)PyErr_NoMemory();
455 455 inline_scan(self, self->offsets);
456 456 }
457 457 return self->offsets[pos];
458 458 }
459 459
460 460 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
461 461 }
462 462
463 463 /*
464 464 * RevlogNG format (all in big endian, data may be inlined):
465 465 * 6 bytes: offset
466 466 * 2 bytes: flags
467 467 * 4 bytes: compressed length
468 468 * 4 bytes: uncompressed length
469 469 * 4 bytes: base revision
470 470 * 4 bytes: link revision
471 471 * 4 bytes: parent 1 revision
472 472 * 4 bytes: parent 2 revision
473 473 * 32 bytes: nodeid (only 20 bytes used)
474 474 */
475 475 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
476 476 {
477 477 uint64_t offset_flags;
478 478 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
479 479 const char *c_node_id;
480 480 const char *data;
481 481 Py_ssize_t length = index_length(self);
482 482 PyObject *entry;
483 483
484 484 if (pos < 0)
485 485 pos += length;
486 486
487 487 if (pos < 0 || pos >= length) {
488 488 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
489 489 return NULL;
490 490 }
491 491
492 492 if (pos == length - 1) {
493 493 Py_INCREF(nullentry);
494 494 return nullentry;
495 495 }
496 496
497 497 if (pos >= self->length - 1) {
498 498 PyObject *obj;
499 499 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
500 500 Py_INCREF(obj);
501 501 return obj;
502 502 }
503 503
504 504 if (self->cache) {
505 505 if (self->cache[pos]) {
506 506 Py_INCREF(self->cache[pos]);
507 507 return self->cache[pos];
508 508 }
509 509 } else {
510 510 self->cache = calloc(self->raw_length, sizeof(PyObject *));
511 511 if (self->cache == NULL)
512 512 return PyErr_NoMemory();
513 513 }
514 514
515 515 data = index_deref(self, pos);
516 516 if (data == NULL)
517 517 return NULL;
518 518
519 519 offset_flags = getbe32(data + 4);
520 520 if (pos == 0) /* mask out version number for the first entry */
521 521 offset_flags &= 0xFFFF;
522 522 else {
523 523 uint32_t offset_high = getbe32(data);
524 524 offset_flags |= ((uint64_t)offset_high) << 32;
525 525 }
526 526
527 527 comp_len = getbe32(data + 8);
528 528 uncomp_len = getbe32(data + 12);
529 529 base_rev = getbe32(data + 16);
530 530 link_rev = getbe32(data + 20);
531 531 parent_1 = getbe32(data + 24);
532 532 parent_2 = getbe32(data + 28);
533 533 c_node_id = data + 32;
534 534
535 535 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
536 536 uncomp_len, base_rev, link_rev,
537 537 parent_1, parent_2, c_node_id, 20);
538 538
539 539 if (entry) {
540 540 PyObject_GC_UnTrack(entry);
541 541 Py_INCREF(entry);
542 542 }
543 543
544 544 self->cache[pos] = entry;
545 545
546 546 return entry;
547 547 }
548 548
549 549 /*
550 550 * Return the 20-byte SHA of the node corresponding to the given rev.
551 551 */
552 552 static const char *index_node(indexObject *self, Py_ssize_t pos)
553 553 {
554 554 Py_ssize_t length = index_length(self);
555 555 const char *data;
556 556
557 557 if (pos == length - 1 || pos == INT_MAX)
558 558 return nullid;
559 559
560 560 if (pos >= length)
561 561 return NULL;
562 562
563 563 if (pos >= self->length - 1) {
564 564 PyObject *tuple, *str;
565 565 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
566 566 str = PyTuple_GetItem(tuple, 7);
567 567 return str ? PyString_AS_STRING(str) : NULL;
568 568 }
569 569
570 570 data = index_deref(self, pos);
571 571 return data ? data + 32 : NULL;
572 572 }
573 573
574 574 static int nt_insert(indexObject *self, const char *node, int rev);
575 575
576 576 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
577 577 {
578 578 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
579 579 return -1;
580 580 if (*nodelen == 20)
581 581 return 0;
582 582 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
583 583 return -1;
584 584 }
585 585
586 586 static PyObject *index_insert(indexObject *self, PyObject *args)
587 587 {
588 588 PyObject *obj;
589 589 char *node;
590 590 long offset;
591 591 Py_ssize_t len, nodelen;
592 592
593 593 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
594 594 return NULL;
595 595
596 596 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
597 597 PyErr_SetString(PyExc_TypeError, "8-tuple required");
598 598 return NULL;
599 599 }
600 600
601 601 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
602 602 return NULL;
603 603
604 604 len = index_length(self);
605 605
606 606 if (offset < 0)
607 607 offset += len;
608 608
609 609 if (offset != len - 1) {
610 610 PyErr_SetString(PyExc_IndexError,
611 611 "insert only supported at index -1");
612 612 return NULL;
613 613 }
614 614
615 615 if (offset > INT_MAX) {
616 616 PyErr_SetString(PyExc_ValueError,
617 617 "currently only 2**31 revs supported");
618 618 return NULL;
619 619 }
620 620
621 621 if (self->added == NULL) {
622 622 self->added = PyList_New(0);
623 623 if (self->added == NULL)
624 624 return NULL;
625 625 }
626 626
627 627 if (PyList_Append(self->added, obj) == -1)
628 628 return NULL;
629 629
630 630 if (self->nt)
631 631 nt_insert(self, node, (int)offset);
632 632
633 633 Py_CLEAR(self->headrevs);
634 634 Py_RETURN_NONE;
635 635 }
636 636
637 637 static void _index_clearcaches(indexObject *self)
638 638 {
639 639 if (self->cache) {
640 640 Py_ssize_t i;
641 641
642 642 for (i = 0; i < self->raw_length; i++)
643 643 Py_CLEAR(self->cache[i]);
644 644 free(self->cache);
645 645 self->cache = NULL;
646 646 }
647 647 if (self->offsets) {
648 648 free(self->offsets);
649 649 self->offsets = NULL;
650 650 }
651 651 if (self->nt) {
652 652 free(self->nt);
653 653 self->nt = NULL;
654 654 }
655 655 Py_CLEAR(self->headrevs);
656 656 }
657 657
658 658 static PyObject *index_clearcaches(indexObject *self)
659 659 {
660 660 _index_clearcaches(self);
661 661 self->ntlength = self->ntcapacity = 0;
662 662 self->ntdepth = self->ntsplits = 0;
663 663 self->ntrev = -1;
664 664 self->ntlookups = self->ntmisses = 0;
665 665 Py_RETURN_NONE;
666 666 }
667 667
668 668 static PyObject *index_stats(indexObject *self)
669 669 {
670 670 PyObject *obj = PyDict_New();
671 671
672 672 if (obj == NULL)
673 673 return NULL;
674 674
675 675 #define istat(__n, __d) \
676 676 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
677 677 goto bail;
678 678
679 679 if (self->added) {
680 680 Py_ssize_t len = PyList_GET_SIZE(self->added);
681 681 if (PyDict_SetItemString(obj, "index entries added",
682 682 PyInt_FromSsize_t(len)) == -1)
683 683 goto bail;
684 684 }
685 685
686 686 if (self->raw_length != self->length - 1)
687 687 istat(raw_length, "revs on disk");
688 688 istat(length, "revs in memory");
689 689 istat(ntcapacity, "node trie capacity");
690 690 istat(ntdepth, "node trie depth");
691 691 istat(ntlength, "node trie count");
692 692 istat(ntlookups, "node trie lookups");
693 693 istat(ntmisses, "node trie misses");
694 694 istat(ntrev, "node trie last rev scanned");
695 695 istat(ntsplits, "node trie splits");
696 696
697 697 #undef istat
698 698
699 699 return obj;
700 700
701 701 bail:
702 702 Py_XDECREF(obj);
703 703 return NULL;
704 704 }
705 705
706 706 /*
707 707 * When we cache a list, we want to be sure the caller can't mutate
708 708 * the cached copy.
709 709 */
710 710 static PyObject *list_copy(PyObject *list)
711 711 {
712 712 Py_ssize_t len = PyList_GET_SIZE(list);
713 713 PyObject *newlist = PyList_New(len);
714 714 Py_ssize_t i;
715 715
716 716 if (newlist == NULL)
717 717 return NULL;
718 718
719 719 for (i = 0; i < len; i++) {
720 720 PyObject *obj = PyList_GET_ITEM(list, i);
721 721 Py_INCREF(obj);
722 722 PyList_SET_ITEM(newlist, i, obj);
723 723 }
724 724
725 725 return newlist;
726 726 }
727 727
728 728 static PyObject *index_headrevs(indexObject *self)
729 729 {
730 730 Py_ssize_t i, len, addlen;
731 731 char *nothead = NULL;
732 732 PyObject *heads;
733 733
734 734 if (self->headrevs)
735 735 return list_copy(self->headrevs);
736 736
737 737 len = index_length(self) - 1;
738 738 heads = PyList_New(0);
739 739 if (heads == NULL)
740 740 goto bail;
741 741 if (len == 0) {
742 742 PyObject *nullid = PyInt_FromLong(-1);
743 743 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
744 744 Py_XDECREF(nullid);
745 745 goto bail;
746 746 }
747 747 goto done;
748 748 }
749 749
750 750 nothead = calloc(len, 1);
751 751 if (nothead == NULL)
752 752 goto bail;
753 753
754 754 for (i = 0; i < self->raw_length; i++) {
755 755 const char *data = index_deref(self, i);
756 756 int parent_1 = getbe32(data + 24);
757 757 int parent_2 = getbe32(data + 28);
758 758 if (parent_1 >= 0)
759 759 nothead[parent_1] = 1;
760 760 if (parent_2 >= 0)
761 761 nothead[parent_2] = 1;
762 762 }
763 763
764 764 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
765 765
766 766 for (i = 0; i < addlen; i++) {
767 767 PyObject *rev = PyList_GET_ITEM(self->added, i);
768 768 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
769 769 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
770 770 long parent_1, parent_2;
771 771
772 772 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
773 773 PyErr_SetString(PyExc_TypeError,
774 774 "revlog parents are invalid");
775 775 goto bail;
776 776 }
777 777 parent_1 = PyInt_AS_LONG(p1);
778 778 parent_2 = PyInt_AS_LONG(p2);
779 779 if (parent_1 >= 0)
780 780 nothead[parent_1] = 1;
781 781 if (parent_2 >= 0)
782 782 nothead[parent_2] = 1;
783 783 }
784 784
785 785 for (i = 0; i < len; i++) {
786 786 PyObject *head;
787 787
788 788 if (nothead[i])
789 789 continue;
790 790 head = PyInt_FromLong(i);
791 791 if (head == NULL || PyList_Append(heads, head) == -1) {
792 792 Py_XDECREF(head);
793 793 goto bail;
794 794 }
795 795 }
796 796
797 797 done:
798 798 self->headrevs = heads;
799 799 free(nothead);
800 800 return list_copy(self->headrevs);
801 801 bail:
802 802 Py_XDECREF(heads);
803 803 free(nothead);
804 804 return NULL;
805 805 }
806 806
807 807 static inline int nt_level(const char *node, Py_ssize_t level)
808 808 {
809 809 int v = node[level>>1];
810 810 if (!(level & 1))
811 811 v >>= 4;
812 812 return v & 0xf;
813 813 }
814 814
815 815 /*
816 816 * Return values:
817 817 *
818 818 * -4: match is ambiguous (multiple candidates)
819 819 * -2: not found
820 820 * rest: valid rev
821 821 */
822 822 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
823 823 int hex)
824 824 {
825 825 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
826 826 int level, maxlevel, off;
827 827
828 828 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
829 829 return -1;
830 830
831 831 if (self->nt == NULL)
832 832 return -2;
833 833
834 834 if (hex)
835 835 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
836 836 else
837 837 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
838 838
839 839 for (level = off = 0; level < maxlevel; level++) {
840 840 int k = getnybble(node, level);
841 841 nodetree *n = &self->nt[off];
842 842 int v = n->children[k];
843 843
844 844 if (v < 0) {
845 845 const char *n;
846 846 Py_ssize_t i;
847 847
848 848 v = -v - 1;
849 849 n = index_node(self, v);
850 850 if (n == NULL)
851 851 return -2;
852 852 for (i = level; i < maxlevel; i++)
853 853 if (getnybble(node, i) != nt_level(n, i))
854 854 return -2;
855 855 return v;
856 856 }
857 857 if (v == 0)
858 858 return -2;
859 859 off = v;
860 860 }
861 861 /* multiple matches against an ambiguous prefix */
862 862 return -4;
863 863 }
864 864
865 865 static int nt_new(indexObject *self)
866 866 {
867 867 if (self->ntlength == self->ntcapacity) {
868 868 self->ntcapacity *= 2;
869 869 self->nt = realloc(self->nt,
870 870 self->ntcapacity * sizeof(nodetree));
871 871 if (self->nt == NULL) {
872 872 PyErr_SetString(PyExc_MemoryError, "out of memory");
873 873 return -1;
874 874 }
875 875 memset(&self->nt[self->ntlength], 0,
876 876 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
877 877 }
878 878 return self->ntlength++;
879 879 }
880 880
881 881 static int nt_insert(indexObject *self, const char *node, int rev)
882 882 {
883 883 int level = 0;
884 884 int off = 0;
885 885
886 886 while (level < 40) {
887 887 int k = nt_level(node, level);
888 888 nodetree *n;
889 889 int v;
890 890
891 891 n = &self->nt[off];
892 892 v = n->children[k];
893 893
894 894 if (v == 0) {
895 895 n->children[k] = -rev - 1;
896 896 return 0;
897 897 }
898 898 if (v < 0) {
899 899 const char *oldnode = index_node(self, -v - 1);
900 900 int noff;
901 901
902 902 if (!oldnode || !memcmp(oldnode, node, 20)) {
903 903 n->children[k] = -rev - 1;
904 904 return 0;
905 905 }
906 906 noff = nt_new(self);
907 907 if (noff == -1)
908 908 return -1;
909 909 /* self->nt may have been changed by realloc */
910 910 self->nt[off].children[k] = noff;
911 911 off = noff;
912 912 n = &self->nt[off];
913 913 n->children[nt_level(oldnode, ++level)] = v;
914 914 if (level > self->ntdepth)
915 915 self->ntdepth = level;
916 916 self->ntsplits += 1;
917 917 } else {
918 918 level += 1;
919 919 off = v;
920 920 }
921 921 }
922 922
923 923 return -1;
924 924 }
925 925
926 926 static int nt_init(indexObject *self)
927 927 {
928 928 if (self->nt == NULL) {
929 929 if (self->raw_length > INT_MAX) {
930 930 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
931 931 return -1;
932 932 }
933 933 self->ntcapacity = self->raw_length < 4
934 934 ? 4 : (int)self->raw_length / 2;
935 935
936 936 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
937 937 if (self->nt == NULL) {
938 938 PyErr_NoMemory();
939 939 return -1;
940 940 }
941 941 self->ntlength = 1;
942 942 self->ntrev = (int)index_length(self) - 1;
943 943 self->ntlookups = 1;
944 944 self->ntmisses = 0;
945 945 if (nt_insert(self, nullid, INT_MAX) == -1)
946 946 return -1;
947 947 }
948 948 return 0;
949 949 }
950 950
951 951 /*
952 952 * Return values:
953 953 *
954 954 * -3: error (exception set)
955 955 * -2: not found (no exception set)
956 956 * rest: valid rev
957 957 */
958 958 static int index_find_node(indexObject *self,
959 959 const char *node, Py_ssize_t nodelen)
960 960 {
961 961 int rev;
962 962
963 963 self->ntlookups++;
964 964 rev = nt_find(self, node, nodelen, 0);
965 965 if (rev >= -1)
966 966 return rev;
967 967
968 968 if (nt_init(self) == -1)
969 969 return -3;
970 970
971 971 /*
972 972 * For the first handful of lookups, we scan the entire index,
973 973 * and cache only the matching nodes. This optimizes for cases
974 974 * like "hg tip", where only a few nodes are accessed.
975 975 *
976 976 * After that, we cache every node we visit, using a single
977 977 * scan amortized over multiple lookups. This gives the best
978 978 * bulk performance, e.g. for "hg log".
979 979 */
980 980 if (self->ntmisses++ < 4) {
981 981 for (rev = self->ntrev - 1; rev >= 0; rev--) {
982 982 const char *n = index_node(self, rev);
983 983 if (n == NULL)
984 984 return -2;
985 985 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
986 986 if (nt_insert(self, n, rev) == -1)
987 987 return -3;
988 988 break;
989 989 }
990 990 }
991 991 } else {
992 992 for (rev = self->ntrev - 1; rev >= 0; rev--) {
993 993 const char *n = index_node(self, rev);
994 994 if (n == NULL) {
995 995 self->ntrev = rev + 1;
996 996 return -2;
997 997 }
998 998 if (nt_insert(self, n, rev) == -1) {
999 999 self->ntrev = rev + 1;
1000 1000 return -3;
1001 1001 }
1002 1002 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1003 1003 break;
1004 1004 }
1005 1005 }
1006 1006 self->ntrev = rev;
1007 1007 }
1008 1008
1009 1009 if (rev >= 0)
1010 1010 return rev;
1011 1011 return -2;
1012 1012 }
1013 1013
1014 1014 static PyObject *raise_revlog_error(void)
1015 1015 {
1016 1016 static PyObject *errclass;
1017 1017 PyObject *mod = NULL, *errobj;
1018 1018
1019 1019 if (errclass == NULL) {
1020 1020 PyObject *dict;
1021 1021
1022 1022 mod = PyImport_ImportModule("mercurial.error");
1023 1023 if (mod == NULL)
1024 1024 goto classfail;
1025 1025
1026 1026 dict = PyModule_GetDict(mod);
1027 1027 if (dict == NULL)
1028 1028 goto classfail;
1029 1029
1030 1030 errclass = PyDict_GetItemString(dict, "RevlogError");
1031 1031 if (errclass == NULL) {
1032 1032 PyErr_SetString(PyExc_SystemError,
1033 1033 "could not find RevlogError");
1034 1034 goto classfail;
1035 1035 }
1036 1036 Py_INCREF(errclass);
1037 1037 }
1038 1038
1039 1039 errobj = PyObject_CallFunction(errclass, NULL);
1040 1040 if (errobj == NULL)
1041 1041 return NULL;
1042 1042 PyErr_SetObject(errclass, errobj);
1043 1043 return errobj;
1044 1044
1045 1045 classfail:
1046 1046 Py_XDECREF(mod);
1047 1047 return NULL;
1048 1048 }
1049 1049
1050 1050 static PyObject *index_getitem(indexObject *self, PyObject *value)
1051 1051 {
1052 1052 char *node;
1053 1053 Py_ssize_t nodelen;
1054 1054 int rev;
1055 1055
1056 1056 if (PyInt_Check(value))
1057 1057 return index_get(self, PyInt_AS_LONG(value));
1058 1058
1059 1059 if (node_check(value, &node, &nodelen) == -1)
1060 1060 return NULL;
1061 1061 rev = index_find_node(self, node, nodelen);
1062 1062 if (rev >= -1)
1063 1063 return PyInt_FromLong(rev);
1064 1064 if (rev == -2)
1065 1065 raise_revlog_error();
1066 1066 return NULL;
1067 1067 }
1068 1068
1069 1069 static int nt_partialmatch(indexObject *self, const char *node,
1070 1070 Py_ssize_t nodelen)
1071 1071 {
1072 1072 int rev;
1073 1073
1074 1074 if (nt_init(self) == -1)
1075 1075 return -3;
1076 1076
1077 1077 if (self->ntrev > 0) {
1078 1078 /* ensure that the radix tree is fully populated */
1079 1079 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1080 1080 const char *n = index_node(self, rev);
1081 1081 if (n == NULL)
1082 1082 return -2;
1083 1083 if (nt_insert(self, n, rev) == -1)
1084 1084 return -3;
1085 1085 }
1086 1086 self->ntrev = rev;
1087 1087 }
1088 1088
1089 1089 return nt_find(self, node, nodelen, 1);
1090 1090 }
1091 1091
1092 1092 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1093 1093 {
1094 1094 const char *fullnode;
1095 1095 int nodelen;
1096 1096 char *node;
1097 1097 int rev, i;
1098 1098
1099 1099 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1100 1100 return NULL;
1101 1101
1102 1102 if (nodelen < 4) {
1103 1103 PyErr_SetString(PyExc_ValueError, "key too short");
1104 1104 return NULL;
1105 1105 }
1106 1106
1107 1107 if (nodelen > 40) {
1108 1108 PyErr_SetString(PyExc_ValueError, "key too long");
1109 1109 return NULL;
1110 1110 }
1111 1111
1112 1112 for (i = 0; i < nodelen; i++)
1113 1113 hexdigit(node, i);
1114 1114 if (PyErr_Occurred()) {
1115 1115 /* input contains non-hex characters */
1116 1116 PyErr_Clear();
1117 1117 Py_RETURN_NONE;
1118 1118 }
1119 1119
1120 1120 rev = nt_partialmatch(self, node, nodelen);
1121 1121
1122 1122 switch (rev) {
1123 1123 case -4:
1124 1124 raise_revlog_error();
1125 1125 case -3:
1126 1126 return NULL;
1127 1127 case -2:
1128 1128 Py_RETURN_NONE;
1129 1129 case -1:
1130 1130 return PyString_FromStringAndSize(nullid, 20);
1131 1131 }
1132 1132
1133 1133 fullnode = index_node(self, rev);
1134 1134 if (fullnode == NULL) {
1135 1135 PyErr_Format(PyExc_IndexError,
1136 1136 "could not access rev %d", rev);
1137 1137 return NULL;
1138 1138 }
1139 1139 return PyString_FromStringAndSize(fullnode, 20);
1140 1140 }
1141 1141
1142 1142 static PyObject *index_m_get(indexObject *self, PyObject *args)
1143 1143 {
1144 1144 Py_ssize_t nodelen;
1145 1145 PyObject *val;
1146 1146 char *node;
1147 1147 int rev;
1148 1148
1149 1149 if (!PyArg_ParseTuple(args, "O", &val))
1150 1150 return NULL;
1151 1151 if (node_check(val, &node, &nodelen) == -1)
1152 1152 return NULL;
1153 1153 rev = index_find_node(self, node, nodelen);
1154 1154 if (rev == -3)
1155 1155 return NULL;
1156 1156 if (rev == -2)
1157 1157 Py_RETURN_NONE;
1158 1158 return PyInt_FromLong(rev);
1159 1159 }
1160 1160
1161 1161 static int index_contains(indexObject *self, PyObject *value)
1162 1162 {
1163 1163 char *node;
1164 1164 Py_ssize_t nodelen;
1165 1165
1166 1166 if (PyInt_Check(value)) {
1167 1167 long rev = PyInt_AS_LONG(value);
1168 1168 return rev >= -1 && rev < index_length(self);
1169 1169 }
1170 1170
1171 1171 if (node_check(value, &node, &nodelen) == -1)
1172 1172 return -1;
1173 1173
1174 1174 switch (index_find_node(self, node, nodelen)) {
1175 1175 case -3:
1176 1176 return -1;
1177 1177 case -2:
1178 1178 return 0;
1179 1179 default:
1180 1180 return 1;
1181 1181 }
1182 1182 }
1183 1183
1184 1184 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1185 1185 {
1186 1186 if (rev >= self->length - 1) {
1187 1187 PyObject *tuple = PyList_GET_ITEM(self->added,
1188 1188 rev - self->length + 1);
1189 1189 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1190 1190 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1191 1191 } else {
1192 1192 const char *data = index_deref(self, rev);
1193 1193 ps[0] = getbe32(data + 24);
1194 1194 ps[1] = getbe32(data + 28);
1195 1195 }
1196 1196 }
1197 1197
1198 1198 typedef uint64_t bitmask;
1199 1199
1200 1200 /*
1201 1201 * Given a disjoint set of revs, return all candidates for the
1202 1202 * greatest common ancestor. In revset notation, this is the set
1203 1203 * "heads(::a and ::b and ...)"
1204 1204 */
1205 1205 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1206 1206 int revcount)
1207 1207 {
1208 1208 const bitmask allseen = (1ull << revcount) - 1;
1209 1209 const bitmask poison = 1ull << revcount;
1210 1210 PyObject *gca = PyList_New(0);
1211 int i, v, interesting, left;
1211 int i, v, interesting;
1212 1212 int maxrev = -1;
1213 1213 long sp;
1214 1214 bitmask *seen;
1215 1215
1216 1216 if (gca == NULL)
1217 1217 return PyErr_NoMemory();
1218 1218
1219 1219 for (i = 0; i < revcount; i++) {
1220 1220 if (revs[i] > maxrev)
1221 1221 maxrev = revs[i];
1222 1222 }
1223 1223
1224 1224 seen = calloc(sizeof(*seen), maxrev + 1);
1225 1225 if (seen == NULL) {
1226 1226 Py_DECREF(gca);
1227 1227 return PyErr_NoMemory();
1228 1228 }
1229 1229
1230 1230 for (i = 0; i < revcount; i++)
1231 1231 seen[revs[i]] = 1ull << i;
1232 1232
1233 interesting = left = revcount;
1233 interesting = revcount;
1234 1234
1235 1235 for (v = maxrev; v >= 0 && interesting; v--) {
1236 1236 long sv = seen[v];
1237 1237 int parents[2];
1238 1238
1239 1239 if (!sv)
1240 1240 continue;
1241 1241
1242 1242 if (sv < poison) {
1243 1243 interesting -= 1;
1244 1244 if (sv == allseen) {
1245 1245 PyObject *obj = PyInt_FromLong(v);
1246 1246 if (obj == NULL)
1247 1247 goto bail;
1248 1248 if (PyList_Append(gca, obj) == -1) {
1249 1249 Py_DECREF(obj);
1250 1250 goto bail;
1251 1251 }
1252 1252 sv |= poison;
1253 1253 for (i = 0; i < revcount; i++) {
1254 if (revs[i] == v) {
1255 if (--left <= 1)
1256 goto done;
1257 break;
1258 }
1254 if (revs[i] == v)
1255 goto done;
1259 1256 }
1260 1257 }
1261 1258 }
1262 1259 index_get_parents(self, v, parents);
1263 1260
1264 1261 for (i = 0; i < 2; i++) {
1265 1262 int p = parents[i];
1266 1263 if (p == -1)
1267 1264 continue;
1268 1265 sp = seen[p];
1269 1266 if (sv < poison) {
1270 1267 if (sp == 0) {
1271 1268 seen[p] = sv;
1272 1269 interesting++;
1273 1270 }
1274 1271 else if (sp != sv)
1275 1272 seen[p] |= sv;
1276 1273 } else {
1277 1274 if (sp && sp < poison)
1278 1275 interesting--;
1279 1276 seen[p] = sv;
1280 1277 }
1281 1278 }
1282 1279 }
1283 1280
1284 1281 done:
1285 1282 free(seen);
1286 1283 return gca;
1287 1284 bail:
1288 1285 free(seen);
1289 1286 Py_XDECREF(gca);
1290 1287 return NULL;
1291 1288 }
1292 1289
1293 1290 /*
1294 1291 * Given a disjoint set of revs, return the subset with the longest
1295 1292 * path to the root.
1296 1293 */
1297 1294 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1298 1295 {
1299 1296 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1300 1297 static const Py_ssize_t capacity = 24;
1301 1298 int *depth, *interesting = NULL;
1302 1299 int i, j, v, ninteresting;
1303 1300 PyObject *dict = NULL, *keys;
1304 1301 long *seen = NULL;
1305 1302 int maxrev = -1;
1306 1303 long final;
1307 1304
1308 1305 if (revcount > capacity) {
1309 1306 PyErr_Format(PyExc_OverflowError,
1310 1307 "bitset size (%ld) > capacity (%ld)",
1311 1308 (long)revcount, (long)capacity);
1312 1309 return NULL;
1313 1310 }
1314 1311
1315 1312 for (i = 0; i < revcount; i++) {
1316 1313 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1317 1314 if (n > maxrev)
1318 1315 maxrev = n;
1319 1316 }
1320 1317
1321 1318 depth = calloc(sizeof(*depth), maxrev + 1);
1322 1319 if (depth == NULL)
1323 1320 return PyErr_NoMemory();
1324 1321
1325 1322 seen = calloc(sizeof(*seen), maxrev + 1);
1326 1323 if (seen == NULL) {
1327 1324 PyErr_NoMemory();
1328 1325 goto bail;
1329 1326 }
1330 1327
1331 1328 interesting = calloc(sizeof(*interesting), 2 << revcount);
1332 1329 if (interesting == NULL) {
1333 1330 PyErr_NoMemory();
1334 1331 goto bail;
1335 1332 }
1336 1333
1337 1334 if (PyList_Sort(revs) == -1)
1338 1335 goto bail;
1339 1336
1340 1337 for (i = 0; i < revcount; i++) {
1341 1338 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1342 1339 long b = 1l << i;
1343 1340 depth[n] = 1;
1344 1341 seen[n] = b;
1345 1342 interesting[b] = 1;
1346 1343 }
1347 1344
1348 1345 ninteresting = (int)revcount;
1349 1346
1350 1347 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1351 1348 int dv = depth[v];
1352 1349 int parents[2];
1353 1350 long sv;
1354 1351
1355 1352 if (dv == 0)
1356 1353 continue;
1357 1354
1358 1355 sv = seen[v];
1359 1356 index_get_parents(self, v, parents);
1360 1357
1361 1358 for (i = 0; i < 2; i++) {
1362 1359 int p = parents[i];
1363 1360 long nsp, sp;
1364 1361 int dp;
1365 1362
1366 1363 if (p == -1)
1367 1364 continue;
1368 1365
1369 1366 dp = depth[p];
1370 1367 nsp = sp = seen[p];
1371 1368 if (dp <= dv) {
1372 1369 depth[p] = dv + 1;
1373 1370 if (sp != sv) {
1374 1371 interesting[sv] += 1;
1375 1372 nsp = seen[p] = sv;
1376 1373 if (sp) {
1377 1374 interesting[sp] -= 1;
1378 1375 if (interesting[sp] == 0)
1379 1376 ninteresting -= 1;
1380 1377 }
1381 1378 }
1382 1379 }
1383 1380 else if (dv == dp - 1) {
1384 1381 nsp = sp | sv;
1385 1382 if (nsp == sp)
1386 1383 continue;
1387 1384 seen[p] = nsp;
1388 1385 interesting[sp] -= 1;
1389 1386 if (interesting[sp] == 0 && interesting[nsp] > 0)
1390 1387 ninteresting -= 1;
1391 1388 interesting[nsp] += 1;
1392 1389 }
1393 1390 }
1394 1391 interesting[sv] -= 1;
1395 1392 if (interesting[sv] == 0)
1396 1393 ninteresting -= 1;
1397 1394 }
1398 1395
1399 1396 final = 0;
1400 1397 j = ninteresting;
1401 1398 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1402 1399 if (interesting[i] == 0)
1403 1400 continue;
1404 1401 final |= i;
1405 1402 j -= 1;
1406 1403 }
1407 1404 if (final == 0)
1408 1405 return PyList_New(0);
1409 1406
1410 1407 dict = PyDict_New();
1411 1408 if (dict == NULL)
1412 1409 goto bail;
1413 1410
1414 1411 for (i = 0; i < revcount; i++) {
1415 1412 PyObject *key;
1416 1413
1417 1414 if ((final & (1 << i)) == 0)
1418 1415 continue;
1419 1416
1420 1417 key = PyList_GET_ITEM(revs, i);
1421 1418 Py_INCREF(key);
1422 1419 Py_INCREF(Py_None);
1423 1420 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1424 1421 Py_DECREF(key);
1425 1422 Py_DECREF(Py_None);
1426 1423 goto bail;
1427 1424 }
1428 1425 }
1429 1426
1430 1427 keys = PyDict_Keys(dict);
1431 1428
1432 1429 free(depth);
1433 1430 free(seen);
1434 1431 free(interesting);
1435 1432 Py_DECREF(dict);
1436 1433
1437 1434 return keys;
1438 1435 bail:
1439 1436 free(depth);
1440 1437 free(seen);
1441 1438 free(interesting);
1442 1439 Py_XDECREF(dict);
1443 1440
1444 1441 return NULL;
1445 1442 }
1446 1443
1447 1444 /*
1448 1445 * Given a (possibly overlapping) set of revs, return the greatest
1449 1446 * common ancestors: those with the longest path to the root.
1450 1447 */
1451 1448 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1452 1449 {
1453 1450 PyObject *ret = NULL, *gca = NULL;
1454 1451 Py_ssize_t argcount, i, len;
1455 1452 bitmask repeat = 0;
1456 1453 int revcount = 0;
1457 1454 int *revs;
1458 1455
1459 1456 argcount = PySequence_Length(args);
1460 1457 revs = malloc(argcount * sizeof(*revs));
1461 1458 if (argcount > 0 && revs == NULL)
1462 1459 return PyErr_NoMemory();
1463 1460 len = index_length(self) - 1;
1464 1461
1465 1462 for (i = 0; i < argcount; i++) {
1466 1463 static const int capacity = 24;
1467 1464 PyObject *obj = PySequence_GetItem(args, i);
1468 1465 bitmask x;
1469 1466 long val;
1470 1467
1471 1468 if (!PyInt_Check(obj)) {
1472 1469 PyErr_SetString(PyExc_TypeError,
1473 1470 "arguments must all be ints");
1474 1471 goto bail;
1475 1472 }
1476 1473 val = PyInt_AsLong(obj);
1477 1474 if (val == -1) {
1478 1475 ret = PyList_New(0);
1479 1476 goto done;
1480 1477 }
1481 1478 if (val < 0 || val >= len) {
1482 1479 PyErr_SetString(PyExc_IndexError,
1483 1480 "index out of range");
1484 1481 goto bail;
1485 1482 }
1486 1483 /* this cheesy bloom filter lets us avoid some more
1487 1484 * expensive duplicate checks in the common set-is-disjoint
1488 1485 * case */
1489 1486 x = 1ull << (val & 0x3f);
1490 1487 if (repeat & x) {
1491 1488 int k;
1492 1489 for (k = 0; k < revcount; k++) {
1493 1490 if (val == revs[k])
1494 1491 goto duplicate;
1495 1492 }
1496 1493 }
1497 1494 else repeat |= x;
1498 1495 if (revcount >= capacity) {
1499 1496 PyErr_Format(PyExc_OverflowError,
1500 1497 "bitset size (%d) > capacity (%d)",
1501 1498 revcount, capacity);
1502 1499 goto bail;
1503 1500 }
1504 1501 revs[revcount++] = (int)val;
1505 1502 duplicate:;
1506 1503 }
1507 1504
1508 1505 if (revcount == 0) {
1509 1506 ret = PyList_New(0);
1510 1507 goto done;
1511 1508 }
1512 1509 if (revcount == 1) {
1513 1510 PyObject *obj;
1514 1511 ret = PyList_New(1);
1515 1512 if (ret == NULL)
1516 1513 goto bail;
1517 1514 obj = PyInt_FromLong(revs[0]);
1518 1515 if (obj == NULL)
1519 1516 goto bail;
1520 1517 PyList_SET_ITEM(ret, 0, obj);
1521 1518 goto done;
1522 1519 }
1523 1520
1524 1521 gca = find_gca_candidates(self, revs, revcount);
1525 1522 if (gca == NULL)
1526 1523 goto bail;
1527 1524
1528 1525 if (PyList_GET_SIZE(gca) <= 1) {
1529 1526 ret = gca;
1530 1527 Py_INCREF(gca);
1531 1528 }
1532 1529 else ret = find_deepest(self, gca);
1533 1530
1534 1531 done:
1535 1532 free(revs);
1536 1533 Py_XDECREF(gca);
1537 1534
1538 1535 return ret;
1539 1536
1540 1537 bail:
1541 1538 free(revs);
1542 1539 Py_XDECREF(gca);
1543 1540 Py_XDECREF(ret);
1544 1541 return NULL;
1545 1542 }
1546 1543
1547 1544 /*
1548 1545 * Invalidate any trie entries introduced by added revs.
1549 1546 */
1550 1547 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1551 1548 {
1552 1549 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1553 1550
1554 1551 for (i = start; i < len; i++) {
1555 1552 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1556 1553 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1557 1554
1558 1555 nt_insert(self, PyString_AS_STRING(node), -1);
1559 1556 }
1560 1557
1561 1558 if (start == 0)
1562 1559 Py_CLEAR(self->added);
1563 1560 }
1564 1561
1565 1562 /*
1566 1563 * Delete a numeric range of revs, which must be at the end of the
1567 1564 * range, but exclude the sentinel nullid entry.
1568 1565 */
1569 1566 static int index_slice_del(indexObject *self, PyObject *item)
1570 1567 {
1571 1568 Py_ssize_t start, stop, step, slicelength;
1572 1569 Py_ssize_t length = index_length(self);
1573 1570 int ret = 0;
1574 1571
1575 1572 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1576 1573 &start, &stop, &step, &slicelength) < 0)
1577 1574 return -1;
1578 1575
1579 1576 if (slicelength <= 0)
1580 1577 return 0;
1581 1578
1582 1579 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1583 1580 stop = start;
1584 1581
1585 1582 if (step < 0) {
1586 1583 stop = start + 1;
1587 1584 start = stop + step*(slicelength - 1) - 1;
1588 1585 step = -step;
1589 1586 }
1590 1587
1591 1588 if (step != 1) {
1592 1589 PyErr_SetString(PyExc_ValueError,
1593 1590 "revlog index delete requires step size of 1");
1594 1591 return -1;
1595 1592 }
1596 1593
1597 1594 if (stop != length - 1) {
1598 1595 PyErr_SetString(PyExc_IndexError,
1599 1596 "revlog index deletion indices are invalid");
1600 1597 return -1;
1601 1598 }
1602 1599
1603 1600 if (start < self->length - 1) {
1604 1601 if (self->nt) {
1605 1602 Py_ssize_t i;
1606 1603
1607 1604 for (i = start + 1; i < self->length - 1; i++) {
1608 1605 const char *node = index_node(self, i);
1609 1606
1610 1607 if (node)
1611 1608 nt_insert(self, node, -1);
1612 1609 }
1613 1610 if (self->added)
1614 1611 nt_invalidate_added(self, 0);
1615 1612 if (self->ntrev > start)
1616 1613 self->ntrev = (int)start;
1617 1614 }
1618 1615 self->length = start + 1;
1619 1616 if (start < self->raw_length) {
1620 1617 if (self->cache) {
1621 1618 Py_ssize_t i;
1622 1619 for (i = start; i < self->raw_length; i++)
1623 1620 Py_CLEAR(self->cache[i]);
1624 1621 }
1625 1622 self->raw_length = start;
1626 1623 }
1627 1624 goto done;
1628 1625 }
1629 1626
1630 1627 if (self->nt) {
1631 1628 nt_invalidate_added(self, start - self->length + 1);
1632 1629 if (self->ntrev > start)
1633 1630 self->ntrev = (int)start;
1634 1631 }
1635 1632 if (self->added)
1636 1633 ret = PyList_SetSlice(self->added, start - self->length + 1,
1637 1634 PyList_GET_SIZE(self->added), NULL);
1638 1635 done:
1639 1636 Py_CLEAR(self->headrevs);
1640 1637 return ret;
1641 1638 }
1642 1639
1643 1640 /*
1644 1641 * Supported ops:
1645 1642 *
1646 1643 * slice deletion
1647 1644 * string assignment (extend node->rev mapping)
1648 1645 * string deletion (shrink node->rev mapping)
1649 1646 */
1650 1647 static int index_assign_subscript(indexObject *self, PyObject *item,
1651 1648 PyObject *value)
1652 1649 {
1653 1650 char *node;
1654 1651 Py_ssize_t nodelen;
1655 1652 long rev;
1656 1653
1657 1654 if (PySlice_Check(item) && value == NULL)
1658 1655 return index_slice_del(self, item);
1659 1656
1660 1657 if (node_check(item, &node, &nodelen) == -1)
1661 1658 return -1;
1662 1659
1663 1660 if (value == NULL)
1664 1661 return self->nt ? nt_insert(self, node, -1) : 0;
1665 1662 rev = PyInt_AsLong(value);
1666 1663 if (rev > INT_MAX || rev < 0) {
1667 1664 if (!PyErr_Occurred())
1668 1665 PyErr_SetString(PyExc_ValueError, "rev out of range");
1669 1666 return -1;
1670 1667 }
1671 1668 return nt_insert(self, node, (int)rev);
1672 1669 }
1673 1670
1674 1671 /*
1675 1672 * Find all RevlogNG entries in an index that has inline data. Update
1676 1673 * the optional "offsets" table with those entries.
1677 1674 */
1678 1675 static long inline_scan(indexObject *self, const char **offsets)
1679 1676 {
1680 1677 const char *data = PyString_AS_STRING(self->data);
1681 1678 Py_ssize_t pos = 0;
1682 1679 Py_ssize_t end = PyString_GET_SIZE(self->data);
1683 1680 long incr = v1_hdrsize;
1684 1681 Py_ssize_t len = 0;
1685 1682
1686 1683 while (pos + v1_hdrsize <= end && pos >= 0) {
1687 1684 uint32_t comp_len;
1688 1685 /* 3rd element of header is length of compressed inline data */
1689 1686 comp_len = getbe32(data + pos + 8);
1690 1687 incr = v1_hdrsize + comp_len;
1691 1688 if (offsets)
1692 1689 offsets[len] = data + pos;
1693 1690 len++;
1694 1691 pos += incr;
1695 1692 }
1696 1693
1697 1694 if (pos != end) {
1698 1695 if (!PyErr_Occurred())
1699 1696 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1700 1697 return -1;
1701 1698 }
1702 1699
1703 1700 return len;
1704 1701 }
1705 1702
1706 1703 static int index_init(indexObject *self, PyObject *args)
1707 1704 {
1708 1705 PyObject *data_obj, *inlined_obj;
1709 1706 Py_ssize_t size;
1710 1707
1711 1708 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1712 1709 self->raw_length = 0;
1713 1710 self->added = NULL;
1714 1711 self->cache = NULL;
1715 1712 self->data = NULL;
1716 1713 self->headrevs = NULL;
1717 1714 self->nt = NULL;
1718 1715 self->offsets = NULL;
1719 1716
1720 1717 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1721 1718 return -1;
1722 1719 if (!PyString_Check(data_obj)) {
1723 1720 PyErr_SetString(PyExc_TypeError, "data is not a string");
1724 1721 return -1;
1725 1722 }
1726 1723 size = PyString_GET_SIZE(data_obj);
1727 1724
1728 1725 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1729 1726 self->data = data_obj;
1730 1727
1731 1728 self->ntlength = self->ntcapacity = 0;
1732 1729 self->ntdepth = self->ntsplits = 0;
1733 1730 self->ntlookups = self->ntmisses = 0;
1734 1731 self->ntrev = -1;
1735 1732 Py_INCREF(self->data);
1736 1733
1737 1734 if (self->inlined) {
1738 1735 long len = inline_scan(self, NULL);
1739 1736 if (len == -1)
1740 1737 goto bail;
1741 1738 self->raw_length = len;
1742 1739 self->length = len + 1;
1743 1740 } else {
1744 1741 if (size % v1_hdrsize) {
1745 1742 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1746 1743 goto bail;
1747 1744 }
1748 1745 self->raw_length = size / v1_hdrsize;
1749 1746 self->length = self->raw_length + 1;
1750 1747 }
1751 1748
1752 1749 return 0;
1753 1750 bail:
1754 1751 return -1;
1755 1752 }
1756 1753
1757 1754 static PyObject *index_nodemap(indexObject *self)
1758 1755 {
1759 1756 Py_INCREF(self);
1760 1757 return (PyObject *)self;
1761 1758 }
1762 1759
1763 1760 static void index_dealloc(indexObject *self)
1764 1761 {
1765 1762 _index_clearcaches(self);
1766 1763 Py_XDECREF(self->data);
1767 1764 Py_XDECREF(self->added);
1768 1765 PyObject_Del(self);
1769 1766 }
1770 1767
1771 1768 static PySequenceMethods index_sequence_methods = {
1772 1769 (lenfunc)index_length, /* sq_length */
1773 1770 0, /* sq_concat */
1774 1771 0, /* sq_repeat */
1775 1772 (ssizeargfunc)index_get, /* sq_item */
1776 1773 0, /* sq_slice */
1777 1774 0, /* sq_ass_item */
1778 1775 0, /* sq_ass_slice */
1779 1776 (objobjproc)index_contains, /* sq_contains */
1780 1777 };
1781 1778
1782 1779 static PyMappingMethods index_mapping_methods = {
1783 1780 (lenfunc)index_length, /* mp_length */
1784 1781 (binaryfunc)index_getitem, /* mp_subscript */
1785 1782 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1786 1783 };
1787 1784
1788 1785 static PyMethodDef index_methods[] = {
1789 1786 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1790 1787 "return the gca set of the given revs"},
1791 1788 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1792 1789 "clear the index caches"},
1793 1790 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1794 1791 "get an index entry"},
1795 1792 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1796 1793 "get head revisions"},
1797 1794 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1798 1795 "insert an index entry"},
1799 1796 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1800 1797 "match a potentially ambiguous node ID"},
1801 1798 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1802 1799 "stats for the index"},
1803 1800 {NULL} /* Sentinel */
1804 1801 };
1805 1802
1806 1803 static PyGetSetDef index_getset[] = {
1807 1804 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1808 1805 {NULL} /* Sentinel */
1809 1806 };
1810 1807
1811 1808 static PyTypeObject indexType = {
1812 1809 PyObject_HEAD_INIT(NULL)
1813 1810 0, /* ob_size */
1814 1811 "parsers.index", /* tp_name */
1815 1812 sizeof(indexObject), /* tp_basicsize */
1816 1813 0, /* tp_itemsize */
1817 1814 (destructor)index_dealloc, /* tp_dealloc */
1818 1815 0, /* tp_print */
1819 1816 0, /* tp_getattr */
1820 1817 0, /* tp_setattr */
1821 1818 0, /* tp_compare */
1822 1819 0, /* tp_repr */
1823 1820 0, /* tp_as_number */
1824 1821 &index_sequence_methods, /* tp_as_sequence */
1825 1822 &index_mapping_methods, /* tp_as_mapping */
1826 1823 0, /* tp_hash */
1827 1824 0, /* tp_call */
1828 1825 0, /* tp_str */
1829 1826 0, /* tp_getattro */
1830 1827 0, /* tp_setattro */
1831 1828 0, /* tp_as_buffer */
1832 1829 Py_TPFLAGS_DEFAULT, /* tp_flags */
1833 1830 "revlog index", /* tp_doc */
1834 1831 0, /* tp_traverse */
1835 1832 0, /* tp_clear */
1836 1833 0, /* tp_richcompare */
1837 1834 0, /* tp_weaklistoffset */
1838 1835 0, /* tp_iter */
1839 1836 0, /* tp_iternext */
1840 1837 index_methods, /* tp_methods */
1841 1838 0, /* tp_members */
1842 1839 index_getset, /* tp_getset */
1843 1840 0, /* tp_base */
1844 1841 0, /* tp_dict */
1845 1842 0, /* tp_descr_get */
1846 1843 0, /* tp_descr_set */
1847 1844 0, /* tp_dictoffset */
1848 1845 (initproc)index_init, /* tp_init */
1849 1846 0, /* tp_alloc */
1850 1847 };
1851 1848
1852 1849 /*
1853 1850 * returns a tuple of the form (index, index, cache) with elements as
1854 1851 * follows:
1855 1852 *
1856 1853 * index: an index object that lazily parses RevlogNG records
1857 1854 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1858 1855 *
1859 1856 * added complications are for backwards compatibility
1860 1857 */
1861 1858 static PyObject *parse_index2(PyObject *self, PyObject *args)
1862 1859 {
1863 1860 PyObject *tuple = NULL, *cache = NULL;
1864 1861 indexObject *idx;
1865 1862 int ret;
1866 1863
1867 1864 idx = PyObject_New(indexObject, &indexType);
1868 1865 if (idx == NULL)
1869 1866 goto bail;
1870 1867
1871 1868 ret = index_init(idx, args);
1872 1869 if (ret == -1)
1873 1870 goto bail;
1874 1871
1875 1872 if (idx->inlined) {
1876 1873 cache = Py_BuildValue("iO", 0, idx->data);
1877 1874 if (cache == NULL)
1878 1875 goto bail;
1879 1876 } else {
1880 1877 cache = Py_None;
1881 1878 Py_INCREF(cache);
1882 1879 }
1883 1880
1884 1881 tuple = Py_BuildValue("NN", idx, cache);
1885 1882 if (!tuple)
1886 1883 goto bail;
1887 1884 return tuple;
1888 1885
1889 1886 bail:
1890 1887 Py_XDECREF(idx);
1891 1888 Py_XDECREF(cache);
1892 1889 Py_XDECREF(tuple);
1893 1890 return NULL;
1894 1891 }
1895 1892
1896 1893 static char parsers_doc[] = "Efficient content parsing.";
1897 1894
1898 1895 PyObject *encodedir(PyObject *self, PyObject *args);
1899 1896 PyObject *pathencode(PyObject *self, PyObject *args);
1900 1897 PyObject *lowerencode(PyObject *self, PyObject *args);
1901 1898
1902 1899 static PyMethodDef methods[] = {
1903 1900 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1904 1901 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1905 1902 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1906 1903 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1907 1904 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1908 1905 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1909 1906 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1910 1907 {NULL, NULL}
1911 1908 };
1912 1909
1913 1910 void dirs_module_init(PyObject *mod);
1914 1911
1915 1912 static void module_init(PyObject *mod)
1916 1913 {
1917 1914 dirs_module_init(mod);
1918 1915
1919 1916 indexType.tp_new = PyType_GenericNew;
1920 1917 if (PyType_Ready(&indexType) < 0)
1921 1918 return;
1922 1919 Py_INCREF(&indexType);
1923 1920
1924 1921 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1925 1922
1926 1923 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1927 1924 -1, -1, -1, -1, nullid, 20);
1928 1925 if (nullentry)
1929 1926 PyObject_GC_UnTrack(nullentry);
1930 1927
1931 1928 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1932 1929 }
1933 1930
1934 1931 #ifdef IS_PY3K
1935 1932 static struct PyModuleDef parsers_module = {
1936 1933 PyModuleDef_HEAD_INIT,
1937 1934 "parsers",
1938 1935 parsers_doc,
1939 1936 -1,
1940 1937 methods
1941 1938 };
1942 1939
1943 1940 PyMODINIT_FUNC PyInit_parsers(void)
1944 1941 {
1945 1942 PyObject *mod = PyModule_Create(&parsers_module);
1946 1943 module_init(mod);
1947 1944 return mod;
1948 1945 }
1949 1946 #else
1950 1947 PyMODINIT_FUNC initparsers(void)
1951 1948 {
1952 1949 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1953 1950 module_init(mod);
1954 1951 }
1955 1952 #endif
General Comments 0
You need to be logged in to leave comments. Login now