##// END OF EJS Templates
parsers: use base-16 trie for faster node->rev mapping...
Bryan O'Sullivan -
r16414:e8d37b78 default
parent child Browse files
Show More
@@ -1,206 +1,227 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance'''
3 3
4 from mercurial import cmdutil, scmutil, match, commands
4 from mercurial import cmdutil, scmutil, util, match, commands
5 5 import time, os, sys
6 6
7 7 def timer(func, title=None):
8 8 results = []
9 9 begin = time.time()
10 10 count = 0
11 11 while True:
12 12 ostart = os.times()
13 13 cstart = time.time()
14 14 r = func()
15 15 cstop = time.time()
16 16 ostop = os.times()
17 17 count += 1
18 18 a, b = ostart, ostop
19 19 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
20 20 if cstop - begin > 3 and count >= 100:
21 21 break
22 22 if cstop - begin > 10 and count >= 3:
23 23 break
24 24 if title:
25 25 sys.stderr.write("! %s\n" % title)
26 26 if r:
27 27 sys.stderr.write("! result: %s\n" % r)
28 28 m = min(results)
29 29 sys.stderr.write("! wall %f comb %f user %f sys %f (best of %d)\n"
30 30 % (m[0], m[1] + m[2], m[1], m[2], count))
31 31
32 32 def perfwalk(ui, repo, *pats):
33 33 try:
34 34 m = scmutil.match(repo[None], pats, {})
35 35 timer(lambda: len(list(repo.dirstate.walk(m, [], True, False))))
36 36 except:
37 37 try:
38 38 m = scmutil.match(repo[None], pats, {})
39 39 timer(lambda: len([b for a, b, c in repo.dirstate.statwalk([], m)]))
40 40 except:
41 41 timer(lambda: len(list(cmdutil.walk(repo, pats, {}))))
42 42
43 43 def perfstatus(ui, repo, *pats):
44 44 #m = match.always(repo.root, repo.getcwd())
45 45 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False, False))))
46 46 timer(lambda: sum(map(len, repo.status())))
47 47
48 48 def perfheads(ui, repo):
49 49 timer(lambda: len(repo.changelog.headrevs()))
50 50
51 51 def perftags(ui, repo):
52 52 import mercurial.changelog, mercurial.manifest
53 53 def t():
54 54 repo.changelog = mercurial.changelog.changelog(repo.sopener)
55 55 repo.manifest = mercurial.manifest.manifest(repo.sopener)
56 56 repo._tags = None
57 57 return len(repo.tags())
58 58 timer(t)
59 59
60 60 def perfdirstate(ui, repo):
61 61 "a" in repo.dirstate
62 62 def d():
63 63 repo.dirstate.invalidate()
64 64 "a" in repo.dirstate
65 65 timer(d)
66 66
67 67 def perfdirstatedirs(ui, repo):
68 68 "a" in repo.dirstate
69 69 def d():
70 70 "a" in repo.dirstate._dirs
71 71 del repo.dirstate._dirs
72 72 timer(d)
73 73
74 74 def perfmanifest(ui, repo):
75 75 def d():
76 76 t = repo.manifest.tip()
77 77 m = repo.manifest.read(t)
78 78 repo.manifest.mapcache = None
79 79 repo.manifest._cache = None
80 80 timer(d)
81 81
82 82 def perfchangeset(ui, repo, rev):
83 83 n = repo[rev].node()
84 84 def d():
85 85 c = repo.changelog.read(n)
86 86 #repo.changelog._cache = None
87 87 timer(d)
88 88
89 89 def perfindex(ui, repo):
90 90 import mercurial.revlog
91 91 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
92 92 n = repo["tip"].node()
93 93 def d():
94 94 cl = mercurial.revlog.revlog(repo.sopener, "00changelog.i")
95 95 cl.rev(n)
96 96 timer(d)
97 97
98 98 def perfstartup(ui, repo):
99 99 cmd = sys.argv[0]
100 100 def d():
101 101 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
102 102 timer(d)
103 103
104 104 def perfparents(ui, repo):
105 105 nl = [repo.changelog.node(i) for i in xrange(1000)]
106 106 def d():
107 107 for n in nl:
108 108 repo.changelog.parents(n)
109 109 timer(d)
110 110
111 111 def perflookup(ui, repo, rev):
112 112 timer(lambda: len(repo.lookup(rev)))
113 113
114 114 def perfnodelookup(ui, repo, rev):
115 115 import mercurial.revlog
116 116 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
117 117 n = repo[rev].node()
118 118 def d():
119 119 cl = mercurial.revlog.revlog(repo.sopener, "00changelog.i")
120 120 cl.rev(n)
121 121 timer(d)
122 122
123 def perfnodelookup(ui, repo, rev):
124 import mercurial.revlog
125 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
126 n = repo[rev].node()
127 cl = mercurial.revlog.revlog(repo.sopener, "00changelog.i")
128 # behave somewhat consistently across internal API changes
129 if util.safehasattr(cl, 'clearcaches'):
130 clearcaches = cl.clearcaches
131 elif util.safehasattr(cl, '_nodecache'):
132 from mercurial.node import nullid, nullrev
133 def clearcaches():
134 cl._nodecache = {nullid: nullrev}
135 cl._nodepos = None
136 else:
137 def clearcaches():
138 pass
139 def d():
140 cl.rev(n)
141 clearcaches()
142 timer(d)
143
123 144 def perflog(ui, repo, **opts):
124 145 ui.pushbuffer()
125 146 timer(lambda: commands.log(ui, repo, rev=[], date='', user='',
126 147 copies=opts.get('rename')))
127 148 ui.popbuffer()
128 149
129 150 def perftemplating(ui, repo):
130 151 ui.pushbuffer()
131 152 timer(lambda: commands.log(ui, repo, rev=[], date='', user='',
132 153 template='{date|shortdate} [{rev}:{node|short}]'
133 154 ' {author|person}: {desc|firstline}\n'))
134 155 ui.popbuffer()
135 156
136 157 def perfcca(ui, repo):
137 158 timer(lambda: scmutil.casecollisionauditor(ui, False, repo[None]))
138 159
139 160 def perffncacheload(ui, repo):
140 161 from mercurial import scmutil, store
141 162 s = store.store(set(['store','fncache']), repo.path, scmutil.opener)
142 163 def d():
143 164 s.fncache._load()
144 165 timer(d)
145 166
146 167 def perffncachewrite(ui, repo):
147 168 from mercurial import scmutil, store
148 169 s = store.store(set(['store','fncache']), repo.path, scmutil.opener)
149 170 s.fncache._load()
150 171 def d():
151 172 s.fncache._dirty = True
152 173 s.fncache.write()
153 174 timer(d)
154 175
155 176 def perfdiffwd(ui, repo):
156 177 """Profile diff of working directory changes"""
157 178 options = {
158 179 'w': 'ignore_all_space',
159 180 'b': 'ignore_space_change',
160 181 'B': 'ignore_blank_lines',
161 182 }
162 183
163 184 for diffopt in ('', 'w', 'b', 'B', 'wB'):
164 185 opts = dict((options[c], '1') for c in diffopt)
165 186 def d():
166 187 ui.pushbuffer()
167 188 commands.diff(ui, repo, **opts)
168 189 ui.popbuffer()
169 190 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
170 191 timer(d, title)
171 192
172 193 def perfrevlog(ui, repo, file_, **opts):
173 194 from mercurial import revlog
174 195 dist = opts['dist']
175 196 def d():
176 197 r = revlog.revlog(lambda fn: open(fn, 'rb'), file_)
177 198 for x in xrange(0, len(r), dist):
178 199 r.revision(r.node(x))
179 200
180 201 timer(d)
181 202
182 203 cmdtable = {
183 204 'perfcca': (perfcca, []),
184 205 'perffncacheload': (perffncacheload, []),
185 206 'perffncachewrite': (perffncachewrite, []),
186 207 'perflookup': (perflookup, []),
187 208 'perfnodelookup': (perfnodelookup, []),
188 209 'perfparents': (perfparents, []),
189 210 'perfstartup': (perfstartup, []),
190 211 'perfstatus': (perfstatus, []),
191 212 'perfwalk': (perfwalk, []),
192 213 'perfmanifest': (perfmanifest, []),
193 214 'perfchangeset': (perfchangeset, []),
194 215 'perfindex': (perfindex, []),
195 216 'perfheads': (perfheads, []),
196 217 'perftags': (perftags, []),
197 218 'perfdirstate': (perfdirstate, []),
198 219 'perfdirstatedirs': (perfdirstate, []),
199 220 'perflog': (perflog,
200 221 [('', 'rename', False, 'ask log to follow renames')]),
201 222 'perftemplating': (perftemplating, []),
202 223 'perfdiffwd': (perfdiffwd, []),
203 224 'perfrevlog': (perfrevlog,
204 225 [('d', 'dist', 100, 'distance between the revisions')],
205 226 "[INDEXFILE]"),
206 227 }
This diff has been collapsed as it changes many lines, (585 lines changed) Show them Hide them
@@ -1,713 +1,1200 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <string.h>
13 13
14 14 #include "util.h"
15 15
16 16 static int hexdigit(char c)
17 17 {
18 18 if (c >= '0' && c <= '9')
19 19 return c - '0';
20 20 if (c >= 'a' && c <= 'f')
21 21 return c - 'a' + 10;
22 22 if (c >= 'A' && c <= 'F')
23 23 return c - 'A' + 10;
24 24
25 25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
26 26 return 0;
27 27 }
28 28
29 29 /*
30 30 * Turn a hex-encoded string into binary.
31 31 */
32 32 static PyObject *unhexlify(const char *str, int len)
33 33 {
34 34 PyObject *ret;
35 35 const char *c;
36 36 char *d;
37 37
38 38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
39 39
40 40 if (!ret)
41 41 return NULL;
42 42
43 43 d = PyBytes_AsString(ret);
44 44
45 45 for (c = str; c < str + len;) {
46 46 int hi = hexdigit(*c++);
47 47 int lo = hexdigit(*c++);
48 48 *d++ = (hi << 4) | lo;
49 49 }
50 50
51 51 return ret;
52 52 }
53 53
54 54 /*
55 55 * This code assumes that a manifest is stitched together with newline
56 56 * ('\n') characters.
57 57 */
58 58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
59 59 {
60 60 PyObject *mfdict, *fdict;
61 61 char *str, *cur, *start, *zero;
62 62 int len;
63 63
64 64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
65 65 &PyDict_Type, &mfdict,
66 66 &PyDict_Type, &fdict,
67 67 &str, &len))
68 68 goto quit;
69 69
70 70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
71 71 PyObject *file = NULL, *node = NULL;
72 72 PyObject *flags = NULL;
73 73 int nlen;
74 74
75 75 if (!*cur) {
76 76 zero = cur;
77 77 continue;
78 78 }
79 79 else if (*cur != '\n')
80 80 continue;
81 81
82 82 if (!zero) {
83 83 PyErr_SetString(PyExc_ValueError,
84 84 "manifest entry has no separator");
85 85 goto quit;
86 86 }
87 87
88 88 file = PyBytes_FromStringAndSize(start, zero - start);
89 89
90 90 if (!file)
91 91 goto bail;
92 92
93 93 nlen = cur - zero - 1;
94 94
95 95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
96 96 if (!node)
97 97 goto bail;
98 98
99 99 if (nlen > 40) {
100 100 flags = PyBytes_FromStringAndSize(zero + 41,
101 101 nlen - 40);
102 102 if (!flags)
103 103 goto bail;
104 104
105 105 if (PyDict_SetItem(fdict, file, flags) == -1)
106 106 goto bail;
107 107 }
108 108
109 109 if (PyDict_SetItem(mfdict, file, node) == -1)
110 110 goto bail;
111 111
112 112 start = cur + 1;
113 113 zero = NULL;
114 114
115 115 Py_XDECREF(flags);
116 116 Py_XDECREF(node);
117 117 Py_XDECREF(file);
118 118 continue;
119 119 bail:
120 120 Py_XDECREF(flags);
121 121 Py_XDECREF(node);
122 122 Py_XDECREF(file);
123 123 goto quit;
124 124 }
125 125
126 126 if (len > 0 && *(cur - 1) != '\n') {
127 127 PyErr_SetString(PyExc_ValueError,
128 128 "manifest contains trailing garbage");
129 129 goto quit;
130 130 }
131 131
132 132 Py_INCREF(Py_None);
133 133 return Py_None;
134 134 quit:
135 135 return NULL;
136 136 }
137 137
138 138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
139 139 {
140 140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
141 141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
142 142 char *str, *cur, *end, *cpos;
143 143 int state, mode, size, mtime;
144 144 unsigned int flen;
145 145 int len;
146 146 uint32_t decode[4]; /* for alignment */
147 147
148 148 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
149 149 &PyDict_Type, &dmap,
150 150 &PyDict_Type, &cmap,
151 151 &str, &len))
152 152 goto quit;
153 153
154 154 /* read parents */
155 155 if (len < 40)
156 156 goto quit;
157 157
158 158 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
159 159 if (!parents)
160 160 goto quit;
161 161
162 162 /* read filenames */
163 163 cur = str + 40;
164 164 end = str + len;
165 165
166 166 while (cur < end - 17) {
167 167 /* unpack header */
168 168 state = *cur;
169 169 memcpy(decode, cur + 1, 16);
170 170 mode = ntohl(decode[0]);
171 171 size = ntohl(decode[1]);
172 172 mtime = ntohl(decode[2]);
173 173 flen = ntohl(decode[3]);
174 174 cur += 17;
175 175 if (cur + flen > end || cur + flen < cur) {
176 176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 177 goto quit;
178 178 }
179 179
180 180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 181 if (!entry)
182 182 goto quit;
183 183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184 184
185 185 cpos = memchr(cur, 0, flen);
186 186 if (cpos) {
187 187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 188 cname = PyBytes_FromStringAndSize(cpos + 1,
189 189 flen - (cpos - cur) - 1);
190 190 if (!fname || !cname ||
191 191 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 192 PyDict_SetItem(dmap, fname, entry) == -1)
193 193 goto quit;
194 194 Py_DECREF(cname);
195 195 } else {
196 196 fname = PyBytes_FromStringAndSize(cur, flen);
197 197 if (!fname ||
198 198 PyDict_SetItem(dmap, fname, entry) == -1)
199 199 goto quit;
200 200 }
201 201 cur += flen;
202 202 Py_DECREF(fname);
203 203 Py_DECREF(entry);
204 204 fname = cname = entry = NULL;
205 205 }
206 206
207 207 ret = parents;
208 208 Py_INCREF(ret);
209 209 quit:
210 210 Py_XDECREF(fname);
211 211 Py_XDECREF(cname);
212 212 Py_XDECREF(entry);
213 213 Py_XDECREF(parents);
214 214 return ret;
215 215 }
216 216
217 217 /*
218 * A list-like object that decodes the contents of a RevlogNG index
219 * file on demand. It has limited support for insert and delete at the
220 * last element before the end. The last entry is always a sentinel
221 * nullid.
218 * A base-16 trie for fast node->rev mapping.
219 *
220 * Positive value is index of the next node in the trie
221 * Negative value is a leaf: -(rev + 1)
222 * Zero is empty
223 */
224 typedef struct {
225 int children[16];
226 } nodetree;
227
228 /*
229 * This class has two behaviours.
230 *
231 * When used in a list-like way (with integer keys), we decode an
232 * entry in a RevlogNG index file on demand. Our last entry is a
233 * sentinel, always a nullid. We have limited support for
234 * integer-keyed insert and delete, only at elements right before the
235 * sentinel.
236 *
237 * With string keys, we lazily perform a reverse mapping from node to
238 * rev, using a base-16 trie.
222 239 */
223 240 typedef struct {
224 241 PyObject_HEAD
225 242 /* Type-specific fields go here. */
226 243 PyObject *data; /* raw bytes of index */
227 244 PyObject **cache; /* cached tuples */
228 245 const char **offsets; /* populated on demand */
229 246 Py_ssize_t raw_length; /* original number of elements */
230 247 Py_ssize_t length; /* current number of elements */
231 248 PyObject *added; /* populated on demand */
249 nodetree *nt; /* base-16 trie */
250 int ntlength; /* # nodes in use */
251 int ntcapacity; /* # nodes allocated */
252 int ntdepth; /* maximum depth of tree */
253 int ntsplits; /* # splits performed */
254 int ntrev; /* last rev scanned */
255 int ntlookups; /* # lookups */
256 int ntmisses; /* # lookups that miss the cache */
232 257 int inlined;
233 258 } indexObject;
234 259
235 static Py_ssize_t index_length(indexObject *self)
260 static Py_ssize_t index_length(const indexObject *self)
236 261 {
237 262 if (self->added == NULL)
238 263 return self->length;
239 264 return self->length + PyList_GET_SIZE(self->added);
240 265 }
241 266
242 267 static PyObject *nullentry;
268 static const char nullid[20];
243 269
244 270 static long inline_scan(indexObject *self, const char **offsets);
245 271
246 272 #if LONG_MAX == 0x7fffffffL
247 273 static char *tuple_format = "Kiiiiiis#";
248 274 #else
249 275 static char *tuple_format = "kiiiiiis#";
250 276 #endif
251 277
252 /* RevlogNG format (all in big endian, data may be inlined):
278 /*
279 * Return a pointer to the beginning of a RevlogNG record.
280 */
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 {
283 if (self->inlined && pos > 0) {
284 if (self->offsets == NULL) {
285 self->offsets = malloc(self->raw_length *
286 sizeof(*self->offsets));
287 if (self->offsets == NULL)
288 return (const char *)PyErr_NoMemory();
289 inline_scan(self, self->offsets);
290 }
291 return self->offsets[pos];
292 }
293
294 return PyString_AS_STRING(self->data) + pos * 64;
295 }
296
297 /*
298 * RevlogNG format (all in big endian, data may be inlined):
253 299 * 6 bytes: offset
254 300 * 2 bytes: flags
255 301 * 4 bytes: compressed length
256 302 * 4 bytes: uncompressed length
257 303 * 4 bytes: base revision
258 304 * 4 bytes: link revision
259 305 * 4 bytes: parent 1 revision
260 306 * 4 bytes: parent 2 revision
261 307 * 32 bytes: nodeid (only 20 bytes used)
262 308 */
263 309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
264 310 {
265 311 uint32_t decode[8]; /* to enforce alignment with inline data */
266 312 uint64_t offset_flags;
267 313 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
268 314 const char *c_node_id;
269 315 const char *data;
270 316 Py_ssize_t length = index_length(self);
271 317 PyObject *entry;
272 318
273 if (pos >= length) {
319 if (pos < 0)
320 pos += length;
321
322 if (pos < 0 || pos >= length) {
274 323 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
275 324 return NULL;
276 325 }
277 326
278 327 if (pos == length - 1) {
279 328 Py_INCREF(nullentry);
280 329 return nullentry;
281 330 }
282 331
283 332 if (pos >= self->length - 1) {
284 333 PyObject *obj;
285 334 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
286 335 Py_INCREF(obj);
287 336 return obj;
288 337 }
289 338
290 339 if (self->cache) {
291 340 if (self->cache[pos]) {
292 341 Py_INCREF(self->cache[pos]);
293 342 return self->cache[pos];
294 343 }
295 344 } else {
296 345 self->cache = calloc(self->raw_length, sizeof(PyObject *));
297 346 if (self->cache == NULL)
298 347 return PyErr_NoMemory();
299 348 }
300 349
301 if (self->inlined && pos > 0) {
302 if (self->offsets == NULL) {
303 self->offsets = malloc(self->raw_length *
304 sizeof(*self->offsets));
305 if (self->offsets == NULL)
306 return PyErr_NoMemory();
307 inline_scan(self, self->offsets);
308 }
309 data = self->offsets[pos];
310 } else
311 data = PyString_AS_STRING(self->data) + pos * 64;
350 data = index_deref(self, pos);
351 if (data == NULL)
352 return NULL;
312 353
313 354 memcpy(decode, data, 8 * sizeof(uint32_t));
314 355
315 356 offset_flags = ntohl(decode[1]);
316 357 if (pos == 0) /* mask out version number for the first entry */
317 358 offset_flags &= 0xFFFF;
318 359 else {
319 360 uint32_t offset_high = ntohl(decode[0]);
320 361 offset_flags |= ((uint64_t)offset_high) << 32;
321 362 }
322 363
323 364 comp_len = ntohl(decode[2]);
324 365 uncomp_len = ntohl(decode[3]);
325 366 base_rev = ntohl(decode[4]);
326 367 link_rev = ntohl(decode[5]);
327 368 parent_1 = ntohl(decode[6]);
328 369 parent_2 = ntohl(decode[7]);
329 370 c_node_id = data + 32;
330 371
331 372 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
332 373 uncomp_len, base_rev, link_rev,
333 374 parent_1, parent_2, c_node_id, 20);
334 375
335 376 if (entry)
336 377 PyObject_GC_UnTrack(entry);
337 378
338 379 self->cache[pos] = entry;
339 380 Py_INCREF(entry);
340 381
341 382 return entry;
342 383 }
343 384
385 /*
386 * Return the 20-byte SHA of the node corresponding to the given rev.
387 */
388 static const char *index_node(indexObject *self, Py_ssize_t pos)
389 {
390 Py_ssize_t length = index_length(self);
391 const char *data;
392
393 if (pos == length - 1)
394 return nullid;
395
396 if (pos >= length)
397 return NULL;
398
399 if (pos >= self->length - 1) {
400 PyObject *tuple, *str;
401 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
402 str = PyTuple_GetItem(tuple, 7);
403 return str ? PyString_AS_STRING(str) : NULL;
404 }
405
406 data = index_deref(self, pos);
407 return data ? data + 32 : NULL;
408 }
409
410 static int nt_insert(indexObject *self, const char *node, int rev);
411
412 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
413 {
414 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
415 return -1;
416 if (*nodelen == 20)
417 return 0;
418 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
419 return -1;
420 }
421
344 422 static PyObject *index_insert(indexObject *self, PyObject *args)
345 423 {
346 PyObject *obj, *node;
424 PyObject *obj;
425 char *node;
347 426 long offset;
348 Py_ssize_t len;
427 Py_ssize_t len, nodelen;
349 428
350 429 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
351 430 return NULL;
352 431
353 432 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
354 PyErr_SetString(PyExc_ValueError, "8-tuple required");
433 PyErr_SetString(PyExc_TypeError, "8-tuple required");
355 434 return NULL;
356 435 }
357 436
358 node = PyTuple_GET_ITEM(obj, 7);
359 if (!PyString_Check(node) || PyString_GET_SIZE(node) != 20) {
360 PyErr_SetString(PyExc_ValueError,
361 "20-byte hash required as last element");
437 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
362 438 return NULL;
363 }
364 439
365 440 len = index_length(self);
366 441
367 442 if (offset < 0)
368 443 offset += len;
369 444
370 445 if (offset != len - 1) {
371 446 PyErr_SetString(PyExc_IndexError,
372 447 "insert only supported at index -1");
373 448 return NULL;
374 449 }
375 450
451 if (offset > INT_MAX) {
452 PyErr_SetString(PyExc_ValueError,
453 "currently only 2**31 revs supported");
454 return NULL;
455 }
456
376 457 if (self->added == NULL) {
377 458 self->added = PyList_New(0);
378 459 if (self->added == NULL)
379 460 return NULL;
380 461 }
381 462
382 463 if (PyList_Append(self->added, obj) == -1)
383 464 return NULL;
384 465
466 if (self->nt)
467 nt_insert(self, node, (int)offset);
468
385 469 Py_RETURN_NONE;
386 470 }
387 471
388 472 static void _index_clearcaches(indexObject *self)
389 473 {
390 474 if (self->cache) {
391 475 Py_ssize_t i;
392 476
393 477 for (i = 0; i < self->raw_length; i++) {
394 478 Py_XDECREF(self->cache[i]);
395 479 self->cache[i] = NULL;
396 480 }
397 481 free(self->cache);
398 482 self->cache = NULL;
399 483 }
400 484 if (self->offsets) {
401 485 free(self->offsets);
402 486 self->offsets = NULL;
403 487 }
488 if (self->nt) {
489 free(self->nt);
490 self->nt = NULL;
491 }
404 492 }
405 493
406 494 static PyObject *index_clearcaches(indexObject *self)
407 495 {
408 496 _index_clearcaches(self);
497 self->ntlength = self->ntcapacity = 0;
498 self->ntdepth = self->ntsplits = 0;
499 self->ntrev = -1;
500 self->ntlookups = self->ntmisses = 0;
409 501 Py_RETURN_NONE;
410 502 }
411 503
412 static int index_assign_subscript(indexObject *self, PyObject *item,
413 PyObject *value)
504 static PyObject *index_stats(indexObject *self)
505 {
506 PyObject *obj = PyDict_New();
507
508 if (obj == NULL)
509 return NULL;
510
511 #define istat(__n, __d) \
512 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
513 goto bail;
514
515 if (self->added) {
516 Py_ssize_t len = PyList_GET_SIZE(self->added);
517 if (PyDict_SetItemString(obj, "index entries added",
518 PyInt_FromLong(len)) == -1)
519 goto bail;
520 }
521
522 if (self->raw_length != self->length - 1)
523 istat(raw_length, "revs on disk");
524 istat(length, "revs in memory");
525 istat(ntcapacity, "node trie capacity");
526 istat(ntdepth, "node trie depth");
527 istat(ntlength, "node trie count");
528 istat(ntlookups, "node trie lookups");
529 istat(ntmisses, "node trie misses");
530 istat(ntrev, "node trie last rev scanned");
531 istat(ntsplits, "node trie splits");
532
533 #undef istat
534
535 return obj;
536
537 bail:
538 Py_XDECREF(obj);
539 return NULL;
540 }
541
542 static inline int nt_level(const char *node, int level)
543 {
544 int v = node[level>>1];
545 if (!(level & 1))
546 v >>= 4;
547 return v & 0xf;
548 }
549
550 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
551 {
552 int level, off;
553
554 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
555 return -1;
556
557 if (self->nt == NULL)
558 return -2;
559
560 for (level = off = 0; level < nodelen; level++) {
561 int k = nt_level(node, level);
562 nodetree *n = &self->nt[off];
563 int v = n->children[k];
564
565 if (v < 0) {
566 const char *n;
567 v = -v - 1;
568 n = index_node(self, v);
569 if (n == NULL)
570 return -2;
571 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
572 ? -2 : v;
573 }
574 if (v == 0)
575 return -2;
576 off = v;
577 }
578 return -2;
579 }
580
581 static int nt_new(indexObject *self)
582 {
583 if (self->ntlength == self->ntcapacity) {
584 self->ntcapacity *= 2;
585 self->nt = realloc(self->nt,
586 self->ntcapacity * sizeof(nodetree));
587 if (self->nt == NULL) {
588 PyErr_SetString(PyExc_MemoryError, "out of memory");
589 return -1;
590 }
591 memset(&self->nt[self->ntlength], 0,
592 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
593 }
594 return self->ntlength++;
595 }
596
597 static int nt_insert(indexObject *self, const char *node, int rev)
598 {
599 int level = 0;
600 int off = 0;
601
602 while (level < 20) {
603 int k = nt_level(node, level);
604 nodetree *n;
605 int v;
606
607 n = &self->nt[off];
608 v = n->children[k];
609
610 if (v == 0) {
611 n->children[k] = -rev - 1;
612 return 0;
613 }
614 if (v < 0) {
615 const char *oldnode = index_node(self, -v - 1);
616 int noff;
617
618 if (!oldnode || !memcmp(oldnode, node, 20)) {
619 n->children[k] = -rev - 1;
620 return 0;
621 }
622 noff = nt_new(self);
623 if (noff == -1)
624 return -1;
625 /* self->nt may have been changed by realloc */
626 self->nt[off].children[k] = noff;
627 off = noff;
628 n = &self->nt[off];
629 n->children[nt_level(oldnode, ++level)] = v;
630 if (level > self->ntdepth)
631 self->ntdepth = level;
632 self->ntsplits += 1;
633 } else {
634 level += 1;
635 off = v;
636 }
637 }
638
639 return -1;
640 }
641
642 /*
643 * Return values:
644 *
645 * -3: error (exception set)
646 * -2: not found (no exception set)
647 * rest: valid rev
648 */
649 static int index_find_node(indexObject *self,
650 const char *node, Py_ssize_t nodelen)
651 {
652 int rev;
653
654 self->ntlookups++;
655 rev = nt_find(self, node, nodelen);
656 if (rev >= -1)
657 return rev;
658
659 if (self->nt == NULL) {
660 self->ntcapacity = self->raw_length < 4
661 ? 4 : self->raw_length / 2;
662 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
663 if (self->nt == NULL) {
664 PyErr_SetString(PyExc_MemoryError, "out of memory");
665 return -3;
666 }
667 self->ntlength = 1;
668 self->ntrev = (int)index_length(self) - 1;
669 self->ntlookups = 1;
670 self->ntmisses = 0;
671 }
672
673 /*
674 * For the first handful of lookups, we scan the entire index,
675 * and cache only the matching nodes. This optimizes for cases
676 * like "hg tip", where only a few nodes are accessed.
677 *
678 * After that, we cache every node we visit, using a single
679 * scan amortized over multiple lookups. This gives the best
680 * bulk performance, e.g. for "hg log".
681 */
682 if (self->ntmisses++ < 4) {
683 for (rev = self->ntrev - 1; rev >= 0; rev--) {
684 const char *n = index_node(self, rev);
685 if (n == NULL)
686 return -2;
687 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
688 if (nt_insert(self, n, rev) == -1)
689 return -3;
690 break;
691 }
692 }
693 } else {
694 for (rev = self->ntrev - 1; rev >= 0; rev--) {
695 const char *n = index_node(self, rev);
696 if (n == NULL)
697 return -2;
698 if (nt_insert(self, n, rev) == -1)
699 return -3;
700 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
701 break;
702 }
703 }
704 self->ntrev = rev;
705 }
706
707 if (rev >= 0)
708 return rev;
709 return -2;
710 }
711
712 static PyObject *raise_revlog_error(void)
713 {
714 static PyObject *errclass;
715 PyObject *mod = NULL, *errobj;
716
717 if (errclass == NULL) {
718 PyObject *dict;
719
720 mod = PyImport_ImportModule("mercurial.error");
721 if (mod == NULL)
722 goto classfail;
723
724 dict = PyModule_GetDict(mod);
725 if (dict == NULL)
726 goto classfail;
727
728 errclass = PyDict_GetItemString(dict, "RevlogError");
729 if (errclass == NULL) {
730 PyErr_SetString(PyExc_SystemError,
731 "could not find RevlogError");
732 goto classfail;
733 }
734 Py_INCREF(errclass);
735 }
736
737 errobj = PyObject_CallFunction(errclass, NULL);
738 if (errobj == NULL)
739 return NULL;
740 PyErr_SetObject(errclass, errobj);
741 return errobj;
742
743 classfail:
744 Py_XDECREF(mod);
745 return NULL;
746 }
747
748 static PyObject *index_getitem(indexObject *self, PyObject *value)
749 {
750 char *node;
751 Py_ssize_t nodelen;
752 int rev;
753
754 if (PyInt_Check(value))
755 return index_get(self, PyInt_AS_LONG(value));
756
757 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
758 return NULL;
759 rev = index_find_node(self, node, nodelen);
760 if (rev >= -1)
761 return PyInt_FromLong(rev);
762 if (rev == -2)
763 raise_revlog_error();
764 return NULL;
765 }
766
767 static PyObject *index_m_get(indexObject *self, PyObject *args)
768 {
769 char *node;
770 int nodelen, rev;
771
772 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
773 return NULL;
774
775 rev = index_find_node(self, node, nodelen);
776 if (rev == -3)
777 return NULL;
778 if (rev == -2)
779 Py_RETURN_NONE;
780 return PyInt_FromLong(rev);
781 }
782
783 static int index_contains(indexObject *self, PyObject *value)
784 {
785 char *node;
786 Py_ssize_t nodelen;
787
788 if (PyInt_Check(value)) {
789 long rev = PyInt_AS_LONG(value);
790 return rev >= -1 && rev < index_length(self);
791 }
792
793 if (!PyString_Check(value))
794 return 0;
795
796 node = PyString_AS_STRING(value);
797 nodelen = PyString_GET_SIZE(value);
798
799 switch (index_find_node(self, node, nodelen)) {
800 case -3:
801 return -1;
802 case -2:
803 return 0;
804 default:
805 return 1;
806 }
807 }
808
809 /*
810 * Invalidate any trie entries introduced by added revs.
811 */
812 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
813 {
814 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
815
816 for (i = start; i < len; i++) {
817 PyObject *tuple = PyList_GET_ITEM(self->added, i);
818 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
819
820 nt_insert(self, PyString_AS_STRING(node), -1);
821 }
822
823 if (start == 0) {
824 Py_DECREF(self->added);
825 self->added = NULL;
826 }
827 }
828
829 /*
830 * Delete a numeric range of revs, which must be at the end of the
831 * range, but exclude the sentinel nullid entry.
832 */
833 static int index_slice_del(indexObject *self, PyObject *item)
414 834 {
415 835 Py_ssize_t start, stop, step, slicelength;
416 836 Py_ssize_t length = index_length(self);
417 837
418 if (!PySlice_Check(item) || value != NULL) {
419 PyErr_SetString(PyExc_TypeError,
420 "revlog index only supports slice deletion");
421 return -1;
422 }
423
424 838 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
425 839 &start, &stop, &step, &slicelength) < 0)
426 840 return -1;
427 841
428 842 if (slicelength <= 0)
429 843 return 0;
430 844
431 845 if ((step < 0 && start < stop) || (step > 0 && start > stop))
432 846 stop = start;
433 847
434 848 if (step < 0) {
435 849 stop = start + 1;
436 850 start = stop + step*(slicelength - 1) - 1;
437 851 step = -step;
438 852 }
439 853
440 854 if (step != 1) {
441 855 PyErr_SetString(PyExc_ValueError,
442 856 "revlog index delete requires step size of 1");
443 857 return -1;
444 858 }
445 859
446 860 if (stop != length - 1) {
447 861 PyErr_SetString(PyExc_IndexError,
448 862 "revlog index deletion indices are invalid");
449 863 return -1;
450 864 }
451 865
452 if (start < self->length) {
866 if (start < self->length - 1) {
867 if (self->nt) {
868 Py_ssize_t i;
869
870 for (i = start + 1; i < self->length - 1; i++) {
871 const char *node = index_node(self, i);
872
873 if (node)
874 nt_insert(self, node, -1);
875 }
876 if (self->added)
877 nt_invalidate_added(self, 0);
878 if (self->ntrev > start)
879 self->ntrev = (int)start;
880 }
453 881 self->length = start + 1;
454 if (self->added) {
455 Py_DECREF(self->added);
456 self->added = NULL;
457 }
458 882 return 0;
459 883 }
460 884
461 return PyList_SetSlice(self->added, start - self->length + 1,
462 PyList_GET_SIZE(self->added),
463 NULL);
885 if (self->nt) {
886 nt_invalidate_added(self, start - self->length + 1);
887 if (self->ntrev > start)
888 self->ntrev = (int)start;
889 }
890 return self->added
891 ? PyList_SetSlice(self->added, start - self->length + 1,
892 PyList_GET_SIZE(self->added), NULL)
893 : 0;
464 894 }
465 895
896 /*
897 * Supported ops:
898 *
899 * slice deletion
900 * string assignment (extend node->rev mapping)
901 * string deletion (shrink node->rev mapping)
902 */
903 static int index_assign_subscript(indexObject *self, PyObject *item,
904 PyObject *value)
905 {
906 char *node;
907 Py_ssize_t nodelen;
908 long rev;
909
910 if (PySlice_Check(item) && value == NULL)
911 return index_slice_del(self, item);
912
913 if (node_check(item, &node, &nodelen) == -1)
914 return -1;
915
916 if (value == NULL)
917 return self->nt ? nt_insert(self, node, -1) : 0;
918 rev = PyInt_AsLong(value);
919 if (rev > INT_MAX || rev < 0) {
920 if (!PyErr_Occurred())
921 PyErr_SetString(PyExc_ValueError, "rev out of range");
922 return -1;
923 }
924 return nt_insert(self, node, (int)rev);
925 }
926
927 /*
928 * Find all RevlogNG entries in an index that has inline data. Update
929 * the optional "offsets" table with those entries.
930 */
466 931 static long inline_scan(indexObject *self, const char **offsets)
467 932 {
468 933 const char *data = PyString_AS_STRING(self->data);
469 934 const char *end = data + PyString_GET_SIZE(self->data);
470 935 const long hdrsize = 64;
471 936 long incr = hdrsize;
472 937 Py_ssize_t len = 0;
473 938
474 939 while (data + hdrsize <= end) {
475 940 uint32_t comp_len;
476 941 const char *old_data;
477 942 /* 3rd element of header is length of compressed inline data */
478 943 memcpy(&comp_len, data + 8, sizeof(uint32_t));
479 944 incr = hdrsize + ntohl(comp_len);
480 945 if (incr < hdrsize)
481 946 break;
482 947 if (offsets)
483 948 offsets[len] = data;
484 949 len++;
485 950 old_data = data;
486 951 data += incr;
487 952 if (data <= old_data)
488 953 break;
489 954 }
490 955
491 956 if (data != end && data + hdrsize != end) {
492 957 if (!PyErr_Occurred())
493 958 PyErr_SetString(PyExc_ValueError, "corrupt index file");
494 959 return -1;
495 960 }
496 961
497 962 return len;
498 963 }
499 964
500 965 static int index_real_init(indexObject *self, const char *data, int size,
501 966 PyObject *inlined_obj, PyObject *data_obj)
502 967 {
503 968 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
504 969 self->data = data_obj;
505 970 self->cache = NULL;
506 971
507 972 self->added = NULL;
508 973 self->offsets = NULL;
974 self->nt = NULL;
975 self->ntlength = self->ntcapacity = 0;
976 self->ntdepth = self->ntsplits = 0;
977 self->ntlookups = self->ntmisses = 0;
978 self->ntrev = -1;
509 979 Py_INCREF(self->data);
510 980
511 981 if (self->inlined) {
512 982 long len = inline_scan(self, NULL);
513 983 if (len == -1)
514 984 goto bail;
515 985 self->raw_length = len;
516 986 self->length = len + 1;
517 987 } else {
518 988 if (size % 64) {
519 989 PyErr_SetString(PyExc_ValueError, "corrupt index file");
520 990 goto bail;
521 991 }
522 992 self->raw_length = size / 64;
523 993 self->length = self->raw_length + 1;
524 994 }
525 995
526 996 return 0;
527 997 bail:
528 998 return -1;
529 999 }
530 1000
531 1001 static int index_init(indexObject *self, PyObject *args, PyObject *kwds)
532 1002 {
533 1003 const char *data;
534 1004 int size;
535 1005 PyObject *inlined_obj;
536 1006
537 1007 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
538 1008 return -1;
539 1009
540 1010 return index_real_init(self, data, size, inlined_obj,
541 1011 PyTuple_GET_ITEM(args, 0));
542 1012 }
543 1013
1014 static PyObject *index_nodemap(indexObject *self)
1015 {
1016 return (PyObject *)self;
1017 }
1018
544 1019 static void index_dealloc(indexObject *self)
545 1020 {
546 1021 _index_clearcaches(self);
547 1022 Py_DECREF(self->data);
548 1023 Py_XDECREF(self->added);
549 1024 PyObject_Del(self);
550 1025 }
551 1026
552 1027 static PySequenceMethods index_sequence_methods = {
553 1028 (lenfunc)index_length, /* sq_length */
554 1029 0, /* sq_concat */
555 1030 0, /* sq_repeat */
556 1031 (ssizeargfunc)index_get, /* sq_item */
1032 0, /* sq_slice */
1033 0, /* sq_ass_item */
1034 0, /* sq_ass_slice */
1035 (objobjproc)index_contains, /* sq_contains */
557 1036 };
558 1037
559 1038 static PyMappingMethods index_mapping_methods = {
560 1039 (lenfunc)index_length, /* mp_length */
561 NULL, /* mp_subscript */
1040 (binaryfunc)index_getitem, /* mp_subscript */
562 1041 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
563 1042 };
564 1043
565 1044 static PyMethodDef index_methods[] = {
566 1045 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
567 1046 "clear the index caches"},
1047 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1048 "get an index entry"},
568 1049 {"insert", (PyCFunction)index_insert, METH_VARARGS,
569 1050 "insert an index entry"},
1051 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1052 "stats for the index"},
1053 {NULL} /* Sentinel */
1054 };
1055
1056 static PyGetSetDef index_getset[] = {
1057 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
570 1058 {NULL} /* Sentinel */
571 1059 };
572 1060
573 1061 static PyTypeObject indexType = {
574 1062 PyObject_HEAD_INIT(NULL)
575 1063 0, /* ob_size */
576 1064 "parsers.index", /* tp_name */
577 1065 sizeof(indexObject), /* tp_basicsize */
578 1066 0, /* tp_itemsize */
579 1067 (destructor)index_dealloc, /* tp_dealloc */
580 1068 0, /* tp_print */
581 1069 0, /* tp_getattr */
582 1070 0, /* tp_setattr */
583 1071 0, /* tp_compare */
584 1072 0, /* tp_repr */
585 1073 0, /* tp_as_number */
586 1074 &index_sequence_methods, /* tp_as_sequence */
587 1075 &index_mapping_methods, /* tp_as_mapping */
588 1076 0, /* tp_hash */
589 1077 0, /* tp_call */
590 1078 0, /* tp_str */
591 1079 0, /* tp_getattro */
592 1080 0, /* tp_setattro */
593 1081 0, /* tp_as_buffer */
594 1082 Py_TPFLAGS_DEFAULT, /* tp_flags */
595 1083 "revlog index", /* tp_doc */
596 1084 0, /* tp_traverse */
597 1085 0, /* tp_clear */
598 1086 0, /* tp_richcompare */
599 1087 0, /* tp_weaklistoffset */
600 1088 0, /* tp_iter */
601 1089 0, /* tp_iternext */
602 1090 index_methods, /* tp_methods */
603 1091 0, /* tp_members */
604 0, /* tp_getset */
1092 index_getset, /* tp_getset */
605 1093 0, /* tp_base */
606 1094 0, /* tp_dict */
607 1095 0, /* tp_descr_get */
608 1096 0, /* tp_descr_set */
609 1097 0, /* tp_dictoffset */
610 1098 (initproc)index_init, /* tp_init */
611 1099 0, /* tp_alloc */
612 1100 PyType_GenericNew, /* tp_new */
613 1101 };
614 1102
615 1103 /*
616 * returns a tuple of the form (index, None, cache) with elements as
1104 * returns a tuple of the form (index, index, cache) with elements as
617 1105 * follows:
618 1106 *
619 * index: an index object that lazily parses the RevlogNG records
1107 * index: an index object that lazily parses RevlogNG records
620 1108 * cache: if data is inlined, a tuple (index_file_content, 0), else None
621 1109 *
622 1110 * added complications are for backwards compatibility
623 1111 */
624 1112 static PyObject *parse_index2(PyObject *self, PyObject *args)
625 1113 {
626 1114 const char *data;
627 1115 int size, ret;
628 1116 PyObject *inlined_obj, *tuple = NULL, *cache = NULL;
629 1117 indexObject *idx;
630 1118
631 1119 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
632 1120 return NULL;
633 1121
634 1122 idx = PyObject_New(indexObject, &indexType);
635 1123
636 1124 if (idx == NULL)
637 1125 goto bail;
638 1126
639 1127 ret = index_real_init(idx, data, size, inlined_obj,
640 1128 PyTuple_GET_ITEM(args, 0));
641 1129 if (ret)
642 1130 goto bail;
643 1131
644 1132 if (idx->inlined) {
645 1133 Py_INCREF(idx->data);
646 1134 cache = Py_BuildValue("iO", 0, idx->data);
647 1135 if (cache == NULL)
648 1136 goto bail;
649 1137 } else {
650 1138 cache = Py_None;
651 1139 Py_INCREF(cache);
652 1140 }
653 1141
1142 Py_INCREF(idx);
1143
654 1144 tuple = Py_BuildValue("NN", idx, cache);
655 1145 if (!tuple)
656 1146 goto bail;
657 1147 return tuple;
658 1148
659 1149 bail:
660 1150 Py_XDECREF(idx);
661 1151 Py_XDECREF(cache);
662 1152 Py_XDECREF(tuple);
663 1153 return NULL;
664 1154 }
665 1155
666 1156 static char parsers_doc[] = "Efficient content parsing.";
667 1157
668 1158 static PyMethodDef methods[] = {
669 1159 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
670 1160 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
671 1161 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
672 1162 {NULL, NULL}
673 1163 };
674 1164
675 1165 static void module_init(PyObject *mod)
676 1166 {
677 static const char nullid[20];
678
679 1167 if (PyType_Ready(&indexType) < 0)
680 1168 return;
681 1169 Py_INCREF(&indexType);
682 1170
683 1171 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
684 1172
685 1173 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
686 1174 -1, -1, -1, -1, nullid, 20);
687 1175 if (nullentry)
688 1176 PyObject_GC_UnTrack(nullentry);
689 1177 }
690 1178
691 1179 #ifdef IS_PY3K
692 1180 static struct PyModuleDef parsers_module = {
693 1181 PyModuleDef_HEAD_INIT,
694 1182 "parsers",
695 1183 parsers_doc,
696 1184 -1,
697 1185 methods
698 1186 };
699 1187
700 1188 PyMODINIT_FUNC PyInit_parsers(void)
701 1189 {
702 1190 PyObject *mod = PyModule_Create(&parsers_module);
703 1191 module_init(mod);
704 1192 return mod;
705 1193 }
706 1194 #else
707 1195 PyMODINIT_FUNC initparsers(void)
708 1196 {
709 1197 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
710 1198 module_init(mod);
711 1199 }
712 1200 #endif
713
@@ -1,1295 +1,1306 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 # import stuff from node for others to import from revlog
15 15 from node import bin, hex, nullid, nullrev
16 16 from i18n import _
17 17 import ancestor, mdiff, parsers, error, util, dagutil
18 18 import struct, zlib, errno
19 19
20 20 _pack = struct.pack
21 21 _unpack = struct.unpack
22 22 _compress = zlib.compress
23 23 _decompress = zlib.decompress
24 24 _sha = util.sha1
25 25
26 26 # revlog header flags
27 27 REVLOGV0 = 0
28 28 REVLOGNG = 1
29 29 REVLOGNGINLINEDATA = (1 << 16)
30 30 REVLOGGENERALDELTA = (1 << 17)
31 31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35 35
36 36 # revlog index flags
37 37 REVIDX_KNOWN_FLAGS = 0
38 38
39 39 # max size of revlog with inline data
40 40 _maxinline = 131072
41 41 _chunksize = 1048576
42 42
43 43 RevlogError = error.RevlogError
44 44 LookupError = error.LookupError
45 45
46 46 def getoffset(q):
47 47 return int(q >> 16)
48 48
49 49 def gettype(q):
50 50 return int(q & 0xFFFF)
51 51
52 52 def offset_type(offset, type):
53 53 return long(long(offset) << 16 | type)
54 54
55 55 nullhash = _sha(nullid)
56 56
57 57 def hash(text, p1, p2):
58 58 """generate a hash from the given text and its parent hashes
59 59
60 60 This hash combines both the current file contents and its history
61 61 in a manner that makes it easy to distinguish nodes with the same
62 62 content in the revision graph.
63 63 """
64 64 # As of now, if one of the parent node is null, p2 is null
65 65 if p2 == nullid:
66 66 # deep copy of a hash is faster than creating one
67 67 s = nullhash.copy()
68 68 s.update(p1)
69 69 else:
70 70 # none of the parent nodes are nullid
71 71 l = [p1, p2]
72 72 l.sort()
73 73 s = _sha(l[0])
74 74 s.update(l[1])
75 75 s.update(text)
76 76 return s.digest()
77 77
78 78 def compress(text):
79 79 """ generate a possibly-compressed representation of text """
80 80 if not text:
81 81 return ("", text)
82 82 l = len(text)
83 83 bin = None
84 84 if l < 44:
85 85 pass
86 86 elif l > 1000000:
87 87 # zlib makes an internal copy, thus doubling memory usage for
88 88 # large files, so lets do this in pieces
89 89 z = zlib.compressobj()
90 90 p = []
91 91 pos = 0
92 92 while pos < l:
93 93 pos2 = pos + 2**20
94 94 p.append(z.compress(text[pos:pos2]))
95 95 pos = pos2
96 96 p.append(z.flush())
97 97 if sum(map(len, p)) < l:
98 98 bin = "".join(p)
99 99 else:
100 100 bin = _compress(text)
101 101 if bin is None or len(bin) > l:
102 102 if text[0] == '\0':
103 103 return ("", text)
104 104 return ('u', text)
105 105 return ("", bin)
106 106
107 107 def decompress(bin):
108 108 """ decompress the given input """
109 109 if not bin:
110 110 return bin
111 111 t = bin[0]
112 112 if t == '\0':
113 113 return bin
114 114 if t == 'x':
115 115 return _decompress(bin)
116 116 if t == 'u':
117 117 return bin[1:]
118 118 raise RevlogError(_("unknown compression type %r") % t)
119 119
120 120 indexformatv0 = ">4l20s20s20s"
121 121 v0shaoffset = 56
122 122
123 123 class revlogoldio(object):
124 124 def __init__(self):
125 125 self.size = struct.calcsize(indexformatv0)
126 126
127 127 def parseindex(self, data, inline):
128 128 s = self.size
129 129 index = []
130 130 nodemap = {nullid: nullrev}
131 131 n = off = 0
132 132 l = len(data)
133 133 while off + s <= l:
134 134 cur = data[off:off + s]
135 135 off += s
136 136 e = _unpack(indexformatv0, cur)
137 137 # transform to revlogv1 format
138 138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 140 index.append(e2)
141 141 nodemap[e[6]] = n
142 142 n += 1
143 143
144 144 # add the magic null revision at -1
145 145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146 146
147 147 return index, nodemap, None
148 148
149 149 def packentry(self, entry, node, version, rev):
150 150 if gettype(entry[0]):
151 151 raise RevlogError(_("index entry flags need RevlogNG"))
152 152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 153 node(entry[5]), node(entry[6]), entry[7])
154 154 return _pack(indexformatv0, *e2)
155 155
156 156 # index ng:
157 157 # 6 bytes: offset
158 158 # 2 bytes: flags
159 159 # 4 bytes: compressed length
160 160 # 4 bytes: uncompressed length
161 161 # 4 bytes: base rev
162 162 # 4 bytes: link rev
163 163 # 4 bytes: parent 1 rev
164 164 # 4 bytes: parent 2 rev
165 165 # 32 bytes: nodeid
166 166 indexformatng = ">Qiiiiii20s12x"
167 167 ngshaoffset = 32
168 168 versionformat = ">I"
169 169
170 170 class revlogio(object):
171 171 def __init__(self):
172 172 self.size = struct.calcsize(indexformatng)
173 173
174 174 def parseindex(self, data, inline):
175 175 # call the C implementation to parse the index data
176 176 index, cache = parsers.parse_index2(data, inline)
177 return index, None, cache
177 return index, getattr(index, 'nodemap', None), cache
178 178
179 179 def packentry(self, entry, node, version, rev):
180 180 p = _pack(indexformatng, *entry)
181 181 if rev == 0:
182 182 p = _pack(versionformat, version) + p[4:]
183 183 return p
184 184
185 185 class revlog(object):
186 186 """
187 187 the underlying revision storage object
188 188
189 189 A revlog consists of two parts, an index and the revision data.
190 190
191 191 The index is a file with a fixed record size containing
192 192 information on each revision, including its nodeid (hash), the
193 193 nodeids of its parents, the position and offset of its data within
194 194 the data file, and the revision it's based on. Finally, each entry
195 195 contains a linkrev entry that can serve as a pointer to external
196 196 data.
197 197
198 198 The revision data itself is a linear collection of data chunks.
199 199 Each chunk represents a revision and is usually represented as a
200 200 delta against the previous chunk. To bound lookup time, runs of
201 201 deltas are limited to about 2 times the length of the original
202 202 version data. This makes retrieval of a version proportional to
203 203 its size, or O(1) relative to the number of revisions.
204 204
205 205 Both pieces of the revlog are written to in an append-only
206 206 fashion, which means we never need to rewrite a file to insert or
207 207 remove data, and can use some simple techniques to avoid the need
208 208 for locking while reading.
209 209 """
210 210 def __init__(self, opener, indexfile):
211 211 """
212 212 create a revlog object
213 213
214 214 opener is a function that abstracts the file opening operation
215 215 and can be used to implement COW semantics or the like.
216 216 """
217 217 self.indexfile = indexfile
218 218 self.datafile = indexfile[:-2] + ".d"
219 219 self.opener = opener
220 220 self._cache = None
221 221 self._basecache = (0, 0)
222 222 self._chunkcache = (0, '')
223 223 self.index = []
224 224 self._pcache = {}
225 225 self._nodecache = {nullid: nullrev}
226 226 self._nodepos = None
227 227
228 228 v = REVLOG_DEFAULT_VERSION
229 229 opts = getattr(opener, 'options', None)
230 230 if opts is not None:
231 231 if 'revlogv1' in opts:
232 232 if 'generaldelta' in opts:
233 233 v |= REVLOGGENERALDELTA
234 234 else:
235 235 v = 0
236 236
237 237 i = ''
238 238 self._initempty = True
239 239 try:
240 240 f = self.opener(self.indexfile)
241 241 i = f.read()
242 242 f.close()
243 243 if len(i) > 0:
244 244 v = struct.unpack(versionformat, i[:4])[0]
245 245 self._initempty = False
246 246 except IOError, inst:
247 247 if inst.errno != errno.ENOENT:
248 248 raise
249 249
250 250 self.version = v
251 251 self._inline = v & REVLOGNGINLINEDATA
252 252 self._generaldelta = v & REVLOGGENERALDELTA
253 253 flags = v & ~0xFFFF
254 254 fmt = v & 0xFFFF
255 255 if fmt == REVLOGV0 and flags:
256 256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
257 257 % (self.indexfile, flags >> 16))
258 258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
259 259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
260 260 % (self.indexfile, flags >> 16))
261 261 elif fmt > REVLOGNG:
262 262 raise RevlogError(_("index %s unknown format %d")
263 263 % (self.indexfile, fmt))
264 264
265 265 self._io = revlogio()
266 266 if self.version == REVLOGV0:
267 267 self._io = revlogoldio()
268 268 try:
269 269 d = self._io.parseindex(i, self._inline)
270 270 except (ValueError, IndexError):
271 271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
272 272 self.index, nodemap, self._chunkcache = d
273 273 if nodemap is not None:
274 274 self.nodemap = self._nodecache = nodemap
275 275 if not self._chunkcache:
276 276 self._chunkclear()
277 277
278 278 def tip(self):
279 279 return self.node(len(self.index) - 2)
280 280 def __len__(self):
281 281 return len(self.index) - 1
282 282 def __iter__(self):
283 283 for i in xrange(len(self)):
284 284 yield i
285 285
286 286 @util.propertycache
287 287 def nodemap(self):
288 288 self.rev(self.node(0))
289 289 return self._nodecache
290 290
291 291 def hasnode(self, node):
292 292 try:
293 293 self.rev(node)
294 294 return True
295 295 except KeyError:
296 296 return False
297 297
298 def clearcaches(self):
299 try:
300 self._nodecache.clearcaches()
301 except AttributeError:
302 self._nodecache = {nullid: nullrev}
303 self._nodepos = None
304
298 305 def rev(self, node):
299 306 try:
300 307 return self._nodecache[node]
308 except RevlogError:
309 # parsers.c radix tree lookup failed
310 raise LookupError(node, self.indexfile, _('no node'))
301 311 except KeyError:
312 # pure python cache lookup failed
302 313 n = self._nodecache
303 314 i = self.index
304 315 p = self._nodepos
305 316 if p is None:
306 317 p = len(i) - 2
307 318 for r in xrange(p, -1, -1):
308 319 v = i[r][7]
309 320 n[v] = r
310 321 if v == node:
311 322 self._nodepos = r - 1
312 323 return r
313 324 raise LookupError(node, self.indexfile, _('no node'))
314 325
315 326 def node(self, rev):
316 327 return self.index[rev][7]
317 328 def linkrev(self, rev):
318 329 return self.index[rev][4]
319 330 def parents(self, node):
320 331 i = self.index
321 332 d = i[self.rev(node)]
322 333 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
323 334 def parentrevs(self, rev):
324 335 return self.index[rev][5:7]
325 336 def start(self, rev):
326 337 return int(self.index[rev][0] >> 16)
327 338 def end(self, rev):
328 339 return self.start(rev) + self.length(rev)
329 340 def length(self, rev):
330 341 return self.index[rev][1]
331 342 def chainbase(self, rev):
332 343 index = self.index
333 344 base = index[rev][3]
334 345 while base != rev:
335 346 rev = base
336 347 base = index[rev][3]
337 348 return base
338 349 def flags(self, rev):
339 350 return self.index[rev][0] & 0xFFFF
340 351 def rawsize(self, rev):
341 352 """return the length of the uncompressed text for a given revision"""
342 353 l = self.index[rev][2]
343 354 if l >= 0:
344 355 return l
345 356
346 357 t = self.revision(self.node(rev))
347 358 return len(t)
348 359 size = rawsize
349 360
350 361 def reachable(self, node, stop=None):
351 362 """return the set of all nodes ancestral to a given node, including
352 363 the node itself, stopping when stop is matched"""
353 364 reachable = set((node,))
354 365 visit = [node]
355 366 if stop:
356 367 stopn = self.rev(stop)
357 368 else:
358 369 stopn = 0
359 370 while visit:
360 371 n = visit.pop(0)
361 372 if n == stop:
362 373 continue
363 374 if n == nullid:
364 375 continue
365 376 for p in self.parents(n):
366 377 if self.rev(p) < stopn:
367 378 continue
368 379 if p not in reachable:
369 380 reachable.add(p)
370 381 visit.append(p)
371 382 return reachable
372 383
373 384 def ancestors(self, *revs):
374 385 """Generate the ancestors of 'revs' in reverse topological order.
375 386
376 387 Yield a sequence of revision numbers starting with the parents
377 388 of each revision in revs, i.e., each revision is *not* considered
378 389 an ancestor of itself. Results are in breadth-first order:
379 390 parents of each rev in revs, then parents of those, etc. Result
380 391 does not include the null revision."""
381 392 visit = list(revs)
382 393 seen = set([nullrev])
383 394 while visit:
384 395 for parent in self.parentrevs(visit.pop(0)):
385 396 if parent not in seen:
386 397 visit.append(parent)
387 398 seen.add(parent)
388 399 yield parent
389 400
390 401 def descendants(self, *revs):
391 402 """Generate the descendants of 'revs' in revision order.
392 403
393 404 Yield a sequence of revision numbers starting with a child of
394 405 some rev in revs, i.e., each revision is *not* considered a
395 406 descendant of itself. Results are ordered by revision number (a
396 407 topological sort)."""
397 408 first = min(revs)
398 409 if first == nullrev:
399 410 for i in self:
400 411 yield i
401 412 return
402 413
403 414 seen = set(revs)
404 415 for i in xrange(first + 1, len(self)):
405 416 for x in self.parentrevs(i):
406 417 if x != nullrev and x in seen:
407 418 seen.add(i)
408 419 yield i
409 420 break
410 421
411 422 def findcommonmissing(self, common=None, heads=None):
412 423 """Return a tuple of the ancestors of common and the ancestors of heads
413 424 that are not ancestors of common. In revset terminology, we return the
414 425 tuple:
415 426
416 427 ::common, (::heads) - (::common)
417 428
418 429 The list is sorted by revision number, meaning it is
419 430 topologically sorted.
420 431
421 432 'heads' and 'common' are both lists of node IDs. If heads is
422 433 not supplied, uses all of the revlog's heads. If common is not
423 434 supplied, uses nullid."""
424 435 if common is None:
425 436 common = [nullid]
426 437 if heads is None:
427 438 heads = self.heads()
428 439
429 440 common = [self.rev(n) for n in common]
430 441 heads = [self.rev(n) for n in heads]
431 442
432 443 # we want the ancestors, but inclusive
433 444 has = set(self.ancestors(*common))
434 445 has.add(nullrev)
435 446 has.update(common)
436 447
437 448 # take all ancestors from heads that aren't in has
438 449 missing = set()
439 450 visit = [r for r in heads if r not in has]
440 451 while visit:
441 452 r = visit.pop(0)
442 453 if r in missing:
443 454 continue
444 455 else:
445 456 missing.add(r)
446 457 for p in self.parentrevs(r):
447 458 if p not in has:
448 459 visit.append(p)
449 460 missing = list(missing)
450 461 missing.sort()
451 462 return has, [self.node(r) for r in missing]
452 463
453 464 def findmissing(self, common=None, heads=None):
454 465 """Return the ancestors of heads that are not ancestors of common.
455 466
456 467 More specifically, return a list of nodes N such that every N
457 468 satisfies the following constraints:
458 469
459 470 1. N is an ancestor of some node in 'heads'
460 471 2. N is not an ancestor of any node in 'common'
461 472
462 473 The list is sorted by revision number, meaning it is
463 474 topologically sorted.
464 475
465 476 'heads' and 'common' are both lists of node IDs. If heads is
466 477 not supplied, uses all of the revlog's heads. If common is not
467 478 supplied, uses nullid."""
468 479 _common, missing = self.findcommonmissing(common, heads)
469 480 return missing
470 481
471 482 def nodesbetween(self, roots=None, heads=None):
472 483 """Return a topological path from 'roots' to 'heads'.
473 484
474 485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
475 486 topologically sorted list of all nodes N that satisfy both of
476 487 these constraints:
477 488
478 489 1. N is a descendant of some node in 'roots'
479 490 2. N is an ancestor of some node in 'heads'
480 491
481 492 Every node is considered to be both a descendant and an ancestor
482 493 of itself, so every reachable node in 'roots' and 'heads' will be
483 494 included in 'nodes'.
484 495
485 496 'outroots' is the list of reachable nodes in 'roots', i.e., the
486 497 subset of 'roots' that is returned in 'nodes'. Likewise,
487 498 'outheads' is the subset of 'heads' that is also in 'nodes'.
488 499
489 500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
490 501 unspecified, uses nullid as the only root. If 'heads' is
491 502 unspecified, uses list of all of the revlog's heads."""
492 503 nonodes = ([], [], [])
493 504 if roots is not None:
494 505 roots = list(roots)
495 506 if not roots:
496 507 return nonodes
497 508 lowestrev = min([self.rev(n) for n in roots])
498 509 else:
499 510 roots = [nullid] # Everybody's a descendant of nullid
500 511 lowestrev = nullrev
501 512 if (lowestrev == nullrev) and (heads is None):
502 513 # We want _all_ the nodes!
503 514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
504 515 if heads is None:
505 516 # All nodes are ancestors, so the latest ancestor is the last
506 517 # node.
507 518 highestrev = len(self) - 1
508 519 # Set ancestors to None to signal that every node is an ancestor.
509 520 ancestors = None
510 521 # Set heads to an empty dictionary for later discovery of heads
511 522 heads = {}
512 523 else:
513 524 heads = list(heads)
514 525 if not heads:
515 526 return nonodes
516 527 ancestors = set()
517 528 # Turn heads into a dictionary so we can remove 'fake' heads.
518 529 # Also, later we will be using it to filter out the heads we can't
519 530 # find from roots.
520 531 heads = dict.fromkeys(heads, False)
521 532 # Start at the top and keep marking parents until we're done.
522 533 nodestotag = set(heads)
523 534 # Remember where the top was so we can use it as a limit later.
524 535 highestrev = max([self.rev(n) for n in nodestotag])
525 536 while nodestotag:
526 537 # grab a node to tag
527 538 n = nodestotag.pop()
528 539 # Never tag nullid
529 540 if n == nullid:
530 541 continue
531 542 # A node's revision number represents its place in a
532 543 # topologically sorted list of nodes.
533 544 r = self.rev(n)
534 545 if r >= lowestrev:
535 546 if n not in ancestors:
536 547 # If we are possibly a descendant of one of the roots
537 548 # and we haven't already been marked as an ancestor
538 549 ancestors.add(n) # Mark as ancestor
539 550 # Add non-nullid parents to list of nodes to tag.
540 551 nodestotag.update([p for p in self.parents(n) if
541 552 p != nullid])
542 553 elif n in heads: # We've seen it before, is it a fake head?
543 554 # So it is, real heads should not be the ancestors of
544 555 # any other heads.
545 556 heads.pop(n)
546 557 if not ancestors:
547 558 return nonodes
548 559 # Now that we have our set of ancestors, we want to remove any
549 560 # roots that are not ancestors.
550 561
551 562 # If one of the roots was nullid, everything is included anyway.
552 563 if lowestrev > nullrev:
553 564 # But, since we weren't, let's recompute the lowest rev to not
554 565 # include roots that aren't ancestors.
555 566
556 567 # Filter out roots that aren't ancestors of heads
557 568 roots = [n for n in roots if n in ancestors]
558 569 # Recompute the lowest revision
559 570 if roots:
560 571 lowestrev = min([self.rev(n) for n in roots])
561 572 else:
562 573 # No more roots? Return empty list
563 574 return nonodes
564 575 else:
565 576 # We are descending from nullid, and don't need to care about
566 577 # any other roots.
567 578 lowestrev = nullrev
568 579 roots = [nullid]
569 580 # Transform our roots list into a set.
570 581 descendants = set(roots)
571 582 # Also, keep the original roots so we can filter out roots that aren't
572 583 # 'real' roots (i.e. are descended from other roots).
573 584 roots = descendants.copy()
574 585 # Our topologically sorted list of output nodes.
575 586 orderedout = []
576 587 # Don't start at nullid since we don't want nullid in our output list,
577 588 # and if nullid shows up in descedents, empty parents will look like
578 589 # they're descendants.
579 590 for r in xrange(max(lowestrev, 0), highestrev + 1):
580 591 n = self.node(r)
581 592 isdescendant = False
582 593 if lowestrev == nullrev: # Everybody is a descendant of nullid
583 594 isdescendant = True
584 595 elif n in descendants:
585 596 # n is already a descendant
586 597 isdescendant = True
587 598 # This check only needs to be done here because all the roots
588 599 # will start being marked is descendants before the loop.
589 600 if n in roots:
590 601 # If n was a root, check if it's a 'real' root.
591 602 p = tuple(self.parents(n))
592 603 # If any of its parents are descendants, it's not a root.
593 604 if (p[0] in descendants) or (p[1] in descendants):
594 605 roots.remove(n)
595 606 else:
596 607 p = tuple(self.parents(n))
597 608 # A node is a descendant if either of its parents are
598 609 # descendants. (We seeded the dependents list with the roots
599 610 # up there, remember?)
600 611 if (p[0] in descendants) or (p[1] in descendants):
601 612 descendants.add(n)
602 613 isdescendant = True
603 614 if isdescendant and ((ancestors is None) or (n in ancestors)):
604 615 # Only include nodes that are both descendants and ancestors.
605 616 orderedout.append(n)
606 617 if (ancestors is not None) and (n in heads):
607 618 # We're trying to figure out which heads are reachable
608 619 # from roots.
609 620 # Mark this head as having been reached
610 621 heads[n] = True
611 622 elif ancestors is None:
612 623 # Otherwise, we're trying to discover the heads.
613 624 # Assume this is a head because if it isn't, the next step
614 625 # will eventually remove it.
615 626 heads[n] = True
616 627 # But, obviously its parents aren't.
617 628 for p in self.parents(n):
618 629 heads.pop(p, None)
619 630 heads = [n for n, flag in heads.iteritems() if flag]
620 631 roots = list(roots)
621 632 assert orderedout
622 633 assert roots
623 634 assert heads
624 635 return (orderedout, roots, heads)
625 636
626 637 def headrevs(self):
627 638 count = len(self)
628 639 if not count:
629 640 return [nullrev]
630 641 ishead = [1] * (count + 1)
631 642 index = self.index
632 643 for r in xrange(count):
633 644 e = index[r]
634 645 ishead[e[5]] = ishead[e[6]] = 0
635 646 return [r for r in xrange(count) if ishead[r]]
636 647
637 648 def heads(self, start=None, stop=None):
638 649 """return the list of all nodes that have no children
639 650
640 651 if start is specified, only heads that are descendants of
641 652 start will be returned
642 653 if stop is specified, it will consider all the revs from stop
643 654 as if they had no children
644 655 """
645 656 if start is None and stop is None:
646 657 if not len(self):
647 658 return [nullid]
648 659 return [self.node(r) for r in self.headrevs()]
649 660
650 661 if start is None:
651 662 start = nullid
652 663 if stop is None:
653 664 stop = []
654 665 stoprevs = set([self.rev(n) for n in stop])
655 666 startrev = self.rev(start)
656 667 reachable = set((startrev,))
657 668 heads = set((startrev,))
658 669
659 670 parentrevs = self.parentrevs
660 671 for r in xrange(startrev + 1, len(self)):
661 672 for p in parentrevs(r):
662 673 if p in reachable:
663 674 if r not in stoprevs:
664 675 reachable.add(r)
665 676 heads.add(r)
666 677 if p in heads and p not in stoprevs:
667 678 heads.remove(p)
668 679
669 680 return [self.node(r) for r in heads]
670 681
671 682 def children(self, node):
672 683 """find the children of a given node"""
673 684 c = []
674 685 p = self.rev(node)
675 686 for r in range(p + 1, len(self)):
676 687 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
677 688 if prevs:
678 689 for pr in prevs:
679 690 if pr == p:
680 691 c.append(self.node(r))
681 692 elif p == nullrev:
682 693 c.append(self.node(r))
683 694 return c
684 695
685 696 def descendant(self, start, end):
686 697 if start == nullrev:
687 698 return True
688 699 for i in self.descendants(start):
689 700 if i == end:
690 701 return True
691 702 elif i > end:
692 703 break
693 704 return False
694 705
695 706 def ancestor(self, a, b):
696 707 """calculate the least common ancestor of nodes a and b"""
697 708
698 709 # fast path, check if it is a descendant
699 710 a, b = self.rev(a), self.rev(b)
700 711 start, end = sorted((a, b))
701 712 if self.descendant(start, end):
702 713 return self.node(start)
703 714
704 715 def parents(rev):
705 716 return [p for p in self.parentrevs(rev) if p != nullrev]
706 717
707 718 c = ancestor.ancestor(a, b, parents)
708 719 if c is None:
709 720 return nullid
710 721
711 722 return self.node(c)
712 723
713 724 def _match(self, id):
714 725 if isinstance(id, (long, int)):
715 726 # rev
716 727 return self.node(id)
717 728 if len(id) == 20:
718 729 # possibly a binary node
719 730 # odds of a binary node being all hex in ASCII are 1 in 10**25
720 731 try:
721 732 node = id
722 733 self.rev(node) # quick search the index
723 734 return node
724 735 except LookupError:
725 736 pass # may be partial hex id
726 737 try:
727 738 # str(rev)
728 739 rev = int(id)
729 740 if str(rev) != id:
730 741 raise ValueError
731 742 if rev < 0:
732 743 rev = len(self) + rev
733 744 if rev < 0 or rev >= len(self):
734 745 raise ValueError
735 746 return self.node(rev)
736 747 except (ValueError, OverflowError):
737 748 pass
738 749 if len(id) == 40:
739 750 try:
740 751 # a full hex nodeid?
741 752 node = bin(id)
742 753 self.rev(node)
743 754 return node
744 755 except (TypeError, LookupError):
745 756 pass
746 757
747 758 def _partialmatch(self, id):
748 759 if id in self._pcache:
749 760 return self._pcache[id]
750 761
751 762 if len(id) < 40:
752 763 try:
753 764 # hex(node)[:...]
754 765 l = len(id) // 2 # grab an even number of digits
755 766 prefix = bin(id[:l * 2])
756 767 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
757 768 nl = [n for n in nl if hex(n).startswith(id)]
758 769 if len(nl) > 0:
759 770 if len(nl) == 1:
760 771 self._pcache[id] = nl[0]
761 772 return nl[0]
762 773 raise LookupError(id, self.indexfile,
763 774 _('ambiguous identifier'))
764 775 return None
765 776 except TypeError:
766 777 pass
767 778
768 779 def lookup(self, id):
769 780 """locate a node based on:
770 781 - revision number or str(revision number)
771 782 - nodeid or subset of hex nodeid
772 783 """
773 784 n = self._match(id)
774 785 if n is not None:
775 786 return n
776 787 n = self._partialmatch(id)
777 788 if n:
778 789 return n
779 790
780 791 raise LookupError(id, self.indexfile, _('no match found'))
781 792
782 793 def cmp(self, node, text):
783 794 """compare text with a given file revision
784 795
785 796 returns True if text is different than what is stored.
786 797 """
787 798 p1, p2 = self.parents(node)
788 799 return hash(text, p1, p2) != node
789 800
790 801 def _addchunk(self, offset, data):
791 802 o, d = self._chunkcache
792 803 # try to add to existing cache
793 804 if o + len(d) == offset and len(d) + len(data) < _chunksize:
794 805 self._chunkcache = o, d + data
795 806 else:
796 807 self._chunkcache = offset, data
797 808
798 809 def _loadchunk(self, offset, length):
799 810 if self._inline:
800 811 df = self.opener(self.indexfile)
801 812 else:
802 813 df = self.opener(self.datafile)
803 814
804 815 readahead = max(65536, length)
805 816 df.seek(offset)
806 817 d = df.read(readahead)
807 818 df.close()
808 819 self._addchunk(offset, d)
809 820 if readahead > length:
810 821 return d[:length]
811 822 return d
812 823
813 824 def _getchunk(self, offset, length):
814 825 o, d = self._chunkcache
815 826 l = len(d)
816 827
817 828 # is it in the cache?
818 829 cachestart = offset - o
819 830 cacheend = cachestart + length
820 831 if cachestart >= 0 and cacheend <= l:
821 832 if cachestart == 0 and cacheend == l:
822 833 return d # avoid a copy
823 834 return d[cachestart:cacheend]
824 835
825 836 return self._loadchunk(offset, length)
826 837
827 838 def _chunkraw(self, startrev, endrev):
828 839 start = self.start(startrev)
829 840 length = self.end(endrev) - start
830 841 if self._inline:
831 842 start += (startrev + 1) * self._io.size
832 843 return self._getchunk(start, length)
833 844
834 845 def _chunk(self, rev):
835 846 return decompress(self._chunkraw(rev, rev))
836 847
837 848 def _chunkbase(self, rev):
838 849 return self._chunk(rev)
839 850
840 851 def _chunkclear(self):
841 852 self._chunkcache = (0, '')
842 853
843 854 def deltaparent(self, rev):
844 855 """return deltaparent of the given revision"""
845 856 base = self.index[rev][3]
846 857 if base == rev:
847 858 return nullrev
848 859 elif self._generaldelta:
849 860 return base
850 861 else:
851 862 return rev - 1
852 863
853 864 def revdiff(self, rev1, rev2):
854 865 """return or calculate a delta between two revisions"""
855 866 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
856 867 return self._chunk(rev2)
857 868
858 869 return mdiff.textdiff(self.revision(self.node(rev1)),
859 870 self.revision(self.node(rev2)))
860 871
861 872 def revision(self, nodeorrev):
862 873 """return an uncompressed revision of a given node or"""
863 874 if isinstance(nodeorrev, int):
864 875 rev = nodeorrev
865 876 node = self.node(rev)
866 877 else:
867 878 node = nodeorrev
868 879 rev = None
869 880
870 881 cachedrev = None
871 882 if node == nullid:
872 883 return ""
873 884 if self._cache:
874 885 if self._cache[0] == node:
875 886 return self._cache[2]
876 887 cachedrev = self._cache[1]
877 888
878 889 # look up what we need to read
879 890 text = None
880 891 if rev is None:
881 892 rev = self.rev(node)
882 893
883 894 # check rev flags
884 895 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
885 896 raise RevlogError(_('incompatible revision flag %x') %
886 897 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
887 898
888 899 # build delta chain
889 900 chain = []
890 901 index = self.index # for performance
891 902 generaldelta = self._generaldelta
892 903 iterrev = rev
893 904 e = index[iterrev]
894 905 while iterrev != e[3] and iterrev != cachedrev:
895 906 chain.append(iterrev)
896 907 if generaldelta:
897 908 iterrev = e[3]
898 909 else:
899 910 iterrev -= 1
900 911 e = index[iterrev]
901 912 chain.reverse()
902 913 base = iterrev
903 914
904 915 if iterrev == cachedrev:
905 916 # cache hit
906 917 text = self._cache[2]
907 918
908 919 # drop cache to save memory
909 920 self._cache = None
910 921
911 922 self._chunkraw(base, rev)
912 923 if text is None:
913 924 text = self._chunkbase(base)
914 925
915 926 bins = [self._chunk(r) for r in chain]
916 927 text = mdiff.patches(text, bins)
917 928
918 929 text = self._checkhash(text, node, rev)
919 930
920 931 self._cache = (node, rev, text)
921 932 return text
922 933
923 934 def _checkhash(self, text, node, rev):
924 935 p1, p2 = self.parents(node)
925 936 if node != hash(text, p1, p2):
926 937 raise RevlogError(_("integrity check failed on %s:%d")
927 938 % (self.indexfile, rev))
928 939 return text
929 940
930 941 def checkinlinesize(self, tr, fp=None):
931 942 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
932 943 return
933 944
934 945 trinfo = tr.find(self.indexfile)
935 946 if trinfo is None:
936 947 raise RevlogError(_("%s not found in the transaction")
937 948 % self.indexfile)
938 949
939 950 trindex = trinfo[2]
940 951 dataoff = self.start(trindex)
941 952
942 953 tr.add(self.datafile, dataoff)
943 954
944 955 if fp:
945 956 fp.flush()
946 957 fp.close()
947 958
948 959 df = self.opener(self.datafile, 'w')
949 960 try:
950 961 for r in self:
951 962 df.write(self._chunkraw(r, r))
952 963 finally:
953 964 df.close()
954 965
955 966 fp = self.opener(self.indexfile, 'w', atomictemp=True)
956 967 self.version &= ~(REVLOGNGINLINEDATA)
957 968 self._inline = False
958 969 for i in self:
959 970 e = self._io.packentry(self.index[i], self.node, self.version, i)
960 971 fp.write(e)
961 972
962 973 # if we don't call close, the temp file will never replace the
963 974 # real index
964 975 fp.close()
965 976
966 977 tr.replace(self.indexfile, trindex * self._io.size)
967 978 self._chunkclear()
968 979
969 980 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
970 981 """add a revision to the log
971 982
972 983 text - the revision data to add
973 984 transaction - the transaction object used for rollback
974 985 link - the linkrev data to add
975 986 p1, p2 - the parent nodeids of the revision
976 987 cachedelta - an optional precomputed delta
977 988 """
978 989 node = hash(text, p1, p2)
979 990 if node in self.nodemap:
980 991 return node
981 992
982 993 dfh = None
983 994 if not self._inline:
984 995 dfh = self.opener(self.datafile, "a")
985 996 ifh = self.opener(self.indexfile, "a+")
986 997 try:
987 998 return self._addrevision(node, text, transaction, link, p1, p2,
988 999 cachedelta, ifh, dfh)
989 1000 finally:
990 1001 if dfh:
991 1002 dfh.close()
992 1003 ifh.close()
993 1004
994 1005 def _addrevision(self, node, text, transaction, link, p1, p2,
995 1006 cachedelta, ifh, dfh):
996 1007 """internal function to add revisions to the log
997 1008
998 1009 see addrevision for argument descriptions.
999 1010 invariants:
1000 1011 - text is optional (can be None); if not set, cachedelta must be set.
1001 1012 if both are set, they must correspond to eachother.
1002 1013 """
1003 1014 btext = [text]
1004 1015 def buildtext():
1005 1016 if btext[0] is not None:
1006 1017 return btext[0]
1007 1018 # flush any pending writes here so we can read it in revision
1008 1019 if dfh:
1009 1020 dfh.flush()
1010 1021 ifh.flush()
1011 1022 basetext = self.revision(self.node(cachedelta[0]))
1012 1023 btext[0] = mdiff.patch(basetext, cachedelta[1])
1013 1024 chk = hash(btext[0], p1, p2)
1014 1025 if chk != node:
1015 1026 raise RevlogError(_("consistency error in delta"))
1016 1027 return btext[0]
1017 1028
1018 1029 def builddelta(rev):
1019 1030 # can we use the cached delta?
1020 1031 if cachedelta and cachedelta[0] == rev:
1021 1032 delta = cachedelta[1]
1022 1033 else:
1023 1034 t = buildtext()
1024 1035 ptext = self.revision(self.node(rev))
1025 1036 delta = mdiff.textdiff(ptext, t)
1026 1037 data = compress(delta)
1027 1038 l = len(data[1]) + len(data[0])
1028 1039 if basecache[0] == rev:
1029 1040 chainbase = basecache[1]
1030 1041 else:
1031 1042 chainbase = self.chainbase(rev)
1032 1043 dist = l + offset - self.start(chainbase)
1033 1044 if self._generaldelta:
1034 1045 base = rev
1035 1046 else:
1036 1047 base = chainbase
1037 1048 return dist, l, data, base, chainbase
1038 1049
1039 1050 curr = len(self)
1040 1051 prev = curr - 1
1041 1052 base = chainbase = curr
1042 1053 offset = self.end(prev)
1043 1054 flags = 0
1044 1055 d = None
1045 1056 basecache = self._basecache
1046 1057 p1r, p2r = self.rev(p1), self.rev(p2)
1047 1058
1048 1059 # should we try to build a delta?
1049 1060 if prev != nullrev:
1050 1061 if self._generaldelta:
1051 1062 if p1r >= basecache[1]:
1052 1063 d = builddelta(p1r)
1053 1064 elif p2r >= basecache[1]:
1054 1065 d = builddelta(p2r)
1055 1066 else:
1056 1067 d = builddelta(prev)
1057 1068 else:
1058 1069 d = builddelta(prev)
1059 1070 dist, l, data, base, chainbase = d
1060 1071
1061 1072 # full versions are inserted when the needed deltas
1062 1073 # become comparable to the uncompressed text
1063 1074 if text is None:
1064 1075 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1065 1076 cachedelta[1])
1066 1077 else:
1067 1078 textlen = len(text)
1068 1079 if d is None or dist > textlen * 2:
1069 1080 text = buildtext()
1070 1081 data = compress(text)
1071 1082 l = len(data[1]) + len(data[0])
1072 1083 base = chainbase = curr
1073 1084
1074 1085 e = (offset_type(offset, flags), l, textlen,
1075 1086 base, link, p1r, p2r, node)
1076 1087 self.index.insert(-1, e)
1077 1088 self.nodemap[node] = curr
1078 1089
1079 1090 entry = self._io.packentry(e, self.node, self.version, curr)
1080 1091 if not self._inline:
1081 1092 transaction.add(self.datafile, offset)
1082 1093 transaction.add(self.indexfile, curr * len(entry))
1083 1094 if data[0]:
1084 1095 dfh.write(data[0])
1085 1096 dfh.write(data[1])
1086 1097 dfh.flush()
1087 1098 ifh.write(entry)
1088 1099 else:
1089 1100 offset += curr * self._io.size
1090 1101 transaction.add(self.indexfile, offset, curr)
1091 1102 ifh.write(entry)
1092 1103 ifh.write(data[0])
1093 1104 ifh.write(data[1])
1094 1105 self.checkinlinesize(transaction, ifh)
1095 1106
1096 1107 if type(text) == str: # only accept immutable objects
1097 1108 self._cache = (node, curr, text)
1098 1109 self._basecache = (curr, chainbase)
1099 1110 return node
1100 1111
1101 1112 def group(self, nodelist, bundler, reorder=None):
1102 1113 """Calculate a delta group, yielding a sequence of changegroup chunks
1103 1114 (strings).
1104 1115
1105 1116 Given a list of changeset revs, return a set of deltas and
1106 1117 metadata corresponding to nodes. The first delta is
1107 1118 first parent(nodelist[0]) -> nodelist[0], the receiver is
1108 1119 guaranteed to have this parent as it has all history before
1109 1120 these changesets. In the case firstparent is nullrev the
1110 1121 changegroup starts with a full revision.
1111 1122 """
1112 1123
1113 1124 # if we don't have any revisions touched by these changesets, bail
1114 1125 if len(nodelist) == 0:
1115 1126 yield bundler.close()
1116 1127 return
1117 1128
1118 1129 # for generaldelta revlogs, we linearize the revs; this will both be
1119 1130 # much quicker and generate a much smaller bundle
1120 1131 if (self._generaldelta and reorder is not False) or reorder:
1121 1132 dag = dagutil.revlogdag(self)
1122 1133 revs = set(self.rev(n) for n in nodelist)
1123 1134 revs = dag.linearize(revs)
1124 1135 else:
1125 1136 revs = sorted([self.rev(n) for n in nodelist])
1126 1137
1127 1138 # add the parent of the first rev
1128 1139 p = self.parentrevs(revs[0])[0]
1129 1140 revs.insert(0, p)
1130 1141
1131 1142 # build deltas
1132 1143 for r in xrange(len(revs) - 1):
1133 1144 prev, curr = revs[r], revs[r + 1]
1134 1145 for c in bundler.revchunk(self, curr, prev):
1135 1146 yield c
1136 1147
1137 1148 yield bundler.close()
1138 1149
1139 1150 def addgroup(self, bundle, linkmapper, transaction):
1140 1151 """
1141 1152 add a delta group
1142 1153
1143 1154 given a set of deltas, add them to the revision log. the
1144 1155 first delta is against its parent, which should be in our
1145 1156 log, the rest are against the previous delta.
1146 1157 """
1147 1158
1148 1159 # track the base of the current delta log
1149 1160 content = []
1150 1161 node = None
1151 1162
1152 1163 r = len(self)
1153 1164 end = 0
1154 1165 if r:
1155 1166 end = self.end(r - 1)
1156 1167 ifh = self.opener(self.indexfile, "a+")
1157 1168 isize = r * self._io.size
1158 1169 if self._inline:
1159 1170 transaction.add(self.indexfile, end + isize, r)
1160 1171 dfh = None
1161 1172 else:
1162 1173 transaction.add(self.indexfile, isize, r)
1163 1174 transaction.add(self.datafile, end)
1164 1175 dfh = self.opener(self.datafile, "a")
1165 1176
1166 1177 try:
1167 1178 # loop through our set of deltas
1168 1179 chain = None
1169 1180 while True:
1170 1181 chunkdata = bundle.deltachunk(chain)
1171 1182 if not chunkdata:
1172 1183 break
1173 1184 node = chunkdata['node']
1174 1185 p1 = chunkdata['p1']
1175 1186 p2 = chunkdata['p2']
1176 1187 cs = chunkdata['cs']
1177 1188 deltabase = chunkdata['deltabase']
1178 1189 delta = chunkdata['delta']
1179 1190
1180 1191 content.append(node)
1181 1192
1182 1193 link = linkmapper(cs)
1183 1194 if node in self.nodemap:
1184 1195 # this can happen if two branches make the same change
1185 1196 chain = node
1186 1197 continue
1187 1198
1188 1199 for p in (p1, p2):
1189 1200 if not p in self.nodemap:
1190 1201 raise LookupError(p, self.indexfile,
1191 1202 _('unknown parent'))
1192 1203
1193 1204 if deltabase not in self.nodemap:
1194 1205 raise LookupError(deltabase, self.indexfile,
1195 1206 _('unknown delta base'))
1196 1207
1197 1208 baserev = self.rev(deltabase)
1198 1209 chain = self._addrevision(node, None, transaction, link,
1199 1210 p1, p2, (baserev, delta), ifh, dfh)
1200 1211 if not dfh and not self._inline:
1201 1212 # addrevision switched from inline to conventional
1202 1213 # reopen the index
1203 1214 ifh.close()
1204 1215 dfh = self.opener(self.datafile, "a")
1205 1216 ifh = self.opener(self.indexfile, "a")
1206 1217 finally:
1207 1218 if dfh:
1208 1219 dfh.close()
1209 1220 ifh.close()
1210 1221
1211 1222 return content
1212 1223
1213 1224 def strip(self, minlink, transaction):
1214 1225 """truncate the revlog on the first revision with a linkrev >= minlink
1215 1226
1216 1227 This function is called when we're stripping revision minlink and
1217 1228 its descendants from the repository.
1218 1229
1219 1230 We have to remove all revisions with linkrev >= minlink, because
1220 1231 the equivalent changelog revisions will be renumbered after the
1221 1232 strip.
1222 1233
1223 1234 So we truncate the revlog on the first of these revisions, and
1224 1235 trust that the caller has saved the revisions that shouldn't be
1225 1236 removed and that it'll re-add them after this truncation.
1226 1237 """
1227 1238 if len(self) == 0:
1228 1239 return
1229 1240
1230 1241 for rev in self:
1231 1242 if self.index[rev][4] >= minlink:
1232 1243 break
1233 1244 else:
1234 1245 return
1235 1246
1236 1247 # first truncate the files on disk
1237 1248 end = self.start(rev)
1238 1249 if not self._inline:
1239 1250 transaction.add(self.datafile, end)
1240 1251 end = rev * self._io.size
1241 1252 else:
1242 1253 end += rev * self._io.size
1243 1254
1244 1255 transaction.add(self.indexfile, end)
1245 1256
1246 1257 # then reset internal state in memory to forget those revisions
1247 1258 self._cache = None
1248 1259 self._chunkclear()
1249 1260 for x in xrange(rev, len(self)):
1250 1261 del self.nodemap[self.node(x)]
1251 1262
1252 1263 del self.index[rev:-1]
1253 1264
1254 1265 def checksize(self):
1255 1266 expected = 0
1256 1267 if len(self):
1257 1268 expected = max(0, self.end(len(self) - 1))
1258 1269
1259 1270 try:
1260 1271 f = self.opener(self.datafile)
1261 1272 f.seek(0, 2)
1262 1273 actual = f.tell()
1263 1274 f.close()
1264 1275 dd = actual - expected
1265 1276 except IOError, inst:
1266 1277 if inst.errno != errno.ENOENT:
1267 1278 raise
1268 1279 dd = 0
1269 1280
1270 1281 try:
1271 1282 f = self.opener(self.indexfile)
1272 1283 f.seek(0, 2)
1273 1284 actual = f.tell()
1274 1285 f.close()
1275 1286 s = self._io.size
1276 1287 i = max(0, actual // s)
1277 1288 di = actual - (i * s)
1278 1289 if self._inline:
1279 1290 databytes = 0
1280 1291 for r in self:
1281 1292 databytes += max(0, self.length(r))
1282 1293 dd = 0
1283 1294 di = actual - len(self) * s - databytes
1284 1295 except IOError, inst:
1285 1296 if inst.errno != errno.ENOENT:
1286 1297 raise
1287 1298 di = 0
1288 1299
1289 1300 return (dd, di)
1290 1301
1291 1302 def files(self):
1292 1303 res = [self.indexfile]
1293 1304 if not self._inline:
1294 1305 res.append(self.datafile)
1295 1306 return res
@@ -1,115 +1,122 b''
1 1 from mercurial import parsers
2 2 from mercurial.node import nullid, nullrev
3 3 import struct
4 4
5 5 # This unit test compares the return value of the original Python
6 6 # implementation of parseindex and the new C implementation for
7 7 # an index file with and without inlined data
8 8
9 9 # original python implementation
10 10 def gettype(q):
11 11 return int(q & 0xFFFF)
12 12
13 13 def offset_type(offset, type):
14 14 return long(long(offset) << 16 | type)
15 15
16 16 indexformatng = ">Qiiiiii20s12x"
17 17
18 18 def py_parseindex(data, inline) :
19 19 s = 64
20 20 cache = None
21 21 index = []
22 22 nodemap = {nullid: nullrev}
23 23 n = off = 0
24 24
25 25 l = len(data) - s
26 26 append = index.append
27 27 if inline:
28 28 cache = (0, data)
29 29 while off <= l:
30 30 e = struct.unpack(indexformatng, data[off:off + s])
31 31 nodemap[e[7]] = n
32 32 append(e)
33 33 n += 1
34 34 if e[1] < 0:
35 35 break
36 36 off += e[1] + s
37 37 else:
38 38 while off <= l:
39 39 e = struct.unpack(indexformatng, data[off:off + s])
40 40 nodemap[e[7]] = n
41 41 append(e)
42 42 n += 1
43 43 off += s
44 44
45 45 e = list(index[0])
46 46 type = gettype(e[0])
47 47 e[0] = offset_type(0, type)
48 48 index[0] = tuple(e)
49 49
50 50 # add the magic null revision at -1
51 51 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
52 52
53 53 return index, cache
54 54
55 55 data_inlined = '\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x01\x8c' \
56 56 '\x00\x00\x04\x07\x00\x00\x00\x00\x00\x00\x15\x15\xff\xff\xff' \
57 57 '\xff\xff\xff\xff\xff\xebG\x97\xb7\x1fB\x04\xcf\x13V\x81\tw\x1b' \
58 58 'w\xdduR\xda\xc6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
59 59 'x\x9c\x9d\x93?O\xc30\x10\xc5\xf7|\x8a\xdb\x9a\xa8m\x06\xd8*\x95' \
60 60 '\x81B\xa1\xa2\xa2R\xcb\x86Pd\x9a\x0b5$vd_\x04\xfd\xf6\x9c\xff@' \
61 61 '\x11!\x0b\xd9\xec\xf7\xbbw\xe7gG6\xad6\x04\xdaN\xc0\x92\xa0$)' \
62 62 '\xb1\x82\xa2\xd1%\x16\xa4\x8b7\xa9\xca\xd4-\xb2Y\x02\xfc\xc9' \
63 63 '\xcaS\xf9\xaeX\xed\xb6\xd77Q\x02\x83\xd4\x19\xf5--Y\xea\xe1W' \
64 64 '\xab\xed\x10\xceR\x0f_\xdf\xdf\r\xe1,\xf5\xf0\xcb\xf5 \xceR\x0f' \
65 65 '_\xdc\x0e\x0e\xc3R\x0f_\xae\x96\x9b!\x9e\xa5\x1e\xbf\xdb,\x06' \
66 66 '\xc7q\x9a/\x88\x82\xc3B\xea\xb5\xb4TJ\x93\xb6\x82\x0e\xe16\xe6' \
67 67 'KQ\xdb\xaf\xecG\xa3\xd1 \x01\xd3\x0b_^\xe8\xaa\xa0\xae\xad\xd1' \
68 68 '&\xbef\x1bz\x08\xb0|\xc9Xz\x06\xf6Z\x91\x90J\xaa\x17\x90\xaa' \
69 69 '\xd2\xa6\x11$5C\xcf\xba#\xa0\x03\x02*2\x92-\xfc\xb1\x94\xdf\xe2' \
70 70 '\xae\xb8\'m\x8ey0^\x85\xd3\x82\xb4\xf0`:\x9c\x00\x8a\xfd\x01' \
71 71 '\xb0\xc6\x86\x8b\xdd\xae\x80\xf3\xa9\x9fd\x16\n\x00R%\x1a\x06' \
72 72 '\xe9\xd8b\x98\x1d\xf4\xf3+\x9bf\x01\xd8p\x1b\xf3.\xed\x9f^g\xc3' \
73 73 '^\xd9W81T\xdb\xd5\x04sx|\xf2\xeb\xd6`%?x\xed"\x831\xbf\xf3\xdc' \
74 74 'b\xeb%gaY\xe1\xad\x9f\xb9f\'1w\xa9\xa5a\x83s\x82J\xb98\xbc4\x8b' \
75 75 '\x83\x00\x9f$z\xb8#\xa5\xb1\xdf\x98\xd9\xec\x1b\x89O\xe3Ts\x9a4' \
76 76 '\x17m\x8b\xfc\x8f\xa5\x95\x9a\xfc\xfa\xed,\xe5|\xa1\xfe\x15\xb9' \
77 77 '\xbc\xb2\x93\x1f\xf2\x95\xff\xdf,\x1a\xc5\xe7\x17*\x93Oz:>\x0e'
78 78
79 79 data_non_inlined = '\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19' \
80 80 '\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff' \
81 81 '\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d' \
82 82 '\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
83 83 '\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00' \
84 84 '\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff' \
85 85 '\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh' \
86 86 '\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
87 87 '\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00' \
88 88 '\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n' \
89 89 '\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00' \
90 90 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F' \
91 91 '\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01' \
92 92 '\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1' \
93 93 '\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00' \
94 94 '\x00\x00\x00\x00\x00\x00\x00\x00\x00'
95 95
96 96 def parse_index2(data, inline):
97 97 index, chunkcache = parsers.parse_index2(data, inline)
98 98 return list(index), chunkcache
99 99
100 100 def runtest() :
101 101 py_res_1 = py_parseindex(data_inlined, True)
102 102 c_res_1 = parse_index2(data_inlined, True)
103 103
104 104 py_res_2 = py_parseindex(data_non_inlined, False)
105 105 c_res_2 = parse_index2(data_non_inlined, False)
106 106
107 107 if py_res_1 != c_res_1:
108 108 print "Parse index result (with inlined data) differs!"
109 109
110 110 if py_res_2 != c_res_2:
111 111 print "Parse index result (no inlined data) differs!"
112 112
113 ix = parsers.parse_index2(data_inlined, True)[0]
114 for i, r in enumerate(ix):
115 if r[7] == nullid:
116 i = -1
117 if ix[r[7]] != i:
118 print 'Reverse lookup inconsistent for %r' % r[7].encode('hex')
119
113 120 print "done"
114 121
115 122 runtest()
General Comments 0
You need to be logged in to leave comments. Login now