##// END OF EJS Templates
parsers: use base-16 trie for faster node->rev mapping...
Bryan O'Sullivan -
r16414:e8d37b78 default
parent child Browse files
Show More
@@ -1,206 +1,227 b''
1 # perf.py - performance test routines
1 # perf.py - performance test routines
2 '''helper extension to measure performance'''
2 '''helper extension to measure performance'''
3
3
4 from mercurial import cmdutil, scmutil, match, commands
4 from mercurial import cmdutil, scmutil, util, match, commands
5 import time, os, sys
5 import time, os, sys
6
6
7 def timer(func, title=None):
7 def timer(func, title=None):
8 results = []
8 results = []
9 begin = time.time()
9 begin = time.time()
10 count = 0
10 count = 0
11 while True:
11 while True:
12 ostart = os.times()
12 ostart = os.times()
13 cstart = time.time()
13 cstart = time.time()
14 r = func()
14 r = func()
15 cstop = time.time()
15 cstop = time.time()
16 ostop = os.times()
16 ostop = os.times()
17 count += 1
17 count += 1
18 a, b = ostart, ostop
18 a, b = ostart, ostop
19 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
19 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
20 if cstop - begin > 3 and count >= 100:
20 if cstop - begin > 3 and count >= 100:
21 break
21 break
22 if cstop - begin > 10 and count >= 3:
22 if cstop - begin > 10 and count >= 3:
23 break
23 break
24 if title:
24 if title:
25 sys.stderr.write("! %s\n" % title)
25 sys.stderr.write("! %s\n" % title)
26 if r:
26 if r:
27 sys.stderr.write("! result: %s\n" % r)
27 sys.stderr.write("! result: %s\n" % r)
28 m = min(results)
28 m = min(results)
29 sys.stderr.write("! wall %f comb %f user %f sys %f (best of %d)\n"
29 sys.stderr.write("! wall %f comb %f user %f sys %f (best of %d)\n"
30 % (m[0], m[1] + m[2], m[1], m[2], count))
30 % (m[0], m[1] + m[2], m[1], m[2], count))
31
31
32 def perfwalk(ui, repo, *pats):
32 def perfwalk(ui, repo, *pats):
33 try:
33 try:
34 m = scmutil.match(repo[None], pats, {})
34 m = scmutil.match(repo[None], pats, {})
35 timer(lambda: len(list(repo.dirstate.walk(m, [], True, False))))
35 timer(lambda: len(list(repo.dirstate.walk(m, [], True, False))))
36 except:
36 except:
37 try:
37 try:
38 m = scmutil.match(repo[None], pats, {})
38 m = scmutil.match(repo[None], pats, {})
39 timer(lambda: len([b for a, b, c in repo.dirstate.statwalk([], m)]))
39 timer(lambda: len([b for a, b, c in repo.dirstate.statwalk([], m)]))
40 except:
40 except:
41 timer(lambda: len(list(cmdutil.walk(repo, pats, {}))))
41 timer(lambda: len(list(cmdutil.walk(repo, pats, {}))))
42
42
43 def perfstatus(ui, repo, *pats):
43 def perfstatus(ui, repo, *pats):
44 #m = match.always(repo.root, repo.getcwd())
44 #m = match.always(repo.root, repo.getcwd())
45 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False, False))))
45 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False, False))))
46 timer(lambda: sum(map(len, repo.status())))
46 timer(lambda: sum(map(len, repo.status())))
47
47
48 def perfheads(ui, repo):
48 def perfheads(ui, repo):
49 timer(lambda: len(repo.changelog.headrevs()))
49 timer(lambda: len(repo.changelog.headrevs()))
50
50
51 def perftags(ui, repo):
51 def perftags(ui, repo):
52 import mercurial.changelog, mercurial.manifest
52 import mercurial.changelog, mercurial.manifest
53 def t():
53 def t():
54 repo.changelog = mercurial.changelog.changelog(repo.sopener)
54 repo.changelog = mercurial.changelog.changelog(repo.sopener)
55 repo.manifest = mercurial.manifest.manifest(repo.sopener)
55 repo.manifest = mercurial.manifest.manifest(repo.sopener)
56 repo._tags = None
56 repo._tags = None
57 return len(repo.tags())
57 return len(repo.tags())
58 timer(t)
58 timer(t)
59
59
60 def perfdirstate(ui, repo):
60 def perfdirstate(ui, repo):
61 "a" in repo.dirstate
61 "a" in repo.dirstate
62 def d():
62 def d():
63 repo.dirstate.invalidate()
63 repo.dirstate.invalidate()
64 "a" in repo.dirstate
64 "a" in repo.dirstate
65 timer(d)
65 timer(d)
66
66
67 def perfdirstatedirs(ui, repo):
67 def perfdirstatedirs(ui, repo):
68 "a" in repo.dirstate
68 "a" in repo.dirstate
69 def d():
69 def d():
70 "a" in repo.dirstate._dirs
70 "a" in repo.dirstate._dirs
71 del repo.dirstate._dirs
71 del repo.dirstate._dirs
72 timer(d)
72 timer(d)
73
73
74 def perfmanifest(ui, repo):
74 def perfmanifest(ui, repo):
75 def d():
75 def d():
76 t = repo.manifest.tip()
76 t = repo.manifest.tip()
77 m = repo.manifest.read(t)
77 m = repo.manifest.read(t)
78 repo.manifest.mapcache = None
78 repo.manifest.mapcache = None
79 repo.manifest._cache = None
79 repo.manifest._cache = None
80 timer(d)
80 timer(d)
81
81
82 def perfchangeset(ui, repo, rev):
82 def perfchangeset(ui, repo, rev):
83 n = repo[rev].node()
83 n = repo[rev].node()
84 def d():
84 def d():
85 c = repo.changelog.read(n)
85 c = repo.changelog.read(n)
86 #repo.changelog._cache = None
86 #repo.changelog._cache = None
87 timer(d)
87 timer(d)
88
88
89 def perfindex(ui, repo):
89 def perfindex(ui, repo):
90 import mercurial.revlog
90 import mercurial.revlog
91 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
91 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
92 n = repo["tip"].node()
92 n = repo["tip"].node()
93 def d():
93 def d():
94 cl = mercurial.revlog.revlog(repo.sopener, "00changelog.i")
94 cl = mercurial.revlog.revlog(repo.sopener, "00changelog.i")
95 cl.rev(n)
95 cl.rev(n)
96 timer(d)
96 timer(d)
97
97
98 def perfstartup(ui, repo):
98 def perfstartup(ui, repo):
99 cmd = sys.argv[0]
99 cmd = sys.argv[0]
100 def d():
100 def d():
101 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
101 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
102 timer(d)
102 timer(d)
103
103
104 def perfparents(ui, repo):
104 def perfparents(ui, repo):
105 nl = [repo.changelog.node(i) for i in xrange(1000)]
105 nl = [repo.changelog.node(i) for i in xrange(1000)]
106 def d():
106 def d():
107 for n in nl:
107 for n in nl:
108 repo.changelog.parents(n)
108 repo.changelog.parents(n)
109 timer(d)
109 timer(d)
110
110
111 def perflookup(ui, repo, rev):
111 def perflookup(ui, repo, rev):
112 timer(lambda: len(repo.lookup(rev)))
112 timer(lambda: len(repo.lookup(rev)))
113
113
114 def perfnodelookup(ui, repo, rev):
114 def perfnodelookup(ui, repo, rev):
115 import mercurial.revlog
115 import mercurial.revlog
116 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
116 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
117 n = repo[rev].node()
117 n = repo[rev].node()
118 def d():
118 def d():
119 cl = mercurial.revlog.revlog(repo.sopener, "00changelog.i")
119 cl = mercurial.revlog.revlog(repo.sopener, "00changelog.i")
120 cl.rev(n)
120 cl.rev(n)
121 timer(d)
121 timer(d)
122
122
123 def perfnodelookup(ui, repo, rev):
124 import mercurial.revlog
125 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
126 n = repo[rev].node()
127 cl = mercurial.revlog.revlog(repo.sopener, "00changelog.i")
128 # behave somewhat consistently across internal API changes
129 if util.safehasattr(cl, 'clearcaches'):
130 clearcaches = cl.clearcaches
131 elif util.safehasattr(cl, '_nodecache'):
132 from mercurial.node import nullid, nullrev
133 def clearcaches():
134 cl._nodecache = {nullid: nullrev}
135 cl._nodepos = None
136 else:
137 def clearcaches():
138 pass
139 def d():
140 cl.rev(n)
141 clearcaches()
142 timer(d)
143
123 def perflog(ui, repo, **opts):
144 def perflog(ui, repo, **opts):
124 ui.pushbuffer()
145 ui.pushbuffer()
125 timer(lambda: commands.log(ui, repo, rev=[], date='', user='',
146 timer(lambda: commands.log(ui, repo, rev=[], date='', user='',
126 copies=opts.get('rename')))
147 copies=opts.get('rename')))
127 ui.popbuffer()
148 ui.popbuffer()
128
149
129 def perftemplating(ui, repo):
150 def perftemplating(ui, repo):
130 ui.pushbuffer()
151 ui.pushbuffer()
131 timer(lambda: commands.log(ui, repo, rev=[], date='', user='',
152 timer(lambda: commands.log(ui, repo, rev=[], date='', user='',
132 template='{date|shortdate} [{rev}:{node|short}]'
153 template='{date|shortdate} [{rev}:{node|short}]'
133 ' {author|person}: {desc|firstline}\n'))
154 ' {author|person}: {desc|firstline}\n'))
134 ui.popbuffer()
155 ui.popbuffer()
135
156
136 def perfcca(ui, repo):
157 def perfcca(ui, repo):
137 timer(lambda: scmutil.casecollisionauditor(ui, False, repo[None]))
158 timer(lambda: scmutil.casecollisionauditor(ui, False, repo[None]))
138
159
139 def perffncacheload(ui, repo):
160 def perffncacheload(ui, repo):
140 from mercurial import scmutil, store
161 from mercurial import scmutil, store
141 s = store.store(set(['store','fncache']), repo.path, scmutil.opener)
162 s = store.store(set(['store','fncache']), repo.path, scmutil.opener)
142 def d():
163 def d():
143 s.fncache._load()
164 s.fncache._load()
144 timer(d)
165 timer(d)
145
166
146 def perffncachewrite(ui, repo):
167 def perffncachewrite(ui, repo):
147 from mercurial import scmutil, store
168 from mercurial import scmutil, store
148 s = store.store(set(['store','fncache']), repo.path, scmutil.opener)
169 s = store.store(set(['store','fncache']), repo.path, scmutil.opener)
149 s.fncache._load()
170 s.fncache._load()
150 def d():
171 def d():
151 s.fncache._dirty = True
172 s.fncache._dirty = True
152 s.fncache.write()
173 s.fncache.write()
153 timer(d)
174 timer(d)
154
175
155 def perfdiffwd(ui, repo):
176 def perfdiffwd(ui, repo):
156 """Profile diff of working directory changes"""
177 """Profile diff of working directory changes"""
157 options = {
178 options = {
158 'w': 'ignore_all_space',
179 'w': 'ignore_all_space',
159 'b': 'ignore_space_change',
180 'b': 'ignore_space_change',
160 'B': 'ignore_blank_lines',
181 'B': 'ignore_blank_lines',
161 }
182 }
162
183
163 for diffopt in ('', 'w', 'b', 'B', 'wB'):
184 for diffopt in ('', 'w', 'b', 'B', 'wB'):
164 opts = dict((options[c], '1') for c in diffopt)
185 opts = dict((options[c], '1') for c in diffopt)
165 def d():
186 def d():
166 ui.pushbuffer()
187 ui.pushbuffer()
167 commands.diff(ui, repo, **opts)
188 commands.diff(ui, repo, **opts)
168 ui.popbuffer()
189 ui.popbuffer()
169 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
190 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
170 timer(d, title)
191 timer(d, title)
171
192
172 def perfrevlog(ui, repo, file_, **opts):
193 def perfrevlog(ui, repo, file_, **opts):
173 from mercurial import revlog
194 from mercurial import revlog
174 dist = opts['dist']
195 dist = opts['dist']
175 def d():
196 def d():
176 r = revlog.revlog(lambda fn: open(fn, 'rb'), file_)
197 r = revlog.revlog(lambda fn: open(fn, 'rb'), file_)
177 for x in xrange(0, len(r), dist):
198 for x in xrange(0, len(r), dist):
178 r.revision(r.node(x))
199 r.revision(r.node(x))
179
200
180 timer(d)
201 timer(d)
181
202
182 cmdtable = {
203 cmdtable = {
183 'perfcca': (perfcca, []),
204 'perfcca': (perfcca, []),
184 'perffncacheload': (perffncacheload, []),
205 'perffncacheload': (perffncacheload, []),
185 'perffncachewrite': (perffncachewrite, []),
206 'perffncachewrite': (perffncachewrite, []),
186 'perflookup': (perflookup, []),
207 'perflookup': (perflookup, []),
187 'perfnodelookup': (perfnodelookup, []),
208 'perfnodelookup': (perfnodelookup, []),
188 'perfparents': (perfparents, []),
209 'perfparents': (perfparents, []),
189 'perfstartup': (perfstartup, []),
210 'perfstartup': (perfstartup, []),
190 'perfstatus': (perfstatus, []),
211 'perfstatus': (perfstatus, []),
191 'perfwalk': (perfwalk, []),
212 'perfwalk': (perfwalk, []),
192 'perfmanifest': (perfmanifest, []),
213 'perfmanifest': (perfmanifest, []),
193 'perfchangeset': (perfchangeset, []),
214 'perfchangeset': (perfchangeset, []),
194 'perfindex': (perfindex, []),
215 'perfindex': (perfindex, []),
195 'perfheads': (perfheads, []),
216 'perfheads': (perfheads, []),
196 'perftags': (perftags, []),
217 'perftags': (perftags, []),
197 'perfdirstate': (perfdirstate, []),
218 'perfdirstate': (perfdirstate, []),
198 'perfdirstatedirs': (perfdirstate, []),
219 'perfdirstatedirs': (perfdirstate, []),
199 'perflog': (perflog,
220 'perflog': (perflog,
200 [('', 'rename', False, 'ask log to follow renames')]),
221 [('', 'rename', False, 'ask log to follow renames')]),
201 'perftemplating': (perftemplating, []),
222 'perftemplating': (perftemplating, []),
202 'perfdiffwd': (perfdiffwd, []),
223 'perfdiffwd': (perfdiffwd, []),
203 'perfrevlog': (perfrevlog,
224 'perfrevlog': (perfrevlog,
204 [('d', 'dist', 100, 'distance between the revisions')],
225 [('d', 'dist', 100, 'distance between the revisions')],
205 "[INDEXFILE]"),
226 "[INDEXFILE]"),
206 }
227 }
This diff has been collapsed as it changes many lines, (585 lines changed) Show them Hide them
@@ -1,713 +1,1200 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <string.h>
12 #include <string.h>
13
13
14 #include "util.h"
14 #include "util.h"
15
15
16 static int hexdigit(char c)
16 static int hexdigit(char c)
17 {
17 {
18 if (c >= '0' && c <= '9')
18 if (c >= '0' && c <= '9')
19 return c - '0';
19 return c - '0';
20 if (c >= 'a' && c <= 'f')
20 if (c >= 'a' && c <= 'f')
21 return c - 'a' + 10;
21 return c - 'a' + 10;
22 if (c >= 'A' && c <= 'F')
22 if (c >= 'A' && c <= 'F')
23 return c - 'A' + 10;
23 return c - 'A' + 10;
24
24
25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
25 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
26 return 0;
26 return 0;
27 }
27 }
28
28
29 /*
29 /*
30 * Turn a hex-encoded string into binary.
30 * Turn a hex-encoded string into binary.
31 */
31 */
32 static PyObject *unhexlify(const char *str, int len)
32 static PyObject *unhexlify(const char *str, int len)
33 {
33 {
34 PyObject *ret;
34 PyObject *ret;
35 const char *c;
35 const char *c;
36 char *d;
36 char *d;
37
37
38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
38 ret = PyBytes_FromStringAndSize(NULL, len / 2);
39
39
40 if (!ret)
40 if (!ret)
41 return NULL;
41 return NULL;
42
42
43 d = PyBytes_AsString(ret);
43 d = PyBytes_AsString(ret);
44
44
45 for (c = str; c < str + len;) {
45 for (c = str; c < str + len;) {
46 int hi = hexdigit(*c++);
46 int hi = hexdigit(*c++);
47 int lo = hexdigit(*c++);
47 int lo = hexdigit(*c++);
48 *d++ = (hi << 4) | lo;
48 *d++ = (hi << 4) | lo;
49 }
49 }
50
50
51 return ret;
51 return ret;
52 }
52 }
53
53
54 /*
54 /*
55 * This code assumes that a manifest is stitched together with newline
55 * This code assumes that a manifest is stitched together with newline
56 * ('\n') characters.
56 * ('\n') characters.
57 */
57 */
58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
58 static PyObject *parse_manifest(PyObject *self, PyObject *args)
59 {
59 {
60 PyObject *mfdict, *fdict;
60 PyObject *mfdict, *fdict;
61 char *str, *cur, *start, *zero;
61 char *str, *cur, *start, *zero;
62 int len;
62 int len;
63
63
64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
64 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
65 &PyDict_Type, &mfdict,
65 &PyDict_Type, &mfdict,
66 &PyDict_Type, &fdict,
66 &PyDict_Type, &fdict,
67 &str, &len))
67 &str, &len))
68 goto quit;
68 goto quit;
69
69
70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
70 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
71 PyObject *file = NULL, *node = NULL;
71 PyObject *file = NULL, *node = NULL;
72 PyObject *flags = NULL;
72 PyObject *flags = NULL;
73 int nlen;
73 int nlen;
74
74
75 if (!*cur) {
75 if (!*cur) {
76 zero = cur;
76 zero = cur;
77 continue;
77 continue;
78 }
78 }
79 else if (*cur != '\n')
79 else if (*cur != '\n')
80 continue;
80 continue;
81
81
82 if (!zero) {
82 if (!zero) {
83 PyErr_SetString(PyExc_ValueError,
83 PyErr_SetString(PyExc_ValueError,
84 "manifest entry has no separator");
84 "manifest entry has no separator");
85 goto quit;
85 goto quit;
86 }
86 }
87
87
88 file = PyBytes_FromStringAndSize(start, zero - start);
88 file = PyBytes_FromStringAndSize(start, zero - start);
89
89
90 if (!file)
90 if (!file)
91 goto bail;
91 goto bail;
92
92
93 nlen = cur - zero - 1;
93 nlen = cur - zero - 1;
94
94
95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
95 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
96 if (!node)
96 if (!node)
97 goto bail;
97 goto bail;
98
98
99 if (nlen > 40) {
99 if (nlen > 40) {
100 flags = PyBytes_FromStringAndSize(zero + 41,
100 flags = PyBytes_FromStringAndSize(zero + 41,
101 nlen - 40);
101 nlen - 40);
102 if (!flags)
102 if (!flags)
103 goto bail;
103 goto bail;
104
104
105 if (PyDict_SetItem(fdict, file, flags) == -1)
105 if (PyDict_SetItem(fdict, file, flags) == -1)
106 goto bail;
106 goto bail;
107 }
107 }
108
108
109 if (PyDict_SetItem(mfdict, file, node) == -1)
109 if (PyDict_SetItem(mfdict, file, node) == -1)
110 goto bail;
110 goto bail;
111
111
112 start = cur + 1;
112 start = cur + 1;
113 zero = NULL;
113 zero = NULL;
114
114
115 Py_XDECREF(flags);
115 Py_XDECREF(flags);
116 Py_XDECREF(node);
116 Py_XDECREF(node);
117 Py_XDECREF(file);
117 Py_XDECREF(file);
118 continue;
118 continue;
119 bail:
119 bail:
120 Py_XDECREF(flags);
120 Py_XDECREF(flags);
121 Py_XDECREF(node);
121 Py_XDECREF(node);
122 Py_XDECREF(file);
122 Py_XDECREF(file);
123 goto quit;
123 goto quit;
124 }
124 }
125
125
126 if (len > 0 && *(cur - 1) != '\n') {
126 if (len > 0 && *(cur - 1) != '\n') {
127 PyErr_SetString(PyExc_ValueError,
127 PyErr_SetString(PyExc_ValueError,
128 "manifest contains trailing garbage");
128 "manifest contains trailing garbage");
129 goto quit;
129 goto quit;
130 }
130 }
131
131
132 Py_INCREF(Py_None);
132 Py_INCREF(Py_None);
133 return Py_None;
133 return Py_None;
134 quit:
134 quit:
135 return NULL;
135 return NULL;
136 }
136 }
137
137
138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
138 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
139 {
139 {
140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
140 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
141 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
142 char *str, *cur, *end, *cpos;
142 char *str, *cur, *end, *cpos;
143 int state, mode, size, mtime;
143 int state, mode, size, mtime;
144 unsigned int flen;
144 unsigned int flen;
145 int len;
145 int len;
146 uint32_t decode[4]; /* for alignment */
146 uint32_t decode[4]; /* for alignment */
147
147
148 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
148 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
149 &PyDict_Type, &dmap,
149 &PyDict_Type, &dmap,
150 &PyDict_Type, &cmap,
150 &PyDict_Type, &cmap,
151 &str, &len))
151 &str, &len))
152 goto quit;
152 goto quit;
153
153
154 /* read parents */
154 /* read parents */
155 if (len < 40)
155 if (len < 40)
156 goto quit;
156 goto quit;
157
157
158 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
158 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
159 if (!parents)
159 if (!parents)
160 goto quit;
160 goto quit;
161
161
162 /* read filenames */
162 /* read filenames */
163 cur = str + 40;
163 cur = str + 40;
164 end = str + len;
164 end = str + len;
165
165
166 while (cur < end - 17) {
166 while (cur < end - 17) {
167 /* unpack header */
167 /* unpack header */
168 state = *cur;
168 state = *cur;
169 memcpy(decode, cur + 1, 16);
169 memcpy(decode, cur + 1, 16);
170 mode = ntohl(decode[0]);
170 mode = ntohl(decode[0]);
171 size = ntohl(decode[1]);
171 size = ntohl(decode[1]);
172 mtime = ntohl(decode[2]);
172 mtime = ntohl(decode[2]);
173 flen = ntohl(decode[3]);
173 flen = ntohl(decode[3]);
174 cur += 17;
174 cur += 17;
175 if (cur + flen > end || cur + flen < cur) {
175 if (cur + flen > end || cur + flen < cur) {
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 goto quit;
177 goto quit;
178 }
178 }
179
179
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 if (!entry)
181 if (!entry)
182 goto quit;
182 goto quit;
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184
184
185 cpos = memchr(cur, 0, flen);
185 cpos = memchr(cur, 0, flen);
186 if (cpos) {
186 if (cpos) {
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 cname = PyBytes_FromStringAndSize(cpos + 1,
188 cname = PyBytes_FromStringAndSize(cpos + 1,
189 flen - (cpos - cur) - 1);
189 flen - (cpos - cur) - 1);
190 if (!fname || !cname ||
190 if (!fname || !cname ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 PyDict_SetItem(dmap, fname, entry) == -1)
192 PyDict_SetItem(dmap, fname, entry) == -1)
193 goto quit;
193 goto quit;
194 Py_DECREF(cname);
194 Py_DECREF(cname);
195 } else {
195 } else {
196 fname = PyBytes_FromStringAndSize(cur, flen);
196 fname = PyBytes_FromStringAndSize(cur, flen);
197 if (!fname ||
197 if (!fname ||
198 PyDict_SetItem(dmap, fname, entry) == -1)
198 PyDict_SetItem(dmap, fname, entry) == -1)
199 goto quit;
199 goto quit;
200 }
200 }
201 cur += flen;
201 cur += flen;
202 Py_DECREF(fname);
202 Py_DECREF(fname);
203 Py_DECREF(entry);
203 Py_DECREF(entry);
204 fname = cname = entry = NULL;
204 fname = cname = entry = NULL;
205 }
205 }
206
206
207 ret = parents;
207 ret = parents;
208 Py_INCREF(ret);
208 Py_INCREF(ret);
209 quit:
209 quit:
210 Py_XDECREF(fname);
210 Py_XDECREF(fname);
211 Py_XDECREF(cname);
211 Py_XDECREF(cname);
212 Py_XDECREF(entry);
212 Py_XDECREF(entry);
213 Py_XDECREF(parents);
213 Py_XDECREF(parents);
214 return ret;
214 return ret;
215 }
215 }
216
216
217 /*
217 /*
218 * A list-like object that decodes the contents of a RevlogNG index
218 * A base-16 trie for fast node->rev mapping.
219 * file on demand. It has limited support for insert and delete at the
219 *
220 * last element before the end. The last entry is always a sentinel
220 * Positive value is index of the next node in the trie
221 * nullid.
221 * Negative value is a leaf: -(rev + 1)
222 * Zero is empty
223 */
224 typedef struct {
225 int children[16];
226 } nodetree;
227
228 /*
229 * This class has two behaviours.
230 *
231 * When used in a list-like way (with integer keys), we decode an
232 * entry in a RevlogNG index file on demand. Our last entry is a
233 * sentinel, always a nullid. We have limited support for
234 * integer-keyed insert and delete, only at elements right before the
235 * sentinel.
236 *
237 * With string keys, we lazily perform a reverse mapping from node to
238 * rev, using a base-16 trie.
222 */
239 */
223 typedef struct {
240 typedef struct {
224 PyObject_HEAD
241 PyObject_HEAD
225 /* Type-specific fields go here. */
242 /* Type-specific fields go here. */
226 PyObject *data; /* raw bytes of index */
243 PyObject *data; /* raw bytes of index */
227 PyObject **cache; /* cached tuples */
244 PyObject **cache; /* cached tuples */
228 const char **offsets; /* populated on demand */
245 const char **offsets; /* populated on demand */
229 Py_ssize_t raw_length; /* original number of elements */
246 Py_ssize_t raw_length; /* original number of elements */
230 Py_ssize_t length; /* current number of elements */
247 Py_ssize_t length; /* current number of elements */
231 PyObject *added; /* populated on demand */
248 PyObject *added; /* populated on demand */
249 nodetree *nt; /* base-16 trie */
250 int ntlength; /* # nodes in use */
251 int ntcapacity; /* # nodes allocated */
252 int ntdepth; /* maximum depth of tree */
253 int ntsplits; /* # splits performed */
254 int ntrev; /* last rev scanned */
255 int ntlookups; /* # lookups */
256 int ntmisses; /* # lookups that miss the cache */
232 int inlined;
257 int inlined;
233 } indexObject;
258 } indexObject;
234
259
235 static Py_ssize_t index_length(indexObject *self)
260 static Py_ssize_t index_length(const indexObject *self)
236 {
261 {
237 if (self->added == NULL)
262 if (self->added == NULL)
238 return self->length;
263 return self->length;
239 return self->length + PyList_GET_SIZE(self->added);
264 return self->length + PyList_GET_SIZE(self->added);
240 }
265 }
241
266
242 static PyObject *nullentry;
267 static PyObject *nullentry;
268 static const char nullid[20];
243
269
244 static long inline_scan(indexObject *self, const char **offsets);
270 static long inline_scan(indexObject *self, const char **offsets);
245
271
246 #if LONG_MAX == 0x7fffffffL
272 #if LONG_MAX == 0x7fffffffL
247 static char *tuple_format = "Kiiiiiis#";
273 static char *tuple_format = "Kiiiiiis#";
248 #else
274 #else
249 static char *tuple_format = "kiiiiiis#";
275 static char *tuple_format = "kiiiiiis#";
250 #endif
276 #endif
251
277
252 /* RevlogNG format (all in big endian, data may be inlined):
278 /*
279 * Return a pointer to the beginning of a RevlogNG record.
280 */
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 {
283 if (self->inlined && pos > 0) {
284 if (self->offsets == NULL) {
285 self->offsets = malloc(self->raw_length *
286 sizeof(*self->offsets));
287 if (self->offsets == NULL)
288 return (const char *)PyErr_NoMemory();
289 inline_scan(self, self->offsets);
290 }
291 return self->offsets[pos];
292 }
293
294 return PyString_AS_STRING(self->data) + pos * 64;
295 }
296
297 /*
298 * RevlogNG format (all in big endian, data may be inlined):
253 * 6 bytes: offset
299 * 6 bytes: offset
254 * 2 bytes: flags
300 * 2 bytes: flags
255 * 4 bytes: compressed length
301 * 4 bytes: compressed length
256 * 4 bytes: uncompressed length
302 * 4 bytes: uncompressed length
257 * 4 bytes: base revision
303 * 4 bytes: base revision
258 * 4 bytes: link revision
304 * 4 bytes: link revision
259 * 4 bytes: parent 1 revision
305 * 4 bytes: parent 1 revision
260 * 4 bytes: parent 2 revision
306 * 4 bytes: parent 2 revision
261 * 32 bytes: nodeid (only 20 bytes used)
307 * 32 bytes: nodeid (only 20 bytes used)
262 */
308 */
263 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
264 {
310 {
265 uint32_t decode[8]; /* to enforce alignment with inline data */
311 uint32_t decode[8]; /* to enforce alignment with inline data */
266 uint64_t offset_flags;
312 uint64_t offset_flags;
267 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
268 const char *c_node_id;
314 const char *c_node_id;
269 const char *data;
315 const char *data;
270 Py_ssize_t length = index_length(self);
316 Py_ssize_t length = index_length(self);
271 PyObject *entry;
317 PyObject *entry;
272
318
273 if (pos >= length) {
319 if (pos < 0)
320 pos += length;
321
322 if (pos < 0 || pos >= length) {
274 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
323 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
275 return NULL;
324 return NULL;
276 }
325 }
277
326
278 if (pos == length - 1) {
327 if (pos == length - 1) {
279 Py_INCREF(nullentry);
328 Py_INCREF(nullentry);
280 return nullentry;
329 return nullentry;
281 }
330 }
282
331
283 if (pos >= self->length - 1) {
332 if (pos >= self->length - 1) {
284 PyObject *obj;
333 PyObject *obj;
285 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
334 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
286 Py_INCREF(obj);
335 Py_INCREF(obj);
287 return obj;
336 return obj;
288 }
337 }
289
338
290 if (self->cache) {
339 if (self->cache) {
291 if (self->cache[pos]) {
340 if (self->cache[pos]) {
292 Py_INCREF(self->cache[pos]);
341 Py_INCREF(self->cache[pos]);
293 return self->cache[pos];
342 return self->cache[pos];
294 }
343 }
295 } else {
344 } else {
296 self->cache = calloc(self->raw_length, sizeof(PyObject *));
345 self->cache = calloc(self->raw_length, sizeof(PyObject *));
297 if (self->cache == NULL)
346 if (self->cache == NULL)
298 return PyErr_NoMemory();
347 return PyErr_NoMemory();
299 }
348 }
300
349
301 if (self->inlined && pos > 0) {
350 data = index_deref(self, pos);
302 if (self->offsets == NULL) {
351 if (data == NULL)
303 self->offsets = malloc(self->raw_length *
352 return NULL;
304 sizeof(*self->offsets));
305 if (self->offsets == NULL)
306 return PyErr_NoMemory();
307 inline_scan(self, self->offsets);
308 }
309 data = self->offsets[pos];
310 } else
311 data = PyString_AS_STRING(self->data) + pos * 64;
312
353
313 memcpy(decode, data, 8 * sizeof(uint32_t));
354 memcpy(decode, data, 8 * sizeof(uint32_t));
314
355
315 offset_flags = ntohl(decode[1]);
356 offset_flags = ntohl(decode[1]);
316 if (pos == 0) /* mask out version number for the first entry */
357 if (pos == 0) /* mask out version number for the first entry */
317 offset_flags &= 0xFFFF;
358 offset_flags &= 0xFFFF;
318 else {
359 else {
319 uint32_t offset_high = ntohl(decode[0]);
360 uint32_t offset_high = ntohl(decode[0]);
320 offset_flags |= ((uint64_t)offset_high) << 32;
361 offset_flags |= ((uint64_t)offset_high) << 32;
321 }
362 }
322
363
323 comp_len = ntohl(decode[2]);
364 comp_len = ntohl(decode[2]);
324 uncomp_len = ntohl(decode[3]);
365 uncomp_len = ntohl(decode[3]);
325 base_rev = ntohl(decode[4]);
366 base_rev = ntohl(decode[4]);
326 link_rev = ntohl(decode[5]);
367 link_rev = ntohl(decode[5]);
327 parent_1 = ntohl(decode[6]);
368 parent_1 = ntohl(decode[6]);
328 parent_2 = ntohl(decode[7]);
369 parent_2 = ntohl(decode[7]);
329 c_node_id = data + 32;
370 c_node_id = data + 32;
330
371
331 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
372 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
332 uncomp_len, base_rev, link_rev,
373 uncomp_len, base_rev, link_rev,
333 parent_1, parent_2, c_node_id, 20);
374 parent_1, parent_2, c_node_id, 20);
334
375
335 if (entry)
376 if (entry)
336 PyObject_GC_UnTrack(entry);
377 PyObject_GC_UnTrack(entry);
337
378
338 self->cache[pos] = entry;
379 self->cache[pos] = entry;
339 Py_INCREF(entry);
380 Py_INCREF(entry);
340
381
341 return entry;
382 return entry;
342 }
383 }
343
384
385 /*
386 * Return the 20-byte SHA of the node corresponding to the given rev.
387 */
388 static const char *index_node(indexObject *self, Py_ssize_t pos)
389 {
390 Py_ssize_t length = index_length(self);
391 const char *data;
392
393 if (pos == length - 1)
394 return nullid;
395
396 if (pos >= length)
397 return NULL;
398
399 if (pos >= self->length - 1) {
400 PyObject *tuple, *str;
401 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
402 str = PyTuple_GetItem(tuple, 7);
403 return str ? PyString_AS_STRING(str) : NULL;
404 }
405
406 data = index_deref(self, pos);
407 return data ? data + 32 : NULL;
408 }
409
410 static int nt_insert(indexObject *self, const char *node, int rev);
411
412 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
413 {
414 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
415 return -1;
416 if (*nodelen == 20)
417 return 0;
418 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
419 return -1;
420 }
421
344 static PyObject *index_insert(indexObject *self, PyObject *args)
422 static PyObject *index_insert(indexObject *self, PyObject *args)
345 {
423 {
346 PyObject *obj, *node;
424 PyObject *obj;
425 char *node;
347 long offset;
426 long offset;
348 Py_ssize_t len;
427 Py_ssize_t len, nodelen;
349
428
350 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
429 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
351 return NULL;
430 return NULL;
352
431
353 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
432 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
354 PyErr_SetString(PyExc_ValueError, "8-tuple required");
433 PyErr_SetString(PyExc_TypeError, "8-tuple required");
355 return NULL;
434 return NULL;
356 }
435 }
357
436
358 node = PyTuple_GET_ITEM(obj, 7);
437 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
359 if (!PyString_Check(node) || PyString_GET_SIZE(node) != 20) {
360 PyErr_SetString(PyExc_ValueError,
361 "20-byte hash required as last element");
362 return NULL;
438 return NULL;
363 }
364
439
365 len = index_length(self);
440 len = index_length(self);
366
441
367 if (offset < 0)
442 if (offset < 0)
368 offset += len;
443 offset += len;
369
444
370 if (offset != len - 1) {
445 if (offset != len - 1) {
371 PyErr_SetString(PyExc_IndexError,
446 PyErr_SetString(PyExc_IndexError,
372 "insert only supported at index -1");
447 "insert only supported at index -1");
373 return NULL;
448 return NULL;
374 }
449 }
375
450
451 if (offset > INT_MAX) {
452 PyErr_SetString(PyExc_ValueError,
453 "currently only 2**31 revs supported");
454 return NULL;
455 }
456
376 if (self->added == NULL) {
457 if (self->added == NULL) {
377 self->added = PyList_New(0);
458 self->added = PyList_New(0);
378 if (self->added == NULL)
459 if (self->added == NULL)
379 return NULL;
460 return NULL;
380 }
461 }
381
462
382 if (PyList_Append(self->added, obj) == -1)
463 if (PyList_Append(self->added, obj) == -1)
383 return NULL;
464 return NULL;
384
465
466 if (self->nt)
467 nt_insert(self, node, (int)offset);
468
385 Py_RETURN_NONE;
469 Py_RETURN_NONE;
386 }
470 }
387
471
388 static void _index_clearcaches(indexObject *self)
472 static void _index_clearcaches(indexObject *self)
389 {
473 {
390 if (self->cache) {
474 if (self->cache) {
391 Py_ssize_t i;
475 Py_ssize_t i;
392
476
393 for (i = 0; i < self->raw_length; i++) {
477 for (i = 0; i < self->raw_length; i++) {
394 Py_XDECREF(self->cache[i]);
478 Py_XDECREF(self->cache[i]);
395 self->cache[i] = NULL;
479 self->cache[i] = NULL;
396 }
480 }
397 free(self->cache);
481 free(self->cache);
398 self->cache = NULL;
482 self->cache = NULL;
399 }
483 }
400 if (self->offsets) {
484 if (self->offsets) {
401 free(self->offsets);
485 free(self->offsets);
402 self->offsets = NULL;
486 self->offsets = NULL;
403 }
487 }
488 if (self->nt) {
489 free(self->nt);
490 self->nt = NULL;
491 }
404 }
492 }
405
493
406 static PyObject *index_clearcaches(indexObject *self)
494 static PyObject *index_clearcaches(indexObject *self)
407 {
495 {
408 _index_clearcaches(self);
496 _index_clearcaches(self);
497 self->ntlength = self->ntcapacity = 0;
498 self->ntdepth = self->ntsplits = 0;
499 self->ntrev = -1;
500 self->ntlookups = self->ntmisses = 0;
409 Py_RETURN_NONE;
501 Py_RETURN_NONE;
410 }
502 }
411
503
412 static int index_assign_subscript(indexObject *self, PyObject *item,
504 static PyObject *index_stats(indexObject *self)
413 PyObject *value)
505 {
506 PyObject *obj = PyDict_New();
507
508 if (obj == NULL)
509 return NULL;
510
511 #define istat(__n, __d) \
512 if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
513 goto bail;
514
515 if (self->added) {
516 Py_ssize_t len = PyList_GET_SIZE(self->added);
517 if (PyDict_SetItemString(obj, "index entries added",
518 PyInt_FromLong(len)) == -1)
519 goto bail;
520 }
521
522 if (self->raw_length != self->length - 1)
523 istat(raw_length, "revs on disk");
524 istat(length, "revs in memory");
525 istat(ntcapacity, "node trie capacity");
526 istat(ntdepth, "node trie depth");
527 istat(ntlength, "node trie count");
528 istat(ntlookups, "node trie lookups");
529 istat(ntmisses, "node trie misses");
530 istat(ntrev, "node trie last rev scanned");
531 istat(ntsplits, "node trie splits");
532
533 #undef istat
534
535 return obj;
536
537 bail:
538 Py_XDECREF(obj);
539 return NULL;
540 }
541
542 static inline int nt_level(const char *node, int level)
543 {
544 int v = node[level>>1];
545 if (!(level & 1))
546 v >>= 4;
547 return v & 0xf;
548 }
549
550 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
551 {
552 int level, off;
553
554 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
555 return -1;
556
557 if (self->nt == NULL)
558 return -2;
559
560 for (level = off = 0; level < nodelen; level++) {
561 int k = nt_level(node, level);
562 nodetree *n = &self->nt[off];
563 int v = n->children[k];
564
565 if (v < 0) {
566 const char *n;
567 v = -v - 1;
568 n = index_node(self, v);
569 if (n == NULL)
570 return -2;
571 return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
572 ? -2 : v;
573 }
574 if (v == 0)
575 return -2;
576 off = v;
577 }
578 return -2;
579 }
580
581 static int nt_new(indexObject *self)
582 {
583 if (self->ntlength == self->ntcapacity) {
584 self->ntcapacity *= 2;
585 self->nt = realloc(self->nt,
586 self->ntcapacity * sizeof(nodetree));
587 if (self->nt == NULL) {
588 PyErr_SetString(PyExc_MemoryError, "out of memory");
589 return -1;
590 }
591 memset(&self->nt[self->ntlength], 0,
592 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
593 }
594 return self->ntlength++;
595 }
596
597 static int nt_insert(indexObject *self, const char *node, int rev)
598 {
599 int level = 0;
600 int off = 0;
601
602 while (level < 20) {
603 int k = nt_level(node, level);
604 nodetree *n;
605 int v;
606
607 n = &self->nt[off];
608 v = n->children[k];
609
610 if (v == 0) {
611 n->children[k] = -rev - 1;
612 return 0;
613 }
614 if (v < 0) {
615 const char *oldnode = index_node(self, -v - 1);
616 int noff;
617
618 if (!oldnode || !memcmp(oldnode, node, 20)) {
619 n->children[k] = -rev - 1;
620 return 0;
621 }
622 noff = nt_new(self);
623 if (noff == -1)
624 return -1;
625 /* self->nt may have been changed by realloc */
626 self->nt[off].children[k] = noff;
627 off = noff;
628 n = &self->nt[off];
629 n->children[nt_level(oldnode, ++level)] = v;
630 if (level > self->ntdepth)
631 self->ntdepth = level;
632 self->ntsplits += 1;
633 } else {
634 level += 1;
635 off = v;
636 }
637 }
638
639 return -1;
640 }
641
642 /*
643 * Return values:
644 *
645 * -3: error (exception set)
646 * -2: not found (no exception set)
647 * rest: valid rev
648 */
649 static int index_find_node(indexObject *self,
650 const char *node, Py_ssize_t nodelen)
651 {
652 int rev;
653
654 self->ntlookups++;
655 rev = nt_find(self, node, nodelen);
656 if (rev >= -1)
657 return rev;
658
659 if (self->nt == NULL) {
660 self->ntcapacity = self->raw_length < 4
661 ? 4 : self->raw_length / 2;
662 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
663 if (self->nt == NULL) {
664 PyErr_SetString(PyExc_MemoryError, "out of memory");
665 return -3;
666 }
667 self->ntlength = 1;
668 self->ntrev = (int)index_length(self) - 1;
669 self->ntlookups = 1;
670 self->ntmisses = 0;
671 }
672
673 /*
674 * For the first handful of lookups, we scan the entire index,
675 * and cache only the matching nodes. This optimizes for cases
676 * like "hg tip", where only a few nodes are accessed.
677 *
678 * After that, we cache every node we visit, using a single
679 * scan amortized over multiple lookups. This gives the best
680 * bulk performance, e.g. for "hg log".
681 */
682 if (self->ntmisses++ < 4) {
683 for (rev = self->ntrev - 1; rev >= 0; rev--) {
684 const char *n = index_node(self, rev);
685 if (n == NULL)
686 return -2;
687 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
688 if (nt_insert(self, n, rev) == -1)
689 return -3;
690 break;
691 }
692 }
693 } else {
694 for (rev = self->ntrev - 1; rev >= 0; rev--) {
695 const char *n = index_node(self, rev);
696 if (n == NULL)
697 return -2;
698 if (nt_insert(self, n, rev) == -1)
699 return -3;
700 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
701 break;
702 }
703 }
704 self->ntrev = rev;
705 }
706
707 if (rev >= 0)
708 return rev;
709 return -2;
710 }
711
712 static PyObject *raise_revlog_error(void)
713 {
714 static PyObject *errclass;
715 PyObject *mod = NULL, *errobj;
716
717 if (errclass == NULL) {
718 PyObject *dict;
719
720 mod = PyImport_ImportModule("mercurial.error");
721 if (mod == NULL)
722 goto classfail;
723
724 dict = PyModule_GetDict(mod);
725 if (dict == NULL)
726 goto classfail;
727
728 errclass = PyDict_GetItemString(dict, "RevlogError");
729 if (errclass == NULL) {
730 PyErr_SetString(PyExc_SystemError,
731 "could not find RevlogError");
732 goto classfail;
733 }
734 Py_INCREF(errclass);
735 }
736
737 errobj = PyObject_CallFunction(errclass, NULL);
738 if (errobj == NULL)
739 return NULL;
740 PyErr_SetObject(errclass, errobj);
741 return errobj;
742
743 classfail:
744 Py_XDECREF(mod);
745 return NULL;
746 }
747
748 static PyObject *index_getitem(indexObject *self, PyObject *value)
749 {
750 char *node;
751 Py_ssize_t nodelen;
752 int rev;
753
754 if (PyInt_Check(value))
755 return index_get(self, PyInt_AS_LONG(value));
756
757 if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
758 return NULL;
759 rev = index_find_node(self, node, nodelen);
760 if (rev >= -1)
761 return PyInt_FromLong(rev);
762 if (rev == -2)
763 raise_revlog_error();
764 return NULL;
765 }
766
767 static PyObject *index_m_get(indexObject *self, PyObject *args)
768 {
769 char *node;
770 int nodelen, rev;
771
772 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
773 return NULL;
774
775 rev = index_find_node(self, node, nodelen);
776 if (rev == -3)
777 return NULL;
778 if (rev == -2)
779 Py_RETURN_NONE;
780 return PyInt_FromLong(rev);
781 }
782
783 static int index_contains(indexObject *self, PyObject *value)
784 {
785 char *node;
786 Py_ssize_t nodelen;
787
788 if (PyInt_Check(value)) {
789 long rev = PyInt_AS_LONG(value);
790 return rev >= -1 && rev < index_length(self);
791 }
792
793 if (!PyString_Check(value))
794 return 0;
795
796 node = PyString_AS_STRING(value);
797 nodelen = PyString_GET_SIZE(value);
798
799 switch (index_find_node(self, node, nodelen)) {
800 case -3:
801 return -1;
802 case -2:
803 return 0;
804 default:
805 return 1;
806 }
807 }
808
809 /*
810 * Invalidate any trie entries introduced by added revs.
811 */
812 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
813 {
814 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
815
816 for (i = start; i < len; i++) {
817 PyObject *tuple = PyList_GET_ITEM(self->added, i);
818 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
819
820 nt_insert(self, PyString_AS_STRING(node), -1);
821 }
822
823 if (start == 0) {
824 Py_DECREF(self->added);
825 self->added = NULL;
826 }
827 }
828
829 /*
830 * Delete a numeric range of revs, which must be at the end of the
831 * range, but exclude the sentinel nullid entry.
832 */
833 static int index_slice_del(indexObject *self, PyObject *item)
414 {
834 {
415 Py_ssize_t start, stop, step, slicelength;
835 Py_ssize_t start, stop, step, slicelength;
416 Py_ssize_t length = index_length(self);
836 Py_ssize_t length = index_length(self);
417
837
418 if (!PySlice_Check(item) || value != NULL) {
419 PyErr_SetString(PyExc_TypeError,
420 "revlog index only supports slice deletion");
421 return -1;
422 }
423
424 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
838 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
425 &start, &stop, &step, &slicelength) < 0)
839 &start, &stop, &step, &slicelength) < 0)
426 return -1;
840 return -1;
427
841
428 if (slicelength <= 0)
842 if (slicelength <= 0)
429 return 0;
843 return 0;
430
844
431 if ((step < 0 && start < stop) || (step > 0 && start > stop))
845 if ((step < 0 && start < stop) || (step > 0 && start > stop))
432 stop = start;
846 stop = start;
433
847
434 if (step < 0) {
848 if (step < 0) {
435 stop = start + 1;
849 stop = start + 1;
436 start = stop + step*(slicelength - 1) - 1;
850 start = stop + step*(slicelength - 1) - 1;
437 step = -step;
851 step = -step;
438 }
852 }
439
853
440 if (step != 1) {
854 if (step != 1) {
441 PyErr_SetString(PyExc_ValueError,
855 PyErr_SetString(PyExc_ValueError,
442 "revlog index delete requires step size of 1");
856 "revlog index delete requires step size of 1");
443 return -1;
857 return -1;
444 }
858 }
445
859
446 if (stop != length - 1) {
860 if (stop != length - 1) {
447 PyErr_SetString(PyExc_IndexError,
861 PyErr_SetString(PyExc_IndexError,
448 "revlog index deletion indices are invalid");
862 "revlog index deletion indices are invalid");
449 return -1;
863 return -1;
450 }
864 }
451
865
452 if (start < self->length) {
866 if (start < self->length - 1) {
867 if (self->nt) {
868 Py_ssize_t i;
869
870 for (i = start + 1; i < self->length - 1; i++) {
871 const char *node = index_node(self, i);
872
873 if (node)
874 nt_insert(self, node, -1);
875 }
876 if (self->added)
877 nt_invalidate_added(self, 0);
878 if (self->ntrev > start)
879 self->ntrev = (int)start;
880 }
453 self->length = start + 1;
881 self->length = start + 1;
454 if (self->added) {
455 Py_DECREF(self->added);
456 self->added = NULL;
457 }
458 return 0;
882 return 0;
459 }
883 }
460
884
461 return PyList_SetSlice(self->added, start - self->length + 1,
885 if (self->nt) {
462 PyList_GET_SIZE(self->added),
886 nt_invalidate_added(self, start - self->length + 1);
463 NULL);
887 if (self->ntrev > start)
888 self->ntrev = (int)start;
889 }
890 return self->added
891 ? PyList_SetSlice(self->added, start - self->length + 1,
892 PyList_GET_SIZE(self->added), NULL)
893 : 0;
464 }
894 }
465
895
896 /*
897 * Supported ops:
898 *
899 * slice deletion
900 * string assignment (extend node->rev mapping)
901 * string deletion (shrink node->rev mapping)
902 */
903 static int index_assign_subscript(indexObject *self, PyObject *item,
904 PyObject *value)
905 {
906 char *node;
907 Py_ssize_t nodelen;
908 long rev;
909
910 if (PySlice_Check(item) && value == NULL)
911 return index_slice_del(self, item);
912
913 if (node_check(item, &node, &nodelen) == -1)
914 return -1;
915
916 if (value == NULL)
917 return self->nt ? nt_insert(self, node, -1) : 0;
918 rev = PyInt_AsLong(value);
919 if (rev > INT_MAX || rev < 0) {
920 if (!PyErr_Occurred())
921 PyErr_SetString(PyExc_ValueError, "rev out of range");
922 return -1;
923 }
924 return nt_insert(self, node, (int)rev);
925 }
926
927 /*
928 * Find all RevlogNG entries in an index that has inline data. Update
929 * the optional "offsets" table with those entries.
930 */
466 static long inline_scan(indexObject *self, const char **offsets)
931 static long inline_scan(indexObject *self, const char **offsets)
467 {
932 {
468 const char *data = PyString_AS_STRING(self->data);
933 const char *data = PyString_AS_STRING(self->data);
469 const char *end = data + PyString_GET_SIZE(self->data);
934 const char *end = data + PyString_GET_SIZE(self->data);
470 const long hdrsize = 64;
935 const long hdrsize = 64;
471 long incr = hdrsize;
936 long incr = hdrsize;
472 Py_ssize_t len = 0;
937 Py_ssize_t len = 0;
473
938
474 while (data + hdrsize <= end) {
939 while (data + hdrsize <= end) {
475 uint32_t comp_len;
940 uint32_t comp_len;
476 const char *old_data;
941 const char *old_data;
477 /* 3rd element of header is length of compressed inline data */
942 /* 3rd element of header is length of compressed inline data */
478 memcpy(&comp_len, data + 8, sizeof(uint32_t));
943 memcpy(&comp_len, data + 8, sizeof(uint32_t));
479 incr = hdrsize + ntohl(comp_len);
944 incr = hdrsize + ntohl(comp_len);
480 if (incr < hdrsize)
945 if (incr < hdrsize)
481 break;
946 break;
482 if (offsets)
947 if (offsets)
483 offsets[len] = data;
948 offsets[len] = data;
484 len++;
949 len++;
485 old_data = data;
950 old_data = data;
486 data += incr;
951 data += incr;
487 if (data <= old_data)
952 if (data <= old_data)
488 break;
953 break;
489 }
954 }
490
955
491 if (data != end && data + hdrsize != end) {
956 if (data != end && data + hdrsize != end) {
492 if (!PyErr_Occurred())
957 if (!PyErr_Occurred())
493 PyErr_SetString(PyExc_ValueError, "corrupt index file");
958 PyErr_SetString(PyExc_ValueError, "corrupt index file");
494 return -1;
959 return -1;
495 }
960 }
496
961
497 return len;
962 return len;
498 }
963 }
499
964
500 static int index_real_init(indexObject *self, const char *data, int size,
965 static int index_real_init(indexObject *self, const char *data, int size,
501 PyObject *inlined_obj, PyObject *data_obj)
966 PyObject *inlined_obj, PyObject *data_obj)
502 {
967 {
503 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
968 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
504 self->data = data_obj;
969 self->data = data_obj;
505 self->cache = NULL;
970 self->cache = NULL;
506
971
507 self->added = NULL;
972 self->added = NULL;
508 self->offsets = NULL;
973 self->offsets = NULL;
974 self->nt = NULL;
975 self->ntlength = self->ntcapacity = 0;
976 self->ntdepth = self->ntsplits = 0;
977 self->ntlookups = self->ntmisses = 0;
978 self->ntrev = -1;
509 Py_INCREF(self->data);
979 Py_INCREF(self->data);
510
980
511 if (self->inlined) {
981 if (self->inlined) {
512 long len = inline_scan(self, NULL);
982 long len = inline_scan(self, NULL);
513 if (len == -1)
983 if (len == -1)
514 goto bail;
984 goto bail;
515 self->raw_length = len;
985 self->raw_length = len;
516 self->length = len + 1;
986 self->length = len + 1;
517 } else {
987 } else {
518 if (size % 64) {
988 if (size % 64) {
519 PyErr_SetString(PyExc_ValueError, "corrupt index file");
989 PyErr_SetString(PyExc_ValueError, "corrupt index file");
520 goto bail;
990 goto bail;
521 }
991 }
522 self->raw_length = size / 64;
992 self->raw_length = size / 64;
523 self->length = self->raw_length + 1;
993 self->length = self->raw_length + 1;
524 }
994 }
525
995
526 return 0;
996 return 0;
527 bail:
997 bail:
528 return -1;
998 return -1;
529 }
999 }
530
1000
531 static int index_init(indexObject *self, PyObject *args, PyObject *kwds)
1001 static int index_init(indexObject *self, PyObject *args, PyObject *kwds)
532 {
1002 {
533 const char *data;
1003 const char *data;
534 int size;
1004 int size;
535 PyObject *inlined_obj;
1005 PyObject *inlined_obj;
536
1006
537 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
1007 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
538 return -1;
1008 return -1;
539
1009
540 return index_real_init(self, data, size, inlined_obj,
1010 return index_real_init(self, data, size, inlined_obj,
541 PyTuple_GET_ITEM(args, 0));
1011 PyTuple_GET_ITEM(args, 0));
542 }
1012 }
543
1013
1014 static PyObject *index_nodemap(indexObject *self)
1015 {
1016 return (PyObject *)self;
1017 }
1018
544 static void index_dealloc(indexObject *self)
1019 static void index_dealloc(indexObject *self)
545 {
1020 {
546 _index_clearcaches(self);
1021 _index_clearcaches(self);
547 Py_DECREF(self->data);
1022 Py_DECREF(self->data);
548 Py_XDECREF(self->added);
1023 Py_XDECREF(self->added);
549 PyObject_Del(self);
1024 PyObject_Del(self);
550 }
1025 }
551
1026
552 static PySequenceMethods index_sequence_methods = {
1027 static PySequenceMethods index_sequence_methods = {
553 (lenfunc)index_length, /* sq_length */
1028 (lenfunc)index_length, /* sq_length */
554 0, /* sq_concat */
1029 0, /* sq_concat */
555 0, /* sq_repeat */
1030 0, /* sq_repeat */
556 (ssizeargfunc)index_get, /* sq_item */
1031 (ssizeargfunc)index_get, /* sq_item */
1032 0, /* sq_slice */
1033 0, /* sq_ass_item */
1034 0, /* sq_ass_slice */
1035 (objobjproc)index_contains, /* sq_contains */
557 };
1036 };
558
1037
559 static PyMappingMethods index_mapping_methods = {
1038 static PyMappingMethods index_mapping_methods = {
560 (lenfunc)index_length, /* mp_length */
1039 (lenfunc)index_length, /* mp_length */
561 NULL, /* mp_subscript */
1040 (binaryfunc)index_getitem, /* mp_subscript */
562 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1041 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
563 };
1042 };
564
1043
565 static PyMethodDef index_methods[] = {
1044 static PyMethodDef index_methods[] = {
566 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1045 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
567 "clear the index caches"},
1046 "clear the index caches"},
1047 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1048 "get an index entry"},
568 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1049 {"insert", (PyCFunction)index_insert, METH_VARARGS,
569 "insert an index entry"},
1050 "insert an index entry"},
1051 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1052 "stats for the index"},
1053 {NULL} /* Sentinel */
1054 };
1055
1056 static PyGetSetDef index_getset[] = {
1057 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
570 {NULL} /* Sentinel */
1058 {NULL} /* Sentinel */
571 };
1059 };
572
1060
573 static PyTypeObject indexType = {
1061 static PyTypeObject indexType = {
574 PyObject_HEAD_INIT(NULL)
1062 PyObject_HEAD_INIT(NULL)
575 0, /* ob_size */
1063 0, /* ob_size */
576 "parsers.index", /* tp_name */
1064 "parsers.index", /* tp_name */
577 sizeof(indexObject), /* tp_basicsize */
1065 sizeof(indexObject), /* tp_basicsize */
578 0, /* tp_itemsize */
1066 0, /* tp_itemsize */
579 (destructor)index_dealloc, /* tp_dealloc */
1067 (destructor)index_dealloc, /* tp_dealloc */
580 0, /* tp_print */
1068 0, /* tp_print */
581 0, /* tp_getattr */
1069 0, /* tp_getattr */
582 0, /* tp_setattr */
1070 0, /* tp_setattr */
583 0, /* tp_compare */
1071 0, /* tp_compare */
584 0, /* tp_repr */
1072 0, /* tp_repr */
585 0, /* tp_as_number */
1073 0, /* tp_as_number */
586 &index_sequence_methods, /* tp_as_sequence */
1074 &index_sequence_methods, /* tp_as_sequence */
587 &index_mapping_methods, /* tp_as_mapping */
1075 &index_mapping_methods, /* tp_as_mapping */
588 0, /* tp_hash */
1076 0, /* tp_hash */
589 0, /* tp_call */
1077 0, /* tp_call */
590 0, /* tp_str */
1078 0, /* tp_str */
591 0, /* tp_getattro */
1079 0, /* tp_getattro */
592 0, /* tp_setattro */
1080 0, /* tp_setattro */
593 0, /* tp_as_buffer */
1081 0, /* tp_as_buffer */
594 Py_TPFLAGS_DEFAULT, /* tp_flags */
1082 Py_TPFLAGS_DEFAULT, /* tp_flags */
595 "revlog index", /* tp_doc */
1083 "revlog index", /* tp_doc */
596 0, /* tp_traverse */
1084 0, /* tp_traverse */
597 0, /* tp_clear */
1085 0, /* tp_clear */
598 0, /* tp_richcompare */
1086 0, /* tp_richcompare */
599 0, /* tp_weaklistoffset */
1087 0, /* tp_weaklistoffset */
600 0, /* tp_iter */
1088 0, /* tp_iter */
601 0, /* tp_iternext */
1089 0, /* tp_iternext */
602 index_methods, /* tp_methods */
1090 index_methods, /* tp_methods */
603 0, /* tp_members */
1091 0, /* tp_members */
604 0, /* tp_getset */
1092 index_getset, /* tp_getset */
605 0, /* tp_base */
1093 0, /* tp_base */
606 0, /* tp_dict */
1094 0, /* tp_dict */
607 0, /* tp_descr_get */
1095 0, /* tp_descr_get */
608 0, /* tp_descr_set */
1096 0, /* tp_descr_set */
609 0, /* tp_dictoffset */
1097 0, /* tp_dictoffset */
610 (initproc)index_init, /* tp_init */
1098 (initproc)index_init, /* tp_init */
611 0, /* tp_alloc */
1099 0, /* tp_alloc */
612 PyType_GenericNew, /* tp_new */
1100 PyType_GenericNew, /* tp_new */
613 };
1101 };
614
1102
615 /*
1103 /*
616 * returns a tuple of the form (index, None, cache) with elements as
1104 * returns a tuple of the form (index, index, cache) with elements as
617 * follows:
1105 * follows:
618 *
1106 *
619 * index: an index object that lazily parses the RevlogNG records
1107 * index: an index object that lazily parses RevlogNG records
620 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1108 * cache: if data is inlined, a tuple (index_file_content, 0), else None
621 *
1109 *
622 * added complications are for backwards compatibility
1110 * added complications are for backwards compatibility
623 */
1111 */
624 static PyObject *parse_index2(PyObject *self, PyObject *args)
1112 static PyObject *parse_index2(PyObject *self, PyObject *args)
625 {
1113 {
626 const char *data;
1114 const char *data;
627 int size, ret;
1115 int size, ret;
628 PyObject *inlined_obj, *tuple = NULL, *cache = NULL;
1116 PyObject *inlined_obj, *tuple = NULL, *cache = NULL;
629 indexObject *idx;
1117 indexObject *idx;
630
1118
631 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
1119 if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
632 return NULL;
1120 return NULL;
633
1121
634 idx = PyObject_New(indexObject, &indexType);
1122 idx = PyObject_New(indexObject, &indexType);
635
1123
636 if (idx == NULL)
1124 if (idx == NULL)
637 goto bail;
1125 goto bail;
638
1126
639 ret = index_real_init(idx, data, size, inlined_obj,
1127 ret = index_real_init(idx, data, size, inlined_obj,
640 PyTuple_GET_ITEM(args, 0));
1128 PyTuple_GET_ITEM(args, 0));
641 if (ret)
1129 if (ret)
642 goto bail;
1130 goto bail;
643
1131
644 if (idx->inlined) {
1132 if (idx->inlined) {
645 Py_INCREF(idx->data);
1133 Py_INCREF(idx->data);
646 cache = Py_BuildValue("iO", 0, idx->data);
1134 cache = Py_BuildValue("iO", 0, idx->data);
647 if (cache == NULL)
1135 if (cache == NULL)
648 goto bail;
1136 goto bail;
649 } else {
1137 } else {
650 cache = Py_None;
1138 cache = Py_None;
651 Py_INCREF(cache);
1139 Py_INCREF(cache);
652 }
1140 }
653
1141
1142 Py_INCREF(idx);
1143
654 tuple = Py_BuildValue("NN", idx, cache);
1144 tuple = Py_BuildValue("NN", idx, cache);
655 if (!tuple)
1145 if (!tuple)
656 goto bail;
1146 goto bail;
657 return tuple;
1147 return tuple;
658
1148
659 bail:
1149 bail:
660 Py_XDECREF(idx);
1150 Py_XDECREF(idx);
661 Py_XDECREF(cache);
1151 Py_XDECREF(cache);
662 Py_XDECREF(tuple);
1152 Py_XDECREF(tuple);
663 return NULL;
1153 return NULL;
664 }
1154 }
665
1155
666 static char parsers_doc[] = "Efficient content parsing.";
1156 static char parsers_doc[] = "Efficient content parsing.";
667
1157
668 static PyMethodDef methods[] = {
1158 static PyMethodDef methods[] = {
669 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1159 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
670 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1160 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
671 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1161 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
672 {NULL, NULL}
1162 {NULL, NULL}
673 };
1163 };
674
1164
675 static void module_init(PyObject *mod)
1165 static void module_init(PyObject *mod)
676 {
1166 {
677 static const char nullid[20];
678
679 if (PyType_Ready(&indexType) < 0)
1167 if (PyType_Ready(&indexType) < 0)
680 return;
1168 return;
681 Py_INCREF(&indexType);
1169 Py_INCREF(&indexType);
682
1170
683 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1171 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
684
1172
685 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1173 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
686 -1, -1, -1, -1, nullid, 20);
1174 -1, -1, -1, -1, nullid, 20);
687 if (nullentry)
1175 if (nullentry)
688 PyObject_GC_UnTrack(nullentry);
1176 PyObject_GC_UnTrack(nullentry);
689 }
1177 }
690
1178
691 #ifdef IS_PY3K
1179 #ifdef IS_PY3K
692 static struct PyModuleDef parsers_module = {
1180 static struct PyModuleDef parsers_module = {
693 PyModuleDef_HEAD_INIT,
1181 PyModuleDef_HEAD_INIT,
694 "parsers",
1182 "parsers",
695 parsers_doc,
1183 parsers_doc,
696 -1,
1184 -1,
697 methods
1185 methods
698 };
1186 };
699
1187
700 PyMODINIT_FUNC PyInit_parsers(void)
1188 PyMODINIT_FUNC PyInit_parsers(void)
701 {
1189 {
702 PyObject *mod = PyModule_Create(&parsers_module);
1190 PyObject *mod = PyModule_Create(&parsers_module);
703 module_init(mod);
1191 module_init(mod);
704 return mod;
1192 return mod;
705 }
1193 }
706 #else
1194 #else
707 PyMODINIT_FUNC initparsers(void)
1195 PyMODINIT_FUNC initparsers(void)
708 {
1196 {
709 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1197 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
710 module_init(mod);
1198 module_init(mod);
711 }
1199 }
712 #endif
1200 #endif
713
@@ -1,1295 +1,1306 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, dagutil
17 import ancestor, mdiff, parsers, error, util, dagutil
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45
45
46 def getoffset(q):
46 def getoffset(q):
47 return int(q >> 16)
47 return int(q >> 16)
48
48
49 def gettype(q):
49 def gettype(q):
50 return int(q & 0xFFFF)
50 return int(q & 0xFFFF)
51
51
52 def offset_type(offset, type):
52 def offset_type(offset, type):
53 return long(long(offset) << 16 | type)
53 return long(long(offset) << 16 | type)
54
54
55 nullhash = _sha(nullid)
55 nullhash = _sha(nullid)
56
56
57 def hash(text, p1, p2):
57 def hash(text, p1, p2):
58 """generate a hash from the given text and its parent hashes
58 """generate a hash from the given text and its parent hashes
59
59
60 This hash combines both the current file contents and its history
60 This hash combines both the current file contents and its history
61 in a manner that makes it easy to distinguish nodes with the same
61 in a manner that makes it easy to distinguish nodes with the same
62 content in the revision graph.
62 content in the revision graph.
63 """
63 """
64 # As of now, if one of the parent node is null, p2 is null
64 # As of now, if one of the parent node is null, p2 is null
65 if p2 == nullid:
65 if p2 == nullid:
66 # deep copy of a hash is faster than creating one
66 # deep copy of a hash is faster than creating one
67 s = nullhash.copy()
67 s = nullhash.copy()
68 s.update(p1)
68 s.update(p1)
69 else:
69 else:
70 # none of the parent nodes are nullid
70 # none of the parent nodes are nullid
71 l = [p1, p2]
71 l = [p1, p2]
72 l.sort()
72 l.sort()
73 s = _sha(l[0])
73 s = _sha(l[0])
74 s.update(l[1])
74 s.update(l[1])
75 s.update(text)
75 s.update(text)
76 return s.digest()
76 return s.digest()
77
77
78 def compress(text):
78 def compress(text):
79 """ generate a possibly-compressed representation of text """
79 """ generate a possibly-compressed representation of text """
80 if not text:
80 if not text:
81 return ("", text)
81 return ("", text)
82 l = len(text)
82 l = len(text)
83 bin = None
83 bin = None
84 if l < 44:
84 if l < 44:
85 pass
85 pass
86 elif l > 1000000:
86 elif l > 1000000:
87 # zlib makes an internal copy, thus doubling memory usage for
87 # zlib makes an internal copy, thus doubling memory usage for
88 # large files, so lets do this in pieces
88 # large files, so lets do this in pieces
89 z = zlib.compressobj()
89 z = zlib.compressobj()
90 p = []
90 p = []
91 pos = 0
91 pos = 0
92 while pos < l:
92 while pos < l:
93 pos2 = pos + 2**20
93 pos2 = pos + 2**20
94 p.append(z.compress(text[pos:pos2]))
94 p.append(z.compress(text[pos:pos2]))
95 pos = pos2
95 pos = pos2
96 p.append(z.flush())
96 p.append(z.flush())
97 if sum(map(len, p)) < l:
97 if sum(map(len, p)) < l:
98 bin = "".join(p)
98 bin = "".join(p)
99 else:
99 else:
100 bin = _compress(text)
100 bin = _compress(text)
101 if bin is None or len(bin) > l:
101 if bin is None or len(bin) > l:
102 if text[0] == '\0':
102 if text[0] == '\0':
103 return ("", text)
103 return ("", text)
104 return ('u', text)
104 return ('u', text)
105 return ("", bin)
105 return ("", bin)
106
106
107 def decompress(bin):
107 def decompress(bin):
108 """ decompress the given input """
108 """ decompress the given input """
109 if not bin:
109 if not bin:
110 return bin
110 return bin
111 t = bin[0]
111 t = bin[0]
112 if t == '\0':
112 if t == '\0':
113 return bin
113 return bin
114 if t == 'x':
114 if t == 'x':
115 return _decompress(bin)
115 return _decompress(bin)
116 if t == 'u':
116 if t == 'u':
117 return bin[1:]
117 return bin[1:]
118 raise RevlogError(_("unknown compression type %r") % t)
118 raise RevlogError(_("unknown compression type %r") % t)
119
119
120 indexformatv0 = ">4l20s20s20s"
120 indexformatv0 = ">4l20s20s20s"
121 v0shaoffset = 56
121 v0shaoffset = 56
122
122
123 class revlogoldio(object):
123 class revlogoldio(object):
124 def __init__(self):
124 def __init__(self):
125 self.size = struct.calcsize(indexformatv0)
125 self.size = struct.calcsize(indexformatv0)
126
126
127 def parseindex(self, data, inline):
127 def parseindex(self, data, inline):
128 s = self.size
128 s = self.size
129 index = []
129 index = []
130 nodemap = {nullid: nullrev}
130 nodemap = {nullid: nullrev}
131 n = off = 0
131 n = off = 0
132 l = len(data)
132 l = len(data)
133 while off + s <= l:
133 while off + s <= l:
134 cur = data[off:off + s]
134 cur = data[off:off + s]
135 off += s
135 off += s
136 e = _unpack(indexformatv0, cur)
136 e = _unpack(indexformatv0, cur)
137 # transform to revlogv1 format
137 # transform to revlogv1 format
138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 index.append(e2)
140 index.append(e2)
141 nodemap[e[6]] = n
141 nodemap[e[6]] = n
142 n += 1
142 n += 1
143
143
144 # add the magic null revision at -1
144 # add the magic null revision at -1
145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146
146
147 return index, nodemap, None
147 return index, nodemap, None
148
148
149 def packentry(self, entry, node, version, rev):
149 def packentry(self, entry, node, version, rev):
150 if gettype(entry[0]):
150 if gettype(entry[0]):
151 raise RevlogError(_("index entry flags need RevlogNG"))
151 raise RevlogError(_("index entry flags need RevlogNG"))
152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 node(entry[5]), node(entry[6]), entry[7])
153 node(entry[5]), node(entry[6]), entry[7])
154 return _pack(indexformatv0, *e2)
154 return _pack(indexformatv0, *e2)
155
155
156 # index ng:
156 # index ng:
157 # 6 bytes: offset
157 # 6 bytes: offset
158 # 2 bytes: flags
158 # 2 bytes: flags
159 # 4 bytes: compressed length
159 # 4 bytes: compressed length
160 # 4 bytes: uncompressed length
160 # 4 bytes: uncompressed length
161 # 4 bytes: base rev
161 # 4 bytes: base rev
162 # 4 bytes: link rev
162 # 4 bytes: link rev
163 # 4 bytes: parent 1 rev
163 # 4 bytes: parent 1 rev
164 # 4 bytes: parent 2 rev
164 # 4 bytes: parent 2 rev
165 # 32 bytes: nodeid
165 # 32 bytes: nodeid
166 indexformatng = ">Qiiiiii20s12x"
166 indexformatng = ">Qiiiiii20s12x"
167 ngshaoffset = 32
167 ngshaoffset = 32
168 versionformat = ">I"
168 versionformat = ">I"
169
169
170 class revlogio(object):
170 class revlogio(object):
171 def __init__(self):
171 def __init__(self):
172 self.size = struct.calcsize(indexformatng)
172 self.size = struct.calcsize(indexformatng)
173
173
174 def parseindex(self, data, inline):
174 def parseindex(self, data, inline):
175 # call the C implementation to parse the index data
175 # call the C implementation to parse the index data
176 index, cache = parsers.parse_index2(data, inline)
176 index, cache = parsers.parse_index2(data, inline)
177 return index, None, cache
177 return index, getattr(index, 'nodemap', None), cache
178
178
179 def packentry(self, entry, node, version, rev):
179 def packentry(self, entry, node, version, rev):
180 p = _pack(indexformatng, *entry)
180 p = _pack(indexformatng, *entry)
181 if rev == 0:
181 if rev == 0:
182 p = _pack(versionformat, version) + p[4:]
182 p = _pack(versionformat, version) + p[4:]
183 return p
183 return p
184
184
185 class revlog(object):
185 class revlog(object):
186 """
186 """
187 the underlying revision storage object
187 the underlying revision storage object
188
188
189 A revlog consists of two parts, an index and the revision data.
189 A revlog consists of two parts, an index and the revision data.
190
190
191 The index is a file with a fixed record size containing
191 The index is a file with a fixed record size containing
192 information on each revision, including its nodeid (hash), the
192 information on each revision, including its nodeid (hash), the
193 nodeids of its parents, the position and offset of its data within
193 nodeids of its parents, the position and offset of its data within
194 the data file, and the revision it's based on. Finally, each entry
194 the data file, and the revision it's based on. Finally, each entry
195 contains a linkrev entry that can serve as a pointer to external
195 contains a linkrev entry that can serve as a pointer to external
196 data.
196 data.
197
197
198 The revision data itself is a linear collection of data chunks.
198 The revision data itself is a linear collection of data chunks.
199 Each chunk represents a revision and is usually represented as a
199 Each chunk represents a revision and is usually represented as a
200 delta against the previous chunk. To bound lookup time, runs of
200 delta against the previous chunk. To bound lookup time, runs of
201 deltas are limited to about 2 times the length of the original
201 deltas are limited to about 2 times the length of the original
202 version data. This makes retrieval of a version proportional to
202 version data. This makes retrieval of a version proportional to
203 its size, or O(1) relative to the number of revisions.
203 its size, or O(1) relative to the number of revisions.
204
204
205 Both pieces of the revlog are written to in an append-only
205 Both pieces of the revlog are written to in an append-only
206 fashion, which means we never need to rewrite a file to insert or
206 fashion, which means we never need to rewrite a file to insert or
207 remove data, and can use some simple techniques to avoid the need
207 remove data, and can use some simple techniques to avoid the need
208 for locking while reading.
208 for locking while reading.
209 """
209 """
210 def __init__(self, opener, indexfile):
210 def __init__(self, opener, indexfile):
211 """
211 """
212 create a revlog object
212 create a revlog object
213
213
214 opener is a function that abstracts the file opening operation
214 opener is a function that abstracts the file opening operation
215 and can be used to implement COW semantics or the like.
215 and can be used to implement COW semantics or the like.
216 """
216 """
217 self.indexfile = indexfile
217 self.indexfile = indexfile
218 self.datafile = indexfile[:-2] + ".d"
218 self.datafile = indexfile[:-2] + ".d"
219 self.opener = opener
219 self.opener = opener
220 self._cache = None
220 self._cache = None
221 self._basecache = (0, 0)
221 self._basecache = (0, 0)
222 self._chunkcache = (0, '')
222 self._chunkcache = (0, '')
223 self.index = []
223 self.index = []
224 self._pcache = {}
224 self._pcache = {}
225 self._nodecache = {nullid: nullrev}
225 self._nodecache = {nullid: nullrev}
226 self._nodepos = None
226 self._nodepos = None
227
227
228 v = REVLOG_DEFAULT_VERSION
228 v = REVLOG_DEFAULT_VERSION
229 opts = getattr(opener, 'options', None)
229 opts = getattr(opener, 'options', None)
230 if opts is not None:
230 if opts is not None:
231 if 'revlogv1' in opts:
231 if 'revlogv1' in opts:
232 if 'generaldelta' in opts:
232 if 'generaldelta' in opts:
233 v |= REVLOGGENERALDELTA
233 v |= REVLOGGENERALDELTA
234 else:
234 else:
235 v = 0
235 v = 0
236
236
237 i = ''
237 i = ''
238 self._initempty = True
238 self._initempty = True
239 try:
239 try:
240 f = self.opener(self.indexfile)
240 f = self.opener(self.indexfile)
241 i = f.read()
241 i = f.read()
242 f.close()
242 f.close()
243 if len(i) > 0:
243 if len(i) > 0:
244 v = struct.unpack(versionformat, i[:4])[0]
244 v = struct.unpack(versionformat, i[:4])[0]
245 self._initempty = False
245 self._initempty = False
246 except IOError, inst:
246 except IOError, inst:
247 if inst.errno != errno.ENOENT:
247 if inst.errno != errno.ENOENT:
248 raise
248 raise
249
249
250 self.version = v
250 self.version = v
251 self._inline = v & REVLOGNGINLINEDATA
251 self._inline = v & REVLOGNGINLINEDATA
252 self._generaldelta = v & REVLOGGENERALDELTA
252 self._generaldelta = v & REVLOGGENERALDELTA
253 flags = v & ~0xFFFF
253 flags = v & ~0xFFFF
254 fmt = v & 0xFFFF
254 fmt = v & 0xFFFF
255 if fmt == REVLOGV0 and flags:
255 if fmt == REVLOGV0 and flags:
256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
257 % (self.indexfile, flags >> 16))
257 % (self.indexfile, flags >> 16))
258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
260 % (self.indexfile, flags >> 16))
260 % (self.indexfile, flags >> 16))
261 elif fmt > REVLOGNG:
261 elif fmt > REVLOGNG:
262 raise RevlogError(_("index %s unknown format %d")
262 raise RevlogError(_("index %s unknown format %d")
263 % (self.indexfile, fmt))
263 % (self.indexfile, fmt))
264
264
265 self._io = revlogio()
265 self._io = revlogio()
266 if self.version == REVLOGV0:
266 if self.version == REVLOGV0:
267 self._io = revlogoldio()
267 self._io = revlogoldio()
268 try:
268 try:
269 d = self._io.parseindex(i, self._inline)
269 d = self._io.parseindex(i, self._inline)
270 except (ValueError, IndexError):
270 except (ValueError, IndexError):
271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
272 self.index, nodemap, self._chunkcache = d
272 self.index, nodemap, self._chunkcache = d
273 if nodemap is not None:
273 if nodemap is not None:
274 self.nodemap = self._nodecache = nodemap
274 self.nodemap = self._nodecache = nodemap
275 if not self._chunkcache:
275 if not self._chunkcache:
276 self._chunkclear()
276 self._chunkclear()
277
277
278 def tip(self):
278 def tip(self):
279 return self.node(len(self.index) - 2)
279 return self.node(len(self.index) - 2)
280 def __len__(self):
280 def __len__(self):
281 return len(self.index) - 1
281 return len(self.index) - 1
282 def __iter__(self):
282 def __iter__(self):
283 for i in xrange(len(self)):
283 for i in xrange(len(self)):
284 yield i
284 yield i
285
285
286 @util.propertycache
286 @util.propertycache
287 def nodemap(self):
287 def nodemap(self):
288 self.rev(self.node(0))
288 self.rev(self.node(0))
289 return self._nodecache
289 return self._nodecache
290
290
291 def hasnode(self, node):
291 def hasnode(self, node):
292 try:
292 try:
293 self.rev(node)
293 self.rev(node)
294 return True
294 return True
295 except KeyError:
295 except KeyError:
296 return False
296 return False
297
297
298 def clearcaches(self):
299 try:
300 self._nodecache.clearcaches()
301 except AttributeError:
302 self._nodecache = {nullid: nullrev}
303 self._nodepos = None
304
298 def rev(self, node):
305 def rev(self, node):
299 try:
306 try:
300 return self._nodecache[node]
307 return self._nodecache[node]
308 except RevlogError:
309 # parsers.c radix tree lookup failed
310 raise LookupError(node, self.indexfile, _('no node'))
301 except KeyError:
311 except KeyError:
312 # pure python cache lookup failed
302 n = self._nodecache
313 n = self._nodecache
303 i = self.index
314 i = self.index
304 p = self._nodepos
315 p = self._nodepos
305 if p is None:
316 if p is None:
306 p = len(i) - 2
317 p = len(i) - 2
307 for r in xrange(p, -1, -1):
318 for r in xrange(p, -1, -1):
308 v = i[r][7]
319 v = i[r][7]
309 n[v] = r
320 n[v] = r
310 if v == node:
321 if v == node:
311 self._nodepos = r - 1
322 self._nodepos = r - 1
312 return r
323 return r
313 raise LookupError(node, self.indexfile, _('no node'))
324 raise LookupError(node, self.indexfile, _('no node'))
314
325
315 def node(self, rev):
326 def node(self, rev):
316 return self.index[rev][7]
327 return self.index[rev][7]
317 def linkrev(self, rev):
328 def linkrev(self, rev):
318 return self.index[rev][4]
329 return self.index[rev][4]
319 def parents(self, node):
330 def parents(self, node):
320 i = self.index
331 i = self.index
321 d = i[self.rev(node)]
332 d = i[self.rev(node)]
322 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
333 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
323 def parentrevs(self, rev):
334 def parentrevs(self, rev):
324 return self.index[rev][5:7]
335 return self.index[rev][5:7]
325 def start(self, rev):
336 def start(self, rev):
326 return int(self.index[rev][0] >> 16)
337 return int(self.index[rev][0] >> 16)
327 def end(self, rev):
338 def end(self, rev):
328 return self.start(rev) + self.length(rev)
339 return self.start(rev) + self.length(rev)
329 def length(self, rev):
340 def length(self, rev):
330 return self.index[rev][1]
341 return self.index[rev][1]
331 def chainbase(self, rev):
342 def chainbase(self, rev):
332 index = self.index
343 index = self.index
333 base = index[rev][3]
344 base = index[rev][3]
334 while base != rev:
345 while base != rev:
335 rev = base
346 rev = base
336 base = index[rev][3]
347 base = index[rev][3]
337 return base
348 return base
338 def flags(self, rev):
349 def flags(self, rev):
339 return self.index[rev][0] & 0xFFFF
350 return self.index[rev][0] & 0xFFFF
340 def rawsize(self, rev):
351 def rawsize(self, rev):
341 """return the length of the uncompressed text for a given revision"""
352 """return the length of the uncompressed text for a given revision"""
342 l = self.index[rev][2]
353 l = self.index[rev][2]
343 if l >= 0:
354 if l >= 0:
344 return l
355 return l
345
356
346 t = self.revision(self.node(rev))
357 t = self.revision(self.node(rev))
347 return len(t)
358 return len(t)
348 size = rawsize
359 size = rawsize
349
360
350 def reachable(self, node, stop=None):
361 def reachable(self, node, stop=None):
351 """return the set of all nodes ancestral to a given node, including
362 """return the set of all nodes ancestral to a given node, including
352 the node itself, stopping when stop is matched"""
363 the node itself, stopping when stop is matched"""
353 reachable = set((node,))
364 reachable = set((node,))
354 visit = [node]
365 visit = [node]
355 if stop:
366 if stop:
356 stopn = self.rev(stop)
367 stopn = self.rev(stop)
357 else:
368 else:
358 stopn = 0
369 stopn = 0
359 while visit:
370 while visit:
360 n = visit.pop(0)
371 n = visit.pop(0)
361 if n == stop:
372 if n == stop:
362 continue
373 continue
363 if n == nullid:
374 if n == nullid:
364 continue
375 continue
365 for p in self.parents(n):
376 for p in self.parents(n):
366 if self.rev(p) < stopn:
377 if self.rev(p) < stopn:
367 continue
378 continue
368 if p not in reachable:
379 if p not in reachable:
369 reachable.add(p)
380 reachable.add(p)
370 visit.append(p)
381 visit.append(p)
371 return reachable
382 return reachable
372
383
373 def ancestors(self, *revs):
384 def ancestors(self, *revs):
374 """Generate the ancestors of 'revs' in reverse topological order.
385 """Generate the ancestors of 'revs' in reverse topological order.
375
386
376 Yield a sequence of revision numbers starting with the parents
387 Yield a sequence of revision numbers starting with the parents
377 of each revision in revs, i.e., each revision is *not* considered
388 of each revision in revs, i.e., each revision is *not* considered
378 an ancestor of itself. Results are in breadth-first order:
389 an ancestor of itself. Results are in breadth-first order:
379 parents of each rev in revs, then parents of those, etc. Result
390 parents of each rev in revs, then parents of those, etc. Result
380 does not include the null revision."""
391 does not include the null revision."""
381 visit = list(revs)
392 visit = list(revs)
382 seen = set([nullrev])
393 seen = set([nullrev])
383 while visit:
394 while visit:
384 for parent in self.parentrevs(visit.pop(0)):
395 for parent in self.parentrevs(visit.pop(0)):
385 if parent not in seen:
396 if parent not in seen:
386 visit.append(parent)
397 visit.append(parent)
387 seen.add(parent)
398 seen.add(parent)
388 yield parent
399 yield parent
389
400
390 def descendants(self, *revs):
401 def descendants(self, *revs):
391 """Generate the descendants of 'revs' in revision order.
402 """Generate the descendants of 'revs' in revision order.
392
403
393 Yield a sequence of revision numbers starting with a child of
404 Yield a sequence of revision numbers starting with a child of
394 some rev in revs, i.e., each revision is *not* considered a
405 some rev in revs, i.e., each revision is *not* considered a
395 descendant of itself. Results are ordered by revision number (a
406 descendant of itself. Results are ordered by revision number (a
396 topological sort)."""
407 topological sort)."""
397 first = min(revs)
408 first = min(revs)
398 if first == nullrev:
409 if first == nullrev:
399 for i in self:
410 for i in self:
400 yield i
411 yield i
401 return
412 return
402
413
403 seen = set(revs)
414 seen = set(revs)
404 for i in xrange(first + 1, len(self)):
415 for i in xrange(first + 1, len(self)):
405 for x in self.parentrevs(i):
416 for x in self.parentrevs(i):
406 if x != nullrev and x in seen:
417 if x != nullrev and x in seen:
407 seen.add(i)
418 seen.add(i)
408 yield i
419 yield i
409 break
420 break
410
421
411 def findcommonmissing(self, common=None, heads=None):
422 def findcommonmissing(self, common=None, heads=None):
412 """Return a tuple of the ancestors of common and the ancestors of heads
423 """Return a tuple of the ancestors of common and the ancestors of heads
413 that are not ancestors of common. In revset terminology, we return the
424 that are not ancestors of common. In revset terminology, we return the
414 tuple:
425 tuple:
415
426
416 ::common, (::heads) - (::common)
427 ::common, (::heads) - (::common)
417
428
418 The list is sorted by revision number, meaning it is
429 The list is sorted by revision number, meaning it is
419 topologically sorted.
430 topologically sorted.
420
431
421 'heads' and 'common' are both lists of node IDs. If heads is
432 'heads' and 'common' are both lists of node IDs. If heads is
422 not supplied, uses all of the revlog's heads. If common is not
433 not supplied, uses all of the revlog's heads. If common is not
423 supplied, uses nullid."""
434 supplied, uses nullid."""
424 if common is None:
435 if common is None:
425 common = [nullid]
436 common = [nullid]
426 if heads is None:
437 if heads is None:
427 heads = self.heads()
438 heads = self.heads()
428
439
429 common = [self.rev(n) for n in common]
440 common = [self.rev(n) for n in common]
430 heads = [self.rev(n) for n in heads]
441 heads = [self.rev(n) for n in heads]
431
442
432 # we want the ancestors, but inclusive
443 # we want the ancestors, but inclusive
433 has = set(self.ancestors(*common))
444 has = set(self.ancestors(*common))
434 has.add(nullrev)
445 has.add(nullrev)
435 has.update(common)
446 has.update(common)
436
447
437 # take all ancestors from heads that aren't in has
448 # take all ancestors from heads that aren't in has
438 missing = set()
449 missing = set()
439 visit = [r for r in heads if r not in has]
450 visit = [r for r in heads if r not in has]
440 while visit:
451 while visit:
441 r = visit.pop(0)
452 r = visit.pop(0)
442 if r in missing:
453 if r in missing:
443 continue
454 continue
444 else:
455 else:
445 missing.add(r)
456 missing.add(r)
446 for p in self.parentrevs(r):
457 for p in self.parentrevs(r):
447 if p not in has:
458 if p not in has:
448 visit.append(p)
459 visit.append(p)
449 missing = list(missing)
460 missing = list(missing)
450 missing.sort()
461 missing.sort()
451 return has, [self.node(r) for r in missing]
462 return has, [self.node(r) for r in missing]
452
463
453 def findmissing(self, common=None, heads=None):
464 def findmissing(self, common=None, heads=None):
454 """Return the ancestors of heads that are not ancestors of common.
465 """Return the ancestors of heads that are not ancestors of common.
455
466
456 More specifically, return a list of nodes N such that every N
467 More specifically, return a list of nodes N such that every N
457 satisfies the following constraints:
468 satisfies the following constraints:
458
469
459 1. N is an ancestor of some node in 'heads'
470 1. N is an ancestor of some node in 'heads'
460 2. N is not an ancestor of any node in 'common'
471 2. N is not an ancestor of any node in 'common'
461
472
462 The list is sorted by revision number, meaning it is
473 The list is sorted by revision number, meaning it is
463 topologically sorted.
474 topologically sorted.
464
475
465 'heads' and 'common' are both lists of node IDs. If heads is
476 'heads' and 'common' are both lists of node IDs. If heads is
466 not supplied, uses all of the revlog's heads. If common is not
477 not supplied, uses all of the revlog's heads. If common is not
467 supplied, uses nullid."""
478 supplied, uses nullid."""
468 _common, missing = self.findcommonmissing(common, heads)
479 _common, missing = self.findcommonmissing(common, heads)
469 return missing
480 return missing
470
481
471 def nodesbetween(self, roots=None, heads=None):
482 def nodesbetween(self, roots=None, heads=None):
472 """Return a topological path from 'roots' to 'heads'.
483 """Return a topological path from 'roots' to 'heads'.
473
484
474 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
475 topologically sorted list of all nodes N that satisfy both of
486 topologically sorted list of all nodes N that satisfy both of
476 these constraints:
487 these constraints:
477
488
478 1. N is a descendant of some node in 'roots'
489 1. N is a descendant of some node in 'roots'
479 2. N is an ancestor of some node in 'heads'
490 2. N is an ancestor of some node in 'heads'
480
491
481 Every node is considered to be both a descendant and an ancestor
492 Every node is considered to be both a descendant and an ancestor
482 of itself, so every reachable node in 'roots' and 'heads' will be
493 of itself, so every reachable node in 'roots' and 'heads' will be
483 included in 'nodes'.
494 included in 'nodes'.
484
495
485 'outroots' is the list of reachable nodes in 'roots', i.e., the
496 'outroots' is the list of reachable nodes in 'roots', i.e., the
486 subset of 'roots' that is returned in 'nodes'. Likewise,
497 subset of 'roots' that is returned in 'nodes'. Likewise,
487 'outheads' is the subset of 'heads' that is also in 'nodes'.
498 'outheads' is the subset of 'heads' that is also in 'nodes'.
488
499
489 'roots' and 'heads' are both lists of node IDs. If 'roots' is
500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
490 unspecified, uses nullid as the only root. If 'heads' is
501 unspecified, uses nullid as the only root. If 'heads' is
491 unspecified, uses list of all of the revlog's heads."""
502 unspecified, uses list of all of the revlog's heads."""
492 nonodes = ([], [], [])
503 nonodes = ([], [], [])
493 if roots is not None:
504 if roots is not None:
494 roots = list(roots)
505 roots = list(roots)
495 if not roots:
506 if not roots:
496 return nonodes
507 return nonodes
497 lowestrev = min([self.rev(n) for n in roots])
508 lowestrev = min([self.rev(n) for n in roots])
498 else:
509 else:
499 roots = [nullid] # Everybody's a descendant of nullid
510 roots = [nullid] # Everybody's a descendant of nullid
500 lowestrev = nullrev
511 lowestrev = nullrev
501 if (lowestrev == nullrev) and (heads is None):
512 if (lowestrev == nullrev) and (heads is None):
502 # We want _all_ the nodes!
513 # We want _all_ the nodes!
503 return ([self.node(r) for r in self], [nullid], list(self.heads()))
514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
504 if heads is None:
515 if heads is None:
505 # All nodes are ancestors, so the latest ancestor is the last
516 # All nodes are ancestors, so the latest ancestor is the last
506 # node.
517 # node.
507 highestrev = len(self) - 1
518 highestrev = len(self) - 1
508 # Set ancestors to None to signal that every node is an ancestor.
519 # Set ancestors to None to signal that every node is an ancestor.
509 ancestors = None
520 ancestors = None
510 # Set heads to an empty dictionary for later discovery of heads
521 # Set heads to an empty dictionary for later discovery of heads
511 heads = {}
522 heads = {}
512 else:
523 else:
513 heads = list(heads)
524 heads = list(heads)
514 if not heads:
525 if not heads:
515 return nonodes
526 return nonodes
516 ancestors = set()
527 ancestors = set()
517 # Turn heads into a dictionary so we can remove 'fake' heads.
528 # Turn heads into a dictionary so we can remove 'fake' heads.
518 # Also, later we will be using it to filter out the heads we can't
529 # Also, later we will be using it to filter out the heads we can't
519 # find from roots.
530 # find from roots.
520 heads = dict.fromkeys(heads, False)
531 heads = dict.fromkeys(heads, False)
521 # Start at the top and keep marking parents until we're done.
532 # Start at the top and keep marking parents until we're done.
522 nodestotag = set(heads)
533 nodestotag = set(heads)
523 # Remember where the top was so we can use it as a limit later.
534 # Remember where the top was so we can use it as a limit later.
524 highestrev = max([self.rev(n) for n in nodestotag])
535 highestrev = max([self.rev(n) for n in nodestotag])
525 while nodestotag:
536 while nodestotag:
526 # grab a node to tag
537 # grab a node to tag
527 n = nodestotag.pop()
538 n = nodestotag.pop()
528 # Never tag nullid
539 # Never tag nullid
529 if n == nullid:
540 if n == nullid:
530 continue
541 continue
531 # A node's revision number represents its place in a
542 # A node's revision number represents its place in a
532 # topologically sorted list of nodes.
543 # topologically sorted list of nodes.
533 r = self.rev(n)
544 r = self.rev(n)
534 if r >= lowestrev:
545 if r >= lowestrev:
535 if n not in ancestors:
546 if n not in ancestors:
536 # If we are possibly a descendant of one of the roots
547 # If we are possibly a descendant of one of the roots
537 # and we haven't already been marked as an ancestor
548 # and we haven't already been marked as an ancestor
538 ancestors.add(n) # Mark as ancestor
549 ancestors.add(n) # Mark as ancestor
539 # Add non-nullid parents to list of nodes to tag.
550 # Add non-nullid parents to list of nodes to tag.
540 nodestotag.update([p for p in self.parents(n) if
551 nodestotag.update([p for p in self.parents(n) if
541 p != nullid])
552 p != nullid])
542 elif n in heads: # We've seen it before, is it a fake head?
553 elif n in heads: # We've seen it before, is it a fake head?
543 # So it is, real heads should not be the ancestors of
554 # So it is, real heads should not be the ancestors of
544 # any other heads.
555 # any other heads.
545 heads.pop(n)
556 heads.pop(n)
546 if not ancestors:
557 if not ancestors:
547 return nonodes
558 return nonodes
548 # Now that we have our set of ancestors, we want to remove any
559 # Now that we have our set of ancestors, we want to remove any
549 # roots that are not ancestors.
560 # roots that are not ancestors.
550
561
551 # If one of the roots was nullid, everything is included anyway.
562 # If one of the roots was nullid, everything is included anyway.
552 if lowestrev > nullrev:
563 if lowestrev > nullrev:
553 # But, since we weren't, let's recompute the lowest rev to not
564 # But, since we weren't, let's recompute the lowest rev to not
554 # include roots that aren't ancestors.
565 # include roots that aren't ancestors.
555
566
556 # Filter out roots that aren't ancestors of heads
567 # Filter out roots that aren't ancestors of heads
557 roots = [n for n in roots if n in ancestors]
568 roots = [n for n in roots if n in ancestors]
558 # Recompute the lowest revision
569 # Recompute the lowest revision
559 if roots:
570 if roots:
560 lowestrev = min([self.rev(n) for n in roots])
571 lowestrev = min([self.rev(n) for n in roots])
561 else:
572 else:
562 # No more roots? Return empty list
573 # No more roots? Return empty list
563 return nonodes
574 return nonodes
564 else:
575 else:
565 # We are descending from nullid, and don't need to care about
576 # We are descending from nullid, and don't need to care about
566 # any other roots.
577 # any other roots.
567 lowestrev = nullrev
578 lowestrev = nullrev
568 roots = [nullid]
579 roots = [nullid]
569 # Transform our roots list into a set.
580 # Transform our roots list into a set.
570 descendants = set(roots)
581 descendants = set(roots)
571 # Also, keep the original roots so we can filter out roots that aren't
582 # Also, keep the original roots so we can filter out roots that aren't
572 # 'real' roots (i.e. are descended from other roots).
583 # 'real' roots (i.e. are descended from other roots).
573 roots = descendants.copy()
584 roots = descendants.copy()
574 # Our topologically sorted list of output nodes.
585 # Our topologically sorted list of output nodes.
575 orderedout = []
586 orderedout = []
576 # Don't start at nullid since we don't want nullid in our output list,
587 # Don't start at nullid since we don't want nullid in our output list,
577 # and if nullid shows up in descedents, empty parents will look like
588 # and if nullid shows up in descedents, empty parents will look like
578 # they're descendants.
589 # they're descendants.
579 for r in xrange(max(lowestrev, 0), highestrev + 1):
590 for r in xrange(max(lowestrev, 0), highestrev + 1):
580 n = self.node(r)
591 n = self.node(r)
581 isdescendant = False
592 isdescendant = False
582 if lowestrev == nullrev: # Everybody is a descendant of nullid
593 if lowestrev == nullrev: # Everybody is a descendant of nullid
583 isdescendant = True
594 isdescendant = True
584 elif n in descendants:
595 elif n in descendants:
585 # n is already a descendant
596 # n is already a descendant
586 isdescendant = True
597 isdescendant = True
587 # This check only needs to be done here because all the roots
598 # This check only needs to be done here because all the roots
588 # will start being marked is descendants before the loop.
599 # will start being marked is descendants before the loop.
589 if n in roots:
600 if n in roots:
590 # If n was a root, check if it's a 'real' root.
601 # If n was a root, check if it's a 'real' root.
591 p = tuple(self.parents(n))
602 p = tuple(self.parents(n))
592 # If any of its parents are descendants, it's not a root.
603 # If any of its parents are descendants, it's not a root.
593 if (p[0] in descendants) or (p[1] in descendants):
604 if (p[0] in descendants) or (p[1] in descendants):
594 roots.remove(n)
605 roots.remove(n)
595 else:
606 else:
596 p = tuple(self.parents(n))
607 p = tuple(self.parents(n))
597 # A node is a descendant if either of its parents are
608 # A node is a descendant if either of its parents are
598 # descendants. (We seeded the dependents list with the roots
609 # descendants. (We seeded the dependents list with the roots
599 # up there, remember?)
610 # up there, remember?)
600 if (p[0] in descendants) or (p[1] in descendants):
611 if (p[0] in descendants) or (p[1] in descendants):
601 descendants.add(n)
612 descendants.add(n)
602 isdescendant = True
613 isdescendant = True
603 if isdescendant and ((ancestors is None) or (n in ancestors)):
614 if isdescendant and ((ancestors is None) or (n in ancestors)):
604 # Only include nodes that are both descendants and ancestors.
615 # Only include nodes that are both descendants and ancestors.
605 orderedout.append(n)
616 orderedout.append(n)
606 if (ancestors is not None) and (n in heads):
617 if (ancestors is not None) and (n in heads):
607 # We're trying to figure out which heads are reachable
618 # We're trying to figure out which heads are reachable
608 # from roots.
619 # from roots.
609 # Mark this head as having been reached
620 # Mark this head as having been reached
610 heads[n] = True
621 heads[n] = True
611 elif ancestors is None:
622 elif ancestors is None:
612 # Otherwise, we're trying to discover the heads.
623 # Otherwise, we're trying to discover the heads.
613 # Assume this is a head because if it isn't, the next step
624 # Assume this is a head because if it isn't, the next step
614 # will eventually remove it.
625 # will eventually remove it.
615 heads[n] = True
626 heads[n] = True
616 # But, obviously its parents aren't.
627 # But, obviously its parents aren't.
617 for p in self.parents(n):
628 for p in self.parents(n):
618 heads.pop(p, None)
629 heads.pop(p, None)
619 heads = [n for n, flag in heads.iteritems() if flag]
630 heads = [n for n, flag in heads.iteritems() if flag]
620 roots = list(roots)
631 roots = list(roots)
621 assert orderedout
632 assert orderedout
622 assert roots
633 assert roots
623 assert heads
634 assert heads
624 return (orderedout, roots, heads)
635 return (orderedout, roots, heads)
625
636
626 def headrevs(self):
637 def headrevs(self):
627 count = len(self)
638 count = len(self)
628 if not count:
639 if not count:
629 return [nullrev]
640 return [nullrev]
630 ishead = [1] * (count + 1)
641 ishead = [1] * (count + 1)
631 index = self.index
642 index = self.index
632 for r in xrange(count):
643 for r in xrange(count):
633 e = index[r]
644 e = index[r]
634 ishead[e[5]] = ishead[e[6]] = 0
645 ishead[e[5]] = ishead[e[6]] = 0
635 return [r for r in xrange(count) if ishead[r]]
646 return [r for r in xrange(count) if ishead[r]]
636
647
637 def heads(self, start=None, stop=None):
648 def heads(self, start=None, stop=None):
638 """return the list of all nodes that have no children
649 """return the list of all nodes that have no children
639
650
640 if start is specified, only heads that are descendants of
651 if start is specified, only heads that are descendants of
641 start will be returned
652 start will be returned
642 if stop is specified, it will consider all the revs from stop
653 if stop is specified, it will consider all the revs from stop
643 as if they had no children
654 as if they had no children
644 """
655 """
645 if start is None and stop is None:
656 if start is None and stop is None:
646 if not len(self):
657 if not len(self):
647 return [nullid]
658 return [nullid]
648 return [self.node(r) for r in self.headrevs()]
659 return [self.node(r) for r in self.headrevs()]
649
660
650 if start is None:
661 if start is None:
651 start = nullid
662 start = nullid
652 if stop is None:
663 if stop is None:
653 stop = []
664 stop = []
654 stoprevs = set([self.rev(n) for n in stop])
665 stoprevs = set([self.rev(n) for n in stop])
655 startrev = self.rev(start)
666 startrev = self.rev(start)
656 reachable = set((startrev,))
667 reachable = set((startrev,))
657 heads = set((startrev,))
668 heads = set((startrev,))
658
669
659 parentrevs = self.parentrevs
670 parentrevs = self.parentrevs
660 for r in xrange(startrev + 1, len(self)):
671 for r in xrange(startrev + 1, len(self)):
661 for p in parentrevs(r):
672 for p in parentrevs(r):
662 if p in reachable:
673 if p in reachable:
663 if r not in stoprevs:
674 if r not in stoprevs:
664 reachable.add(r)
675 reachable.add(r)
665 heads.add(r)
676 heads.add(r)
666 if p in heads and p not in stoprevs:
677 if p in heads and p not in stoprevs:
667 heads.remove(p)
678 heads.remove(p)
668
679
669 return [self.node(r) for r in heads]
680 return [self.node(r) for r in heads]
670
681
671 def children(self, node):
682 def children(self, node):
672 """find the children of a given node"""
683 """find the children of a given node"""
673 c = []
684 c = []
674 p = self.rev(node)
685 p = self.rev(node)
675 for r in range(p + 1, len(self)):
686 for r in range(p + 1, len(self)):
676 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
687 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
677 if prevs:
688 if prevs:
678 for pr in prevs:
689 for pr in prevs:
679 if pr == p:
690 if pr == p:
680 c.append(self.node(r))
691 c.append(self.node(r))
681 elif p == nullrev:
692 elif p == nullrev:
682 c.append(self.node(r))
693 c.append(self.node(r))
683 return c
694 return c
684
695
685 def descendant(self, start, end):
696 def descendant(self, start, end):
686 if start == nullrev:
697 if start == nullrev:
687 return True
698 return True
688 for i in self.descendants(start):
699 for i in self.descendants(start):
689 if i == end:
700 if i == end:
690 return True
701 return True
691 elif i > end:
702 elif i > end:
692 break
703 break
693 return False
704 return False
694
705
695 def ancestor(self, a, b):
706 def ancestor(self, a, b):
696 """calculate the least common ancestor of nodes a and b"""
707 """calculate the least common ancestor of nodes a and b"""
697
708
698 # fast path, check if it is a descendant
709 # fast path, check if it is a descendant
699 a, b = self.rev(a), self.rev(b)
710 a, b = self.rev(a), self.rev(b)
700 start, end = sorted((a, b))
711 start, end = sorted((a, b))
701 if self.descendant(start, end):
712 if self.descendant(start, end):
702 return self.node(start)
713 return self.node(start)
703
714
704 def parents(rev):
715 def parents(rev):
705 return [p for p in self.parentrevs(rev) if p != nullrev]
716 return [p for p in self.parentrevs(rev) if p != nullrev]
706
717
707 c = ancestor.ancestor(a, b, parents)
718 c = ancestor.ancestor(a, b, parents)
708 if c is None:
719 if c is None:
709 return nullid
720 return nullid
710
721
711 return self.node(c)
722 return self.node(c)
712
723
713 def _match(self, id):
724 def _match(self, id):
714 if isinstance(id, (long, int)):
725 if isinstance(id, (long, int)):
715 # rev
726 # rev
716 return self.node(id)
727 return self.node(id)
717 if len(id) == 20:
728 if len(id) == 20:
718 # possibly a binary node
729 # possibly a binary node
719 # odds of a binary node being all hex in ASCII are 1 in 10**25
730 # odds of a binary node being all hex in ASCII are 1 in 10**25
720 try:
731 try:
721 node = id
732 node = id
722 self.rev(node) # quick search the index
733 self.rev(node) # quick search the index
723 return node
734 return node
724 except LookupError:
735 except LookupError:
725 pass # may be partial hex id
736 pass # may be partial hex id
726 try:
737 try:
727 # str(rev)
738 # str(rev)
728 rev = int(id)
739 rev = int(id)
729 if str(rev) != id:
740 if str(rev) != id:
730 raise ValueError
741 raise ValueError
731 if rev < 0:
742 if rev < 0:
732 rev = len(self) + rev
743 rev = len(self) + rev
733 if rev < 0 or rev >= len(self):
744 if rev < 0 or rev >= len(self):
734 raise ValueError
745 raise ValueError
735 return self.node(rev)
746 return self.node(rev)
736 except (ValueError, OverflowError):
747 except (ValueError, OverflowError):
737 pass
748 pass
738 if len(id) == 40:
749 if len(id) == 40:
739 try:
750 try:
740 # a full hex nodeid?
751 # a full hex nodeid?
741 node = bin(id)
752 node = bin(id)
742 self.rev(node)
753 self.rev(node)
743 return node
754 return node
744 except (TypeError, LookupError):
755 except (TypeError, LookupError):
745 pass
756 pass
746
757
747 def _partialmatch(self, id):
758 def _partialmatch(self, id):
748 if id in self._pcache:
759 if id in self._pcache:
749 return self._pcache[id]
760 return self._pcache[id]
750
761
751 if len(id) < 40:
762 if len(id) < 40:
752 try:
763 try:
753 # hex(node)[:...]
764 # hex(node)[:...]
754 l = len(id) // 2 # grab an even number of digits
765 l = len(id) // 2 # grab an even number of digits
755 prefix = bin(id[:l * 2])
766 prefix = bin(id[:l * 2])
756 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
767 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
757 nl = [n for n in nl if hex(n).startswith(id)]
768 nl = [n for n in nl if hex(n).startswith(id)]
758 if len(nl) > 0:
769 if len(nl) > 0:
759 if len(nl) == 1:
770 if len(nl) == 1:
760 self._pcache[id] = nl[0]
771 self._pcache[id] = nl[0]
761 return nl[0]
772 return nl[0]
762 raise LookupError(id, self.indexfile,
773 raise LookupError(id, self.indexfile,
763 _('ambiguous identifier'))
774 _('ambiguous identifier'))
764 return None
775 return None
765 except TypeError:
776 except TypeError:
766 pass
777 pass
767
778
768 def lookup(self, id):
779 def lookup(self, id):
769 """locate a node based on:
780 """locate a node based on:
770 - revision number or str(revision number)
781 - revision number or str(revision number)
771 - nodeid or subset of hex nodeid
782 - nodeid or subset of hex nodeid
772 """
783 """
773 n = self._match(id)
784 n = self._match(id)
774 if n is not None:
785 if n is not None:
775 return n
786 return n
776 n = self._partialmatch(id)
787 n = self._partialmatch(id)
777 if n:
788 if n:
778 return n
789 return n
779
790
780 raise LookupError(id, self.indexfile, _('no match found'))
791 raise LookupError(id, self.indexfile, _('no match found'))
781
792
782 def cmp(self, node, text):
793 def cmp(self, node, text):
783 """compare text with a given file revision
794 """compare text with a given file revision
784
795
785 returns True if text is different than what is stored.
796 returns True if text is different than what is stored.
786 """
797 """
787 p1, p2 = self.parents(node)
798 p1, p2 = self.parents(node)
788 return hash(text, p1, p2) != node
799 return hash(text, p1, p2) != node
789
800
790 def _addchunk(self, offset, data):
801 def _addchunk(self, offset, data):
791 o, d = self._chunkcache
802 o, d = self._chunkcache
792 # try to add to existing cache
803 # try to add to existing cache
793 if o + len(d) == offset and len(d) + len(data) < _chunksize:
804 if o + len(d) == offset and len(d) + len(data) < _chunksize:
794 self._chunkcache = o, d + data
805 self._chunkcache = o, d + data
795 else:
806 else:
796 self._chunkcache = offset, data
807 self._chunkcache = offset, data
797
808
798 def _loadchunk(self, offset, length):
809 def _loadchunk(self, offset, length):
799 if self._inline:
810 if self._inline:
800 df = self.opener(self.indexfile)
811 df = self.opener(self.indexfile)
801 else:
812 else:
802 df = self.opener(self.datafile)
813 df = self.opener(self.datafile)
803
814
804 readahead = max(65536, length)
815 readahead = max(65536, length)
805 df.seek(offset)
816 df.seek(offset)
806 d = df.read(readahead)
817 d = df.read(readahead)
807 df.close()
818 df.close()
808 self._addchunk(offset, d)
819 self._addchunk(offset, d)
809 if readahead > length:
820 if readahead > length:
810 return d[:length]
821 return d[:length]
811 return d
822 return d
812
823
813 def _getchunk(self, offset, length):
824 def _getchunk(self, offset, length):
814 o, d = self._chunkcache
825 o, d = self._chunkcache
815 l = len(d)
826 l = len(d)
816
827
817 # is it in the cache?
828 # is it in the cache?
818 cachestart = offset - o
829 cachestart = offset - o
819 cacheend = cachestart + length
830 cacheend = cachestart + length
820 if cachestart >= 0 and cacheend <= l:
831 if cachestart >= 0 and cacheend <= l:
821 if cachestart == 0 and cacheend == l:
832 if cachestart == 0 and cacheend == l:
822 return d # avoid a copy
833 return d # avoid a copy
823 return d[cachestart:cacheend]
834 return d[cachestart:cacheend]
824
835
825 return self._loadchunk(offset, length)
836 return self._loadchunk(offset, length)
826
837
827 def _chunkraw(self, startrev, endrev):
838 def _chunkraw(self, startrev, endrev):
828 start = self.start(startrev)
839 start = self.start(startrev)
829 length = self.end(endrev) - start
840 length = self.end(endrev) - start
830 if self._inline:
841 if self._inline:
831 start += (startrev + 1) * self._io.size
842 start += (startrev + 1) * self._io.size
832 return self._getchunk(start, length)
843 return self._getchunk(start, length)
833
844
834 def _chunk(self, rev):
845 def _chunk(self, rev):
835 return decompress(self._chunkraw(rev, rev))
846 return decompress(self._chunkraw(rev, rev))
836
847
837 def _chunkbase(self, rev):
848 def _chunkbase(self, rev):
838 return self._chunk(rev)
849 return self._chunk(rev)
839
850
840 def _chunkclear(self):
851 def _chunkclear(self):
841 self._chunkcache = (0, '')
852 self._chunkcache = (0, '')
842
853
843 def deltaparent(self, rev):
854 def deltaparent(self, rev):
844 """return deltaparent of the given revision"""
855 """return deltaparent of the given revision"""
845 base = self.index[rev][3]
856 base = self.index[rev][3]
846 if base == rev:
857 if base == rev:
847 return nullrev
858 return nullrev
848 elif self._generaldelta:
859 elif self._generaldelta:
849 return base
860 return base
850 else:
861 else:
851 return rev - 1
862 return rev - 1
852
863
853 def revdiff(self, rev1, rev2):
864 def revdiff(self, rev1, rev2):
854 """return or calculate a delta between two revisions"""
865 """return or calculate a delta between two revisions"""
855 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
866 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
856 return self._chunk(rev2)
867 return self._chunk(rev2)
857
868
858 return mdiff.textdiff(self.revision(self.node(rev1)),
869 return mdiff.textdiff(self.revision(self.node(rev1)),
859 self.revision(self.node(rev2)))
870 self.revision(self.node(rev2)))
860
871
861 def revision(self, nodeorrev):
872 def revision(self, nodeorrev):
862 """return an uncompressed revision of a given node or"""
873 """return an uncompressed revision of a given node or"""
863 if isinstance(nodeorrev, int):
874 if isinstance(nodeorrev, int):
864 rev = nodeorrev
875 rev = nodeorrev
865 node = self.node(rev)
876 node = self.node(rev)
866 else:
877 else:
867 node = nodeorrev
878 node = nodeorrev
868 rev = None
879 rev = None
869
880
870 cachedrev = None
881 cachedrev = None
871 if node == nullid:
882 if node == nullid:
872 return ""
883 return ""
873 if self._cache:
884 if self._cache:
874 if self._cache[0] == node:
885 if self._cache[0] == node:
875 return self._cache[2]
886 return self._cache[2]
876 cachedrev = self._cache[1]
887 cachedrev = self._cache[1]
877
888
878 # look up what we need to read
889 # look up what we need to read
879 text = None
890 text = None
880 if rev is None:
891 if rev is None:
881 rev = self.rev(node)
892 rev = self.rev(node)
882
893
883 # check rev flags
894 # check rev flags
884 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
895 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
885 raise RevlogError(_('incompatible revision flag %x') %
896 raise RevlogError(_('incompatible revision flag %x') %
886 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
897 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
887
898
888 # build delta chain
899 # build delta chain
889 chain = []
900 chain = []
890 index = self.index # for performance
901 index = self.index # for performance
891 generaldelta = self._generaldelta
902 generaldelta = self._generaldelta
892 iterrev = rev
903 iterrev = rev
893 e = index[iterrev]
904 e = index[iterrev]
894 while iterrev != e[3] and iterrev != cachedrev:
905 while iterrev != e[3] and iterrev != cachedrev:
895 chain.append(iterrev)
906 chain.append(iterrev)
896 if generaldelta:
907 if generaldelta:
897 iterrev = e[3]
908 iterrev = e[3]
898 else:
909 else:
899 iterrev -= 1
910 iterrev -= 1
900 e = index[iterrev]
911 e = index[iterrev]
901 chain.reverse()
912 chain.reverse()
902 base = iterrev
913 base = iterrev
903
914
904 if iterrev == cachedrev:
915 if iterrev == cachedrev:
905 # cache hit
916 # cache hit
906 text = self._cache[2]
917 text = self._cache[2]
907
918
908 # drop cache to save memory
919 # drop cache to save memory
909 self._cache = None
920 self._cache = None
910
921
911 self._chunkraw(base, rev)
922 self._chunkraw(base, rev)
912 if text is None:
923 if text is None:
913 text = self._chunkbase(base)
924 text = self._chunkbase(base)
914
925
915 bins = [self._chunk(r) for r in chain]
926 bins = [self._chunk(r) for r in chain]
916 text = mdiff.patches(text, bins)
927 text = mdiff.patches(text, bins)
917
928
918 text = self._checkhash(text, node, rev)
929 text = self._checkhash(text, node, rev)
919
930
920 self._cache = (node, rev, text)
931 self._cache = (node, rev, text)
921 return text
932 return text
922
933
923 def _checkhash(self, text, node, rev):
934 def _checkhash(self, text, node, rev):
924 p1, p2 = self.parents(node)
935 p1, p2 = self.parents(node)
925 if node != hash(text, p1, p2):
936 if node != hash(text, p1, p2):
926 raise RevlogError(_("integrity check failed on %s:%d")
937 raise RevlogError(_("integrity check failed on %s:%d")
927 % (self.indexfile, rev))
938 % (self.indexfile, rev))
928 return text
939 return text
929
940
930 def checkinlinesize(self, tr, fp=None):
941 def checkinlinesize(self, tr, fp=None):
931 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
942 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
932 return
943 return
933
944
934 trinfo = tr.find(self.indexfile)
945 trinfo = tr.find(self.indexfile)
935 if trinfo is None:
946 if trinfo is None:
936 raise RevlogError(_("%s not found in the transaction")
947 raise RevlogError(_("%s not found in the transaction")
937 % self.indexfile)
948 % self.indexfile)
938
949
939 trindex = trinfo[2]
950 trindex = trinfo[2]
940 dataoff = self.start(trindex)
951 dataoff = self.start(trindex)
941
952
942 tr.add(self.datafile, dataoff)
953 tr.add(self.datafile, dataoff)
943
954
944 if fp:
955 if fp:
945 fp.flush()
956 fp.flush()
946 fp.close()
957 fp.close()
947
958
948 df = self.opener(self.datafile, 'w')
959 df = self.opener(self.datafile, 'w')
949 try:
960 try:
950 for r in self:
961 for r in self:
951 df.write(self._chunkraw(r, r))
962 df.write(self._chunkraw(r, r))
952 finally:
963 finally:
953 df.close()
964 df.close()
954
965
955 fp = self.opener(self.indexfile, 'w', atomictemp=True)
966 fp = self.opener(self.indexfile, 'w', atomictemp=True)
956 self.version &= ~(REVLOGNGINLINEDATA)
967 self.version &= ~(REVLOGNGINLINEDATA)
957 self._inline = False
968 self._inline = False
958 for i in self:
969 for i in self:
959 e = self._io.packentry(self.index[i], self.node, self.version, i)
970 e = self._io.packentry(self.index[i], self.node, self.version, i)
960 fp.write(e)
971 fp.write(e)
961
972
962 # if we don't call close, the temp file will never replace the
973 # if we don't call close, the temp file will never replace the
963 # real index
974 # real index
964 fp.close()
975 fp.close()
965
976
966 tr.replace(self.indexfile, trindex * self._io.size)
977 tr.replace(self.indexfile, trindex * self._io.size)
967 self._chunkclear()
978 self._chunkclear()
968
979
969 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
980 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
970 """add a revision to the log
981 """add a revision to the log
971
982
972 text - the revision data to add
983 text - the revision data to add
973 transaction - the transaction object used for rollback
984 transaction - the transaction object used for rollback
974 link - the linkrev data to add
985 link - the linkrev data to add
975 p1, p2 - the parent nodeids of the revision
986 p1, p2 - the parent nodeids of the revision
976 cachedelta - an optional precomputed delta
987 cachedelta - an optional precomputed delta
977 """
988 """
978 node = hash(text, p1, p2)
989 node = hash(text, p1, p2)
979 if node in self.nodemap:
990 if node in self.nodemap:
980 return node
991 return node
981
992
982 dfh = None
993 dfh = None
983 if not self._inline:
994 if not self._inline:
984 dfh = self.opener(self.datafile, "a")
995 dfh = self.opener(self.datafile, "a")
985 ifh = self.opener(self.indexfile, "a+")
996 ifh = self.opener(self.indexfile, "a+")
986 try:
997 try:
987 return self._addrevision(node, text, transaction, link, p1, p2,
998 return self._addrevision(node, text, transaction, link, p1, p2,
988 cachedelta, ifh, dfh)
999 cachedelta, ifh, dfh)
989 finally:
1000 finally:
990 if dfh:
1001 if dfh:
991 dfh.close()
1002 dfh.close()
992 ifh.close()
1003 ifh.close()
993
1004
994 def _addrevision(self, node, text, transaction, link, p1, p2,
1005 def _addrevision(self, node, text, transaction, link, p1, p2,
995 cachedelta, ifh, dfh):
1006 cachedelta, ifh, dfh):
996 """internal function to add revisions to the log
1007 """internal function to add revisions to the log
997
1008
998 see addrevision for argument descriptions.
1009 see addrevision for argument descriptions.
999 invariants:
1010 invariants:
1000 - text is optional (can be None); if not set, cachedelta must be set.
1011 - text is optional (can be None); if not set, cachedelta must be set.
1001 if both are set, they must correspond to eachother.
1012 if both are set, they must correspond to eachother.
1002 """
1013 """
1003 btext = [text]
1014 btext = [text]
1004 def buildtext():
1015 def buildtext():
1005 if btext[0] is not None:
1016 if btext[0] is not None:
1006 return btext[0]
1017 return btext[0]
1007 # flush any pending writes here so we can read it in revision
1018 # flush any pending writes here so we can read it in revision
1008 if dfh:
1019 if dfh:
1009 dfh.flush()
1020 dfh.flush()
1010 ifh.flush()
1021 ifh.flush()
1011 basetext = self.revision(self.node(cachedelta[0]))
1022 basetext = self.revision(self.node(cachedelta[0]))
1012 btext[0] = mdiff.patch(basetext, cachedelta[1])
1023 btext[0] = mdiff.patch(basetext, cachedelta[1])
1013 chk = hash(btext[0], p1, p2)
1024 chk = hash(btext[0], p1, p2)
1014 if chk != node:
1025 if chk != node:
1015 raise RevlogError(_("consistency error in delta"))
1026 raise RevlogError(_("consistency error in delta"))
1016 return btext[0]
1027 return btext[0]
1017
1028
1018 def builddelta(rev):
1029 def builddelta(rev):
1019 # can we use the cached delta?
1030 # can we use the cached delta?
1020 if cachedelta and cachedelta[0] == rev:
1031 if cachedelta and cachedelta[0] == rev:
1021 delta = cachedelta[1]
1032 delta = cachedelta[1]
1022 else:
1033 else:
1023 t = buildtext()
1034 t = buildtext()
1024 ptext = self.revision(self.node(rev))
1035 ptext = self.revision(self.node(rev))
1025 delta = mdiff.textdiff(ptext, t)
1036 delta = mdiff.textdiff(ptext, t)
1026 data = compress(delta)
1037 data = compress(delta)
1027 l = len(data[1]) + len(data[0])
1038 l = len(data[1]) + len(data[0])
1028 if basecache[0] == rev:
1039 if basecache[0] == rev:
1029 chainbase = basecache[1]
1040 chainbase = basecache[1]
1030 else:
1041 else:
1031 chainbase = self.chainbase(rev)
1042 chainbase = self.chainbase(rev)
1032 dist = l + offset - self.start(chainbase)
1043 dist = l + offset - self.start(chainbase)
1033 if self._generaldelta:
1044 if self._generaldelta:
1034 base = rev
1045 base = rev
1035 else:
1046 else:
1036 base = chainbase
1047 base = chainbase
1037 return dist, l, data, base, chainbase
1048 return dist, l, data, base, chainbase
1038
1049
1039 curr = len(self)
1050 curr = len(self)
1040 prev = curr - 1
1051 prev = curr - 1
1041 base = chainbase = curr
1052 base = chainbase = curr
1042 offset = self.end(prev)
1053 offset = self.end(prev)
1043 flags = 0
1054 flags = 0
1044 d = None
1055 d = None
1045 basecache = self._basecache
1056 basecache = self._basecache
1046 p1r, p2r = self.rev(p1), self.rev(p2)
1057 p1r, p2r = self.rev(p1), self.rev(p2)
1047
1058
1048 # should we try to build a delta?
1059 # should we try to build a delta?
1049 if prev != nullrev:
1060 if prev != nullrev:
1050 if self._generaldelta:
1061 if self._generaldelta:
1051 if p1r >= basecache[1]:
1062 if p1r >= basecache[1]:
1052 d = builddelta(p1r)
1063 d = builddelta(p1r)
1053 elif p2r >= basecache[1]:
1064 elif p2r >= basecache[1]:
1054 d = builddelta(p2r)
1065 d = builddelta(p2r)
1055 else:
1066 else:
1056 d = builddelta(prev)
1067 d = builddelta(prev)
1057 else:
1068 else:
1058 d = builddelta(prev)
1069 d = builddelta(prev)
1059 dist, l, data, base, chainbase = d
1070 dist, l, data, base, chainbase = d
1060
1071
1061 # full versions are inserted when the needed deltas
1072 # full versions are inserted when the needed deltas
1062 # become comparable to the uncompressed text
1073 # become comparable to the uncompressed text
1063 if text is None:
1074 if text is None:
1064 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1075 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1065 cachedelta[1])
1076 cachedelta[1])
1066 else:
1077 else:
1067 textlen = len(text)
1078 textlen = len(text)
1068 if d is None or dist > textlen * 2:
1079 if d is None or dist > textlen * 2:
1069 text = buildtext()
1080 text = buildtext()
1070 data = compress(text)
1081 data = compress(text)
1071 l = len(data[1]) + len(data[0])
1082 l = len(data[1]) + len(data[0])
1072 base = chainbase = curr
1083 base = chainbase = curr
1073
1084
1074 e = (offset_type(offset, flags), l, textlen,
1085 e = (offset_type(offset, flags), l, textlen,
1075 base, link, p1r, p2r, node)
1086 base, link, p1r, p2r, node)
1076 self.index.insert(-1, e)
1087 self.index.insert(-1, e)
1077 self.nodemap[node] = curr
1088 self.nodemap[node] = curr
1078
1089
1079 entry = self._io.packentry(e, self.node, self.version, curr)
1090 entry = self._io.packentry(e, self.node, self.version, curr)
1080 if not self._inline:
1091 if not self._inline:
1081 transaction.add(self.datafile, offset)
1092 transaction.add(self.datafile, offset)
1082 transaction.add(self.indexfile, curr * len(entry))
1093 transaction.add(self.indexfile, curr * len(entry))
1083 if data[0]:
1094 if data[0]:
1084 dfh.write(data[0])
1095 dfh.write(data[0])
1085 dfh.write(data[1])
1096 dfh.write(data[1])
1086 dfh.flush()
1097 dfh.flush()
1087 ifh.write(entry)
1098 ifh.write(entry)
1088 else:
1099 else:
1089 offset += curr * self._io.size
1100 offset += curr * self._io.size
1090 transaction.add(self.indexfile, offset, curr)
1101 transaction.add(self.indexfile, offset, curr)
1091 ifh.write(entry)
1102 ifh.write(entry)
1092 ifh.write(data[0])
1103 ifh.write(data[0])
1093 ifh.write(data[1])
1104 ifh.write(data[1])
1094 self.checkinlinesize(transaction, ifh)
1105 self.checkinlinesize(transaction, ifh)
1095
1106
1096 if type(text) == str: # only accept immutable objects
1107 if type(text) == str: # only accept immutable objects
1097 self._cache = (node, curr, text)
1108 self._cache = (node, curr, text)
1098 self._basecache = (curr, chainbase)
1109 self._basecache = (curr, chainbase)
1099 return node
1110 return node
1100
1111
1101 def group(self, nodelist, bundler, reorder=None):
1112 def group(self, nodelist, bundler, reorder=None):
1102 """Calculate a delta group, yielding a sequence of changegroup chunks
1113 """Calculate a delta group, yielding a sequence of changegroup chunks
1103 (strings).
1114 (strings).
1104
1115
1105 Given a list of changeset revs, return a set of deltas and
1116 Given a list of changeset revs, return a set of deltas and
1106 metadata corresponding to nodes. The first delta is
1117 metadata corresponding to nodes. The first delta is
1107 first parent(nodelist[0]) -> nodelist[0], the receiver is
1118 first parent(nodelist[0]) -> nodelist[0], the receiver is
1108 guaranteed to have this parent as it has all history before
1119 guaranteed to have this parent as it has all history before
1109 these changesets. In the case firstparent is nullrev the
1120 these changesets. In the case firstparent is nullrev the
1110 changegroup starts with a full revision.
1121 changegroup starts with a full revision.
1111 """
1122 """
1112
1123
1113 # if we don't have any revisions touched by these changesets, bail
1124 # if we don't have any revisions touched by these changesets, bail
1114 if len(nodelist) == 0:
1125 if len(nodelist) == 0:
1115 yield bundler.close()
1126 yield bundler.close()
1116 return
1127 return
1117
1128
1118 # for generaldelta revlogs, we linearize the revs; this will both be
1129 # for generaldelta revlogs, we linearize the revs; this will both be
1119 # much quicker and generate a much smaller bundle
1130 # much quicker and generate a much smaller bundle
1120 if (self._generaldelta and reorder is not False) or reorder:
1131 if (self._generaldelta and reorder is not False) or reorder:
1121 dag = dagutil.revlogdag(self)
1132 dag = dagutil.revlogdag(self)
1122 revs = set(self.rev(n) for n in nodelist)
1133 revs = set(self.rev(n) for n in nodelist)
1123 revs = dag.linearize(revs)
1134 revs = dag.linearize(revs)
1124 else:
1135 else:
1125 revs = sorted([self.rev(n) for n in nodelist])
1136 revs = sorted([self.rev(n) for n in nodelist])
1126
1137
1127 # add the parent of the first rev
1138 # add the parent of the first rev
1128 p = self.parentrevs(revs[0])[0]
1139 p = self.parentrevs(revs[0])[0]
1129 revs.insert(0, p)
1140 revs.insert(0, p)
1130
1141
1131 # build deltas
1142 # build deltas
1132 for r in xrange(len(revs) - 1):
1143 for r in xrange(len(revs) - 1):
1133 prev, curr = revs[r], revs[r + 1]
1144 prev, curr = revs[r], revs[r + 1]
1134 for c in bundler.revchunk(self, curr, prev):
1145 for c in bundler.revchunk(self, curr, prev):
1135 yield c
1146 yield c
1136
1147
1137 yield bundler.close()
1148 yield bundler.close()
1138
1149
1139 def addgroup(self, bundle, linkmapper, transaction):
1150 def addgroup(self, bundle, linkmapper, transaction):
1140 """
1151 """
1141 add a delta group
1152 add a delta group
1142
1153
1143 given a set of deltas, add them to the revision log. the
1154 given a set of deltas, add them to the revision log. the
1144 first delta is against its parent, which should be in our
1155 first delta is against its parent, which should be in our
1145 log, the rest are against the previous delta.
1156 log, the rest are against the previous delta.
1146 """
1157 """
1147
1158
1148 # track the base of the current delta log
1159 # track the base of the current delta log
1149 content = []
1160 content = []
1150 node = None
1161 node = None
1151
1162
1152 r = len(self)
1163 r = len(self)
1153 end = 0
1164 end = 0
1154 if r:
1165 if r:
1155 end = self.end(r - 1)
1166 end = self.end(r - 1)
1156 ifh = self.opener(self.indexfile, "a+")
1167 ifh = self.opener(self.indexfile, "a+")
1157 isize = r * self._io.size
1168 isize = r * self._io.size
1158 if self._inline:
1169 if self._inline:
1159 transaction.add(self.indexfile, end + isize, r)
1170 transaction.add(self.indexfile, end + isize, r)
1160 dfh = None
1171 dfh = None
1161 else:
1172 else:
1162 transaction.add(self.indexfile, isize, r)
1173 transaction.add(self.indexfile, isize, r)
1163 transaction.add(self.datafile, end)
1174 transaction.add(self.datafile, end)
1164 dfh = self.opener(self.datafile, "a")
1175 dfh = self.opener(self.datafile, "a")
1165
1176
1166 try:
1177 try:
1167 # loop through our set of deltas
1178 # loop through our set of deltas
1168 chain = None
1179 chain = None
1169 while True:
1180 while True:
1170 chunkdata = bundle.deltachunk(chain)
1181 chunkdata = bundle.deltachunk(chain)
1171 if not chunkdata:
1182 if not chunkdata:
1172 break
1183 break
1173 node = chunkdata['node']
1184 node = chunkdata['node']
1174 p1 = chunkdata['p1']
1185 p1 = chunkdata['p1']
1175 p2 = chunkdata['p2']
1186 p2 = chunkdata['p2']
1176 cs = chunkdata['cs']
1187 cs = chunkdata['cs']
1177 deltabase = chunkdata['deltabase']
1188 deltabase = chunkdata['deltabase']
1178 delta = chunkdata['delta']
1189 delta = chunkdata['delta']
1179
1190
1180 content.append(node)
1191 content.append(node)
1181
1192
1182 link = linkmapper(cs)
1193 link = linkmapper(cs)
1183 if node in self.nodemap:
1194 if node in self.nodemap:
1184 # this can happen if two branches make the same change
1195 # this can happen if two branches make the same change
1185 chain = node
1196 chain = node
1186 continue
1197 continue
1187
1198
1188 for p in (p1, p2):
1199 for p in (p1, p2):
1189 if not p in self.nodemap:
1200 if not p in self.nodemap:
1190 raise LookupError(p, self.indexfile,
1201 raise LookupError(p, self.indexfile,
1191 _('unknown parent'))
1202 _('unknown parent'))
1192
1203
1193 if deltabase not in self.nodemap:
1204 if deltabase not in self.nodemap:
1194 raise LookupError(deltabase, self.indexfile,
1205 raise LookupError(deltabase, self.indexfile,
1195 _('unknown delta base'))
1206 _('unknown delta base'))
1196
1207
1197 baserev = self.rev(deltabase)
1208 baserev = self.rev(deltabase)
1198 chain = self._addrevision(node, None, transaction, link,
1209 chain = self._addrevision(node, None, transaction, link,
1199 p1, p2, (baserev, delta), ifh, dfh)
1210 p1, p2, (baserev, delta), ifh, dfh)
1200 if not dfh and not self._inline:
1211 if not dfh and not self._inline:
1201 # addrevision switched from inline to conventional
1212 # addrevision switched from inline to conventional
1202 # reopen the index
1213 # reopen the index
1203 ifh.close()
1214 ifh.close()
1204 dfh = self.opener(self.datafile, "a")
1215 dfh = self.opener(self.datafile, "a")
1205 ifh = self.opener(self.indexfile, "a")
1216 ifh = self.opener(self.indexfile, "a")
1206 finally:
1217 finally:
1207 if dfh:
1218 if dfh:
1208 dfh.close()
1219 dfh.close()
1209 ifh.close()
1220 ifh.close()
1210
1221
1211 return content
1222 return content
1212
1223
1213 def strip(self, minlink, transaction):
1224 def strip(self, minlink, transaction):
1214 """truncate the revlog on the first revision with a linkrev >= minlink
1225 """truncate the revlog on the first revision with a linkrev >= minlink
1215
1226
1216 This function is called when we're stripping revision minlink and
1227 This function is called when we're stripping revision minlink and
1217 its descendants from the repository.
1228 its descendants from the repository.
1218
1229
1219 We have to remove all revisions with linkrev >= minlink, because
1230 We have to remove all revisions with linkrev >= minlink, because
1220 the equivalent changelog revisions will be renumbered after the
1231 the equivalent changelog revisions will be renumbered after the
1221 strip.
1232 strip.
1222
1233
1223 So we truncate the revlog on the first of these revisions, and
1234 So we truncate the revlog on the first of these revisions, and
1224 trust that the caller has saved the revisions that shouldn't be
1235 trust that the caller has saved the revisions that shouldn't be
1225 removed and that it'll re-add them after this truncation.
1236 removed and that it'll re-add them after this truncation.
1226 """
1237 """
1227 if len(self) == 0:
1238 if len(self) == 0:
1228 return
1239 return
1229
1240
1230 for rev in self:
1241 for rev in self:
1231 if self.index[rev][4] >= minlink:
1242 if self.index[rev][4] >= minlink:
1232 break
1243 break
1233 else:
1244 else:
1234 return
1245 return
1235
1246
1236 # first truncate the files on disk
1247 # first truncate the files on disk
1237 end = self.start(rev)
1248 end = self.start(rev)
1238 if not self._inline:
1249 if not self._inline:
1239 transaction.add(self.datafile, end)
1250 transaction.add(self.datafile, end)
1240 end = rev * self._io.size
1251 end = rev * self._io.size
1241 else:
1252 else:
1242 end += rev * self._io.size
1253 end += rev * self._io.size
1243
1254
1244 transaction.add(self.indexfile, end)
1255 transaction.add(self.indexfile, end)
1245
1256
1246 # then reset internal state in memory to forget those revisions
1257 # then reset internal state in memory to forget those revisions
1247 self._cache = None
1258 self._cache = None
1248 self._chunkclear()
1259 self._chunkclear()
1249 for x in xrange(rev, len(self)):
1260 for x in xrange(rev, len(self)):
1250 del self.nodemap[self.node(x)]
1261 del self.nodemap[self.node(x)]
1251
1262
1252 del self.index[rev:-1]
1263 del self.index[rev:-1]
1253
1264
1254 def checksize(self):
1265 def checksize(self):
1255 expected = 0
1266 expected = 0
1256 if len(self):
1267 if len(self):
1257 expected = max(0, self.end(len(self) - 1))
1268 expected = max(0, self.end(len(self) - 1))
1258
1269
1259 try:
1270 try:
1260 f = self.opener(self.datafile)
1271 f = self.opener(self.datafile)
1261 f.seek(0, 2)
1272 f.seek(0, 2)
1262 actual = f.tell()
1273 actual = f.tell()
1263 f.close()
1274 f.close()
1264 dd = actual - expected
1275 dd = actual - expected
1265 except IOError, inst:
1276 except IOError, inst:
1266 if inst.errno != errno.ENOENT:
1277 if inst.errno != errno.ENOENT:
1267 raise
1278 raise
1268 dd = 0
1279 dd = 0
1269
1280
1270 try:
1281 try:
1271 f = self.opener(self.indexfile)
1282 f = self.opener(self.indexfile)
1272 f.seek(0, 2)
1283 f.seek(0, 2)
1273 actual = f.tell()
1284 actual = f.tell()
1274 f.close()
1285 f.close()
1275 s = self._io.size
1286 s = self._io.size
1276 i = max(0, actual // s)
1287 i = max(0, actual // s)
1277 di = actual - (i * s)
1288 di = actual - (i * s)
1278 if self._inline:
1289 if self._inline:
1279 databytes = 0
1290 databytes = 0
1280 for r in self:
1291 for r in self:
1281 databytes += max(0, self.length(r))
1292 databytes += max(0, self.length(r))
1282 dd = 0
1293 dd = 0
1283 di = actual - len(self) * s - databytes
1294 di = actual - len(self) * s - databytes
1284 except IOError, inst:
1295 except IOError, inst:
1285 if inst.errno != errno.ENOENT:
1296 if inst.errno != errno.ENOENT:
1286 raise
1297 raise
1287 di = 0
1298 di = 0
1288
1299
1289 return (dd, di)
1300 return (dd, di)
1290
1301
1291 def files(self):
1302 def files(self):
1292 res = [self.indexfile]
1303 res = [self.indexfile]
1293 if not self._inline:
1304 if not self._inline:
1294 res.append(self.datafile)
1305 res.append(self.datafile)
1295 return res
1306 return res
@@ -1,115 +1,122 b''
1 from mercurial import parsers
1 from mercurial import parsers
2 from mercurial.node import nullid, nullrev
2 from mercurial.node import nullid, nullrev
3 import struct
3 import struct
4
4
5 # This unit test compares the return value of the original Python
5 # This unit test compares the return value of the original Python
6 # implementation of parseindex and the new C implementation for
6 # implementation of parseindex and the new C implementation for
7 # an index file with and without inlined data
7 # an index file with and without inlined data
8
8
9 # original python implementation
9 # original python implementation
10 def gettype(q):
10 def gettype(q):
11 return int(q & 0xFFFF)
11 return int(q & 0xFFFF)
12
12
13 def offset_type(offset, type):
13 def offset_type(offset, type):
14 return long(long(offset) << 16 | type)
14 return long(long(offset) << 16 | type)
15
15
16 indexformatng = ">Qiiiiii20s12x"
16 indexformatng = ">Qiiiiii20s12x"
17
17
18 def py_parseindex(data, inline) :
18 def py_parseindex(data, inline) :
19 s = 64
19 s = 64
20 cache = None
20 cache = None
21 index = []
21 index = []
22 nodemap = {nullid: nullrev}
22 nodemap = {nullid: nullrev}
23 n = off = 0
23 n = off = 0
24
24
25 l = len(data) - s
25 l = len(data) - s
26 append = index.append
26 append = index.append
27 if inline:
27 if inline:
28 cache = (0, data)
28 cache = (0, data)
29 while off <= l:
29 while off <= l:
30 e = struct.unpack(indexformatng, data[off:off + s])
30 e = struct.unpack(indexformatng, data[off:off + s])
31 nodemap[e[7]] = n
31 nodemap[e[7]] = n
32 append(e)
32 append(e)
33 n += 1
33 n += 1
34 if e[1] < 0:
34 if e[1] < 0:
35 break
35 break
36 off += e[1] + s
36 off += e[1] + s
37 else:
37 else:
38 while off <= l:
38 while off <= l:
39 e = struct.unpack(indexformatng, data[off:off + s])
39 e = struct.unpack(indexformatng, data[off:off + s])
40 nodemap[e[7]] = n
40 nodemap[e[7]] = n
41 append(e)
41 append(e)
42 n += 1
42 n += 1
43 off += s
43 off += s
44
44
45 e = list(index[0])
45 e = list(index[0])
46 type = gettype(e[0])
46 type = gettype(e[0])
47 e[0] = offset_type(0, type)
47 e[0] = offset_type(0, type)
48 index[0] = tuple(e)
48 index[0] = tuple(e)
49
49
50 # add the magic null revision at -1
50 # add the magic null revision at -1
51 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
51 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
52
52
53 return index, cache
53 return index, cache
54
54
55 data_inlined = '\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x01\x8c' \
55 data_inlined = '\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x01\x8c' \
56 '\x00\x00\x04\x07\x00\x00\x00\x00\x00\x00\x15\x15\xff\xff\xff' \
56 '\x00\x00\x04\x07\x00\x00\x00\x00\x00\x00\x15\x15\xff\xff\xff' \
57 '\xff\xff\xff\xff\xff\xebG\x97\xb7\x1fB\x04\xcf\x13V\x81\tw\x1b' \
57 '\xff\xff\xff\xff\xff\xebG\x97\xb7\x1fB\x04\xcf\x13V\x81\tw\x1b' \
58 'w\xdduR\xda\xc6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
58 'w\xdduR\xda\xc6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
59 'x\x9c\x9d\x93?O\xc30\x10\xc5\xf7|\x8a\xdb\x9a\xa8m\x06\xd8*\x95' \
59 'x\x9c\x9d\x93?O\xc30\x10\xc5\xf7|\x8a\xdb\x9a\xa8m\x06\xd8*\x95' \
60 '\x81B\xa1\xa2\xa2R\xcb\x86Pd\x9a\x0b5$vd_\x04\xfd\xf6\x9c\xff@' \
60 '\x81B\xa1\xa2\xa2R\xcb\x86Pd\x9a\x0b5$vd_\x04\xfd\xf6\x9c\xff@' \
61 '\x11!\x0b\xd9\xec\xf7\xbbw\xe7gG6\xad6\x04\xdaN\xc0\x92\xa0$)' \
61 '\x11!\x0b\xd9\xec\xf7\xbbw\xe7gG6\xad6\x04\xdaN\xc0\x92\xa0$)' \
62 '\xb1\x82\xa2\xd1%\x16\xa4\x8b7\xa9\xca\xd4-\xb2Y\x02\xfc\xc9' \
62 '\xb1\x82\xa2\xd1%\x16\xa4\x8b7\xa9\xca\xd4-\xb2Y\x02\xfc\xc9' \
63 '\xcaS\xf9\xaeX\xed\xb6\xd77Q\x02\x83\xd4\x19\xf5--Y\xea\xe1W' \
63 '\xcaS\xf9\xaeX\xed\xb6\xd77Q\x02\x83\xd4\x19\xf5--Y\xea\xe1W' \
64 '\xab\xed\x10\xceR\x0f_\xdf\xdf\r\xe1,\xf5\xf0\xcb\xf5 \xceR\x0f' \
64 '\xab\xed\x10\xceR\x0f_\xdf\xdf\r\xe1,\xf5\xf0\xcb\xf5 \xceR\x0f' \
65 '_\xdc\x0e\x0e\xc3R\x0f_\xae\x96\x9b!\x9e\xa5\x1e\xbf\xdb,\x06' \
65 '_\xdc\x0e\x0e\xc3R\x0f_\xae\x96\x9b!\x9e\xa5\x1e\xbf\xdb,\x06' \
66 '\xc7q\x9a/\x88\x82\xc3B\xea\xb5\xb4TJ\x93\xb6\x82\x0e\xe16\xe6' \
66 '\xc7q\x9a/\x88\x82\xc3B\xea\xb5\xb4TJ\x93\xb6\x82\x0e\xe16\xe6' \
67 'KQ\xdb\xaf\xecG\xa3\xd1 \x01\xd3\x0b_^\xe8\xaa\xa0\xae\xad\xd1' \
67 'KQ\xdb\xaf\xecG\xa3\xd1 \x01\xd3\x0b_^\xe8\xaa\xa0\xae\xad\xd1' \
68 '&\xbef\x1bz\x08\xb0|\xc9Xz\x06\xf6Z\x91\x90J\xaa\x17\x90\xaa' \
68 '&\xbef\x1bz\x08\xb0|\xc9Xz\x06\xf6Z\x91\x90J\xaa\x17\x90\xaa' \
69 '\xd2\xa6\x11$5C\xcf\xba#\xa0\x03\x02*2\x92-\xfc\xb1\x94\xdf\xe2' \
69 '\xd2\xa6\x11$5C\xcf\xba#\xa0\x03\x02*2\x92-\xfc\xb1\x94\xdf\xe2' \
70 '\xae\xb8\'m\x8ey0^\x85\xd3\x82\xb4\xf0`:\x9c\x00\x8a\xfd\x01' \
70 '\xae\xb8\'m\x8ey0^\x85\xd3\x82\xb4\xf0`:\x9c\x00\x8a\xfd\x01' \
71 '\xb0\xc6\x86\x8b\xdd\xae\x80\xf3\xa9\x9fd\x16\n\x00R%\x1a\x06' \
71 '\xb0\xc6\x86\x8b\xdd\xae\x80\xf3\xa9\x9fd\x16\n\x00R%\x1a\x06' \
72 '\xe9\xd8b\x98\x1d\xf4\xf3+\x9bf\x01\xd8p\x1b\xf3.\xed\x9f^g\xc3' \
72 '\xe9\xd8b\x98\x1d\xf4\xf3+\x9bf\x01\xd8p\x1b\xf3.\xed\x9f^g\xc3' \
73 '^\xd9W81T\xdb\xd5\x04sx|\xf2\xeb\xd6`%?x\xed"\x831\xbf\xf3\xdc' \
73 '^\xd9W81T\xdb\xd5\x04sx|\xf2\xeb\xd6`%?x\xed"\x831\xbf\xf3\xdc' \
74 'b\xeb%gaY\xe1\xad\x9f\xb9f\'1w\xa9\xa5a\x83s\x82J\xb98\xbc4\x8b' \
74 'b\xeb%gaY\xe1\xad\x9f\xb9f\'1w\xa9\xa5a\x83s\x82J\xb98\xbc4\x8b' \
75 '\x83\x00\x9f$z\xb8#\xa5\xb1\xdf\x98\xd9\xec\x1b\x89O\xe3Ts\x9a4' \
75 '\x83\x00\x9f$z\xb8#\xa5\xb1\xdf\x98\xd9\xec\x1b\x89O\xe3Ts\x9a4' \
76 '\x17m\x8b\xfc\x8f\xa5\x95\x9a\xfc\xfa\xed,\xe5|\xa1\xfe\x15\xb9' \
76 '\x17m\x8b\xfc\x8f\xa5\x95\x9a\xfc\xfa\xed,\xe5|\xa1\xfe\x15\xb9' \
77 '\xbc\xb2\x93\x1f\xf2\x95\xff\xdf,\x1a\xc5\xe7\x17*\x93Oz:>\x0e'
77 '\xbc\xb2\x93\x1f\xf2\x95\xff\xdf,\x1a\xc5\xe7\x17*\x93Oz:>\x0e'
78
78
79 data_non_inlined = '\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19' \
79 data_non_inlined = '\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19' \
80 '\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff' \
80 '\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff' \
81 '\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d' \
81 '\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d' \
82 '\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
82 '\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
83 '\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00' \
83 '\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00' \
84 '\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff' \
84 '\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff' \
85 '\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh' \
85 '\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh' \
86 '\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
86 '\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \
87 '\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00' \
87 '\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00' \
88 '\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n' \
88 '\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n' \
89 '\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00' \
89 '\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00' \
90 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F' \
90 '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F' \
91 '\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01' \
91 '\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01' \
92 '\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1' \
92 '\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1' \
93 '\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00' \
93 '\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00' \
94 '\x00\x00\x00\x00\x00\x00\x00\x00\x00'
94 '\x00\x00\x00\x00\x00\x00\x00\x00\x00'
95
95
96 def parse_index2(data, inline):
96 def parse_index2(data, inline):
97 index, chunkcache = parsers.parse_index2(data, inline)
97 index, chunkcache = parsers.parse_index2(data, inline)
98 return list(index), chunkcache
98 return list(index), chunkcache
99
99
100 def runtest() :
100 def runtest() :
101 py_res_1 = py_parseindex(data_inlined, True)
101 py_res_1 = py_parseindex(data_inlined, True)
102 c_res_1 = parse_index2(data_inlined, True)
102 c_res_1 = parse_index2(data_inlined, True)
103
103
104 py_res_2 = py_parseindex(data_non_inlined, False)
104 py_res_2 = py_parseindex(data_non_inlined, False)
105 c_res_2 = parse_index2(data_non_inlined, False)
105 c_res_2 = parse_index2(data_non_inlined, False)
106
106
107 if py_res_1 != c_res_1:
107 if py_res_1 != c_res_1:
108 print "Parse index result (with inlined data) differs!"
108 print "Parse index result (with inlined data) differs!"
109
109
110 if py_res_2 != c_res_2:
110 if py_res_2 != c_res_2:
111 print "Parse index result (no inlined data) differs!"
111 print "Parse index result (no inlined data) differs!"
112
112
113 ix = parsers.parse_index2(data_inlined, True)[0]
114 for i, r in enumerate(ix):
115 if r[7] == nullid:
116 i = -1
117 if ix[r[7]] != i:
118 print 'Reverse lookup inconsistent for %r' % r[7].encode('hex')
119
113 print "done"
120 print "done"
114
121
115 runtest()
122 runtest()
General Comments 0
You need to be logged in to leave comments. Login now