##// END OF EJS Templates
merge with stable
Matt Mackall -
r20169:507919a3 merge default
parent child Browse files
Show More
@@ -1,391 +1,393 b''
1 1 # hgweb/hgweb_mod.py - Web interface for a repository.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 import os, re
10 10 from mercurial import ui, hg, hook, error, encoding, templater, util, repoview
11 11 from mercurial.templatefilters import websub
12 12 from mercurial.i18n import _
13 13 from common import get_stat, ErrorResponse, permhooks, caching
14 14 from common import HTTP_OK, HTTP_NOT_MODIFIED, HTTP_BAD_REQUEST
15 15 from common import HTTP_NOT_FOUND, HTTP_SERVER_ERROR
16 16 from request import wsgirequest
17 17 import webcommands, protocol, webutil
18 18
19 19 perms = {
20 20 'changegroup': 'pull',
21 21 'changegroupsubset': 'pull',
22 22 'getbundle': 'pull',
23 23 'stream_out': 'pull',
24 24 'listkeys': 'pull',
25 25 'unbundle': 'push',
26 26 'pushkey': 'push',
27 27 }
28 28
29 29 def makebreadcrumb(url, prefix=''):
30 30 '''Return a 'URL breadcrumb' list
31 31
32 32 A 'URL breadcrumb' is a list of URL-name pairs,
33 33 corresponding to each of the path items on a URL.
34 34 This can be used to create path navigation entries.
35 35 '''
36 36 if url.endswith('/'):
37 37 url = url[:-1]
38 38 if prefix:
39 39 url = '/' + prefix + url
40 40 relpath = url
41 41 if relpath.startswith('/'):
42 42 relpath = relpath[1:]
43 43
44 44 breadcrumb = []
45 45 urlel = url
46 46 pathitems = [''] + relpath.split('/')
47 47 for pathel in reversed(pathitems):
48 48 if not pathel or not urlel:
49 49 break
50 50 breadcrumb.append({'url': urlel, 'name': pathel})
51 51 urlel = os.path.dirname(urlel)
52 52 return reversed(breadcrumb)
53 53
54 54
55 55 class hgweb(object):
56 56 def __init__(self, repo, name=None, baseui=None):
57 57 if isinstance(repo, str):
58 58 if baseui:
59 59 u = baseui.copy()
60 60 else:
61 61 u = ui.ui()
62 self.repo = hg.repository(u, repo)
62 r = hg.repository(u, repo)
63 63 else:
64 self.repo = repo
64 r = repo
65 65
66 self.repo = self._getview(self.repo)
67 self.repo.ui.setconfig('ui', 'report_untrusted', 'off')
68 self.repo.baseui.setconfig('ui', 'report_untrusted', 'off')
69 self.repo.ui.setconfig('ui', 'nontty', 'true')
70 self.repo.baseui.setconfig('ui', 'nontty', 'true')
66 r = self._getview(r)
67 r.ui.setconfig('ui', 'report_untrusted', 'off')
68 r.baseui.setconfig('ui', 'report_untrusted', 'off')
69 r.ui.setconfig('ui', 'nontty', 'true')
70 r.baseui.setconfig('ui', 'nontty', 'true')
71 self.repo = r
71 72 hook.redirect(True)
72 73 self.mtime = -1
73 74 self.size = -1
74 75 self.reponame = name
75 76 self.archives = 'zip', 'gz', 'bz2'
76 77 self.stripecount = 1
77 78 # a repo owner may set web.templates in .hg/hgrc to get any file
78 79 # readable by the user running the CGI script
79 80 self.templatepath = self.config('web', 'templates')
80 81 self.websubtable = self.loadwebsub()
81 82
82 83 # The CGI scripts are often run by a user different from the repo owner.
83 84 # Trust the settings from the .hg/hgrc files by default.
84 85 def config(self, section, name, default=None, untrusted=True):
85 86 return self.repo.ui.config(section, name, default,
86 87 untrusted=untrusted)
87 88
88 89 def configbool(self, section, name, default=False, untrusted=True):
89 90 return self.repo.ui.configbool(section, name, default,
90 91 untrusted=untrusted)
91 92
92 93 def configlist(self, section, name, default=None, untrusted=True):
93 94 return self.repo.ui.configlist(section, name, default,
94 95 untrusted=untrusted)
95 96
96 97 def _getview(self, repo):
97 viewconfig = self.config('web', 'view', 'served')
98 viewconfig = repo.ui.config('web', 'view', 'served',
99 untrusted=True)
98 100 if viewconfig == 'all':
99 101 return repo.unfiltered()
100 102 elif viewconfig in repoview.filtertable:
101 103 return repo.filtered(viewconfig)
102 104 else:
103 105 return repo.filtered('served')
104 106
105 107 def refresh(self, request=None):
106 108 st = get_stat(self.repo.spath)
107 109 # compare changelog size in addition to mtime to catch
108 110 # rollbacks made less than a second ago
109 111 if st.st_mtime != self.mtime or st.st_size != self.size:
110 112 self.mtime = st.st_mtime
111 113 self.size = st.st_size
112 114 r = hg.repository(self.repo.baseui, self.repo.root)
113 115 self.repo = self._getview(r)
114 116 self.maxchanges = int(self.config("web", "maxchanges", 10))
115 117 self.stripecount = int(self.config("web", "stripes", 1))
116 118 self.maxshortchanges = int(self.config("web", "maxshortchanges",
117 119 60))
118 120 self.maxfiles = int(self.config("web", "maxfiles", 10))
119 121 self.allowpull = self.configbool("web", "allowpull", True)
120 122 encoding.encoding = self.config("web", "encoding",
121 123 encoding.encoding)
122 124 if request:
123 125 self.repo.ui.environ = request.env
124 126
125 127 def run(self):
126 128 if not os.environ.get('GATEWAY_INTERFACE', '').startswith("CGI/1."):
127 129 raise RuntimeError("This function is only intended to be "
128 130 "called while running as a CGI script.")
129 131 import mercurial.hgweb.wsgicgi as wsgicgi
130 132 wsgicgi.launch(self)
131 133
132 134 def __call__(self, env, respond):
133 135 req = wsgirequest(env, respond)
134 136 return self.run_wsgi(req)
135 137
136 138 def run_wsgi(self, req):
137 139
138 140 self.refresh(req)
139 141
140 142 # work with CGI variables to create coherent structure
141 143 # use SCRIPT_NAME, PATH_INFO and QUERY_STRING as well as our REPO_NAME
142 144
143 145 req.url = req.env['SCRIPT_NAME']
144 146 if not req.url.endswith('/'):
145 147 req.url += '/'
146 148 if 'REPO_NAME' in req.env:
147 149 req.url += req.env['REPO_NAME'] + '/'
148 150
149 151 if 'PATH_INFO' in req.env:
150 152 parts = req.env['PATH_INFO'].strip('/').split('/')
151 153 repo_parts = req.env.get('REPO_NAME', '').split('/')
152 154 if parts[:len(repo_parts)] == repo_parts:
153 155 parts = parts[len(repo_parts):]
154 156 query = '/'.join(parts)
155 157 else:
156 158 query = req.env['QUERY_STRING'].split('&', 1)[0]
157 159 query = query.split(';', 1)[0]
158 160
159 161 # process this if it's a protocol request
160 162 # protocol bits don't need to create any URLs
161 163 # and the clients always use the old URL structure
162 164
163 165 cmd = req.form.get('cmd', [''])[0]
164 166 if protocol.iscmd(cmd):
165 167 try:
166 168 if query:
167 169 raise ErrorResponse(HTTP_NOT_FOUND)
168 170 if cmd in perms:
169 171 self.check_perm(req, perms[cmd])
170 172 return protocol.call(self.repo, req, cmd)
171 173 except ErrorResponse, inst:
172 174 # A client that sends unbundle without 100-continue will
173 175 # break if we respond early.
174 176 if (cmd == 'unbundle' and
175 177 (req.env.get('HTTP_EXPECT',
176 178 '').lower() != '100-continue') or
177 179 req.env.get('X-HgHttp2', '')):
178 180 req.drain()
179 181 else:
180 182 req.headers.append(('Connection', 'Close'))
181 183 req.respond(inst, protocol.HGTYPE,
182 184 body='0\n%s\n' % inst.message)
183 185 return ''
184 186
185 187 # translate user-visible url structure to internal structure
186 188
187 189 args = query.split('/', 2)
188 190 if 'cmd' not in req.form and args and args[0]:
189 191
190 192 cmd = args.pop(0)
191 193 style = cmd.rfind('-')
192 194 if style != -1:
193 195 req.form['style'] = [cmd[:style]]
194 196 cmd = cmd[style + 1:]
195 197
196 198 # avoid accepting e.g. style parameter as command
197 199 if util.safehasattr(webcommands, cmd):
198 200 req.form['cmd'] = [cmd]
199 201 else:
200 202 cmd = ''
201 203
202 204 if cmd == 'static':
203 205 req.form['file'] = ['/'.join(args)]
204 206 else:
205 207 if args and args[0]:
206 208 node = args.pop(0)
207 209 req.form['node'] = [node]
208 210 if args:
209 211 req.form['file'] = args
210 212
211 213 ua = req.env.get('HTTP_USER_AGENT', '')
212 214 if cmd == 'rev' and 'mercurial' in ua:
213 215 req.form['style'] = ['raw']
214 216
215 217 if cmd == 'archive':
216 218 fn = req.form['node'][0]
217 219 for type_, spec in self.archive_specs.iteritems():
218 220 ext = spec[2]
219 221 if fn.endswith(ext):
220 222 req.form['node'] = [fn[:-len(ext)]]
221 223 req.form['type'] = [type_]
222 224
223 225 # process the web interface request
224 226
225 227 try:
226 228 tmpl = self.templater(req)
227 229 ctype = tmpl('mimetype', encoding=encoding.encoding)
228 230 ctype = templater.stringify(ctype)
229 231
230 232 # check read permissions non-static content
231 233 if cmd != 'static':
232 234 self.check_perm(req, None)
233 235
234 236 if cmd == '':
235 237 req.form['cmd'] = [tmpl.cache['default']]
236 238 cmd = req.form['cmd'][0]
237 239
238 240 if self.configbool('web', 'cache', True):
239 241 caching(self, req) # sets ETag header or raises NOT_MODIFIED
240 242 if cmd not in webcommands.__all__:
241 243 msg = 'no such method: %s' % cmd
242 244 raise ErrorResponse(HTTP_BAD_REQUEST, msg)
243 245 elif cmd == 'file' and 'raw' in req.form.get('style', []):
244 246 self.ctype = ctype
245 247 content = webcommands.rawfile(self, req, tmpl)
246 248 else:
247 249 content = getattr(webcommands, cmd)(self, req, tmpl)
248 250 req.respond(HTTP_OK, ctype)
249 251
250 252 return content
251 253
252 254 except (error.LookupError, error.RepoLookupError), err:
253 255 req.respond(HTTP_NOT_FOUND, ctype)
254 256 msg = str(err)
255 257 if (util.safehasattr(err, 'name') and
256 258 not isinstance(err, error.ManifestLookupError)):
257 259 msg = 'revision not found: %s' % err.name
258 260 return tmpl('error', error=msg)
259 261 except (error.RepoError, error.RevlogError), inst:
260 262 req.respond(HTTP_SERVER_ERROR, ctype)
261 263 return tmpl('error', error=str(inst))
262 264 except ErrorResponse, inst:
263 265 req.respond(inst, ctype)
264 266 if inst.code == HTTP_NOT_MODIFIED:
265 267 # Not allowed to return a body on a 304
266 268 return ['']
267 269 return tmpl('error', error=inst.message)
268 270
269 271 def loadwebsub(self):
270 272 websubtable = []
271 273 websubdefs = self.repo.ui.configitems('websub')
272 274 # we must maintain interhg backwards compatibility
273 275 websubdefs += self.repo.ui.configitems('interhg')
274 276 for key, pattern in websubdefs:
275 277 # grab the delimiter from the character after the "s"
276 278 unesc = pattern[1]
277 279 delim = re.escape(unesc)
278 280
279 281 # identify portions of the pattern, taking care to avoid escaped
280 282 # delimiters. the replace format and flags are optional, but
281 283 # delimiters are required.
282 284 match = re.match(
283 285 r'^s%s(.+)(?:(?<=\\\\)|(?<!\\))%s(.*)%s([ilmsux])*$'
284 286 % (delim, delim, delim), pattern)
285 287 if not match:
286 288 self.repo.ui.warn(_("websub: invalid pattern for %s: %s\n")
287 289 % (key, pattern))
288 290 continue
289 291
290 292 # we need to unescape the delimiter for regexp and format
291 293 delim_re = re.compile(r'(?<!\\)\\%s' % delim)
292 294 regexp = delim_re.sub(unesc, match.group(1))
293 295 format = delim_re.sub(unesc, match.group(2))
294 296
295 297 # the pattern allows for 6 regexp flags, so set them if necessary
296 298 flagin = match.group(3)
297 299 flags = 0
298 300 if flagin:
299 301 for flag in flagin.upper():
300 302 flags |= re.__dict__[flag]
301 303
302 304 try:
303 305 regexp = re.compile(regexp, flags)
304 306 websubtable.append((regexp, format))
305 307 except re.error:
306 308 self.repo.ui.warn(_("websub: invalid regexp for %s: %s\n")
307 309 % (key, regexp))
308 310 return websubtable
309 311
310 312 def templater(self, req):
311 313
312 314 # determine scheme, port and server name
313 315 # this is needed to create absolute urls
314 316
315 317 proto = req.env.get('wsgi.url_scheme')
316 318 if proto == 'https':
317 319 proto = 'https'
318 320 default_port = "443"
319 321 else:
320 322 proto = 'http'
321 323 default_port = "80"
322 324
323 325 port = req.env["SERVER_PORT"]
324 326 port = port != default_port and (":" + port) or ""
325 327 urlbase = '%s://%s%s' % (proto, req.env['SERVER_NAME'], port)
326 328 logourl = self.config("web", "logourl", "http://mercurial.selenic.com/")
327 329 logoimg = self.config("web", "logoimg", "hglogo.png")
328 330 staticurl = self.config("web", "staticurl") or req.url + 'static/'
329 331 if not staticurl.endswith('/'):
330 332 staticurl += '/'
331 333
332 334 # some functions for the templater
333 335
334 336 def motd(**map):
335 337 yield self.config("web", "motd", "")
336 338
337 339 # figure out which style to use
338 340
339 341 vars = {}
340 342 styles = (
341 343 req.form.get('style', [None])[0],
342 344 self.config('web', 'style'),
343 345 'paper',
344 346 )
345 347 style, mapfile = templater.stylemap(styles, self.templatepath)
346 348 if style == styles[0]:
347 349 vars['style'] = style
348 350
349 351 start = req.url[-1] == '?' and '&' or '?'
350 352 sessionvars = webutil.sessionvars(vars, start)
351 353
352 354 if not self.reponame:
353 355 self.reponame = (self.config("web", "name")
354 356 or req.env.get('REPO_NAME')
355 357 or req.url.strip('/') or self.repo.root)
356 358
357 359 def websubfilter(text):
358 360 return websub(text, self.websubtable)
359 361
360 362 # create the templater
361 363
362 364 tmpl = templater.templater(mapfile,
363 365 filters={"websub": websubfilter},
364 366 defaults={"url": req.url,
365 367 "logourl": logourl,
366 368 "logoimg": logoimg,
367 369 "staticurl": staticurl,
368 370 "urlbase": urlbase,
369 371 "repo": self.reponame,
370 372 "encoding": encoding.encoding,
371 373 "motd": motd,
372 374 "sessionvars": sessionvars,
373 375 "pathdef": makebreadcrumb(req.url),
374 376 })
375 377 return tmpl
376 378
377 379 def archivelist(self, nodeid):
378 380 allowed = self.configlist("web", "allow_archive")
379 381 for i, spec in self.archive_specs.iteritems():
380 382 if i in allowed or self.configbool("web", "allow" + i):
381 383 yield {"type" : i, "extension" : spec[2], "node" : nodeid}
382 384
383 385 archive_specs = {
384 386 'bz2': ('application/x-bzip2', 'tbz2', '.tar.bz2', None),
385 387 'gz': ('application/x-gzip', 'tgz', '.tar.gz', None),
386 388 'zip': ('application/zip', 'zip', '.zip', None),
387 389 }
388 390
389 391 def check_perm(self, req, op):
390 392 for hook in permhooks:
391 393 hook(self, req, op)
@@ -1,429 +1,420 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #define PY_SSIZE_T_CLEAN
24 24 #include <Python.h>
25 25 #include <stdlib.h>
26 26 #include <string.h>
27 27
28 28 #include "util.h"
29 29
30 30 static char mpatch_doc[] = "Efficient binary patching.";
31 31 static PyObject *mpatch_Error;
32 32
33 33 struct frag {
34 34 int start, end, len;
35 35 const char *data;
36 36 };
37 37
38 38 struct flist {
39 39 struct frag *base, *head, *tail;
40 40 };
41 41
42 42 static struct flist *lalloc(Py_ssize_t size)
43 43 {
44 44 struct flist *a = NULL;
45 45
46 46 if (size < 1)
47 47 size = 1;
48 48
49 49 a = (struct flist *)malloc(sizeof(struct flist));
50 50 if (a) {
51 51 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
52 52 if (a->base) {
53 53 a->head = a->tail = a->base;
54 54 return a;
55 55 }
56 56 free(a);
57 57 a = NULL;
58 58 }
59 59 if (!PyErr_Occurred())
60 60 PyErr_NoMemory();
61 61 return NULL;
62 62 }
63 63
64 64 static void lfree(struct flist *a)
65 65 {
66 66 if (a) {
67 67 free(a->base);
68 68 free(a);
69 69 }
70 70 }
71 71
72 72 static Py_ssize_t lsize(struct flist *a)
73 73 {
74 74 return a->tail - a->head;
75 75 }
76 76
77 77 /* move hunks in source that are less cut to dest, compensating
78 78 for changes in offset. the last hunk may be split if necessary.
79 79 */
80 80 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
81 81 {
82 82 struct frag *d = dest->tail, *s = src->head;
83 83 int postend, c, l;
84 84
85 85 while (s != src->tail) {
86 86 if (s->start + offset >= cut)
87 87 break; /* we've gone far enough */
88 88
89 89 postend = offset + s->start + s->len;
90 90 if (postend <= cut) {
91 91 /* save this hunk */
92 92 offset += s->start + s->len - s->end;
93 93 *d++ = *s++;
94 94 }
95 95 else {
96 96 /* break up this hunk */
97 97 c = cut - offset;
98 98 if (s->end < c)
99 99 c = s->end;
100 100 l = cut - offset - s->start;
101 101 if (s->len < l)
102 102 l = s->len;
103 103
104 104 offset += s->start + l - c;
105 105
106 106 d->start = s->start;
107 107 d->end = c;
108 108 d->len = l;
109 109 d->data = s->data;
110 110 d++;
111 111 s->start = c;
112 112 s->len = s->len - l;
113 113 s->data = s->data + l;
114 114
115 115 break;
116 116 }
117 117 }
118 118
119 119 dest->tail = d;
120 120 src->head = s;
121 121 return offset;
122 122 }
123 123
124 124 /* like gather, but with no output list */
125 125 static int discard(struct flist *src, int cut, int offset)
126 126 {
127 127 struct frag *s = src->head;
128 128 int postend, c, l;
129 129
130 130 while (s != src->tail) {
131 131 if (s->start + offset >= cut)
132 132 break;
133 133
134 134 postend = offset + s->start + s->len;
135 135 if (postend <= cut) {
136 136 offset += s->start + s->len - s->end;
137 137 s++;
138 138 }
139 139 else {
140 140 c = cut - offset;
141 141 if (s->end < c)
142 142 c = s->end;
143 143 l = cut - offset - s->start;
144 144 if (s->len < l)
145 145 l = s->len;
146 146
147 147 offset += s->start + l - c;
148 148 s->start = c;
149 149 s->len = s->len - l;
150 150 s->data = s->data + l;
151 151
152 152 break;
153 153 }
154 154 }
155 155
156 156 src->head = s;
157 157 return offset;
158 158 }
159 159
160 160 /* combine hunk lists a and b, while adjusting b for offset changes in a/
161 161 this deletes a and b and returns the resultant list. */
162 162 static struct flist *combine(struct flist *a, struct flist *b)
163 163 {
164 164 struct flist *c = NULL;
165 165 struct frag *bh, *ct;
166 166 int offset = 0, post;
167 167
168 168 if (a && b)
169 169 c = lalloc((lsize(a) + lsize(b)) * 2);
170 170
171 171 if (c) {
172 172
173 173 for (bh = b->head; bh != b->tail; bh++) {
174 174 /* save old hunks */
175 175 offset = gather(c, a, bh->start, offset);
176 176
177 177 /* discard replaced hunks */
178 178 post = discard(a, bh->end, offset);
179 179
180 180 /* insert new hunk */
181 181 ct = c->tail;
182 182 ct->start = bh->start - offset;
183 183 ct->end = bh->end - post;
184 184 ct->len = bh->len;
185 185 ct->data = bh->data;
186 186 c->tail++;
187 187 offset = post;
188 188 }
189 189
190 190 /* hold on to tail from a */
191 191 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
192 192 c->tail += lsize(a);
193 193 }
194 194
195 195 lfree(a);
196 196 lfree(b);
197 197 return c;
198 198 }
199 199
200 200 /* decode a binary patch into a hunk list */
201 201 static struct flist *decode(const char *bin, Py_ssize_t len)
202 202 {
203 203 struct flist *l;
204 204 struct frag *lt;
205 const char *data = bin + 12, *end = bin + len;
205 int pos = 0;
206 206
207 207 /* assume worst case size, we won't have many of these lists */
208 208 l = lalloc(len / 12);
209 209 if (!l)
210 210 return NULL;
211 211
212 212 lt = l->tail;
213 213
214 while (data <= end) {
215 lt->start = getbe32(bin);
216 lt->end = getbe32(bin + 4);
217 lt->len = getbe32(bin + 8);
214 while (pos >= 0 && pos < len) {
215 lt->start = getbe32(bin + pos);
216 lt->end = getbe32(bin + pos + 4);
217 lt->len = getbe32(bin + pos + 8);
218 218 if (lt->start > lt->end)
219 219 break; /* sanity check */
220 bin = data + lt->len;
221 if (bin < data)
222 break; /* big data + big (bogus) len can wrap around */
223 lt->data = data;
224 data = bin + 12;
220 lt->data = bin + pos + 12;
221 pos += 12 + lt->len;
225 222 lt++;
226 223 }
227 224
228 if (bin != end) {
225 if (pos != len) {
229 226 if (!PyErr_Occurred())
230 227 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
231 228 lfree(l);
232 229 return NULL;
233 230 }
234 231
235 232 l->tail = lt;
236 233 return l;
237 234 }
238 235
239 236 /* calculate the size of resultant text */
240 237 static Py_ssize_t calcsize(Py_ssize_t len, struct flist *l)
241 238 {
242 239 Py_ssize_t outlen = 0, last = 0;
243 240 struct frag *f = l->head;
244 241
245 242 while (f != l->tail) {
246 243 if (f->start < last || f->end > len) {
247 244 if (!PyErr_Occurred())
248 245 PyErr_SetString(mpatch_Error,
249 246 "invalid patch");
250 247 return -1;
251 248 }
252 249 outlen += f->start - last;
253 250 last = f->end;
254 251 outlen += f->len;
255 252 f++;
256 253 }
257 254
258 255 outlen += len - last;
259 256 return outlen;
260 257 }
261 258
262 259 static int apply(char *buf, const char *orig, Py_ssize_t len, struct flist *l)
263 260 {
264 261 struct frag *f = l->head;
265 262 int last = 0;
266 263 char *p = buf;
267 264
268 265 while (f != l->tail) {
269 266 if (f->start < last || f->end > len) {
270 267 if (!PyErr_Occurred())
271 268 PyErr_SetString(mpatch_Error,
272 269 "invalid patch");
273 270 return 0;
274 271 }
275 272 memcpy(p, orig + last, f->start - last);
276 273 p += f->start - last;
277 274 memcpy(p, f->data, f->len);
278 275 last = f->end;
279 276 p += f->len;
280 277 f++;
281 278 }
282 279 memcpy(p, orig + last, len - last);
283 280 return 1;
284 281 }
285 282
286 283 /* recursively generate a patch of all bins between start and end */
287 284 static struct flist *fold(PyObject *bins, Py_ssize_t start, Py_ssize_t end)
288 285 {
289 286 Py_ssize_t len, blen;
290 287 const char *buffer;
291 288
292 289 if (start + 1 == end) {
293 290 /* trivial case, output a decoded list */
294 291 PyObject *tmp = PyList_GetItem(bins, start);
295 292 if (!tmp)
296 293 return NULL;
297 294 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
298 295 return NULL;
299 296 return decode(buffer, blen);
300 297 }
301 298
302 299 /* divide and conquer, memory management is elsewhere */
303 300 len = (end - start) / 2;
304 301 return combine(fold(bins, start, start + len),
305 302 fold(bins, start + len, end));
306 303 }
307 304
308 305 static PyObject *
309 306 patches(PyObject *self, PyObject *args)
310 307 {
311 308 PyObject *text, *bins, *result;
312 309 struct flist *patch;
313 310 const char *in;
314 311 char *out;
315 312 Py_ssize_t len, outlen, inlen;
316 313
317 314 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
318 315 return NULL;
319 316
320 317 len = PyList_Size(bins);
321 318 if (!len) {
322 319 /* nothing to do */
323 320 Py_INCREF(text);
324 321 return text;
325 322 }
326 323
327 324 if (PyObject_AsCharBuffer(text, &in, &inlen))
328 325 return NULL;
329 326
330 327 patch = fold(bins, 0, len);
331 328 if (!patch)
332 329 return NULL;
333 330
334 331 outlen = calcsize(inlen, patch);
335 332 if (outlen < 0) {
336 333 result = NULL;
337 334 goto cleanup;
338 335 }
339 336 result = PyBytes_FromStringAndSize(NULL, outlen);
340 337 if (!result) {
341 338 result = NULL;
342 339 goto cleanup;
343 340 }
344 341 out = PyBytes_AsString(result);
345 342 if (!apply(out, in, inlen, patch)) {
346 343 Py_DECREF(result);
347 344 result = NULL;
348 345 }
349 346 cleanup:
350 347 lfree(patch);
351 348 return result;
352 349 }
353 350
354 351 /* calculate size of a patched file directly */
355 352 static PyObject *
356 353 patchedsize(PyObject *self, PyObject *args)
357 354 {
358 long orig, start, end, len, outlen = 0, last = 0;
355 long orig, start, end, len, outlen = 0, last = 0, pos = 0;
359 356 Py_ssize_t patchlen;
360 char *bin, *binend, *data;
357 char *bin;
361 358
362 359 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
363 360 return NULL;
364 361
365 binend = bin + patchlen;
366 data = bin + 12;
367
368 while (data <= binend) {
369 start = getbe32(bin);
370 end = getbe32(bin + 4);
371 len = getbe32(bin + 8);
362 while (pos >= 0 && pos < patchlen) {
363 start = getbe32(bin + pos);
364 end = getbe32(bin + pos + 4);
365 len = getbe32(bin + pos + 8);
372 366 if (start > end)
373 367 break; /* sanity check */
374 bin = data + len;
375 if (bin < data)
376 break; /* big data + big (bogus) len can wrap around */
377 data = bin + 12;
368 pos += 12 + len;
378 369 outlen += start - last;
379 370 last = end;
380 371 outlen += len;
381 372 }
382 373
383 if (bin != binend) {
374 if (pos != patchlen) {
384 375 if (!PyErr_Occurred())
385 376 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
386 377 return NULL;
387 378 }
388 379
389 380 outlen += orig - last;
390 381 return Py_BuildValue("l", outlen);
391 382 }
392 383
393 384 static PyMethodDef methods[] = {
394 385 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
395 386 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
396 387 {NULL, NULL}
397 388 };
398 389
399 390 #ifdef IS_PY3K
400 391 static struct PyModuleDef mpatch_module = {
401 392 PyModuleDef_HEAD_INIT,
402 393 "mpatch",
403 394 mpatch_doc,
404 395 -1,
405 396 methods
406 397 };
407 398
408 399 PyMODINIT_FUNC PyInit_mpatch(void)
409 400 {
410 401 PyObject *m;
411 402
412 403 m = PyModule_Create(&mpatch_module);
413 404 if (m == NULL)
414 405 return NULL;
415 406
416 407 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
417 408 Py_INCREF(mpatch_Error);
418 409 PyModule_AddObject(m, "mpatchError", mpatch_Error);
419 410
420 411 return m;
421 412 }
422 413 #else
423 414 PyMODINIT_FUNC
424 415 initmpatch(void)
425 416 {
426 417 Py_InitModule3("mpatch", methods, mpatch_doc);
427 418 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
428 419 }
429 420 #endif
@@ -1,1965 +1,1959 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static int8_t hextable[256] = {
18 18 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
19 19 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
22 22 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
23 23 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
34 34 };
35 35
36 36 static inline int hexdigit(const char *p, Py_ssize_t off)
37 37 {
38 38 int8_t val = hextable[(unsigned char)p[off]];
39 39
40 40 if (val >= 0) {
41 41 return val;
42 42 }
43 43
44 44 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
45 45 return 0;
46 46 }
47 47
48 48 /*
49 49 * Turn a hex-encoded string into binary.
50 50 */
51 51 static PyObject *unhexlify(const char *str, int len)
52 52 {
53 53 PyObject *ret;
54 54 char *d;
55 55 int i;
56 56
57 57 ret = PyBytes_FromStringAndSize(NULL, len / 2);
58 58
59 59 if (!ret)
60 60 return NULL;
61 61
62 62 d = PyBytes_AsString(ret);
63 63
64 64 for (i = 0; i < len;) {
65 65 int hi = hexdigit(str, i++);
66 66 int lo = hexdigit(str, i++);
67 67 *d++ = (hi << 4) | lo;
68 68 }
69 69
70 70 return ret;
71 71 }
72 72
73 73 /*
74 74 * This code assumes that a manifest is stitched together with newline
75 75 * ('\n') characters.
76 76 */
77 77 static PyObject *parse_manifest(PyObject *self, PyObject *args)
78 78 {
79 79 PyObject *mfdict, *fdict;
80 80 char *str, *start, *end;
81 81 int len;
82 82
83 83 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
84 84 &PyDict_Type, &mfdict,
85 85 &PyDict_Type, &fdict,
86 86 &str, &len))
87 87 goto quit;
88 88
89 89 start = str;
90 90 end = str + len;
91 91 while (start < end) {
92 92 PyObject *file = NULL, *node = NULL;
93 93 PyObject *flags = NULL;
94 94 char *zero = NULL, *newline = NULL;
95 95 ptrdiff_t nlen;
96 96
97 97 zero = memchr(start, '\0', end - start);
98 98 if (!zero) {
99 99 PyErr_SetString(PyExc_ValueError,
100 100 "manifest entry has no separator");
101 101 goto quit;
102 102 }
103 103
104 104 newline = memchr(zero + 1, '\n', end - (zero + 1));
105 105 if (!newline) {
106 106 PyErr_SetString(PyExc_ValueError,
107 107 "manifest contains trailing garbage");
108 108 goto quit;
109 109 }
110 110
111 111 file = PyBytes_FromStringAndSize(start, zero - start);
112 112
113 113 if (!file)
114 114 goto bail;
115 115
116 116 nlen = newline - zero - 1;
117 117
118 118 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
119 119 if (!node)
120 120 goto bail;
121 121
122 122 if (nlen > 40) {
123 123 flags = PyBytes_FromStringAndSize(zero + 41,
124 124 nlen - 40);
125 125 if (!flags)
126 126 goto bail;
127 127
128 128 if (PyDict_SetItem(fdict, file, flags) == -1)
129 129 goto bail;
130 130 }
131 131
132 132 if (PyDict_SetItem(mfdict, file, node) == -1)
133 133 goto bail;
134 134
135 135 start = newline + 1;
136 136
137 137 Py_XDECREF(flags);
138 138 Py_XDECREF(node);
139 139 Py_XDECREF(file);
140 140 continue;
141 141 bail:
142 142 Py_XDECREF(flags);
143 143 Py_XDECREF(node);
144 144 Py_XDECREF(file);
145 145 goto quit;
146 146 }
147 147
148 148 Py_INCREF(Py_None);
149 149 return Py_None;
150 150 quit:
151 151 return NULL;
152 152 }
153 153
154 154 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
155 155 {
156 156 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
157 157 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
158 char state, *str, *cur, *end, *cpos;
158 char state, *cur, *str, *cpos;
159 159 int mode, size, mtime;
160 160 unsigned int flen;
161 int len;
161 int len, pos = 40;
162 162
163 163 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
164 164 &PyDict_Type, &dmap,
165 165 &PyDict_Type, &cmap,
166 166 &str, &len))
167 167 goto quit;
168 168
169 169 /* read parents */
170 170 if (len < 40)
171 171 goto quit;
172 172
173 173 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
174 174 if (!parents)
175 175 goto quit;
176 176
177 177 /* read filenames */
178 cur = str + 40;
179 end = str + len;
180
181 while (cur < end - 17) {
178 while (pos >= 40 && pos < len) {
179 cur = str + pos;
182 180 /* unpack header */
183 181 state = *cur;
184 182 mode = getbe32(cur + 1);
185 183 size = getbe32(cur + 5);
186 184 mtime = getbe32(cur + 9);
187 185 flen = getbe32(cur + 13);
186 pos += 17;
188 187 cur += 17;
189 if (cur + flen > end || cur + flen < cur) {
188 if (flen > len - pos || flen < 0) {
190 189 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
191 190 goto quit;
192 191 }
193 192
194 193 entry = Py_BuildValue("ciii", state, mode, size, mtime);
195 194 if (!entry)
196 195 goto quit;
197 196 PyObject_GC_UnTrack(entry); /* don't waste time with this */
198 197
199 198 cpos = memchr(cur, 0, flen);
200 199 if (cpos) {
201 200 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
202 201 cname = PyBytes_FromStringAndSize(cpos + 1,
203 202 flen - (cpos - cur) - 1);
204 203 if (!fname || !cname ||
205 204 PyDict_SetItem(cmap, fname, cname) == -1 ||
206 205 PyDict_SetItem(dmap, fname, entry) == -1)
207 206 goto quit;
208 207 Py_DECREF(cname);
209 208 } else {
210 209 fname = PyBytes_FromStringAndSize(cur, flen);
211 210 if (!fname ||
212 211 PyDict_SetItem(dmap, fname, entry) == -1)
213 212 goto quit;
214 213 }
215 cur += flen;
216 214 Py_DECREF(fname);
217 215 Py_DECREF(entry);
218 216 fname = cname = entry = NULL;
217 pos += flen;
219 218 }
220 219
221 220 ret = parents;
222 221 Py_INCREF(ret);
223 222 quit:
224 223 Py_XDECREF(fname);
225 224 Py_XDECREF(cname);
226 225 Py_XDECREF(entry);
227 226 Py_XDECREF(parents);
228 227 return ret;
229 228 }
230 229
231 230 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
232 231 {
233 232 PyObject *o = PyTuple_GET_ITEM(tuple, off);
234 233 long val;
235 234
236 235 if (PyInt_Check(o))
237 236 val = PyInt_AS_LONG(o);
238 237 else if (PyLong_Check(o)) {
239 238 val = PyLong_AsLong(o);
240 239 if (val == -1 && PyErr_Occurred())
241 240 return -1;
242 241 } else {
243 242 PyErr_SetString(PyExc_TypeError, "expected an int or long");
244 243 return -1;
245 244 }
246 245 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
247 246 PyErr_SetString(PyExc_OverflowError,
248 247 "Python value to large to convert to uint32_t");
249 248 return -1;
250 249 }
251 250 *v = (uint32_t)val;
252 251 return 0;
253 252 }
254 253
255 254 static PyObject *dirstate_unset;
256 255
257 256 /*
258 257 * Efficiently pack a dirstate object into its on-disk format.
259 258 */
260 259 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
261 260 {
262 261 PyObject *packobj = NULL;
263 262 PyObject *map, *copymap, *pl;
264 263 Py_ssize_t nbytes, pos, l;
265 264 PyObject *k, *v, *pn;
266 265 char *p, *s;
267 266 double now;
268 267
269 268 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
270 269 &PyDict_Type, &map, &PyDict_Type, &copymap,
271 270 &pl, &now))
272 271 return NULL;
273 272
274 273 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
275 274 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
276 275 return NULL;
277 276 }
278 277
279 278 /* Figure out how much we need to allocate. */
280 279 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
281 280 PyObject *c;
282 281 if (!PyString_Check(k)) {
283 282 PyErr_SetString(PyExc_TypeError, "expected string key");
284 283 goto bail;
285 284 }
286 285 nbytes += PyString_GET_SIZE(k) + 17;
287 286 c = PyDict_GetItem(copymap, k);
288 287 if (c) {
289 288 if (!PyString_Check(c)) {
290 289 PyErr_SetString(PyExc_TypeError,
291 290 "expected string key");
292 291 goto bail;
293 292 }
294 293 nbytes += PyString_GET_SIZE(c) + 1;
295 294 }
296 295 }
297 296
298 297 packobj = PyString_FromStringAndSize(NULL, nbytes);
299 298 if (packobj == NULL)
300 299 goto bail;
301 300
302 301 p = PyString_AS_STRING(packobj);
303 302
304 303 pn = PySequence_ITEM(pl, 0);
305 304 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
306 305 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
307 306 goto bail;
308 307 }
309 308 memcpy(p, s, l);
310 309 p += 20;
311 310 pn = PySequence_ITEM(pl, 1);
312 311 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
313 312 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
314 313 goto bail;
315 314 }
316 315 memcpy(p, s, l);
317 316 p += 20;
318 317
319 318 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
320 319 uint32_t mode, size, mtime;
321 320 Py_ssize_t len, l;
322 321 PyObject *o;
323 322 char *s, *t;
324 323
325 324 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
326 325 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
327 326 goto bail;
328 327 }
329 328 o = PyTuple_GET_ITEM(v, 0);
330 329 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
331 330 PyErr_SetString(PyExc_TypeError, "expected one byte");
332 331 goto bail;
333 332 }
334 333 *p++ = *s;
335 334 if (getintat(v, 1, &mode) == -1)
336 335 goto bail;
337 336 if (getintat(v, 2, &size) == -1)
338 337 goto bail;
339 338 if (getintat(v, 3, &mtime) == -1)
340 339 goto bail;
341 340 if (*s == 'n' && mtime == (uint32_t)now) {
342 341 /* See pure/parsers.py:pack_dirstate for why we do
343 342 * this. */
344 343 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
345 344 goto bail;
346 345 mtime = -1;
347 346 }
348 347 putbe32(mode, p);
349 348 putbe32(size, p + 4);
350 349 putbe32(mtime, p + 8);
351 350 t = p + 12;
352 351 p += 16;
353 352 len = PyString_GET_SIZE(k);
354 353 memcpy(p, PyString_AS_STRING(k), len);
355 354 p += len;
356 355 o = PyDict_GetItem(copymap, k);
357 356 if (o) {
358 357 *p++ = '\0';
359 358 l = PyString_GET_SIZE(o);
360 359 memcpy(p, PyString_AS_STRING(o), l);
361 360 p += l;
362 361 len += l + 1;
363 362 }
364 363 putbe32((uint32_t)len, t);
365 364 }
366 365
367 366 pos = p - PyString_AS_STRING(packobj);
368 367 if (pos != nbytes) {
369 368 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
370 369 (long)pos, (long)nbytes);
371 370 goto bail;
372 371 }
373 372
374 373 return packobj;
375 374 bail:
376 375 Py_XDECREF(packobj);
377 376 return NULL;
378 377 }
379 378
380 379 /*
381 380 * A base-16 trie for fast node->rev mapping.
382 381 *
383 382 * Positive value is index of the next node in the trie
384 383 * Negative value is a leaf: -(rev + 1)
385 384 * Zero is empty
386 385 */
387 386 typedef struct {
388 387 int children[16];
389 388 } nodetree;
390 389
391 390 /*
392 391 * This class has two behaviours.
393 392 *
394 393 * When used in a list-like way (with integer keys), we decode an
395 394 * entry in a RevlogNG index file on demand. Our last entry is a
396 395 * sentinel, always a nullid. We have limited support for
397 396 * integer-keyed insert and delete, only at elements right before the
398 397 * sentinel.
399 398 *
400 399 * With string keys, we lazily perform a reverse mapping from node to
401 400 * rev, using a base-16 trie.
402 401 */
403 402 typedef struct {
404 403 PyObject_HEAD
405 404 /* Type-specific fields go here. */
406 405 PyObject *data; /* raw bytes of index */
407 406 PyObject **cache; /* cached tuples */
408 407 const char **offsets; /* populated on demand */
409 408 Py_ssize_t raw_length; /* original number of elements */
410 409 Py_ssize_t length; /* current number of elements */
411 410 PyObject *added; /* populated on demand */
412 411 PyObject *headrevs; /* cache, invalidated on changes */
413 412 nodetree *nt; /* base-16 trie */
414 413 int ntlength; /* # nodes in use */
415 414 int ntcapacity; /* # nodes allocated */
416 415 int ntdepth; /* maximum depth of tree */
417 416 int ntsplits; /* # splits performed */
418 417 int ntrev; /* last rev scanned */
419 418 int ntlookups; /* # lookups */
420 419 int ntmisses; /* # lookups that miss the cache */
421 420 int inlined;
422 421 } indexObject;
423 422
424 423 static Py_ssize_t index_length(const indexObject *self)
425 424 {
426 425 if (self->added == NULL)
427 426 return self->length;
428 427 return self->length + PyList_GET_SIZE(self->added);
429 428 }
430 429
431 430 static PyObject *nullentry;
432 431 static const char nullid[20];
433 432
434 433 static long inline_scan(indexObject *self, const char **offsets);
435 434
436 435 #if LONG_MAX == 0x7fffffffL
437 436 static char *tuple_format = "Kiiiiiis#";
438 437 #else
439 438 static char *tuple_format = "kiiiiiis#";
440 439 #endif
441 440
442 441 /* A RevlogNG v1 index entry is 64 bytes long. */
443 442 static const long v1_hdrsize = 64;
444 443
445 444 /*
446 445 * Return a pointer to the beginning of a RevlogNG record.
447 446 */
448 447 static const char *index_deref(indexObject *self, Py_ssize_t pos)
449 448 {
450 449 if (self->inlined && pos > 0) {
451 450 if (self->offsets == NULL) {
452 451 self->offsets = malloc(self->raw_length *
453 452 sizeof(*self->offsets));
454 453 if (self->offsets == NULL)
455 454 return (const char *)PyErr_NoMemory();
456 455 inline_scan(self, self->offsets);
457 456 }
458 457 return self->offsets[pos];
459 458 }
460 459
461 460 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
462 461 }
463 462
464 463 /*
465 464 * RevlogNG format (all in big endian, data may be inlined):
466 465 * 6 bytes: offset
467 466 * 2 bytes: flags
468 467 * 4 bytes: compressed length
469 468 * 4 bytes: uncompressed length
470 469 * 4 bytes: base revision
471 470 * 4 bytes: link revision
472 471 * 4 bytes: parent 1 revision
473 472 * 4 bytes: parent 2 revision
474 473 * 32 bytes: nodeid (only 20 bytes used)
475 474 */
476 475 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
477 476 {
478 477 uint64_t offset_flags;
479 478 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
480 479 const char *c_node_id;
481 480 const char *data;
482 481 Py_ssize_t length = index_length(self);
483 482 PyObject *entry;
484 483
485 484 if (pos < 0)
486 485 pos += length;
487 486
488 487 if (pos < 0 || pos >= length) {
489 488 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
490 489 return NULL;
491 490 }
492 491
493 492 if (pos == length - 1) {
494 493 Py_INCREF(nullentry);
495 494 return nullentry;
496 495 }
497 496
498 497 if (pos >= self->length - 1) {
499 498 PyObject *obj;
500 499 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
501 500 Py_INCREF(obj);
502 501 return obj;
503 502 }
504 503
505 504 if (self->cache) {
506 505 if (self->cache[pos]) {
507 506 Py_INCREF(self->cache[pos]);
508 507 return self->cache[pos];
509 508 }
510 509 } else {
511 510 self->cache = calloc(self->raw_length, sizeof(PyObject *));
512 511 if (self->cache == NULL)
513 512 return PyErr_NoMemory();
514 513 }
515 514
516 515 data = index_deref(self, pos);
517 516 if (data == NULL)
518 517 return NULL;
519 518
520 519 offset_flags = getbe32(data + 4);
521 520 if (pos == 0) /* mask out version number for the first entry */
522 521 offset_flags &= 0xFFFF;
523 522 else {
524 523 uint32_t offset_high = getbe32(data);
525 524 offset_flags |= ((uint64_t)offset_high) << 32;
526 525 }
527 526
528 527 comp_len = getbe32(data + 8);
529 528 uncomp_len = getbe32(data + 12);
530 529 base_rev = getbe32(data + 16);
531 530 link_rev = getbe32(data + 20);
532 531 parent_1 = getbe32(data + 24);
533 532 parent_2 = getbe32(data + 28);
534 533 c_node_id = data + 32;
535 534
536 535 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
537 536 uncomp_len, base_rev, link_rev,
538 537 parent_1, parent_2, c_node_id, 20);
539 538
540 539 if (entry) {
541 540 PyObject_GC_UnTrack(entry);
542 541 Py_INCREF(entry);
543 542 }
544 543
545 544 self->cache[pos] = entry;
546 545
547 546 return entry;
548 547 }
549 548
550 549 /*
551 550 * Return the 20-byte SHA of the node corresponding to the given rev.
552 551 */
553 552 static const char *index_node(indexObject *self, Py_ssize_t pos)
554 553 {
555 554 Py_ssize_t length = index_length(self);
556 555 const char *data;
557 556
558 557 if (pos == length - 1 || pos == INT_MAX)
559 558 return nullid;
560 559
561 560 if (pos >= length)
562 561 return NULL;
563 562
564 563 if (pos >= self->length - 1) {
565 564 PyObject *tuple, *str;
566 565 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
567 566 str = PyTuple_GetItem(tuple, 7);
568 567 return str ? PyString_AS_STRING(str) : NULL;
569 568 }
570 569
571 570 data = index_deref(self, pos);
572 571 return data ? data + 32 : NULL;
573 572 }
574 573
575 574 static int nt_insert(indexObject *self, const char *node, int rev);
576 575
577 576 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
578 577 {
579 578 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
580 579 return -1;
581 580 if (*nodelen == 20)
582 581 return 0;
583 582 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
584 583 return -1;
585 584 }
586 585
587 586 static PyObject *index_insert(indexObject *self, PyObject *args)
588 587 {
589 588 PyObject *obj;
590 589 char *node;
591 590 long offset;
592 591 Py_ssize_t len, nodelen;
593 592
594 593 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
595 594 return NULL;
596 595
597 596 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
598 597 PyErr_SetString(PyExc_TypeError, "8-tuple required");
599 598 return NULL;
600 599 }
601 600
602 601 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
603 602 return NULL;
604 603
605 604 len = index_length(self);
606 605
607 606 if (offset < 0)
608 607 offset += len;
609 608
610 609 if (offset != len - 1) {
611 610 PyErr_SetString(PyExc_IndexError,
612 611 "insert only supported at index -1");
613 612 return NULL;
614 613 }
615 614
616 615 if (offset > INT_MAX) {
617 616 PyErr_SetString(PyExc_ValueError,
618 617 "currently only 2**31 revs supported");
619 618 return NULL;
620 619 }
621 620
622 621 if (self->added == NULL) {
623 622 self->added = PyList_New(0);
624 623 if (self->added == NULL)
625 624 return NULL;
626 625 }
627 626
628 627 if (PyList_Append(self->added, obj) == -1)
629 628 return NULL;
630 629
631 630 if (self->nt)
632 631 nt_insert(self, node, (int)offset);
633 632
634 633 Py_CLEAR(self->headrevs);
635 634 Py_RETURN_NONE;
636 635 }
637 636
638 637 static void _index_clearcaches(indexObject *self)
639 638 {
640 639 if (self->cache) {
641 640 Py_ssize_t i;
642 641
643 642 for (i = 0; i < self->raw_length; i++)
644 643 Py_CLEAR(self->cache[i]);
645 644 free(self->cache);
646 645 self->cache = NULL;
647 646 }
648 647 if (self->offsets) {
649 648 free(self->offsets);
650 649 self->offsets = NULL;
651 650 }
652 651 if (self->nt) {
653 652 free(self->nt);
654 653 self->nt = NULL;
655 654 }
656 655 Py_CLEAR(self->headrevs);
657 656 }
658 657
659 658 static PyObject *index_clearcaches(indexObject *self)
660 659 {
661 660 _index_clearcaches(self);
662 661 self->ntlength = self->ntcapacity = 0;
663 662 self->ntdepth = self->ntsplits = 0;
664 663 self->ntrev = -1;
665 664 self->ntlookups = self->ntmisses = 0;
666 665 Py_RETURN_NONE;
667 666 }
668 667
669 668 static PyObject *index_stats(indexObject *self)
670 669 {
671 670 PyObject *obj = PyDict_New();
672 671
673 672 if (obj == NULL)
674 673 return NULL;
675 674
676 675 #define istat(__n, __d) \
677 676 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
678 677 goto bail;
679 678
680 679 if (self->added) {
681 680 Py_ssize_t len = PyList_GET_SIZE(self->added);
682 681 if (PyDict_SetItemString(obj, "index entries added",
683 682 PyInt_FromSsize_t(len)) == -1)
684 683 goto bail;
685 684 }
686 685
687 686 if (self->raw_length != self->length - 1)
688 687 istat(raw_length, "revs on disk");
689 688 istat(length, "revs in memory");
690 689 istat(ntcapacity, "node trie capacity");
691 690 istat(ntdepth, "node trie depth");
692 691 istat(ntlength, "node trie count");
693 692 istat(ntlookups, "node trie lookups");
694 693 istat(ntmisses, "node trie misses");
695 694 istat(ntrev, "node trie last rev scanned");
696 695 istat(ntsplits, "node trie splits");
697 696
698 697 #undef istat
699 698
700 699 return obj;
701 700
702 701 bail:
703 702 Py_XDECREF(obj);
704 703 return NULL;
705 704 }
706 705
707 706 /*
708 707 * When we cache a list, we want to be sure the caller can't mutate
709 708 * the cached copy.
710 709 */
711 710 static PyObject *list_copy(PyObject *list)
712 711 {
713 712 Py_ssize_t len = PyList_GET_SIZE(list);
714 713 PyObject *newlist = PyList_New(len);
715 714 Py_ssize_t i;
716 715
717 716 if (newlist == NULL)
718 717 return NULL;
719 718
720 719 for (i = 0; i < len; i++) {
721 720 PyObject *obj = PyList_GET_ITEM(list, i);
722 721 Py_INCREF(obj);
723 722 PyList_SET_ITEM(newlist, i, obj);
724 723 }
725 724
726 725 return newlist;
727 726 }
728 727
729 728 static PyObject *index_headrevs(indexObject *self)
730 729 {
731 730 Py_ssize_t i, len, addlen;
732 731 char *nothead = NULL;
733 732 PyObject *heads;
734 733
735 734 if (self->headrevs)
736 735 return list_copy(self->headrevs);
737 736
738 737 len = index_length(self) - 1;
739 738 heads = PyList_New(0);
740 739 if (heads == NULL)
741 740 goto bail;
742 741 if (len == 0) {
743 742 PyObject *nullid = PyInt_FromLong(-1);
744 743 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
745 744 Py_XDECREF(nullid);
746 745 goto bail;
747 746 }
748 747 goto done;
749 748 }
750 749
751 750 nothead = calloc(len, 1);
752 751 if (nothead == NULL)
753 752 goto bail;
754 753
755 754 for (i = 0; i < self->raw_length; i++) {
756 755 const char *data = index_deref(self, i);
757 756 int parent_1 = getbe32(data + 24);
758 757 int parent_2 = getbe32(data + 28);
759 758 if (parent_1 >= 0)
760 759 nothead[parent_1] = 1;
761 760 if (parent_2 >= 0)
762 761 nothead[parent_2] = 1;
763 762 }
764 763
765 764 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
766 765
767 766 for (i = 0; i < addlen; i++) {
768 767 PyObject *rev = PyList_GET_ITEM(self->added, i);
769 768 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
770 769 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
771 770 long parent_1, parent_2;
772 771
773 772 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
774 773 PyErr_SetString(PyExc_TypeError,
775 774 "revlog parents are invalid");
776 775 goto bail;
777 776 }
778 777 parent_1 = PyInt_AS_LONG(p1);
779 778 parent_2 = PyInt_AS_LONG(p2);
780 779 if (parent_1 >= 0)
781 780 nothead[parent_1] = 1;
782 781 if (parent_2 >= 0)
783 782 nothead[parent_2] = 1;
784 783 }
785 784
786 785 for (i = 0; i < len; i++) {
787 786 PyObject *head;
788 787
789 788 if (nothead[i])
790 789 continue;
791 790 head = PyInt_FromLong(i);
792 791 if (head == NULL || PyList_Append(heads, head) == -1) {
793 792 Py_XDECREF(head);
794 793 goto bail;
795 794 }
796 795 }
797 796
798 797 done:
799 798 self->headrevs = heads;
800 799 free(nothead);
801 800 return list_copy(self->headrevs);
802 801 bail:
803 802 Py_XDECREF(heads);
804 803 free(nothead);
805 804 return NULL;
806 805 }
807 806
808 807 static inline int nt_level(const char *node, Py_ssize_t level)
809 808 {
810 809 int v = node[level>>1];
811 810 if (!(level & 1))
812 811 v >>= 4;
813 812 return v & 0xf;
814 813 }
815 814
816 815 /*
817 816 * Return values:
818 817 *
819 818 * -4: match is ambiguous (multiple candidates)
820 819 * -2: not found
821 820 * rest: valid rev
822 821 */
823 822 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
824 823 int hex)
825 824 {
826 825 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
827 826 int level, maxlevel, off;
828 827
829 828 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
830 829 return -1;
831 830
832 831 if (self->nt == NULL)
833 832 return -2;
834 833
835 834 if (hex)
836 835 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
837 836 else
838 837 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
839 838
840 839 for (level = off = 0; level < maxlevel; level++) {
841 840 int k = getnybble(node, level);
842 841 nodetree *n = &self->nt[off];
843 842 int v = n->children[k];
844 843
845 844 if (v < 0) {
846 845 const char *n;
847 846 Py_ssize_t i;
848 847
849 848 v = -v - 1;
850 849 n = index_node(self, v);
851 850 if (n == NULL)
852 851 return -2;
853 852 for (i = level; i < maxlevel; i++)
854 853 if (getnybble(node, i) != nt_level(n, i))
855 854 return -2;
856 855 return v;
857 856 }
858 857 if (v == 0)
859 858 return -2;
860 859 off = v;
861 860 }
862 861 /* multiple matches against an ambiguous prefix */
863 862 return -4;
864 863 }
865 864
866 865 static int nt_new(indexObject *self)
867 866 {
868 867 if (self->ntlength == self->ntcapacity) {
869 868 self->ntcapacity *= 2;
870 869 self->nt = realloc(self->nt,
871 870 self->ntcapacity * sizeof(nodetree));
872 871 if (self->nt == NULL) {
873 872 PyErr_SetString(PyExc_MemoryError, "out of memory");
874 873 return -1;
875 874 }
876 875 memset(&self->nt[self->ntlength], 0,
877 876 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
878 877 }
879 878 return self->ntlength++;
880 879 }
881 880
882 881 static int nt_insert(indexObject *self, const char *node, int rev)
883 882 {
884 883 int level = 0;
885 884 int off = 0;
886 885
887 886 while (level < 40) {
888 887 int k = nt_level(node, level);
889 888 nodetree *n;
890 889 int v;
891 890
892 891 n = &self->nt[off];
893 892 v = n->children[k];
894 893
895 894 if (v == 0) {
896 895 n->children[k] = -rev - 1;
897 896 return 0;
898 897 }
899 898 if (v < 0) {
900 899 const char *oldnode = index_node(self, -v - 1);
901 900 int noff;
902 901
903 902 if (!oldnode || !memcmp(oldnode, node, 20)) {
904 903 n->children[k] = -rev - 1;
905 904 return 0;
906 905 }
907 906 noff = nt_new(self);
908 907 if (noff == -1)
909 908 return -1;
910 909 /* self->nt may have been changed by realloc */
911 910 self->nt[off].children[k] = noff;
912 911 off = noff;
913 912 n = &self->nt[off];
914 913 n->children[nt_level(oldnode, ++level)] = v;
915 914 if (level > self->ntdepth)
916 915 self->ntdepth = level;
917 916 self->ntsplits += 1;
918 917 } else {
919 918 level += 1;
920 919 off = v;
921 920 }
922 921 }
923 922
924 923 return -1;
925 924 }
926 925
927 926 static int nt_init(indexObject *self)
928 927 {
929 928 if (self->nt == NULL) {
930 929 if (self->raw_length > INT_MAX) {
931 930 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
932 931 return -1;
933 932 }
934 933 self->ntcapacity = self->raw_length < 4
935 934 ? 4 : (int)self->raw_length / 2;
936 935
937 936 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
938 937 if (self->nt == NULL) {
939 938 PyErr_NoMemory();
940 939 return -1;
941 940 }
942 941 self->ntlength = 1;
943 942 self->ntrev = (int)index_length(self) - 1;
944 943 self->ntlookups = 1;
945 944 self->ntmisses = 0;
946 945 if (nt_insert(self, nullid, INT_MAX) == -1)
947 946 return -1;
948 947 }
949 948 return 0;
950 949 }
951 950
952 951 /*
953 952 * Return values:
954 953 *
955 954 * -3: error (exception set)
956 955 * -2: not found (no exception set)
957 956 * rest: valid rev
958 957 */
959 958 static int index_find_node(indexObject *self,
960 959 const char *node, Py_ssize_t nodelen)
961 960 {
962 961 int rev;
963 962
964 963 self->ntlookups++;
965 964 rev = nt_find(self, node, nodelen, 0);
966 965 if (rev >= -1)
967 966 return rev;
968 967
969 968 if (nt_init(self) == -1)
970 969 return -3;
971 970
972 971 /*
973 972 * For the first handful of lookups, we scan the entire index,
974 973 * and cache only the matching nodes. This optimizes for cases
975 974 * like "hg tip", where only a few nodes are accessed.
976 975 *
977 976 * After that, we cache every node we visit, using a single
978 977 * scan amortized over multiple lookups. This gives the best
979 978 * bulk performance, e.g. for "hg log".
980 979 */
981 980 if (self->ntmisses++ < 4) {
982 981 for (rev = self->ntrev - 1; rev >= 0; rev--) {
983 982 const char *n = index_node(self, rev);
984 983 if (n == NULL)
985 984 return -2;
986 985 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
987 986 if (nt_insert(self, n, rev) == -1)
988 987 return -3;
989 988 break;
990 989 }
991 990 }
992 991 } else {
993 992 for (rev = self->ntrev - 1; rev >= 0; rev--) {
994 993 const char *n = index_node(self, rev);
995 994 if (n == NULL) {
996 995 self->ntrev = rev + 1;
997 996 return -2;
998 997 }
999 998 if (nt_insert(self, n, rev) == -1) {
1000 999 self->ntrev = rev + 1;
1001 1000 return -3;
1002 1001 }
1003 1002 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1004 1003 break;
1005 1004 }
1006 1005 }
1007 1006 self->ntrev = rev;
1008 1007 }
1009 1008
1010 1009 if (rev >= 0)
1011 1010 return rev;
1012 1011 return -2;
1013 1012 }
1014 1013
1015 1014 static PyObject *raise_revlog_error(void)
1016 1015 {
1017 1016 static PyObject *errclass;
1018 1017 PyObject *mod = NULL, *errobj;
1019 1018
1020 1019 if (errclass == NULL) {
1021 1020 PyObject *dict;
1022 1021
1023 1022 mod = PyImport_ImportModule("mercurial.error");
1024 1023 if (mod == NULL)
1025 1024 goto classfail;
1026 1025
1027 1026 dict = PyModule_GetDict(mod);
1028 1027 if (dict == NULL)
1029 1028 goto classfail;
1030 1029
1031 1030 errclass = PyDict_GetItemString(dict, "RevlogError");
1032 1031 if (errclass == NULL) {
1033 1032 PyErr_SetString(PyExc_SystemError,
1034 1033 "could not find RevlogError");
1035 1034 goto classfail;
1036 1035 }
1037 1036 Py_INCREF(errclass);
1038 1037 }
1039 1038
1040 1039 errobj = PyObject_CallFunction(errclass, NULL);
1041 1040 if (errobj == NULL)
1042 1041 return NULL;
1043 1042 PyErr_SetObject(errclass, errobj);
1044 1043 return errobj;
1045 1044
1046 1045 classfail:
1047 1046 Py_XDECREF(mod);
1048 1047 return NULL;
1049 1048 }
1050 1049
1051 1050 static PyObject *index_getitem(indexObject *self, PyObject *value)
1052 1051 {
1053 1052 char *node;
1054 1053 Py_ssize_t nodelen;
1055 1054 int rev;
1056 1055
1057 1056 if (PyInt_Check(value))
1058 1057 return index_get(self, PyInt_AS_LONG(value));
1059 1058
1060 1059 if (node_check(value, &node, &nodelen) == -1)
1061 1060 return NULL;
1062 1061 rev = index_find_node(self, node, nodelen);
1063 1062 if (rev >= -1)
1064 1063 return PyInt_FromLong(rev);
1065 1064 if (rev == -2)
1066 1065 raise_revlog_error();
1067 1066 return NULL;
1068 1067 }
1069 1068
1070 1069 static int nt_partialmatch(indexObject *self, const char *node,
1071 1070 Py_ssize_t nodelen)
1072 1071 {
1073 1072 int rev;
1074 1073
1075 1074 if (nt_init(self) == -1)
1076 1075 return -3;
1077 1076
1078 1077 if (self->ntrev > 0) {
1079 1078 /* ensure that the radix tree is fully populated */
1080 1079 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1081 1080 const char *n = index_node(self, rev);
1082 1081 if (n == NULL)
1083 1082 return -2;
1084 1083 if (nt_insert(self, n, rev) == -1)
1085 1084 return -3;
1086 1085 }
1087 1086 self->ntrev = rev;
1088 1087 }
1089 1088
1090 1089 return nt_find(self, node, nodelen, 1);
1091 1090 }
1092 1091
1093 1092 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1094 1093 {
1095 1094 const char *fullnode;
1096 1095 int nodelen;
1097 1096 char *node;
1098 1097 int rev, i;
1099 1098
1100 1099 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1101 1100 return NULL;
1102 1101
1103 1102 if (nodelen < 4) {
1104 1103 PyErr_SetString(PyExc_ValueError, "key too short");
1105 1104 return NULL;
1106 1105 }
1107 1106
1108 1107 if (nodelen > 40) {
1109 1108 PyErr_SetString(PyExc_ValueError, "key too long");
1110 1109 return NULL;
1111 1110 }
1112 1111
1113 1112 for (i = 0; i < nodelen; i++)
1114 1113 hexdigit(node, i);
1115 1114 if (PyErr_Occurred()) {
1116 1115 /* input contains non-hex characters */
1117 1116 PyErr_Clear();
1118 1117 Py_RETURN_NONE;
1119 1118 }
1120 1119
1121 1120 rev = nt_partialmatch(self, node, nodelen);
1122 1121
1123 1122 switch (rev) {
1124 1123 case -4:
1125 1124 raise_revlog_error();
1126 1125 case -3:
1127 1126 return NULL;
1128 1127 case -2:
1129 1128 Py_RETURN_NONE;
1130 1129 case -1:
1131 1130 return PyString_FromStringAndSize(nullid, 20);
1132 1131 }
1133 1132
1134 1133 fullnode = index_node(self, rev);
1135 1134 if (fullnode == NULL) {
1136 1135 PyErr_Format(PyExc_IndexError,
1137 1136 "could not access rev %d", rev);
1138 1137 return NULL;
1139 1138 }
1140 1139 return PyString_FromStringAndSize(fullnode, 20);
1141 1140 }
1142 1141
1143 1142 static PyObject *index_m_get(indexObject *self, PyObject *args)
1144 1143 {
1145 1144 Py_ssize_t nodelen;
1146 1145 PyObject *val;
1147 1146 char *node;
1148 1147 int rev;
1149 1148
1150 1149 if (!PyArg_ParseTuple(args, "O", &val))
1151 1150 return NULL;
1152 1151 if (node_check(val, &node, &nodelen) == -1)
1153 1152 return NULL;
1154 1153 rev = index_find_node(self, node, nodelen);
1155 1154 if (rev == -3)
1156 1155 return NULL;
1157 1156 if (rev == -2)
1158 1157 Py_RETURN_NONE;
1159 1158 return PyInt_FromLong(rev);
1160 1159 }
1161 1160
1162 1161 static int index_contains(indexObject *self, PyObject *value)
1163 1162 {
1164 1163 char *node;
1165 1164 Py_ssize_t nodelen;
1166 1165
1167 1166 if (PyInt_Check(value)) {
1168 1167 long rev = PyInt_AS_LONG(value);
1169 1168 return rev >= -1 && rev < index_length(self);
1170 1169 }
1171 1170
1172 1171 if (node_check(value, &node, &nodelen) == -1)
1173 1172 return -1;
1174 1173
1175 1174 switch (index_find_node(self, node, nodelen)) {
1176 1175 case -3:
1177 1176 return -1;
1178 1177 case -2:
1179 1178 return 0;
1180 1179 default:
1181 1180 return 1;
1182 1181 }
1183 1182 }
1184 1183
1185 1184 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1186 1185 {
1187 1186 if (rev >= self->length - 1) {
1188 1187 PyObject *tuple = PyList_GET_ITEM(self->added,
1189 1188 rev - self->length + 1);
1190 1189 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1191 1190 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1192 1191 } else {
1193 1192 const char *data = index_deref(self, rev);
1194 1193 ps[0] = getbe32(data + 24);
1195 1194 ps[1] = getbe32(data + 28);
1196 1195 }
1197 1196 }
1198 1197
1199 1198 typedef uint64_t bitmask;
1200 1199
1201 1200 /*
1202 1201 * Given a disjoint set of revs, return all candidates for the
1203 1202 * greatest common ancestor. In revset notation, this is the set
1204 1203 * "heads(::a and ::b and ...)"
1205 1204 */
1206 1205 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1207 1206 int revcount)
1208 1207 {
1209 1208 const bitmask allseen = (1ull << revcount) - 1;
1210 1209 const bitmask poison = 1ull << revcount;
1211 1210 PyObject *gca = PyList_New(0);
1212 1211 int i, v, interesting, left;
1213 1212 int maxrev = -1;
1214 1213 long sp;
1215 1214 bitmask *seen;
1216 1215
1217 1216 if (gca == NULL)
1218 1217 return PyErr_NoMemory();
1219 1218
1220 1219 for (i = 0; i < revcount; i++) {
1221 1220 if (revs[i] > maxrev)
1222 1221 maxrev = revs[i];
1223 1222 }
1224 1223
1225 1224 seen = calloc(sizeof(*seen), maxrev + 1);
1226 1225 if (seen == NULL) {
1227 1226 Py_DECREF(gca);
1228 1227 return PyErr_NoMemory();
1229 1228 }
1230 1229
1231 1230 for (i = 0; i < revcount; i++)
1232 1231 seen[revs[i]] = 1ull << i;
1233 1232
1234 1233 interesting = left = revcount;
1235 1234
1236 1235 for (v = maxrev; v >= 0 && interesting; v--) {
1237 1236 long sv = seen[v];
1238 1237 int parents[2];
1239 1238
1240 1239 if (!sv)
1241 1240 continue;
1242 1241
1243 1242 if (sv < poison) {
1244 1243 interesting -= 1;
1245 1244 if (sv == allseen) {
1246 1245 PyObject *obj = PyInt_FromLong(v);
1247 1246 if (obj == NULL)
1248 1247 goto bail;
1249 1248 if (PyList_Append(gca, obj) == -1) {
1250 1249 Py_DECREF(obj);
1251 1250 goto bail;
1252 1251 }
1253 1252 sv |= poison;
1254 1253 for (i = 0; i < revcount; i++) {
1255 1254 if (revs[i] == v) {
1256 1255 if (--left <= 1)
1257 1256 goto done;
1258 1257 break;
1259 1258 }
1260 1259 }
1261 1260 }
1262 1261 }
1263 1262 index_get_parents(self, v, parents);
1264 1263
1265 1264 for (i = 0; i < 2; i++) {
1266 1265 int p = parents[i];
1267 1266 if (p == -1)
1268 1267 continue;
1269 1268 sp = seen[p];
1270 1269 if (sv < poison) {
1271 1270 if (sp == 0) {
1272 1271 seen[p] = sv;
1273 1272 interesting++;
1274 1273 }
1275 1274 else if (sp != sv)
1276 1275 seen[p] |= sv;
1277 1276 } else {
1278 1277 if (sp && sp < poison)
1279 1278 interesting--;
1280 1279 seen[p] = sv;
1281 1280 }
1282 1281 }
1283 1282 }
1284 1283
1285 1284 done:
1286 1285 free(seen);
1287 1286 return gca;
1288 1287 bail:
1289 1288 free(seen);
1290 1289 Py_XDECREF(gca);
1291 1290 return NULL;
1292 1291 }
1293 1292
1294 1293 /*
1295 1294 * Given a disjoint set of revs, return the subset with the longest
1296 1295 * path to the root.
1297 1296 */
1298 1297 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1299 1298 {
1300 1299 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1301 1300 static const Py_ssize_t capacity = 24;
1302 1301 int *depth, *interesting = NULL;
1303 1302 int i, j, v, ninteresting;
1304 1303 PyObject *dict = NULL, *keys;
1305 1304 long *seen = NULL;
1306 1305 int maxrev = -1;
1307 1306 long final;
1308 1307
1309 1308 if (revcount > capacity) {
1310 1309 PyErr_Format(PyExc_OverflowError,
1311 1310 "bitset size (%ld) > capacity (%ld)",
1312 1311 (long)revcount, (long)capacity);
1313 1312 return NULL;
1314 1313 }
1315 1314
1316 1315 for (i = 0; i < revcount; i++) {
1317 1316 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1318 1317 if (n > maxrev)
1319 1318 maxrev = n;
1320 1319 }
1321 1320
1322 1321 depth = calloc(sizeof(*depth), maxrev + 1);
1323 1322 if (depth == NULL)
1324 1323 return PyErr_NoMemory();
1325 1324
1326 1325 seen = calloc(sizeof(*seen), maxrev + 1);
1327 1326 if (seen == NULL) {
1328 1327 PyErr_NoMemory();
1329 1328 goto bail;
1330 1329 }
1331 1330
1332 1331 interesting = calloc(sizeof(*interesting), 2 << revcount);
1333 1332 if (interesting == NULL) {
1334 1333 PyErr_NoMemory();
1335 1334 goto bail;
1336 1335 }
1337 1336
1338 1337 if (PyList_Sort(revs) == -1)
1339 1338 goto bail;
1340 1339
1341 1340 for (i = 0; i < revcount; i++) {
1342 1341 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1343 1342 long b = 1l << i;
1344 1343 depth[n] = 1;
1345 1344 seen[n] = b;
1346 1345 interesting[b] = 1;
1347 1346 }
1348 1347
1349 1348 ninteresting = (int)revcount;
1350 1349
1351 1350 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1352 1351 int dv = depth[v];
1353 1352 int parents[2];
1354 1353 long sv;
1355 1354
1356 1355 if (dv == 0)
1357 1356 continue;
1358 1357
1359 1358 sv = seen[v];
1360 1359 index_get_parents(self, v, parents);
1361 1360
1362 1361 for (i = 0; i < 2; i++) {
1363 1362 int p = parents[i];
1364 1363 long nsp, sp;
1365 1364 int dp;
1366 1365
1367 1366 if (p == -1)
1368 1367 continue;
1369 1368
1370 1369 dp = depth[p];
1371 1370 nsp = sp = seen[p];
1372 1371 if (dp <= dv) {
1373 1372 depth[p] = dv + 1;
1374 1373 if (sp != sv) {
1375 1374 interesting[sv] += 1;
1376 1375 nsp = seen[p] = sv;
1377 1376 if (sp) {
1378 1377 interesting[sp] -= 1;
1379 1378 if (interesting[sp] == 0)
1380 1379 ninteresting -= 1;
1381 1380 }
1382 1381 }
1383 1382 }
1384 1383 else if (dv == dp - 1) {
1385 1384 nsp = sp | sv;
1386 1385 if (nsp == sp)
1387 1386 continue;
1388 1387 seen[p] = nsp;
1389 1388 interesting[sp] -= 1;
1390 1389 if (interesting[sp] == 0 && interesting[nsp] > 0)
1391 1390 ninteresting -= 1;
1392 1391 interesting[nsp] += 1;
1393 1392 }
1394 1393 }
1395 1394 interesting[sv] -= 1;
1396 1395 if (interesting[sv] == 0)
1397 1396 ninteresting -= 1;
1398 1397 }
1399 1398
1400 1399 final = 0;
1401 1400 j = ninteresting;
1402 1401 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1403 1402 if (interesting[i] == 0)
1404 1403 continue;
1405 1404 final |= i;
1406 1405 j -= 1;
1407 1406 }
1408 1407 if (final == 0)
1409 1408 return PyList_New(0);
1410 1409
1411 1410 dict = PyDict_New();
1412 1411 if (dict == NULL)
1413 1412 goto bail;
1414 1413
1415 1414 for (i = 0; i < revcount; i++) {
1416 1415 PyObject *key;
1417 1416
1418 1417 if ((final & (1 << i)) == 0)
1419 1418 continue;
1420 1419
1421 1420 key = PyList_GET_ITEM(revs, i);
1422 1421 Py_INCREF(key);
1423 1422 Py_INCREF(Py_None);
1424 1423 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1425 1424 Py_DECREF(key);
1426 1425 Py_DECREF(Py_None);
1427 1426 goto bail;
1428 1427 }
1429 1428 }
1430 1429
1431 1430 keys = PyDict_Keys(dict);
1432 1431
1433 1432 free(depth);
1434 1433 free(seen);
1435 1434 free(interesting);
1436 1435 Py_DECREF(dict);
1437 1436
1438 1437 return keys;
1439 1438 bail:
1440 1439 free(depth);
1441 1440 free(seen);
1442 1441 free(interesting);
1443 1442 Py_XDECREF(dict);
1444 1443
1445 1444 return NULL;
1446 1445 }
1447 1446
1448 1447 /*
1449 1448 * Given a (possibly overlapping) set of revs, return the greatest
1450 1449 * common ancestors: those with the longest path to the root.
1451 1450 */
1452 1451 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1453 1452 {
1454 1453 PyObject *ret = NULL, *gca = NULL;
1455 1454 Py_ssize_t argcount, i, len;
1456 1455 bitmask repeat = 0;
1457 1456 int revcount = 0;
1458 1457 int *revs;
1459 1458
1460 1459 argcount = PySequence_Length(args);
1461 1460 revs = malloc(argcount * sizeof(*revs));
1462 1461 if (argcount > 0 && revs == NULL)
1463 1462 return PyErr_NoMemory();
1464 1463 len = index_length(self) - 1;
1465 1464
1466 1465 for (i = 0; i < argcount; i++) {
1467 1466 static const int capacity = 24;
1468 1467 PyObject *obj = PySequence_GetItem(args, i);
1469 1468 bitmask x;
1470 1469 long val;
1471 1470
1472 1471 if (!PyInt_Check(obj)) {
1473 1472 PyErr_SetString(PyExc_TypeError,
1474 1473 "arguments must all be ints");
1475 1474 goto bail;
1476 1475 }
1477 1476 val = PyInt_AsLong(obj);
1478 1477 if (val == -1) {
1479 1478 ret = PyList_New(0);
1480 1479 goto done;
1481 1480 }
1482 1481 if (val < 0 || val >= len) {
1483 1482 PyErr_SetString(PyExc_IndexError,
1484 1483 "index out of range");
1485 1484 goto bail;
1486 1485 }
1487 1486 /* this cheesy bloom filter lets us avoid some more
1488 1487 * expensive duplicate checks in the common set-is-disjoint
1489 1488 * case */
1490 1489 x = 1ull << (val & 0x3f);
1491 1490 if (repeat & x) {
1492 1491 int k;
1493 1492 for (k = 0; k < revcount; k++) {
1494 1493 if (val == revs[k])
1495 1494 goto duplicate;
1496 1495 }
1497 1496 }
1498 1497 else repeat |= x;
1499 1498 if (revcount >= capacity) {
1500 1499 PyErr_Format(PyExc_OverflowError,
1501 1500 "bitset size (%d) > capacity (%d)",
1502 1501 revcount, capacity);
1503 1502 goto bail;
1504 1503 }
1505 1504 revs[revcount++] = (int)val;
1506 1505 duplicate:;
1507 1506 }
1508 1507
1509 1508 if (revcount == 0) {
1510 1509 ret = PyList_New(0);
1511 1510 goto done;
1512 1511 }
1513 1512 if (revcount == 1) {
1514 1513 PyObject *obj;
1515 1514 ret = PyList_New(1);
1516 1515 if (ret == NULL)
1517 1516 goto bail;
1518 1517 obj = PyInt_FromLong(revs[0]);
1519 1518 if (obj == NULL)
1520 1519 goto bail;
1521 1520 PyList_SET_ITEM(ret, 0, obj);
1522 1521 goto done;
1523 1522 }
1524 1523
1525 1524 gca = find_gca_candidates(self, revs, revcount);
1526 1525 if (gca == NULL)
1527 1526 goto bail;
1528 1527
1529 1528 if (PyList_GET_SIZE(gca) <= 1) {
1530 1529 ret = gca;
1531 1530 Py_INCREF(gca);
1532 1531 }
1533 1532 else if (PyList_GET_SIZE(gca) == 1) {
1534 1533 ret = PyList_GET_ITEM(gca, 0);
1535 1534 Py_INCREF(ret);
1536 1535 }
1537 1536 else ret = find_deepest(self, gca);
1538 1537
1539 1538 done:
1540 1539 free(revs);
1541 1540 Py_XDECREF(gca);
1542 1541
1543 1542 return ret;
1544 1543
1545 1544 bail:
1546 1545 free(revs);
1547 1546 Py_XDECREF(gca);
1548 1547 Py_XDECREF(ret);
1549 1548 return NULL;
1550 1549 }
1551 1550
1552 1551 /*
1553 1552 * Invalidate any trie entries introduced by added revs.
1554 1553 */
1555 1554 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1556 1555 {
1557 1556 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1558 1557
1559 1558 for (i = start; i < len; i++) {
1560 1559 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1561 1560 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1562 1561
1563 1562 nt_insert(self, PyString_AS_STRING(node), -1);
1564 1563 }
1565 1564
1566 1565 if (start == 0)
1567 1566 Py_CLEAR(self->added);
1568 1567 }
1569 1568
1570 1569 /*
1571 1570 * Delete a numeric range of revs, which must be at the end of the
1572 1571 * range, but exclude the sentinel nullid entry.
1573 1572 */
1574 1573 static int index_slice_del(indexObject *self, PyObject *item)
1575 1574 {
1576 1575 Py_ssize_t start, stop, step, slicelength;
1577 1576 Py_ssize_t length = index_length(self);
1578 1577 int ret = 0;
1579 1578
1580 1579 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1581 1580 &start, &stop, &step, &slicelength) < 0)
1582 1581 return -1;
1583 1582
1584 1583 if (slicelength <= 0)
1585 1584 return 0;
1586 1585
1587 1586 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1588 1587 stop = start;
1589 1588
1590 1589 if (step < 0) {
1591 1590 stop = start + 1;
1592 1591 start = stop + step*(slicelength - 1) - 1;
1593 1592 step = -step;
1594 1593 }
1595 1594
1596 1595 if (step != 1) {
1597 1596 PyErr_SetString(PyExc_ValueError,
1598 1597 "revlog index delete requires step size of 1");
1599 1598 return -1;
1600 1599 }
1601 1600
1602 1601 if (stop != length - 1) {
1603 1602 PyErr_SetString(PyExc_IndexError,
1604 1603 "revlog index deletion indices are invalid");
1605 1604 return -1;
1606 1605 }
1607 1606
1608 1607 if (start < self->length - 1) {
1609 1608 if (self->nt) {
1610 1609 Py_ssize_t i;
1611 1610
1612 1611 for (i = start + 1; i < self->length - 1; i++) {
1613 1612 const char *node = index_node(self, i);
1614 1613
1615 1614 if (node)
1616 1615 nt_insert(self, node, -1);
1617 1616 }
1618 1617 if (self->added)
1619 1618 nt_invalidate_added(self, 0);
1620 1619 if (self->ntrev > start)
1621 1620 self->ntrev = (int)start;
1622 1621 }
1623 1622 self->length = start + 1;
1624 1623 if (start < self->raw_length) {
1625 1624 if (self->cache) {
1626 1625 Py_ssize_t i;
1627 1626 for (i = start; i < self->raw_length; i++)
1628 1627 Py_CLEAR(self->cache[i]);
1629 1628 }
1630 1629 self->raw_length = start;
1631 1630 }
1632 1631 goto done;
1633 1632 }
1634 1633
1635 1634 if (self->nt) {
1636 1635 nt_invalidate_added(self, start - self->length + 1);
1637 1636 if (self->ntrev > start)
1638 1637 self->ntrev = (int)start;
1639 1638 }
1640 1639 if (self->added)
1641 1640 ret = PyList_SetSlice(self->added, start - self->length + 1,
1642 1641 PyList_GET_SIZE(self->added), NULL);
1643 1642 done:
1644 1643 Py_CLEAR(self->headrevs);
1645 1644 return ret;
1646 1645 }
1647 1646
1648 1647 /*
1649 1648 * Supported ops:
1650 1649 *
1651 1650 * slice deletion
1652 1651 * string assignment (extend node->rev mapping)
1653 1652 * string deletion (shrink node->rev mapping)
1654 1653 */
1655 1654 static int index_assign_subscript(indexObject *self, PyObject *item,
1656 1655 PyObject *value)
1657 1656 {
1658 1657 char *node;
1659 1658 Py_ssize_t nodelen;
1660 1659 long rev;
1661 1660
1662 1661 if (PySlice_Check(item) && value == NULL)
1663 1662 return index_slice_del(self, item);
1664 1663
1665 1664 if (node_check(item, &node, &nodelen) == -1)
1666 1665 return -1;
1667 1666
1668 1667 if (value == NULL)
1669 1668 return self->nt ? nt_insert(self, node, -1) : 0;
1670 1669 rev = PyInt_AsLong(value);
1671 1670 if (rev > INT_MAX || rev < 0) {
1672 1671 if (!PyErr_Occurred())
1673 1672 PyErr_SetString(PyExc_ValueError, "rev out of range");
1674 1673 return -1;
1675 1674 }
1676 1675 return nt_insert(self, node, (int)rev);
1677 1676 }
1678 1677
1679 1678 /*
1680 1679 * Find all RevlogNG entries in an index that has inline data. Update
1681 1680 * the optional "offsets" table with those entries.
1682 1681 */
1683 1682 static long inline_scan(indexObject *self, const char **offsets)
1684 1683 {
1685 1684 const char *data = PyString_AS_STRING(self->data);
1686 const char *end = data + PyString_GET_SIZE(self->data);
1685 Py_ssize_t pos = 0;
1686 Py_ssize_t end = PyString_GET_SIZE(self->data);
1687 1687 long incr = v1_hdrsize;
1688 1688 Py_ssize_t len = 0;
1689 1689
1690 while (data + v1_hdrsize <= end) {
1690 while (pos + v1_hdrsize <= end && pos >= 0) {
1691 1691 uint32_t comp_len;
1692 const char *old_data;
1693 1692 /* 3rd element of header is length of compressed inline data */
1694 comp_len = getbe32(data + 8);
1693 comp_len = getbe32(data + pos + 8);
1695 1694 incr = v1_hdrsize + comp_len;
1696 if (incr < v1_hdrsize)
1697 break;
1698 1695 if (offsets)
1699 offsets[len] = data;
1696 offsets[len] = data + pos;
1700 1697 len++;
1701 old_data = data;
1702 data += incr;
1703 if (data <= old_data)
1704 break;
1698 pos += incr;
1705 1699 }
1706 1700
1707 if (data != end && data + v1_hdrsize != end) {
1701 if (pos != end) {
1708 1702 if (!PyErr_Occurred())
1709 1703 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1710 1704 return -1;
1711 1705 }
1712 1706
1713 1707 return len;
1714 1708 }
1715 1709
1716 1710 static int index_init(indexObject *self, PyObject *args)
1717 1711 {
1718 1712 PyObject *data_obj, *inlined_obj;
1719 1713 Py_ssize_t size;
1720 1714
1721 1715 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1722 1716 self->raw_length = 0;
1723 1717 self->added = NULL;
1724 1718 self->cache = NULL;
1725 1719 self->data = NULL;
1726 1720 self->headrevs = NULL;
1727 1721 self->nt = NULL;
1728 1722 self->offsets = NULL;
1729 1723
1730 1724 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1731 1725 return -1;
1732 1726 if (!PyString_Check(data_obj)) {
1733 1727 PyErr_SetString(PyExc_TypeError, "data is not a string");
1734 1728 return -1;
1735 1729 }
1736 1730 size = PyString_GET_SIZE(data_obj);
1737 1731
1738 1732 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1739 1733 self->data = data_obj;
1740 1734
1741 1735 self->ntlength = self->ntcapacity = 0;
1742 1736 self->ntdepth = self->ntsplits = 0;
1743 1737 self->ntlookups = self->ntmisses = 0;
1744 1738 self->ntrev = -1;
1745 1739 Py_INCREF(self->data);
1746 1740
1747 1741 if (self->inlined) {
1748 1742 long len = inline_scan(self, NULL);
1749 1743 if (len == -1)
1750 1744 goto bail;
1751 1745 self->raw_length = len;
1752 1746 self->length = len + 1;
1753 1747 } else {
1754 1748 if (size % v1_hdrsize) {
1755 1749 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1756 1750 goto bail;
1757 1751 }
1758 1752 self->raw_length = size / v1_hdrsize;
1759 1753 self->length = self->raw_length + 1;
1760 1754 }
1761 1755
1762 1756 return 0;
1763 1757 bail:
1764 1758 return -1;
1765 1759 }
1766 1760
1767 1761 static PyObject *index_nodemap(indexObject *self)
1768 1762 {
1769 1763 Py_INCREF(self);
1770 1764 return (PyObject *)self;
1771 1765 }
1772 1766
1773 1767 static void index_dealloc(indexObject *self)
1774 1768 {
1775 1769 _index_clearcaches(self);
1776 1770 Py_XDECREF(self->data);
1777 1771 Py_XDECREF(self->added);
1778 1772 PyObject_Del(self);
1779 1773 }
1780 1774
1781 1775 static PySequenceMethods index_sequence_methods = {
1782 1776 (lenfunc)index_length, /* sq_length */
1783 1777 0, /* sq_concat */
1784 1778 0, /* sq_repeat */
1785 1779 (ssizeargfunc)index_get, /* sq_item */
1786 1780 0, /* sq_slice */
1787 1781 0, /* sq_ass_item */
1788 1782 0, /* sq_ass_slice */
1789 1783 (objobjproc)index_contains, /* sq_contains */
1790 1784 };
1791 1785
1792 1786 static PyMappingMethods index_mapping_methods = {
1793 1787 (lenfunc)index_length, /* mp_length */
1794 1788 (binaryfunc)index_getitem, /* mp_subscript */
1795 1789 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1796 1790 };
1797 1791
1798 1792 static PyMethodDef index_methods[] = {
1799 1793 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1800 1794 "return the gca set of the given revs"},
1801 1795 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1802 1796 "clear the index caches"},
1803 1797 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1804 1798 "get an index entry"},
1805 1799 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1806 1800 "get head revisions"},
1807 1801 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1808 1802 "insert an index entry"},
1809 1803 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1810 1804 "match a potentially ambiguous node ID"},
1811 1805 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1812 1806 "stats for the index"},
1813 1807 {NULL} /* Sentinel */
1814 1808 };
1815 1809
1816 1810 static PyGetSetDef index_getset[] = {
1817 1811 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1818 1812 {NULL} /* Sentinel */
1819 1813 };
1820 1814
1821 1815 static PyTypeObject indexType = {
1822 1816 PyObject_HEAD_INIT(NULL)
1823 1817 0, /* ob_size */
1824 1818 "parsers.index", /* tp_name */
1825 1819 sizeof(indexObject), /* tp_basicsize */
1826 1820 0, /* tp_itemsize */
1827 1821 (destructor)index_dealloc, /* tp_dealloc */
1828 1822 0, /* tp_print */
1829 1823 0, /* tp_getattr */
1830 1824 0, /* tp_setattr */
1831 1825 0, /* tp_compare */
1832 1826 0, /* tp_repr */
1833 1827 0, /* tp_as_number */
1834 1828 &index_sequence_methods, /* tp_as_sequence */
1835 1829 &index_mapping_methods, /* tp_as_mapping */
1836 1830 0, /* tp_hash */
1837 1831 0, /* tp_call */
1838 1832 0, /* tp_str */
1839 1833 0, /* tp_getattro */
1840 1834 0, /* tp_setattro */
1841 1835 0, /* tp_as_buffer */
1842 1836 Py_TPFLAGS_DEFAULT, /* tp_flags */
1843 1837 "revlog index", /* tp_doc */
1844 1838 0, /* tp_traverse */
1845 1839 0, /* tp_clear */
1846 1840 0, /* tp_richcompare */
1847 1841 0, /* tp_weaklistoffset */
1848 1842 0, /* tp_iter */
1849 1843 0, /* tp_iternext */
1850 1844 index_methods, /* tp_methods */
1851 1845 0, /* tp_members */
1852 1846 index_getset, /* tp_getset */
1853 1847 0, /* tp_base */
1854 1848 0, /* tp_dict */
1855 1849 0, /* tp_descr_get */
1856 1850 0, /* tp_descr_set */
1857 1851 0, /* tp_dictoffset */
1858 1852 (initproc)index_init, /* tp_init */
1859 1853 0, /* tp_alloc */
1860 1854 };
1861 1855
1862 1856 /*
1863 1857 * returns a tuple of the form (index, index, cache) with elements as
1864 1858 * follows:
1865 1859 *
1866 1860 * index: an index object that lazily parses RevlogNG records
1867 1861 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1868 1862 *
1869 1863 * added complications are for backwards compatibility
1870 1864 */
1871 1865 static PyObject *parse_index2(PyObject *self, PyObject *args)
1872 1866 {
1873 1867 PyObject *tuple = NULL, *cache = NULL;
1874 1868 indexObject *idx;
1875 1869 int ret;
1876 1870
1877 1871 idx = PyObject_New(indexObject, &indexType);
1878 1872 if (idx == NULL)
1879 1873 goto bail;
1880 1874
1881 1875 ret = index_init(idx, args);
1882 1876 if (ret == -1)
1883 1877 goto bail;
1884 1878
1885 1879 if (idx->inlined) {
1886 1880 cache = Py_BuildValue("iO", 0, idx->data);
1887 1881 if (cache == NULL)
1888 1882 goto bail;
1889 1883 } else {
1890 1884 cache = Py_None;
1891 1885 Py_INCREF(cache);
1892 1886 }
1893 1887
1894 1888 tuple = Py_BuildValue("NN", idx, cache);
1895 1889 if (!tuple)
1896 1890 goto bail;
1897 1891 return tuple;
1898 1892
1899 1893 bail:
1900 1894 Py_XDECREF(idx);
1901 1895 Py_XDECREF(cache);
1902 1896 Py_XDECREF(tuple);
1903 1897 return NULL;
1904 1898 }
1905 1899
1906 1900 static char parsers_doc[] = "Efficient content parsing.";
1907 1901
1908 1902 PyObject *encodedir(PyObject *self, PyObject *args);
1909 1903 PyObject *pathencode(PyObject *self, PyObject *args);
1910 1904 PyObject *lowerencode(PyObject *self, PyObject *args);
1911 1905
1912 1906 static PyMethodDef methods[] = {
1913 1907 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1914 1908 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1915 1909 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1916 1910 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1917 1911 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1918 1912 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1919 1913 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1920 1914 {NULL, NULL}
1921 1915 };
1922 1916
1923 1917 void dirs_module_init(PyObject *mod);
1924 1918
1925 1919 static void module_init(PyObject *mod)
1926 1920 {
1927 1921 dirs_module_init(mod);
1928 1922
1929 1923 indexType.tp_new = PyType_GenericNew;
1930 1924 if (PyType_Ready(&indexType) < 0)
1931 1925 return;
1932 1926 Py_INCREF(&indexType);
1933 1927
1934 1928 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1935 1929
1936 1930 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1937 1931 -1, -1, -1, -1, nullid, 20);
1938 1932 if (nullentry)
1939 1933 PyObject_GC_UnTrack(nullentry);
1940 1934
1941 1935 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1942 1936 }
1943 1937
1944 1938 #ifdef IS_PY3K
1945 1939 static struct PyModuleDef parsers_module = {
1946 1940 PyModuleDef_HEAD_INIT,
1947 1941 "parsers",
1948 1942 parsers_doc,
1949 1943 -1,
1950 1944 methods
1951 1945 };
1952 1946
1953 1947 PyMODINIT_FUNC PyInit_parsers(void)
1954 1948 {
1955 1949 PyObject *mod = PyModule_Create(&parsers_module);
1956 1950 module_init(mod);
1957 1951 return mod;
1958 1952 }
1959 1953 #else
1960 1954 PyMODINIT_FUNC initparsers(void)
1961 1955 {
1962 1956 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1963 1957 module_init(mod);
1964 1958 }
1965 1959 #endif
General Comments 0
You need to be logged in to leave comments. Login now