##// END OF EJS Templates
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan -
r6389:0231f763 default
parent child Browse files
Show More
@@ -0,0 +1,168
1 /*
2 parsers.c - efficient content parsing
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
8 */
9
10 #include <Python.h>
11 #include <ctype.h>
12 #include <string.h>
13
14 static int hexdigit(char c)
15 {
16 if (c >= '0' && c <= '9')
17 return c - '0';
18
19 if (c >= 'A' && c <= 'F')
20 return c - 'A' + 10;
21
22 if (c >= 'a' && c <= 'f')
23 return c - 'a' + 10;
24
25 return -1;
26 }
27
28 /*
29 * Turn a hex-encoded string into binary.
30 */
31 static PyObject *unhexlify(const char *str, int len)
32 {
33 PyObject *ret = NULL;
34 char *c, *d;
35
36 if (len % 2) {
37 PyErr_SetString(PyExc_ValueError,
38 "input is not even in length");
39 goto bail;
40 }
41
42 ret = PyString_FromStringAndSize(NULL, len / 2);
43 if (!ret)
44 goto bail;
45
46 d = PyString_AsString(ret);
47 if (!d)
48 goto bail;
49
50 for (c = str; c < str + len;) {
51 int hi = hexdigit(*c++);
52 int lo = hexdigit(*c++);
53
54 if (hi == -1 || lo == -1) {
55 PyErr_SetString(PyExc_ValueError,
56 "input contains non-hex character");
57 goto bail;
58 }
59
60 *d++ = (hi << 4) | lo;
61 }
62
63 goto done;
64
65 bail:
66 Py_XDECREF(ret);
67 ret = NULL;
68 done:
69 return ret;
70 }
71
72 /*
73 * This code assumes that a manifest is stitched together with newline
74 * ('\n') characters.
75 */
76 static PyObject *parse_manifest(PyObject *self, PyObject *args)
77 {
78 PyObject *mfdict, *fdict;
79 char *str, *cur, *start, *zero;
80 int len;
81
82 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
83 &PyDict_Type, &mfdict,
84 &PyDict_Type, &fdict,
85 &str, &len))
86 goto quit;
87
88 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
89 PyObject *file = NULL, *node = NULL;
90 PyObject *flags = NULL;
91 int nlen;
92
93 if (!*cur) {
94 zero = cur;
95 continue;
96 }
97 else if (*cur != '\n')
98 continue;
99
100 if (!zero) {
101 PyErr_SetString(PyExc_ValueError,
102 "manifest entry has no separator");
103 goto quit;
104 }
105
106 file = PyString_FromStringAndSize(start, zero - start);
107 if (!file)
108 goto bail;
109
110 nlen = cur - zero - 1;
111
112 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
113 if (!node)
114 goto bail;
115
116 if (nlen > 40) {
117 PyObject *flags;
118
119 flags = PyString_FromStringAndSize(zero + 41,
120 nlen - 40);
121 if (!flags)
122 goto bail;
123
124 if (PyDict_SetItem(fdict, file, flags) == -1)
125 goto bail;
126 }
127
128 if (PyDict_SetItem(mfdict, file, node) == -1)
129 goto bail;
130
131 start = cur + 1;
132 zero = NULL;
133
134 Py_XDECREF(flags);
135 Py_XDECREF(node);
136 Py_XDECREF(file);
137 continue;
138 bail:
139 Py_XDECREF(flags);
140 Py_XDECREF(node);
141 Py_XDECREF(file);
142 goto quit;
143 }
144
145 if (len > 0 && *(cur - 1) != '\n') {
146 PyErr_SetString(PyExc_ValueError,
147 "manifest contains trailing garbage");
148 goto quit;
149 }
150
151 Py_INCREF(Py_None);
152 return Py_None;
153
154 quit:
155 return NULL;
156 }
157
158 static char parsers_doc[] = "Efficient content parsing.";
159
160 static PyMethodDef methods[] = {
161 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
162 {NULL, NULL}
163 };
164
165 PyMODINIT_FUNC initparsers(void)
166 {
167 Py_InitModule3("parsers", methods, parsers_doc);
168 }
@@ -8,7 +8,7
8 from node import bin, hex, nullid
8 from node import bin, hex, nullid
9 from revlog import revlog, RevlogError
9 from revlog import revlog, RevlogError
10 from i18n import _
10 from i18n import _
11 import array, struct, mdiff
11 import array, struct, mdiff, parsers
12
12
13 class manifestdict(dict):
13 class manifestdict(dict):
14 def __init__(self, mapping=None, flags=None):
14 def __init__(self, mapping=None, flags=None):
@@ -39,14 +39,7 class manifest(revlog):
39
39
40 def parse(self, lines):
40 def parse(self, lines):
41 mfdict = manifestdict()
41 mfdict = manifestdict()
42 fdict = mfdict._flags
42 parsers.parse_manifest(mfdict, mfdict._flags, lines)
43 for l in lines.splitlines():
44 f, n = l.split('\0')
45 if len(n) > 40:
46 fdict[f] = n[40:]
47 mfdict[f] = bin(n[:40])
48 else:
49 mfdict[f] = bin(n)
50 return mfdict
43 return mfdict
51
44
52 def readdelta(self, node):
45 def readdelta(self, node):
@@ -88,10 +88,11 mercurial.version.remember_version(versi
88 cmdclass = {'install_data': install_package_data}
88 cmdclass = {'install_data': install_package_data}
89
89
90 ext_modules=[
90 ext_modules=[
91 Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
91 Extension('mercurial.base85', ['mercurial/base85.c']),
92 Extension('mercurial.bdiff', ['mercurial/bdiff.c']),
92 Extension('mercurial.bdiff', ['mercurial/bdiff.c']),
93 Extension('mercurial.base85', ['mercurial/base85.c']),
93 Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c']),
94 Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c'])
94 Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
95 Extension('mercurial.parsers', ['mercurial/parsers.c']),
95 ]
96 ]
96
97
97 packages = ['mercurial', 'mercurial.hgweb', 'hgext', 'hgext.convert']
98 packages = ['mercurial', 'mercurial.hgweb', 'hgext', 'hgext.convert']
General Comments 0
You need to be logged in to leave comments. Login now