##// END OF EJS Templates
parsers: speed up hex decoding for manifests
Matt Mackall -
r7092:fb3fc276 default
parent child Browse files
Show More
@@ -1,169 +1,146 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <string.h>
12 #include <string.h>
13
13
14 static int hexdigit(char c)
14 static int hexdigit(char c)
15 {
15 {
16 if (c >= '0' && c <= '9')
16 if (c >= '0' && c <= '9')
17 return c - '0';
17 return c - '0';
18
18 if (c >= 'a' && c <= 'f')
19 return c - 'a' + 10;
19 if (c >= 'A' && c <= 'F')
20 if (c >= 'A' && c <= 'F')
20 return c - 'A' + 10;
21 return c - 'A' + 10;
21
22
22 if (c >= 'a' && c <= 'f')
23 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
23 return c - 'a' + 10;
24 return 0;
24
25 return -1;
26 }
25 }
27
26
28 /*
27 /*
29 * Turn a hex-encoded string into binary.
28 * Turn a hex-encoded string into binary.
30 */
29 */
31 static PyObject *unhexlify(const char *str, int len)
30 static PyObject *unhexlify(const char *str, int len)
32 {
31 {
33 PyObject *ret = NULL;
32 PyObject *ret;
34 const char *c;
33 const char *c;
35 char *d;
34 char *d;
36
35
37 if (len % 2) {
38 PyErr_SetString(PyExc_ValueError,
39 "input is not even in length");
40 goto bail;
41 }
42
43 ret = PyString_FromStringAndSize(NULL, len / 2);
36 ret = PyString_FromStringAndSize(NULL, len / 2);
44 if (!ret)
37 if (!ret)
45 goto bail;
38 return NULL;
46
39
47 d = PyString_AsString(ret);
40 d = PyString_AS_STRING(ret);
48 if (!d)
49 goto bail;
50
51 for (c = str; c < str + len;) {
41 for (c = str; c < str + len;) {
52 int hi = hexdigit(*c++);
42 int hi = hexdigit(*c++);
53 int lo = hexdigit(*c++);
43 int lo = hexdigit(*c++);
54
55 if (hi == -1 || lo == -1) {
56 PyErr_SetString(PyExc_ValueError,
57 "input contains non-hex character");
58 goto bail;
59 }
60
61 *d++ = (hi << 4) | lo;
44 *d++ = (hi << 4) | lo;
62 }
45 }
63
46
64 goto done;
65
66 bail:
67 Py_XDECREF(ret);
68 ret = NULL;
69 done:
70 return ret;
47 return ret;
71 }
48 }
72
49
73 /*
50 /*
74 * This code assumes that a manifest is stitched together with newline
51 * This code assumes that a manifest is stitched together with newline
75 * ('\n') characters.
52 * ('\n') characters.
76 */
53 */
77 static PyObject *parse_manifest(PyObject *self, PyObject *args)
54 static PyObject *parse_manifest(PyObject *self, PyObject *args)
78 {
55 {
79 PyObject *mfdict, *fdict;
56 PyObject *mfdict, *fdict;
80 char *str, *cur, *start, *zero;
57 char *str, *cur, *start, *zero;
81 int len;
58 int len;
82
59
83 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
60 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
84 &PyDict_Type, &mfdict,
61 &PyDict_Type, &mfdict,
85 &PyDict_Type, &fdict,
62 &PyDict_Type, &fdict,
86 &str, &len))
63 &str, &len))
87 goto quit;
64 goto quit;
88
65
89 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
66 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
90 PyObject *file = NULL, *node = NULL;
67 PyObject *file = NULL, *node = NULL;
91 PyObject *flags = NULL;
68 PyObject *flags = NULL;
92 int nlen;
69 int nlen;
93
70
94 if (!*cur) {
71 if (!*cur) {
95 zero = cur;
72 zero = cur;
96 continue;
73 continue;
97 }
74 }
98 else if (*cur != '\n')
75 else if (*cur != '\n')
99 continue;
76 continue;
100
77
101 if (!zero) {
78 if (!zero) {
102 PyErr_SetString(PyExc_ValueError,
79 PyErr_SetString(PyExc_ValueError,
103 "manifest entry has no separator");
80 "manifest entry has no separator");
104 goto quit;
81 goto quit;
105 }
82 }
106
83
107 file = PyString_FromStringAndSize(start, zero - start);
84 file = PyString_FromStringAndSize(start, zero - start);
108 if (!file)
85 if (!file)
109 goto bail;
86 goto bail;
110
87
111 nlen = cur - zero - 1;
88 nlen = cur - zero - 1;
112
89
113 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
90 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
114 if (!node)
91 if (!node)
115 goto bail;
92 goto bail;
116
93
117 if (nlen > 40) {
94 if (nlen > 40) {
118 PyObject *flags;
95 PyObject *flags;
119
96
120 flags = PyString_FromStringAndSize(zero + 41,
97 flags = PyString_FromStringAndSize(zero + 41,
121 nlen - 40);
98 nlen - 40);
122 if (!flags)
99 if (!flags)
123 goto bail;
100 goto bail;
124
101
125 if (PyDict_SetItem(fdict, file, flags) == -1)
102 if (PyDict_SetItem(fdict, file, flags) == -1)
126 goto bail;
103 goto bail;
127 }
104 }
128
105
129 if (PyDict_SetItem(mfdict, file, node) == -1)
106 if (PyDict_SetItem(mfdict, file, node) == -1)
130 goto bail;
107 goto bail;
131
108
132 start = cur + 1;
109 start = cur + 1;
133 zero = NULL;
110 zero = NULL;
134
111
135 Py_XDECREF(flags);
112 Py_XDECREF(flags);
136 Py_XDECREF(node);
113 Py_XDECREF(node);
137 Py_XDECREF(file);
114 Py_XDECREF(file);
138 continue;
115 continue;
139 bail:
116 bail:
140 Py_XDECREF(flags);
117 Py_XDECREF(flags);
141 Py_XDECREF(node);
118 Py_XDECREF(node);
142 Py_XDECREF(file);
119 Py_XDECREF(file);
143 goto quit;
120 goto quit;
144 }
121 }
145
122
146 if (len > 0 && *(cur - 1) != '\n') {
123 if (len > 0 && *(cur - 1) != '\n') {
147 PyErr_SetString(PyExc_ValueError,
124 PyErr_SetString(PyExc_ValueError,
148 "manifest contains trailing garbage");
125 "manifest contains trailing garbage");
149 goto quit;
126 goto quit;
150 }
127 }
151
128
152 Py_INCREF(Py_None);
129 Py_INCREF(Py_None);
153 return Py_None;
130 return Py_None;
154
131
155 quit:
132 quit:
156 return NULL;
133 return NULL;
157 }
134 }
158
135
159 static char parsers_doc[] = "Efficient content parsing.";
136 static char parsers_doc[] = "Efficient content parsing.";
160
137
161 static PyMethodDef methods[] = {
138 static PyMethodDef methods[] = {
162 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
139 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
163 {NULL, NULL}
140 {NULL, NULL}
164 };
141 };
165
142
166 PyMODINIT_FUNC initparsers(void)
143 PyMODINIT_FUNC initparsers(void)
167 {
144 {
168 Py_InitModule3("parsers", methods, parsers_doc);
145 Py_InitModule3("parsers", methods, parsers_doc);
169 }
146 }
General Comments 0
You need to be logged in to leave comments. Login now