Show More
@@ -0,0 +1,168 | |||||
|
1 | /* | |||
|
2 | parsers.c - efficient content parsing | |||
|
3 | ||||
|
4 | Copyright 2008 Matt Mackall <mpm@selenic.com> and others | |||
|
5 | ||||
|
6 | This software may be used and distributed according to the terms of | |||
|
7 | the GNU General Public License, incorporated herein by reference. | |||
|
8 | */ | |||
|
9 | ||||
|
10 | #include <Python.h> | |||
|
11 | #include <ctype.h> | |||
|
12 | #include <string.h> | |||
|
13 | ||||
|
14 | static int hexdigit(char c) | |||
|
15 | { | |||
|
16 | if (c >= '0' && c <= '9') | |||
|
17 | return c - '0'; | |||
|
18 | ||||
|
19 | if (c >= 'A' && c <= 'F') | |||
|
20 | return c - 'A' + 10; | |||
|
21 | ||||
|
22 | if (c >= 'a' && c <= 'f') | |||
|
23 | return c - 'a' + 10; | |||
|
24 | ||||
|
25 | return -1; | |||
|
26 | } | |||
|
27 | ||||
|
28 | /* | |||
|
29 | * Turn a hex-encoded string into binary. | |||
|
30 | */ | |||
|
31 | static PyObject *unhexlify(const char *str, int len) | |||
|
32 | { | |||
|
33 | PyObject *ret = NULL; | |||
|
34 | char *c, *d; | |||
|
35 | ||||
|
36 | if (len % 2) { | |||
|
37 | PyErr_SetString(PyExc_ValueError, | |||
|
38 | "input is not even in length"); | |||
|
39 | goto bail; | |||
|
40 | } | |||
|
41 | ||||
|
42 | ret = PyString_FromStringAndSize(NULL, len / 2); | |||
|
43 | if (!ret) | |||
|
44 | goto bail; | |||
|
45 | ||||
|
46 | d = PyString_AsString(ret); | |||
|
47 | if (!d) | |||
|
48 | goto bail; | |||
|
49 | ||||
|
50 | for (c = str; c < str + len;) { | |||
|
51 | int hi = hexdigit(*c++); | |||
|
52 | int lo = hexdigit(*c++); | |||
|
53 | ||||
|
54 | if (hi == -1 || lo == -1) { | |||
|
55 | PyErr_SetString(PyExc_ValueError, | |||
|
56 | "input contains non-hex character"); | |||
|
57 | goto bail; | |||
|
58 | } | |||
|
59 | ||||
|
60 | *d++ = (hi << 4) | lo; | |||
|
61 | } | |||
|
62 | ||||
|
63 | goto done; | |||
|
64 | ||||
|
65 | bail: | |||
|
66 | Py_XDECREF(ret); | |||
|
67 | ret = NULL; | |||
|
68 | done: | |||
|
69 | return ret; | |||
|
70 | } | |||
|
71 | ||||
|
72 | /* | |||
|
73 | * This code assumes that a manifest is stitched together with newline | |||
|
74 | * ('\n') characters. | |||
|
75 | */ | |||
|
76 | static PyObject *parse_manifest(PyObject *self, PyObject *args) | |||
|
77 | { | |||
|
78 | PyObject *mfdict, *fdict; | |||
|
79 | char *str, *cur, *start, *zero; | |||
|
80 | int len; | |||
|
81 | ||||
|
82 | if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", | |||
|
83 | &PyDict_Type, &mfdict, | |||
|
84 | &PyDict_Type, &fdict, | |||
|
85 | &str, &len)) | |||
|
86 | goto quit; | |||
|
87 | ||||
|
88 | for (start = cur = str, zero = NULL; cur < str + len; cur++) { | |||
|
89 | PyObject *file = NULL, *node = NULL; | |||
|
90 | PyObject *flags = NULL; | |||
|
91 | int nlen; | |||
|
92 | ||||
|
93 | if (!*cur) { | |||
|
94 | zero = cur; | |||
|
95 | continue; | |||
|
96 | } | |||
|
97 | else if (*cur != '\n') | |||
|
98 | continue; | |||
|
99 | ||||
|
100 | if (!zero) { | |||
|
101 | PyErr_SetString(PyExc_ValueError, | |||
|
102 | "manifest entry has no separator"); | |||
|
103 | goto quit; | |||
|
104 | } | |||
|
105 | ||||
|
106 | file = PyString_FromStringAndSize(start, zero - start); | |||
|
107 | if (!file) | |||
|
108 | goto bail; | |||
|
109 | ||||
|
110 | nlen = cur - zero - 1; | |||
|
111 | ||||
|
112 | node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen); | |||
|
113 | if (!node) | |||
|
114 | goto bail; | |||
|
115 | ||||
|
116 | if (nlen > 40) { | |||
|
117 | PyObject *flags; | |||
|
118 | ||||
|
119 | flags = PyString_FromStringAndSize(zero + 41, | |||
|
120 | nlen - 40); | |||
|
121 | if (!flags) | |||
|
122 | goto bail; | |||
|
123 | ||||
|
124 | if (PyDict_SetItem(fdict, file, flags) == -1) | |||
|
125 | goto bail; | |||
|
126 | } | |||
|
127 | ||||
|
128 | if (PyDict_SetItem(mfdict, file, node) == -1) | |||
|
129 | goto bail; | |||
|
130 | ||||
|
131 | start = cur + 1; | |||
|
132 | zero = NULL; | |||
|
133 | ||||
|
134 | Py_XDECREF(flags); | |||
|
135 | Py_XDECREF(node); | |||
|
136 | Py_XDECREF(file); | |||
|
137 | continue; | |||
|
138 | bail: | |||
|
139 | Py_XDECREF(flags); | |||
|
140 | Py_XDECREF(node); | |||
|
141 | Py_XDECREF(file); | |||
|
142 | goto quit; | |||
|
143 | } | |||
|
144 | ||||
|
145 | if (len > 0 && *(cur - 1) != '\n') { | |||
|
146 | PyErr_SetString(PyExc_ValueError, | |||
|
147 | "manifest contains trailing garbage"); | |||
|
148 | goto quit; | |||
|
149 | } | |||
|
150 | ||||
|
151 | Py_INCREF(Py_None); | |||
|
152 | return Py_None; | |||
|
153 | ||||
|
154 | quit: | |||
|
155 | return NULL; | |||
|
156 | } | |||
|
157 | ||||
|
158 | static char parsers_doc[] = "Efficient content parsing."; | |||
|
159 | ||||
|
160 | static PyMethodDef methods[] = { | |||
|
161 | {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, | |||
|
162 | {NULL, NULL} | |||
|
163 | }; | |||
|
164 | ||||
|
165 | PyMODINIT_FUNC initparsers(void) | |||
|
166 | { | |||
|
167 | Py_InitModule3("parsers", methods, parsers_doc); | |||
|
168 | } |
@@ -8,7 +8,7 | |||||
8 | from node import bin, hex, nullid |
|
8 | from node import bin, hex, nullid | |
9 | from revlog import revlog, RevlogError |
|
9 | from revlog import revlog, RevlogError | |
10 | from i18n import _ |
|
10 | from i18n import _ | |
11 | import array, struct, mdiff |
|
11 | import array, struct, mdiff, parsers | |
12 |
|
12 | |||
13 | class manifestdict(dict): |
|
13 | class manifestdict(dict): | |
14 | def __init__(self, mapping=None, flags=None): |
|
14 | def __init__(self, mapping=None, flags=None): | |
@@ -39,14 +39,7 class manifest(revlog): | |||||
39 |
|
39 | |||
40 | def parse(self, lines): |
|
40 | def parse(self, lines): | |
41 | mfdict = manifestdict() |
|
41 | mfdict = manifestdict() | |
42 |
|
|
42 | parsers.parse_manifest(mfdict, mfdict._flags, lines) | |
43 | for l in lines.splitlines(): |
|
|||
44 | f, n = l.split('\0') |
|
|||
45 | if len(n) > 40: |
|
|||
46 | fdict[f] = n[40:] |
|
|||
47 | mfdict[f] = bin(n[:40]) |
|
|||
48 | else: |
|
|||
49 | mfdict[f] = bin(n) |
|
|||
50 | return mfdict |
|
43 | return mfdict | |
51 |
|
44 | |||
52 | def readdelta(self, node): |
|
45 | def readdelta(self, node): |
@@ -88,10 +88,11 mercurial.version.remember_version(versi | |||||
88 | cmdclass = {'install_data': install_package_data} |
|
88 | cmdclass = {'install_data': install_package_data} | |
89 |
|
89 | |||
90 | ext_modules=[ |
|
90 | ext_modules=[ | |
91 |
Extension('mercurial. |
|
91 | Extension('mercurial.base85', ['mercurial/base85.c']), | |
92 | Extension('mercurial.bdiff', ['mercurial/bdiff.c']), |
|
92 | Extension('mercurial.bdiff', ['mercurial/bdiff.c']), | |
93 |
Extension('mercurial. |
|
93 | Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c']), | |
94 |
Extension('mercurial. |
|
94 | Extension('mercurial.mpatch', ['mercurial/mpatch.c']), | |
|
95 | Extension('mercurial.parsers', ['mercurial/parsers.c']), | |||
95 | ] |
|
96 | ] | |
96 |
|
97 | |||
97 | packages = ['mercurial', 'mercurial.hgweb', 'hgext', 'hgext.convert'] |
|
98 | packages = ['mercurial', 'mercurial.hgweb', 'hgext', 'hgext.convert'] |
General Comments 0
You need to be logged in to leave comments.
Login now