##// END OF EJS Templates
dirs: fix trivial over-read of input data...
Augie Fackler -
r43419:2a0774e9 default
parent child Browse files
Show More
@@ -1,319 +1,334
1 /*
1 /*
2 dirs.c - dynamic directory diddling for dirstates
2 dirs.c - dynamic directory diddling for dirstates
3
3
4 Copyright 2013 Facebook
4 Copyright 2013 Facebook
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12
12
13 #include "util.h"
13 #include "util.h"
14
14
15 #ifdef IS_PY3K
15 #ifdef IS_PY3K
16 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
16 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
17 #else
17 #else
18 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
18 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
19 #endif
19 #endif
20
20
21 /*
21 /*
22 * This is a multiset of directory names, built from the files that
22 * This is a multiset of directory names, built from the files that
23 * appear in a dirstate or manifest.
23 * appear in a dirstate or manifest.
24 *
24 *
25 * A few implementation notes:
25 * A few implementation notes:
26 *
26 *
27 * We modify Python integers for refcounting, but those integers are
27 * We modify Python integers for refcounting, but those integers are
28 * never visible to Python code.
28 * never visible to Python code.
29 *
29 *
30 * We mutate strings in-place, but leave them immutable once they can
30 * We mutate strings in-place, but leave them immutable once they can
31 * be seen by Python code.
31 * be seen by Python code.
32 */
32 */
33 typedef struct {
33 typedef struct {
34 PyObject_HEAD
34 PyObject_HEAD
35 PyObject *dict;
35 PyObject *dict;
36 } dirsObject;
36 } dirsObject;
37
37
38 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
38 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
39 {
39 {
40 while (pos != -1) {
40 while (pos != -1) {
41 if (path[pos] == '/')
41 if (path[pos] == '/')
42 break;
42 break;
43 pos -= 1;
43 pos -= 1;
44 }
44 }
45 if (pos == -1) {
45 if (pos == -1) {
46 return 0;
46 return 0;
47 }
47 }
48
48
49 return pos;
49 return pos;
50 }
50 }
51
51
52 static int _addpath(PyObject *dirs, PyObject *path)
52 static int _addpath(PyObject *dirs, PyObject *path)
53 {
53 {
54 const char *cpath = PyBytes_AS_STRING(path);
54 const char *cpath = PyBytes_AS_STRING(path);
55 Py_ssize_t pos = PyBytes_GET_SIZE(path);
55 Py_ssize_t pos = PyBytes_GET_SIZE(path);
56 PyObject *key = NULL;
56 PyObject *key = NULL;
57 int ret = -1;
57 int ret = -1;
58
58
59 /* This loop is super critical for performance. That's why we inline
59 /* This loop is super critical for performance. That's why we inline
60 * access to Python structs instead of going through a supported API.
60 * access to Python structs instead of going through a supported API.
61 * The implementation, therefore, is heavily dependent on CPython
61 * The implementation, therefore, is heavily dependent on CPython
62 * implementation details. We also commit violations of the Python
62 * implementation details. We also commit violations of the Python
63 * "protocol" such as mutating immutable objects. But since we only
63 * "protocol" such as mutating immutable objects. But since we only
64 * mutate objects created in this function or in other well-defined
64 * mutate objects created in this function or in other well-defined
65 * locations, the references are known so these violations should go
65 * locations, the references are known so these violations should go
66 * unnoticed. The code for adjusting the length of a PyBytesObject is
66 * unnoticed. The code for adjusting the length of a PyBytesObject is
67 * essentially a minimal version of _PyBytes_Resize. */
67 * essentially a minimal version of _PyBytes_Resize. */
68 while ((pos = _finddir(cpath, pos - 1)) != -1) {
68 while ((pos = _finddir(cpath, pos - 1)) != -1) {
69 PyObject *val;
69 PyObject *val;
70
70
71 /* It's likely that every prefix already has an entry
71 if (pos < 2) {
72 in our dict. Try to avoid allocating and
72 key = PyBytes_FromStringAndSize(cpath, pos);
73 deallocating a string for each prefix we check. */
74 if (key != NULL)
75 ((PyBytesObject *)key)->ob_shash = -1;
76 else {
77 /* Force Python to not reuse a small shared string. */
78 key = PyBytes_FromStringAndSize(cpath,
79 pos < 2 ? 2 : pos);
80 if (key == NULL)
73 if (key == NULL)
81 goto bail;
74 goto bail;
75 } else {
76 /* It's likely that every prefix already has an entry
77 in our dict. Try to avoid allocating and
78 deallocating a string for each prefix we check. */
79 if (key != NULL)
80 ((PyBytesObject *)key)->ob_shash = -1;
81 else {
82 /* We know pos >= 2, so we won't get a small
83 * shared string. */
84 key = PyBytes_FromStringAndSize(cpath, pos);
85 if (key == NULL)
86 goto bail;
87 }
88 /* Py_SIZE(o) refers to the ob_size member of
89 * the struct. Yes, assigning to what looks
90 * like a function seems wrong. */
91 Py_SIZE(key) = pos;
92 ((PyBytesObject *)key)->ob_sval[pos] = '\0';
82 }
93 }
83 /* Py_SIZE(o) refers to the ob_size member of the struct. Yes,
84 * assigning to what looks like a function seems wrong. */
85 Py_SIZE(key) = pos;
86 ((PyBytesObject *)key)->ob_sval[pos] = '\0';
87
94
88 val = PyDict_GetItem(dirs, key);
95 val = PyDict_GetItem(dirs, key);
89 if (val != NULL) {
96 if (val != NULL) {
90 PYLONG_VALUE(val) += 1;
97 PYLONG_VALUE(val) += 1;
98 if (pos < 2) {
99 /* This was a short string, so we
100 * probably got a small shared string
101 * we can't mutate on the next loop
102 * iteration. Clear it.
103 */
104 Py_CLEAR(key);
105 }
91 break;
106 break;
92 }
107 }
93
108
94 /* Force Python to not reuse a small shared int. */
109 /* Force Python to not reuse a small shared int. */
95 #ifdef IS_PY3K
110 #ifdef IS_PY3K
96 val = PyLong_FromLong(0x1eadbeef);
111 val = PyLong_FromLong(0x1eadbeef);
97 #else
112 #else
98 val = PyInt_FromLong(0x1eadbeef);
113 val = PyInt_FromLong(0x1eadbeef);
99 #endif
114 #endif
100
115
101 if (val == NULL)
116 if (val == NULL)
102 goto bail;
117 goto bail;
103
118
104 PYLONG_VALUE(val) = 1;
119 PYLONG_VALUE(val) = 1;
105 ret = PyDict_SetItem(dirs, key, val);
120 ret = PyDict_SetItem(dirs, key, val);
106 Py_DECREF(val);
121 Py_DECREF(val);
107 if (ret == -1)
122 if (ret == -1)
108 goto bail;
123 goto bail;
109 Py_CLEAR(key);
124 Py_CLEAR(key);
110 }
125 }
111 ret = 0;
126 ret = 0;
112
127
113 bail:
128 bail:
114 Py_XDECREF(key);
129 Py_XDECREF(key);
115
130
116 return ret;
131 return ret;
117 }
132 }
118
133
119 static int _delpath(PyObject *dirs, PyObject *path)
134 static int _delpath(PyObject *dirs, PyObject *path)
120 {
135 {
121 char *cpath = PyBytes_AS_STRING(path);
136 char *cpath = PyBytes_AS_STRING(path);
122 Py_ssize_t pos = PyBytes_GET_SIZE(path);
137 Py_ssize_t pos = PyBytes_GET_SIZE(path);
123 PyObject *key = NULL;
138 PyObject *key = NULL;
124 int ret = -1;
139 int ret = -1;
125
140
126 while ((pos = _finddir(cpath, pos - 1)) != -1) {
141 while ((pos = _finddir(cpath, pos - 1)) != -1) {
127 PyObject *val;
142 PyObject *val;
128
143
129 key = PyBytes_FromStringAndSize(cpath, pos);
144 key = PyBytes_FromStringAndSize(cpath, pos);
130
145
131 if (key == NULL)
146 if (key == NULL)
132 goto bail;
147 goto bail;
133
148
134 val = PyDict_GetItem(dirs, key);
149 val = PyDict_GetItem(dirs, key);
135 if (val == NULL) {
150 if (val == NULL) {
136 PyErr_SetString(PyExc_ValueError,
151 PyErr_SetString(PyExc_ValueError,
137 "expected a value, found none");
152 "expected a value, found none");
138 goto bail;
153 goto bail;
139 }
154 }
140
155
141 if (--PYLONG_VALUE(val) <= 0) {
156 if (--PYLONG_VALUE(val) <= 0) {
142 if (PyDict_DelItem(dirs, key) == -1)
157 if (PyDict_DelItem(dirs, key) == -1)
143 goto bail;
158 goto bail;
144 } else
159 } else
145 break;
160 break;
146 Py_CLEAR(key);
161 Py_CLEAR(key);
147 }
162 }
148 ret = 0;
163 ret = 0;
149
164
150 bail:
165 bail:
151 Py_XDECREF(key);
166 Py_XDECREF(key);
152
167
153 return ret;
168 return ret;
154 }
169 }
155
170
156 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
171 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
157 {
172 {
158 PyObject *key, *value;
173 PyObject *key, *value;
159 Py_ssize_t pos = 0;
174 Py_ssize_t pos = 0;
160
175
161 while (PyDict_Next(source, &pos, &key, &value)) {
176 while (PyDict_Next(source, &pos, &key, &value)) {
162 if (!PyBytes_Check(key)) {
177 if (!PyBytes_Check(key)) {
163 PyErr_SetString(PyExc_TypeError, "expected string key");
178 PyErr_SetString(PyExc_TypeError, "expected string key");
164 return -1;
179 return -1;
165 }
180 }
166 if (skipchar) {
181 if (skipchar) {
167 if (!dirstate_tuple_check(value)) {
182 if (!dirstate_tuple_check(value)) {
168 PyErr_SetString(PyExc_TypeError,
183 PyErr_SetString(PyExc_TypeError,
169 "expected a dirstate tuple");
184 "expected a dirstate tuple");
170 return -1;
185 return -1;
171 }
186 }
172 if (((dirstateTupleObject *)value)->state == skipchar)
187 if (((dirstateTupleObject *)value)->state == skipchar)
173 continue;
188 continue;
174 }
189 }
175
190
176 if (_addpath(dirs, key) == -1)
191 if (_addpath(dirs, key) == -1)
177 return -1;
192 return -1;
178 }
193 }
179
194
180 return 0;
195 return 0;
181 }
196 }
182
197
183 static int dirs_fromiter(PyObject *dirs, PyObject *source)
198 static int dirs_fromiter(PyObject *dirs, PyObject *source)
184 {
199 {
185 PyObject *iter, *item = NULL;
200 PyObject *iter, *item = NULL;
186 int ret;
201 int ret;
187
202
188 iter = PyObject_GetIter(source);
203 iter = PyObject_GetIter(source);
189 if (iter == NULL)
204 if (iter == NULL)
190 return -1;
205 return -1;
191
206
192 while ((item = PyIter_Next(iter)) != NULL) {
207 while ((item = PyIter_Next(iter)) != NULL) {
193 if (!PyBytes_Check(item)) {
208 if (!PyBytes_Check(item)) {
194 PyErr_SetString(PyExc_TypeError, "expected string");
209 PyErr_SetString(PyExc_TypeError, "expected string");
195 break;
210 break;
196 }
211 }
197
212
198 if (_addpath(dirs, item) == -1)
213 if (_addpath(dirs, item) == -1)
199 break;
214 break;
200 Py_CLEAR(item);
215 Py_CLEAR(item);
201 }
216 }
202
217
203 ret = PyErr_Occurred() ? -1 : 0;
218 ret = PyErr_Occurred() ? -1 : 0;
204 Py_DECREF(iter);
219 Py_DECREF(iter);
205 Py_XDECREF(item);
220 Py_XDECREF(item);
206 return ret;
221 return ret;
207 }
222 }
208
223
209 /*
224 /*
210 * Calculate a refcounted set of directory names for the files in a
225 * Calculate a refcounted set of directory names for the files in a
211 * dirstate.
226 * dirstate.
212 */
227 */
213 static int dirs_init(dirsObject *self, PyObject *args)
228 static int dirs_init(dirsObject *self, PyObject *args)
214 {
229 {
215 PyObject *dirs = NULL, *source = NULL;
230 PyObject *dirs = NULL, *source = NULL;
216 char skipchar = 0;
231 char skipchar = 0;
217 int ret = -1;
232 int ret = -1;
218
233
219 self->dict = NULL;
234 self->dict = NULL;
220
235
221 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
236 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
222 return -1;
237 return -1;
223
238
224 dirs = PyDict_New();
239 dirs = PyDict_New();
225
240
226 if (dirs == NULL)
241 if (dirs == NULL)
227 return -1;
242 return -1;
228
243
229 if (source == NULL)
244 if (source == NULL)
230 ret = 0;
245 ret = 0;
231 else if (PyDict_Check(source))
246 else if (PyDict_Check(source))
232 ret = dirs_fromdict(dirs, source, skipchar);
247 ret = dirs_fromdict(dirs, source, skipchar);
233 else if (skipchar)
248 else if (skipchar)
234 PyErr_SetString(PyExc_ValueError,
249 PyErr_SetString(PyExc_ValueError,
235 "skip character is only supported "
250 "skip character is only supported "
236 "with a dict source");
251 "with a dict source");
237 else
252 else
238 ret = dirs_fromiter(dirs, source);
253 ret = dirs_fromiter(dirs, source);
239
254
240 if (ret == -1)
255 if (ret == -1)
241 Py_XDECREF(dirs);
256 Py_XDECREF(dirs);
242 else
257 else
243 self->dict = dirs;
258 self->dict = dirs;
244
259
245 return ret;
260 return ret;
246 }
261 }
247
262
248 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
263 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
249 {
264 {
250 PyObject *path;
265 PyObject *path;
251
266
252 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
267 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
253 return NULL;
268 return NULL;
254
269
255 if (_addpath(self->dict, path) == -1)
270 if (_addpath(self->dict, path) == -1)
256 return NULL;
271 return NULL;
257
272
258 Py_RETURN_NONE;
273 Py_RETURN_NONE;
259 }
274 }
260
275
261 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
276 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
262 {
277 {
263 PyObject *path;
278 PyObject *path;
264
279
265 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
280 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
266 return NULL;
281 return NULL;
267
282
268 if (_delpath(self->dict, path) == -1)
283 if (_delpath(self->dict, path) == -1)
269 return NULL;
284 return NULL;
270
285
271 Py_RETURN_NONE;
286 Py_RETURN_NONE;
272 }
287 }
273
288
274 static int dirs_contains(dirsObject *self, PyObject *value)
289 static int dirs_contains(dirsObject *self, PyObject *value)
275 {
290 {
276 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
291 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
277 }
292 }
278
293
279 static void dirs_dealloc(dirsObject *self)
294 static void dirs_dealloc(dirsObject *self)
280 {
295 {
281 Py_XDECREF(self->dict);
296 Py_XDECREF(self->dict);
282 PyObject_Del(self);
297 PyObject_Del(self);
283 }
298 }
284
299
285 static PyObject *dirs_iter(dirsObject *self)
300 static PyObject *dirs_iter(dirsObject *self)
286 {
301 {
287 return PyObject_GetIter(self->dict);
302 return PyObject_GetIter(self->dict);
288 }
303 }
289
304
290 static PySequenceMethods dirs_sequence_methods;
305 static PySequenceMethods dirs_sequence_methods;
291
306
292 static PyMethodDef dirs_methods[] = {
307 static PyMethodDef dirs_methods[] = {
293 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
308 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
294 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
309 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
295 {NULL} /* Sentinel */
310 {NULL} /* Sentinel */
296 };
311 };
297
312
298 static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) };
313 static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) };
299
314
300 void dirs_module_init(PyObject *mod)
315 void dirs_module_init(PyObject *mod)
301 {
316 {
302 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
317 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
303 dirsType.tp_name = "parsers.dirs";
318 dirsType.tp_name = "parsers.dirs";
304 dirsType.tp_new = PyType_GenericNew;
319 dirsType.tp_new = PyType_GenericNew;
305 dirsType.tp_basicsize = sizeof(dirsObject);
320 dirsType.tp_basicsize = sizeof(dirsObject);
306 dirsType.tp_dealloc = (destructor)dirs_dealloc;
321 dirsType.tp_dealloc = (destructor)dirs_dealloc;
307 dirsType.tp_as_sequence = &dirs_sequence_methods;
322 dirsType.tp_as_sequence = &dirs_sequence_methods;
308 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
323 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
309 dirsType.tp_doc = "dirs";
324 dirsType.tp_doc = "dirs";
310 dirsType.tp_iter = (getiterfunc)dirs_iter;
325 dirsType.tp_iter = (getiterfunc)dirs_iter;
311 dirsType.tp_methods = dirs_methods;
326 dirsType.tp_methods = dirs_methods;
312 dirsType.tp_init = (initproc)dirs_init;
327 dirsType.tp_init = (initproc)dirs_init;
313
328
314 if (PyType_Ready(&dirsType) < 0)
329 if (PyType_Ready(&dirsType) < 0)
315 return;
330 return;
316 Py_INCREF(&dirsType);
331 Py_INCREF(&dirsType);
317
332
318 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
333 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
319 }
334 }
General Comments 0
You need to be logged in to leave comments. Login now