##// END OF EJS Templates
dirs: resolve fuzzer OOM situation by disallowing deep directory hierarchies...
Augie Fackler -
r44057:0796e266 default
parent child Browse files
Show More
@@ -1,313 +1,327
1 /*
1 /*
2 dirs.c - dynamic directory diddling for dirstates
2 dirs.c - dynamic directory diddling for dirstates
3
3
4 Copyright 2013 Facebook
4 Copyright 2013 Facebook
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <string.h>
12
13
13 #include "util.h"
14 #include "util.h"
14
15
15 #ifdef IS_PY3K
16 #ifdef IS_PY3K
16 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
17 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
17 #else
18 #else
18 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
19 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
19 #endif
20 #endif
20
21
21 /*
22 /*
22 * This is a multiset of directory names, built from the files that
23 * This is a multiset of directory names, built from the files that
23 * appear in a dirstate or manifest.
24 * appear in a dirstate or manifest.
24 *
25 *
25 * A few implementation notes:
26 * A few implementation notes:
26 *
27 *
27 * We modify Python integers for refcounting, but those integers are
28 * We modify Python integers for refcounting, but those integers are
28 * never visible to Python code.
29 * never visible to Python code.
29 */
30 */
30 /* clang-format off */
31 /* clang-format off */
31 typedef struct {
32 typedef struct {
32 PyObject_HEAD
33 PyObject_HEAD
33 PyObject *dict;
34 PyObject *dict;
34 } dirsObject;
35 } dirsObject;
35 /* clang-format on */
36 /* clang-format on */
36
37
37 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
38 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
38 {
39 {
39 while (pos != -1) {
40 while (pos != -1) {
40 if (path[pos] == '/')
41 if (path[pos] == '/')
41 break;
42 break;
42 pos -= 1;
43 pos -= 1;
43 }
44 }
44 if (pos == -1) {
45 if (pos == -1) {
45 return 0;
46 return 0;
46 }
47 }
47
48
48 return pos;
49 return pos;
49 }
50 }
50
51
52 /* Mercurial will fail to run on directory hierarchies deeper than
53 * this constant, so we should try and keep this constant as big as
54 * possible.
55 */
56 #define MAX_DIRS_DEPTH 2048
57
51 static int _addpath(PyObject *dirs, PyObject *path)
58 static int _addpath(PyObject *dirs, PyObject *path)
52 {
59 {
53 const char *cpath = PyBytes_AS_STRING(path);
60 const char *cpath = PyBytes_AS_STRING(path);
54 Py_ssize_t pos = PyBytes_GET_SIZE(path);
61 Py_ssize_t pos = PyBytes_GET_SIZE(path);
55 PyObject *key = NULL;
62 PyObject *key = NULL;
56 int ret = -1;
63 int ret = -1;
64 size_t num_slashes = 0;
57
65
58 /* This loop is super critical for performance. That's why we inline
66 /* This loop is super critical for performance. That's why we inline
59 * access to Python structs instead of going through a supported API.
67 * access to Python structs instead of going through a supported API.
60 * The implementation, therefore, is heavily dependent on CPython
68 * The implementation, therefore, is heavily dependent on CPython
61 * implementation details. We also commit violations of the Python
69 * implementation details. We also commit violations of the Python
62 * "protocol" such as mutating immutable objects. But since we only
70 * "protocol" such as mutating immutable objects. But since we only
63 * mutate objects created in this function or in other well-defined
71 * mutate objects created in this function or in other well-defined
64 * locations, the references are known so these violations should go
72 * locations, the references are known so these violations should go
65 * unnoticed. */
73 * unnoticed. */
66 while ((pos = _finddir(cpath, pos - 1)) != -1) {
74 while ((pos = _finddir(cpath, pos - 1)) != -1) {
67 PyObject *val;
75 PyObject *val;
76 ++num_slashes;
77 if (num_slashes > MAX_DIRS_DEPTH) {
78 PyErr_SetString(PyExc_ValueError,
79 "Directory hierarchy too deep.");
80 goto bail;
81 }
68
82
69 /* Sniff for trailing slashes, a marker of an invalid input. */
83 /* Sniff for trailing slashes, a marker of an invalid input. */
70 if (pos > 0 && cpath[pos - 1] == '/') {
84 if (pos > 0 && cpath[pos - 1] == '/') {
71 PyErr_SetString(
85 PyErr_SetString(
72 PyExc_ValueError,
86 PyExc_ValueError,
73 "found invalid consecutive slashes in path");
87 "found invalid consecutive slashes in path");
74 goto bail;
88 goto bail;
75 }
89 }
76
90
77 key = PyBytes_FromStringAndSize(cpath, pos);
91 key = PyBytes_FromStringAndSize(cpath, pos);
78 if (key == NULL)
92 if (key == NULL)
79 goto bail;
93 goto bail;
80
94
81 val = PyDict_GetItem(dirs, key);
95 val = PyDict_GetItem(dirs, key);
82 if (val != NULL) {
96 if (val != NULL) {
83 PYLONG_VALUE(val) += 1;
97 PYLONG_VALUE(val) += 1;
84 Py_CLEAR(key);
98 Py_CLEAR(key);
85 break;
99 break;
86 }
100 }
87
101
88 /* Force Python to not reuse a small shared int. */
102 /* Force Python to not reuse a small shared int. */
89 #ifdef IS_PY3K
103 #ifdef IS_PY3K
90 val = PyLong_FromLong(0x1eadbeef);
104 val = PyLong_FromLong(0x1eadbeef);
91 #else
105 #else
92 val = PyInt_FromLong(0x1eadbeef);
106 val = PyInt_FromLong(0x1eadbeef);
93 #endif
107 #endif
94
108
95 if (val == NULL)
109 if (val == NULL)
96 goto bail;
110 goto bail;
97
111
98 PYLONG_VALUE(val) = 1;
112 PYLONG_VALUE(val) = 1;
99 ret = PyDict_SetItem(dirs, key, val);
113 ret = PyDict_SetItem(dirs, key, val);
100 Py_DECREF(val);
114 Py_DECREF(val);
101 if (ret == -1)
115 if (ret == -1)
102 goto bail;
116 goto bail;
103 Py_CLEAR(key);
117 Py_CLEAR(key);
104 }
118 }
105 ret = 0;
119 ret = 0;
106
120
107 bail:
121 bail:
108 Py_XDECREF(key);
122 Py_XDECREF(key);
109
123
110 return ret;
124 return ret;
111 }
125 }
112
126
113 static int _delpath(PyObject *dirs, PyObject *path)
127 static int _delpath(PyObject *dirs, PyObject *path)
114 {
128 {
115 char *cpath = PyBytes_AS_STRING(path);
129 char *cpath = PyBytes_AS_STRING(path);
116 Py_ssize_t pos = PyBytes_GET_SIZE(path);
130 Py_ssize_t pos = PyBytes_GET_SIZE(path);
117 PyObject *key = NULL;
131 PyObject *key = NULL;
118 int ret = -1;
132 int ret = -1;
119
133
120 while ((pos = _finddir(cpath, pos - 1)) != -1) {
134 while ((pos = _finddir(cpath, pos - 1)) != -1) {
121 PyObject *val;
135 PyObject *val;
122
136
123 key = PyBytes_FromStringAndSize(cpath, pos);
137 key = PyBytes_FromStringAndSize(cpath, pos);
124
138
125 if (key == NULL)
139 if (key == NULL)
126 goto bail;
140 goto bail;
127
141
128 val = PyDict_GetItem(dirs, key);
142 val = PyDict_GetItem(dirs, key);
129 if (val == NULL) {
143 if (val == NULL) {
130 PyErr_SetString(PyExc_ValueError,
144 PyErr_SetString(PyExc_ValueError,
131 "expected a value, found none");
145 "expected a value, found none");
132 goto bail;
146 goto bail;
133 }
147 }
134
148
135 if (--PYLONG_VALUE(val) <= 0) {
149 if (--PYLONG_VALUE(val) <= 0) {
136 if (PyDict_DelItem(dirs, key) == -1)
150 if (PyDict_DelItem(dirs, key) == -1)
137 goto bail;
151 goto bail;
138 } else
152 } else
139 break;
153 break;
140 Py_CLEAR(key);
154 Py_CLEAR(key);
141 }
155 }
142 ret = 0;
156 ret = 0;
143
157
144 bail:
158 bail:
145 Py_XDECREF(key);
159 Py_XDECREF(key);
146
160
147 return ret;
161 return ret;
148 }
162 }
149
163
150 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
164 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
151 {
165 {
152 PyObject *key, *value;
166 PyObject *key, *value;
153 Py_ssize_t pos = 0;
167 Py_ssize_t pos = 0;
154
168
155 while (PyDict_Next(source, &pos, &key, &value)) {
169 while (PyDict_Next(source, &pos, &key, &value)) {
156 if (!PyBytes_Check(key)) {
170 if (!PyBytes_Check(key)) {
157 PyErr_SetString(PyExc_TypeError, "expected string key");
171 PyErr_SetString(PyExc_TypeError, "expected string key");
158 return -1;
172 return -1;
159 }
173 }
160 if (skipchar) {
174 if (skipchar) {
161 if (!dirstate_tuple_check(value)) {
175 if (!dirstate_tuple_check(value)) {
162 PyErr_SetString(PyExc_TypeError,
176 PyErr_SetString(PyExc_TypeError,
163 "expected a dirstate tuple");
177 "expected a dirstate tuple");
164 return -1;
178 return -1;
165 }
179 }
166 if (((dirstateTupleObject *)value)->state == skipchar)
180 if (((dirstateTupleObject *)value)->state == skipchar)
167 continue;
181 continue;
168 }
182 }
169
183
170 if (_addpath(dirs, key) == -1)
184 if (_addpath(dirs, key) == -1)
171 return -1;
185 return -1;
172 }
186 }
173
187
174 return 0;
188 return 0;
175 }
189 }
176
190
177 static int dirs_fromiter(PyObject *dirs, PyObject *source)
191 static int dirs_fromiter(PyObject *dirs, PyObject *source)
178 {
192 {
179 PyObject *iter, *item = NULL;
193 PyObject *iter, *item = NULL;
180 int ret;
194 int ret;
181
195
182 iter = PyObject_GetIter(source);
196 iter = PyObject_GetIter(source);
183 if (iter == NULL)
197 if (iter == NULL)
184 return -1;
198 return -1;
185
199
186 while ((item = PyIter_Next(iter)) != NULL) {
200 while ((item = PyIter_Next(iter)) != NULL) {
187 if (!PyBytes_Check(item)) {
201 if (!PyBytes_Check(item)) {
188 PyErr_SetString(PyExc_TypeError, "expected string");
202 PyErr_SetString(PyExc_TypeError, "expected string");
189 break;
203 break;
190 }
204 }
191
205
192 if (_addpath(dirs, item) == -1)
206 if (_addpath(dirs, item) == -1)
193 break;
207 break;
194 Py_CLEAR(item);
208 Py_CLEAR(item);
195 }
209 }
196
210
197 ret = PyErr_Occurred() ? -1 : 0;
211 ret = PyErr_Occurred() ? -1 : 0;
198 Py_DECREF(iter);
212 Py_DECREF(iter);
199 Py_XDECREF(item);
213 Py_XDECREF(item);
200 return ret;
214 return ret;
201 }
215 }
202
216
203 /*
217 /*
204 * Calculate a refcounted set of directory names for the files in a
218 * Calculate a refcounted set of directory names for the files in a
205 * dirstate.
219 * dirstate.
206 */
220 */
207 static int dirs_init(dirsObject *self, PyObject *args)
221 static int dirs_init(dirsObject *self, PyObject *args)
208 {
222 {
209 PyObject *dirs = NULL, *source = NULL;
223 PyObject *dirs = NULL, *source = NULL;
210 char skipchar = 0;
224 char skipchar = 0;
211 int ret = -1;
225 int ret = -1;
212
226
213 self->dict = NULL;
227 self->dict = NULL;
214
228
215 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
229 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
216 return -1;
230 return -1;
217
231
218 dirs = PyDict_New();
232 dirs = PyDict_New();
219
233
220 if (dirs == NULL)
234 if (dirs == NULL)
221 return -1;
235 return -1;
222
236
223 if (source == NULL)
237 if (source == NULL)
224 ret = 0;
238 ret = 0;
225 else if (PyDict_Check(source))
239 else if (PyDict_Check(source))
226 ret = dirs_fromdict(dirs, source, skipchar);
240 ret = dirs_fromdict(dirs, source, skipchar);
227 else if (skipchar)
241 else if (skipchar)
228 PyErr_SetString(PyExc_ValueError,
242 PyErr_SetString(PyExc_ValueError,
229 "skip character is only supported "
243 "skip character is only supported "
230 "with a dict source");
244 "with a dict source");
231 else
245 else
232 ret = dirs_fromiter(dirs, source);
246 ret = dirs_fromiter(dirs, source);
233
247
234 if (ret == -1)
248 if (ret == -1)
235 Py_XDECREF(dirs);
249 Py_XDECREF(dirs);
236 else
250 else
237 self->dict = dirs;
251 self->dict = dirs;
238
252
239 return ret;
253 return ret;
240 }
254 }
241
255
242 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
256 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
243 {
257 {
244 PyObject *path;
258 PyObject *path;
245
259
246 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
260 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
247 return NULL;
261 return NULL;
248
262
249 if (_addpath(self->dict, path) == -1)
263 if (_addpath(self->dict, path) == -1)
250 return NULL;
264 return NULL;
251
265
252 Py_RETURN_NONE;
266 Py_RETURN_NONE;
253 }
267 }
254
268
255 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
269 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
256 {
270 {
257 PyObject *path;
271 PyObject *path;
258
272
259 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
273 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
260 return NULL;
274 return NULL;
261
275
262 if (_delpath(self->dict, path) == -1)
276 if (_delpath(self->dict, path) == -1)
263 return NULL;
277 return NULL;
264
278
265 Py_RETURN_NONE;
279 Py_RETURN_NONE;
266 }
280 }
267
281
268 static int dirs_contains(dirsObject *self, PyObject *value)
282 static int dirs_contains(dirsObject *self, PyObject *value)
269 {
283 {
270 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
284 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
271 }
285 }
272
286
273 static void dirs_dealloc(dirsObject *self)
287 static void dirs_dealloc(dirsObject *self)
274 {
288 {
275 Py_XDECREF(self->dict);
289 Py_XDECREF(self->dict);
276 PyObject_Del(self);
290 PyObject_Del(self);
277 }
291 }
278
292
279 static PyObject *dirs_iter(dirsObject *self)
293 static PyObject *dirs_iter(dirsObject *self)
280 {
294 {
281 return PyObject_GetIter(self->dict);
295 return PyObject_GetIter(self->dict);
282 }
296 }
283
297
284 static PySequenceMethods dirs_sequence_methods;
298 static PySequenceMethods dirs_sequence_methods;
285
299
286 static PyMethodDef dirs_methods[] = {
300 static PyMethodDef dirs_methods[] = {
287 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
301 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
288 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
302 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
289 {NULL} /* Sentinel */
303 {NULL} /* Sentinel */
290 };
304 };
291
305
292 static PyTypeObject dirsType = {PyVarObject_HEAD_INIT(NULL, 0)};
306 static PyTypeObject dirsType = {PyVarObject_HEAD_INIT(NULL, 0)};
293
307
294 void dirs_module_init(PyObject *mod)
308 void dirs_module_init(PyObject *mod)
295 {
309 {
296 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
310 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
297 dirsType.tp_name = "parsers.dirs";
311 dirsType.tp_name = "parsers.dirs";
298 dirsType.tp_new = PyType_GenericNew;
312 dirsType.tp_new = PyType_GenericNew;
299 dirsType.tp_basicsize = sizeof(dirsObject);
313 dirsType.tp_basicsize = sizeof(dirsObject);
300 dirsType.tp_dealloc = (destructor)dirs_dealloc;
314 dirsType.tp_dealloc = (destructor)dirs_dealloc;
301 dirsType.tp_as_sequence = &dirs_sequence_methods;
315 dirsType.tp_as_sequence = &dirs_sequence_methods;
302 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
316 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
303 dirsType.tp_doc = "dirs";
317 dirsType.tp_doc = "dirs";
304 dirsType.tp_iter = (getiterfunc)dirs_iter;
318 dirsType.tp_iter = (getiterfunc)dirs_iter;
305 dirsType.tp_methods = dirs_methods;
319 dirsType.tp_methods = dirs_methods;
306 dirsType.tp_init = (initproc)dirs_init;
320 dirsType.tp_init = (initproc)dirs_init;
307
321
308 if (PyType_Ready(&dirsType) < 0)
322 if (PyType_Ready(&dirsType) < 0)
309 return;
323 return;
310 Py_INCREF(&dirsType);
324 Py_INCREF(&dirsType);
311
325
312 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
326 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
313 }
327 }
General Comments 0
You need to be logged in to leave comments. Login now