##// END OF EJS Templates
dirs: fix out-of-bounds access in Py3...
Martin von Zweigbergk -
r44290:a47ccdcc default
parent child Browse files
Show More
@@ -1,327 +1,327
1 /*
1 /*
2 dirs.c - dynamic directory diddling for dirstates
2 dirs.c - dynamic directory diddling for dirstates
3
3
4 Copyright 2013 Facebook
4 Copyright 2013 Facebook
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <string.h>
12 #include <string.h>
13
13
14 #include "util.h"
14 #include "util.h"
15
15
16 #ifdef IS_PY3K
16 #ifdef IS_PY3K
17 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
17 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[0]
18 #else
18 #else
19 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
19 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
20 #endif
20 #endif
21
21
22 /*
22 /*
23 * This is a multiset of directory names, built from the files that
23 * This is a multiset of directory names, built from the files that
24 * appear in a dirstate or manifest.
24 * appear in a dirstate or manifest.
25 *
25 *
26 * A few implementation notes:
26 * A few implementation notes:
27 *
27 *
28 * We modify Python integers for refcounting, but those integers are
28 * We modify Python integers for refcounting, but those integers are
29 * never visible to Python code.
29 * never visible to Python code.
30 */
30 */
31 /* clang-format off */
31 /* clang-format off */
32 typedef struct {
32 typedef struct {
33 PyObject_HEAD
33 PyObject_HEAD
34 PyObject *dict;
34 PyObject *dict;
35 } dirsObject;
35 } dirsObject;
36 /* clang-format on */
36 /* clang-format on */
37
37
38 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
38 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
39 {
39 {
40 while (pos != -1) {
40 while (pos != -1) {
41 if (path[pos] == '/')
41 if (path[pos] == '/')
42 break;
42 break;
43 pos -= 1;
43 pos -= 1;
44 }
44 }
45 if (pos == -1) {
45 if (pos == -1) {
46 return 0;
46 return 0;
47 }
47 }
48
48
49 return pos;
49 return pos;
50 }
50 }
51
51
52 /* Mercurial will fail to run on directory hierarchies deeper than
52 /* Mercurial will fail to run on directory hierarchies deeper than
53 * this constant, so we should try and keep this constant as big as
53 * this constant, so we should try and keep this constant as big as
54 * possible.
54 * possible.
55 */
55 */
56 #define MAX_DIRS_DEPTH 2048
56 #define MAX_DIRS_DEPTH 2048
57
57
58 static int _addpath(PyObject *dirs, PyObject *path)
58 static int _addpath(PyObject *dirs, PyObject *path)
59 {
59 {
60 const char *cpath = PyBytes_AS_STRING(path);
60 const char *cpath = PyBytes_AS_STRING(path);
61 Py_ssize_t pos = PyBytes_GET_SIZE(path);
61 Py_ssize_t pos = PyBytes_GET_SIZE(path);
62 PyObject *key = NULL;
62 PyObject *key = NULL;
63 int ret = -1;
63 int ret = -1;
64 size_t num_slashes = 0;
64 size_t num_slashes = 0;
65
65
66 /* This loop is super critical for performance. That's why we inline
66 /* This loop is super critical for performance. That's why we inline
67 * access to Python structs instead of going through a supported API.
67 * access to Python structs instead of going through a supported API.
68 * The implementation, therefore, is heavily dependent on CPython
68 * The implementation, therefore, is heavily dependent on CPython
69 * implementation details. We also commit violations of the Python
69 * implementation details. We also commit violations of the Python
70 * "protocol" such as mutating immutable objects. But since we only
70 * "protocol" such as mutating immutable objects. But since we only
71 * mutate objects created in this function or in other well-defined
71 * mutate objects created in this function or in other well-defined
72 * locations, the references are known so these violations should go
72 * locations, the references are known so these violations should go
73 * unnoticed. */
73 * unnoticed. */
74 while ((pos = _finddir(cpath, pos - 1)) != -1) {
74 while ((pos = _finddir(cpath, pos - 1)) != -1) {
75 PyObject *val;
75 PyObject *val;
76 ++num_slashes;
76 ++num_slashes;
77 if (num_slashes > MAX_DIRS_DEPTH) {
77 if (num_slashes > MAX_DIRS_DEPTH) {
78 PyErr_SetString(PyExc_ValueError,
78 PyErr_SetString(PyExc_ValueError,
79 "Directory hierarchy too deep.");
79 "Directory hierarchy too deep.");
80 goto bail;
80 goto bail;
81 }
81 }
82
82
83 /* Sniff for trailing slashes, a marker of an invalid input. */
83 /* Sniff for trailing slashes, a marker of an invalid input. */
84 if (pos > 0 && cpath[pos - 1] == '/') {
84 if (pos > 0 && cpath[pos - 1] == '/') {
85 PyErr_SetString(
85 PyErr_SetString(
86 PyExc_ValueError,
86 PyExc_ValueError,
87 "found invalid consecutive slashes in path");
87 "found invalid consecutive slashes in path");
88 goto bail;
88 goto bail;
89 }
89 }
90
90
91 key = PyBytes_FromStringAndSize(cpath, pos);
91 key = PyBytes_FromStringAndSize(cpath, pos);
92 if (key == NULL)
92 if (key == NULL)
93 goto bail;
93 goto bail;
94
94
95 val = PyDict_GetItem(dirs, key);
95 val = PyDict_GetItem(dirs, key);
96 if (val != NULL) {
96 if (val != NULL) {
97 PYLONG_VALUE(val) += 1;
97 PYLONG_VALUE(val) += 1;
98 Py_CLEAR(key);
98 Py_CLEAR(key);
99 break;
99 break;
100 }
100 }
101
101
102 /* Force Python to not reuse a small shared int. */
102 /* Force Python to not reuse a small shared int. */
103 #ifdef IS_PY3K
103 #ifdef IS_PY3K
104 val = PyLong_FromLong(0x1eadbeef);
104 val = PyLong_FromLong(0x1eadbeef);
105 #else
105 #else
106 val = PyInt_FromLong(0x1eadbeef);
106 val = PyInt_FromLong(0x1eadbeef);
107 #endif
107 #endif
108
108
109 if (val == NULL)
109 if (val == NULL)
110 goto bail;
110 goto bail;
111
111
112 PYLONG_VALUE(val) = 1;
112 PYLONG_VALUE(val) = 1;
113 ret = PyDict_SetItem(dirs, key, val);
113 ret = PyDict_SetItem(dirs, key, val);
114 Py_DECREF(val);
114 Py_DECREF(val);
115 if (ret == -1)
115 if (ret == -1)
116 goto bail;
116 goto bail;
117 Py_CLEAR(key);
117 Py_CLEAR(key);
118 }
118 }
119 ret = 0;
119 ret = 0;
120
120
121 bail:
121 bail:
122 Py_XDECREF(key);
122 Py_XDECREF(key);
123
123
124 return ret;
124 return ret;
125 }
125 }
126
126
127 static int _delpath(PyObject *dirs, PyObject *path)
127 static int _delpath(PyObject *dirs, PyObject *path)
128 {
128 {
129 char *cpath = PyBytes_AS_STRING(path);
129 char *cpath = PyBytes_AS_STRING(path);
130 Py_ssize_t pos = PyBytes_GET_SIZE(path);
130 Py_ssize_t pos = PyBytes_GET_SIZE(path);
131 PyObject *key = NULL;
131 PyObject *key = NULL;
132 int ret = -1;
132 int ret = -1;
133
133
134 while ((pos = _finddir(cpath, pos - 1)) != -1) {
134 while ((pos = _finddir(cpath, pos - 1)) != -1) {
135 PyObject *val;
135 PyObject *val;
136
136
137 key = PyBytes_FromStringAndSize(cpath, pos);
137 key = PyBytes_FromStringAndSize(cpath, pos);
138
138
139 if (key == NULL)
139 if (key == NULL)
140 goto bail;
140 goto bail;
141
141
142 val = PyDict_GetItem(dirs, key);
142 val = PyDict_GetItem(dirs, key);
143 if (val == NULL) {
143 if (val == NULL) {
144 PyErr_SetString(PyExc_ValueError,
144 PyErr_SetString(PyExc_ValueError,
145 "expected a value, found none");
145 "expected a value, found none");
146 goto bail;
146 goto bail;
147 }
147 }
148
148
149 if (--PYLONG_VALUE(val) <= 0) {
149 if (--PYLONG_VALUE(val) <= 0) {
150 if (PyDict_DelItem(dirs, key) == -1)
150 if (PyDict_DelItem(dirs, key) == -1)
151 goto bail;
151 goto bail;
152 } else
152 } else
153 break;
153 break;
154 Py_CLEAR(key);
154 Py_CLEAR(key);
155 }
155 }
156 ret = 0;
156 ret = 0;
157
157
158 bail:
158 bail:
159 Py_XDECREF(key);
159 Py_XDECREF(key);
160
160
161 return ret;
161 return ret;
162 }
162 }
163
163
164 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
164 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
165 {
165 {
166 PyObject *key, *value;
166 PyObject *key, *value;
167 Py_ssize_t pos = 0;
167 Py_ssize_t pos = 0;
168
168
169 while (PyDict_Next(source, &pos, &key, &value)) {
169 while (PyDict_Next(source, &pos, &key, &value)) {
170 if (!PyBytes_Check(key)) {
170 if (!PyBytes_Check(key)) {
171 PyErr_SetString(PyExc_TypeError, "expected string key");
171 PyErr_SetString(PyExc_TypeError, "expected string key");
172 return -1;
172 return -1;
173 }
173 }
174 if (skipchar) {
174 if (skipchar) {
175 if (!dirstate_tuple_check(value)) {
175 if (!dirstate_tuple_check(value)) {
176 PyErr_SetString(PyExc_TypeError,
176 PyErr_SetString(PyExc_TypeError,
177 "expected a dirstate tuple");
177 "expected a dirstate tuple");
178 return -1;
178 return -1;
179 }
179 }
180 if (((dirstateTupleObject *)value)->state == skipchar)
180 if (((dirstateTupleObject *)value)->state == skipchar)
181 continue;
181 continue;
182 }
182 }
183
183
184 if (_addpath(dirs, key) == -1)
184 if (_addpath(dirs, key) == -1)
185 return -1;
185 return -1;
186 }
186 }
187
187
188 return 0;
188 return 0;
189 }
189 }
190
190
191 static int dirs_fromiter(PyObject *dirs, PyObject *source)
191 static int dirs_fromiter(PyObject *dirs, PyObject *source)
192 {
192 {
193 PyObject *iter, *item = NULL;
193 PyObject *iter, *item = NULL;
194 int ret;
194 int ret;
195
195
196 iter = PyObject_GetIter(source);
196 iter = PyObject_GetIter(source);
197 if (iter == NULL)
197 if (iter == NULL)
198 return -1;
198 return -1;
199
199
200 while ((item = PyIter_Next(iter)) != NULL) {
200 while ((item = PyIter_Next(iter)) != NULL) {
201 if (!PyBytes_Check(item)) {
201 if (!PyBytes_Check(item)) {
202 PyErr_SetString(PyExc_TypeError, "expected string");
202 PyErr_SetString(PyExc_TypeError, "expected string");
203 break;
203 break;
204 }
204 }
205
205
206 if (_addpath(dirs, item) == -1)
206 if (_addpath(dirs, item) == -1)
207 break;
207 break;
208 Py_CLEAR(item);
208 Py_CLEAR(item);
209 }
209 }
210
210
211 ret = PyErr_Occurred() ? -1 : 0;
211 ret = PyErr_Occurred() ? -1 : 0;
212 Py_DECREF(iter);
212 Py_DECREF(iter);
213 Py_XDECREF(item);
213 Py_XDECREF(item);
214 return ret;
214 return ret;
215 }
215 }
216
216
217 /*
217 /*
218 * Calculate a refcounted set of directory names for the files in a
218 * Calculate a refcounted set of directory names for the files in a
219 * dirstate.
219 * dirstate.
220 */
220 */
221 static int dirs_init(dirsObject *self, PyObject *args)
221 static int dirs_init(dirsObject *self, PyObject *args)
222 {
222 {
223 PyObject *dirs = NULL, *source = NULL;
223 PyObject *dirs = NULL, *source = NULL;
224 char skipchar = 0;
224 char skipchar = 0;
225 int ret = -1;
225 int ret = -1;
226
226
227 self->dict = NULL;
227 self->dict = NULL;
228
228
229 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
229 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
230 return -1;
230 return -1;
231
231
232 dirs = PyDict_New();
232 dirs = PyDict_New();
233
233
234 if (dirs == NULL)
234 if (dirs == NULL)
235 return -1;
235 return -1;
236
236
237 if (source == NULL)
237 if (source == NULL)
238 ret = 0;
238 ret = 0;
239 else if (PyDict_Check(source))
239 else if (PyDict_Check(source))
240 ret = dirs_fromdict(dirs, source, skipchar);
240 ret = dirs_fromdict(dirs, source, skipchar);
241 else if (skipchar)
241 else if (skipchar)
242 PyErr_SetString(PyExc_ValueError,
242 PyErr_SetString(PyExc_ValueError,
243 "skip character is only supported "
243 "skip character is only supported "
244 "with a dict source");
244 "with a dict source");
245 else
245 else
246 ret = dirs_fromiter(dirs, source);
246 ret = dirs_fromiter(dirs, source);
247
247
248 if (ret == -1)
248 if (ret == -1)
249 Py_XDECREF(dirs);
249 Py_XDECREF(dirs);
250 else
250 else
251 self->dict = dirs;
251 self->dict = dirs;
252
252
253 return ret;
253 return ret;
254 }
254 }
255
255
256 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
256 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
257 {
257 {
258 PyObject *path;
258 PyObject *path;
259
259
260 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
260 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
261 return NULL;
261 return NULL;
262
262
263 if (_addpath(self->dict, path) == -1)
263 if (_addpath(self->dict, path) == -1)
264 return NULL;
264 return NULL;
265
265
266 Py_RETURN_NONE;
266 Py_RETURN_NONE;
267 }
267 }
268
268
269 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
269 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
270 {
270 {
271 PyObject *path;
271 PyObject *path;
272
272
273 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
273 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
274 return NULL;
274 return NULL;
275
275
276 if (_delpath(self->dict, path) == -1)
276 if (_delpath(self->dict, path) == -1)
277 return NULL;
277 return NULL;
278
278
279 Py_RETURN_NONE;
279 Py_RETURN_NONE;
280 }
280 }
281
281
282 static int dirs_contains(dirsObject *self, PyObject *value)
282 static int dirs_contains(dirsObject *self, PyObject *value)
283 {
283 {
284 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
284 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
285 }
285 }
286
286
287 static void dirs_dealloc(dirsObject *self)
287 static void dirs_dealloc(dirsObject *self)
288 {
288 {
289 Py_XDECREF(self->dict);
289 Py_XDECREF(self->dict);
290 PyObject_Del(self);
290 PyObject_Del(self);
291 }
291 }
292
292
293 static PyObject *dirs_iter(dirsObject *self)
293 static PyObject *dirs_iter(dirsObject *self)
294 {
294 {
295 return PyObject_GetIter(self->dict);
295 return PyObject_GetIter(self->dict);
296 }
296 }
297
297
298 static PySequenceMethods dirs_sequence_methods;
298 static PySequenceMethods dirs_sequence_methods;
299
299
300 static PyMethodDef dirs_methods[] = {
300 static PyMethodDef dirs_methods[] = {
301 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
301 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
302 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
302 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
303 {NULL} /* Sentinel */
303 {NULL} /* Sentinel */
304 };
304 };
305
305
306 static PyTypeObject dirsType = {PyVarObject_HEAD_INIT(NULL, 0)};
306 static PyTypeObject dirsType = {PyVarObject_HEAD_INIT(NULL, 0)};
307
307
308 void dirs_module_init(PyObject *mod)
308 void dirs_module_init(PyObject *mod)
309 {
309 {
310 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
310 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
311 dirsType.tp_name = "parsers.dirs";
311 dirsType.tp_name = "parsers.dirs";
312 dirsType.tp_new = PyType_GenericNew;
312 dirsType.tp_new = PyType_GenericNew;
313 dirsType.tp_basicsize = sizeof(dirsObject);
313 dirsType.tp_basicsize = sizeof(dirsObject);
314 dirsType.tp_dealloc = (destructor)dirs_dealloc;
314 dirsType.tp_dealloc = (destructor)dirs_dealloc;
315 dirsType.tp_as_sequence = &dirs_sequence_methods;
315 dirsType.tp_as_sequence = &dirs_sequence_methods;
316 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
316 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
317 dirsType.tp_doc = "dirs";
317 dirsType.tp_doc = "dirs";
318 dirsType.tp_iter = (getiterfunc)dirs_iter;
318 dirsType.tp_iter = (getiterfunc)dirs_iter;
319 dirsType.tp_methods = dirs_methods;
319 dirsType.tp_methods = dirs_methods;
320 dirsType.tp_init = (initproc)dirs_init;
320 dirsType.tp_init = (initproc)dirs_init;
321
321
322 if (PyType_Ready(&dirsType) < 0)
322 if (PyType_Ready(&dirsType) < 0)
323 return;
323 return;
324 Py_INCREF(&dirsType);
324 Py_INCREF(&dirsType);
325
325
326 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
326 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
327 }
327 }
General Comments 0
You need to be logged in to leave comments. Login now