##// END OF EJS Templates
dirs: fix trivial over-read of input data...
Augie Fackler -
r43419:2a0774e9 default
parent child Browse files
Show More
@@ -1,319 +1,334
1 1 /*
2 2 dirs.c - dynamic directory diddling for dirstates
3 3
4 4 Copyright 2013 Facebook
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12
13 13 #include "util.h"
14 14
15 15 #ifdef IS_PY3K
16 16 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
17 17 #else
18 18 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
19 19 #endif
20 20
21 21 /*
22 22 * This is a multiset of directory names, built from the files that
23 23 * appear in a dirstate or manifest.
24 24 *
25 25 * A few implementation notes:
26 26 *
27 27 * We modify Python integers for refcounting, but those integers are
28 28 * never visible to Python code.
29 29 *
30 30 * We mutate strings in-place, but leave them immutable once they can
31 31 * be seen by Python code.
32 32 */
33 33 typedef struct {
34 34 PyObject_HEAD
35 35 PyObject *dict;
36 36 } dirsObject;
37 37
38 38 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
39 39 {
40 40 while (pos != -1) {
41 41 if (path[pos] == '/')
42 42 break;
43 43 pos -= 1;
44 44 }
45 45 if (pos == -1) {
46 46 return 0;
47 47 }
48 48
49 49 return pos;
50 50 }
51 51
52 52 static int _addpath(PyObject *dirs, PyObject *path)
53 53 {
54 54 const char *cpath = PyBytes_AS_STRING(path);
55 55 Py_ssize_t pos = PyBytes_GET_SIZE(path);
56 56 PyObject *key = NULL;
57 57 int ret = -1;
58 58
59 59 /* This loop is super critical for performance. That's why we inline
60 60 * access to Python structs instead of going through a supported API.
61 61 * The implementation, therefore, is heavily dependent on CPython
62 62 * implementation details. We also commit violations of the Python
63 63 * "protocol" such as mutating immutable objects. But since we only
64 64 * mutate objects created in this function or in other well-defined
65 65 * locations, the references are known so these violations should go
66 66 * unnoticed. The code for adjusting the length of a PyBytesObject is
67 67 * essentially a minimal version of _PyBytes_Resize. */
68 68 while ((pos = _finddir(cpath, pos - 1)) != -1) {
69 69 PyObject *val;
70 70
71 if (pos < 2) {
72 key = PyBytes_FromStringAndSize(cpath, pos);
73 if (key == NULL)
74 goto bail;
75 } else {
71 76 /* It's likely that every prefix already has an entry
72 77 in our dict. Try to avoid allocating and
73 78 deallocating a string for each prefix we check. */
74 79 if (key != NULL)
75 80 ((PyBytesObject *)key)->ob_shash = -1;
76 81 else {
77 /* Force Python to not reuse a small shared string. */
78 key = PyBytes_FromStringAndSize(cpath,
79 pos < 2 ? 2 : pos);
82 /* We know pos >= 2, so we won't get a small
83 * shared string. */
84 key = PyBytes_FromStringAndSize(cpath, pos);
80 85 if (key == NULL)
81 86 goto bail;
82 87 }
83 /* Py_SIZE(o) refers to the ob_size member of the struct. Yes,
84 * assigning to what looks like a function seems wrong. */
88 /* Py_SIZE(o) refers to the ob_size member of
89 * the struct. Yes, assigning to what looks
90 * like a function seems wrong. */
85 91 Py_SIZE(key) = pos;
86 92 ((PyBytesObject *)key)->ob_sval[pos] = '\0';
93 }
87 94
88 95 val = PyDict_GetItem(dirs, key);
89 96 if (val != NULL) {
90 97 PYLONG_VALUE(val) += 1;
98 if (pos < 2) {
99 /* This was a short string, so we
100 * probably got a small shared string
101 * we can't mutate on the next loop
102 * iteration. Clear it.
103 */
104 Py_CLEAR(key);
105 }
91 106 break;
92 107 }
93 108
94 109 /* Force Python to not reuse a small shared int. */
95 110 #ifdef IS_PY3K
96 111 val = PyLong_FromLong(0x1eadbeef);
97 112 #else
98 113 val = PyInt_FromLong(0x1eadbeef);
99 114 #endif
100 115
101 116 if (val == NULL)
102 117 goto bail;
103 118
104 119 PYLONG_VALUE(val) = 1;
105 120 ret = PyDict_SetItem(dirs, key, val);
106 121 Py_DECREF(val);
107 122 if (ret == -1)
108 123 goto bail;
109 124 Py_CLEAR(key);
110 125 }
111 126 ret = 0;
112 127
113 128 bail:
114 129 Py_XDECREF(key);
115 130
116 131 return ret;
117 132 }
118 133
119 134 static int _delpath(PyObject *dirs, PyObject *path)
120 135 {
121 136 char *cpath = PyBytes_AS_STRING(path);
122 137 Py_ssize_t pos = PyBytes_GET_SIZE(path);
123 138 PyObject *key = NULL;
124 139 int ret = -1;
125 140
126 141 while ((pos = _finddir(cpath, pos - 1)) != -1) {
127 142 PyObject *val;
128 143
129 144 key = PyBytes_FromStringAndSize(cpath, pos);
130 145
131 146 if (key == NULL)
132 147 goto bail;
133 148
134 149 val = PyDict_GetItem(dirs, key);
135 150 if (val == NULL) {
136 151 PyErr_SetString(PyExc_ValueError,
137 152 "expected a value, found none");
138 153 goto bail;
139 154 }
140 155
141 156 if (--PYLONG_VALUE(val) <= 0) {
142 157 if (PyDict_DelItem(dirs, key) == -1)
143 158 goto bail;
144 159 } else
145 160 break;
146 161 Py_CLEAR(key);
147 162 }
148 163 ret = 0;
149 164
150 165 bail:
151 166 Py_XDECREF(key);
152 167
153 168 return ret;
154 169 }
155 170
156 171 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
157 172 {
158 173 PyObject *key, *value;
159 174 Py_ssize_t pos = 0;
160 175
161 176 while (PyDict_Next(source, &pos, &key, &value)) {
162 177 if (!PyBytes_Check(key)) {
163 178 PyErr_SetString(PyExc_TypeError, "expected string key");
164 179 return -1;
165 180 }
166 181 if (skipchar) {
167 182 if (!dirstate_tuple_check(value)) {
168 183 PyErr_SetString(PyExc_TypeError,
169 184 "expected a dirstate tuple");
170 185 return -1;
171 186 }
172 187 if (((dirstateTupleObject *)value)->state == skipchar)
173 188 continue;
174 189 }
175 190
176 191 if (_addpath(dirs, key) == -1)
177 192 return -1;
178 193 }
179 194
180 195 return 0;
181 196 }
182 197
183 198 static int dirs_fromiter(PyObject *dirs, PyObject *source)
184 199 {
185 200 PyObject *iter, *item = NULL;
186 201 int ret;
187 202
188 203 iter = PyObject_GetIter(source);
189 204 if (iter == NULL)
190 205 return -1;
191 206
192 207 while ((item = PyIter_Next(iter)) != NULL) {
193 208 if (!PyBytes_Check(item)) {
194 209 PyErr_SetString(PyExc_TypeError, "expected string");
195 210 break;
196 211 }
197 212
198 213 if (_addpath(dirs, item) == -1)
199 214 break;
200 215 Py_CLEAR(item);
201 216 }
202 217
203 218 ret = PyErr_Occurred() ? -1 : 0;
204 219 Py_DECREF(iter);
205 220 Py_XDECREF(item);
206 221 return ret;
207 222 }
208 223
209 224 /*
210 225 * Calculate a refcounted set of directory names for the files in a
211 226 * dirstate.
212 227 */
213 228 static int dirs_init(dirsObject *self, PyObject *args)
214 229 {
215 230 PyObject *dirs = NULL, *source = NULL;
216 231 char skipchar = 0;
217 232 int ret = -1;
218 233
219 234 self->dict = NULL;
220 235
221 236 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
222 237 return -1;
223 238
224 239 dirs = PyDict_New();
225 240
226 241 if (dirs == NULL)
227 242 return -1;
228 243
229 244 if (source == NULL)
230 245 ret = 0;
231 246 else if (PyDict_Check(source))
232 247 ret = dirs_fromdict(dirs, source, skipchar);
233 248 else if (skipchar)
234 249 PyErr_SetString(PyExc_ValueError,
235 250 "skip character is only supported "
236 251 "with a dict source");
237 252 else
238 253 ret = dirs_fromiter(dirs, source);
239 254
240 255 if (ret == -1)
241 256 Py_XDECREF(dirs);
242 257 else
243 258 self->dict = dirs;
244 259
245 260 return ret;
246 261 }
247 262
248 263 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
249 264 {
250 265 PyObject *path;
251 266
252 267 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
253 268 return NULL;
254 269
255 270 if (_addpath(self->dict, path) == -1)
256 271 return NULL;
257 272
258 273 Py_RETURN_NONE;
259 274 }
260 275
261 276 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
262 277 {
263 278 PyObject *path;
264 279
265 280 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
266 281 return NULL;
267 282
268 283 if (_delpath(self->dict, path) == -1)
269 284 return NULL;
270 285
271 286 Py_RETURN_NONE;
272 287 }
273 288
274 289 static int dirs_contains(dirsObject *self, PyObject *value)
275 290 {
276 291 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
277 292 }
278 293
279 294 static void dirs_dealloc(dirsObject *self)
280 295 {
281 296 Py_XDECREF(self->dict);
282 297 PyObject_Del(self);
283 298 }
284 299
285 300 static PyObject *dirs_iter(dirsObject *self)
286 301 {
287 302 return PyObject_GetIter(self->dict);
288 303 }
289 304
290 305 static PySequenceMethods dirs_sequence_methods;
291 306
292 307 static PyMethodDef dirs_methods[] = {
293 308 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
294 309 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
295 310 {NULL} /* Sentinel */
296 311 };
297 312
298 313 static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) };
299 314
300 315 void dirs_module_init(PyObject *mod)
301 316 {
302 317 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
303 318 dirsType.tp_name = "parsers.dirs";
304 319 dirsType.tp_new = PyType_GenericNew;
305 320 dirsType.tp_basicsize = sizeof(dirsObject);
306 321 dirsType.tp_dealloc = (destructor)dirs_dealloc;
307 322 dirsType.tp_as_sequence = &dirs_sequence_methods;
308 323 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
309 324 dirsType.tp_doc = "dirs";
310 325 dirsType.tp_iter = (getiterfunc)dirs_iter;
311 326 dirsType.tp_methods = dirs_methods;
312 327 dirsType.tp_init = (initproc)dirs_init;
313 328
314 329 if (PyType_Ready(&dirsType) < 0)
315 330 return;
316 331 Py_INCREF(&dirsType);
317 332
318 333 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
319 334 }
General Comments 0
You need to be logged in to leave comments. Login now