##// END OF EJS Templates
dirs: remove mutable string optimization at all...
Yuya Nishihara -
r43503:9fa941fa default
parent child Browse files
Show More
@@ -1,334 +1,303 b''
1 /*
1 /*
2 dirs.c - dynamic directory diddling for dirstates
2 dirs.c - dynamic directory diddling for dirstates
3
3
4 Copyright 2013 Facebook
4 Copyright 2013 Facebook
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12
12
13 #include "util.h"
13 #include "util.h"
14
14
15 #ifdef IS_PY3K
15 #ifdef IS_PY3K
16 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
16 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
17 #else
17 #else
18 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
18 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
19 #endif
19 #endif
20
20
21 /*
21 /*
22 * This is a multiset of directory names, built from the files that
22 * This is a multiset of directory names, built from the files that
23 * appear in a dirstate or manifest.
23 * appear in a dirstate or manifest.
24 *
24 *
25 * A few implementation notes:
25 * A few implementation notes:
26 *
26 *
27 * We modify Python integers for refcounting, but those integers are
27 * We modify Python integers for refcounting, but those integers are
28 * never visible to Python code.
28 * never visible to Python code.
29 *
30 * We mutate strings in-place, but leave them immutable once they can
31 * be seen by Python code.
32 */
29 */
33 typedef struct {
30 typedef struct {
34 PyObject_HEAD
31 PyObject_HEAD
35 PyObject *dict;
32 PyObject *dict;
36 } dirsObject;
33 } dirsObject;
37
34
38 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
35 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
39 {
36 {
40 while (pos != -1) {
37 while (pos != -1) {
41 if (path[pos] == '/')
38 if (path[pos] == '/')
42 break;
39 break;
43 pos -= 1;
40 pos -= 1;
44 }
41 }
45 if (pos == -1) {
42 if (pos == -1) {
46 return 0;
43 return 0;
47 }
44 }
48
45
49 return pos;
46 return pos;
50 }
47 }
51
48
52 static int _addpath(PyObject *dirs, PyObject *path)
49 static int _addpath(PyObject *dirs, PyObject *path)
53 {
50 {
54 const char *cpath = PyBytes_AS_STRING(path);
51 const char *cpath = PyBytes_AS_STRING(path);
55 Py_ssize_t pos = PyBytes_GET_SIZE(path);
52 Py_ssize_t pos = PyBytes_GET_SIZE(path);
56 PyObject *key = NULL;
53 PyObject *key = NULL;
57 int ret = -1;
54 int ret = -1;
58
55
59 /* This loop is super critical for performance. That's why we inline
56 /* This loop is super critical for performance. That's why we inline
60 * access to Python structs instead of going through a supported API.
57 * access to Python structs instead of going through a supported API.
61 * The implementation, therefore, is heavily dependent on CPython
58 * The implementation, therefore, is heavily dependent on CPython
62 * implementation details. We also commit violations of the Python
59 * implementation details. We also commit violations of the Python
63 * "protocol" such as mutating immutable objects. But since we only
60 * "protocol" such as mutating immutable objects. But since we only
64 * mutate objects created in this function or in other well-defined
61 * mutate objects created in this function or in other well-defined
65 * locations, the references are known so these violations should go
62 * locations, the references are known so these violations should go
66 * unnoticed. The code for adjusting the length of a PyBytesObject is
63 * unnoticed. */
67 * essentially a minimal version of _PyBytes_Resize. */
68 while ((pos = _finddir(cpath, pos - 1)) != -1) {
64 while ((pos = _finddir(cpath, pos - 1)) != -1) {
69 PyObject *val;
65 PyObject *val;
70
66
71 if (pos < 2) {
67 key = PyBytes_FromStringAndSize(cpath, pos);
72 key = PyBytes_FromStringAndSize(cpath, pos);
68 if (key == NULL)
73 if (key == NULL)
69 goto bail;
74 goto bail;
75 } else {
76 /* It's likely that every prefix already has an entry
77 in our dict. Try to avoid allocating and
78 deallocating a string for each prefix we check. */
79 if (key != NULL)
80 ((PyBytesObject *)key)->ob_shash = -1;
81 else {
82 /* We know pos >= 2, so we won't get a small
83 * shared string. */
84 key = PyBytes_FromStringAndSize(cpath, pos);
85 if (key == NULL)
86 goto bail;
87 }
88 /* Py_SIZE(o) refers to the ob_size member of
89 * the struct. Yes, assigning to what looks
90 * like a function seems wrong. */
91 Py_SIZE(key) = pos;
92 ((PyBytesObject *)key)->ob_sval[pos] = '\0';
93 }
94
70
95 val = PyDict_GetItem(dirs, key);
71 val = PyDict_GetItem(dirs, key);
96 if (val != NULL) {
72 if (val != NULL) {
97 PYLONG_VALUE(val) += 1;
73 PYLONG_VALUE(val) += 1;
98 if (pos < 2) {
74 Py_CLEAR(key);
99 /* This was a short string, so we
100 * probably got a small shared string
101 * we can't mutate on the next loop
102 * iteration. Clear it.
103 */
104 Py_CLEAR(key);
105 }
106 break;
75 break;
107 }
76 }
108
77
109 /* Force Python to not reuse a small shared int. */
78 /* Force Python to not reuse a small shared int. */
110 #ifdef IS_PY3K
79 #ifdef IS_PY3K
111 val = PyLong_FromLong(0x1eadbeef);
80 val = PyLong_FromLong(0x1eadbeef);
112 #else
81 #else
113 val = PyInt_FromLong(0x1eadbeef);
82 val = PyInt_FromLong(0x1eadbeef);
114 #endif
83 #endif
115
84
116 if (val == NULL)
85 if (val == NULL)
117 goto bail;
86 goto bail;
118
87
119 PYLONG_VALUE(val) = 1;
88 PYLONG_VALUE(val) = 1;
120 ret = PyDict_SetItem(dirs, key, val);
89 ret = PyDict_SetItem(dirs, key, val);
121 Py_DECREF(val);
90 Py_DECREF(val);
122 if (ret == -1)
91 if (ret == -1)
123 goto bail;
92 goto bail;
124 Py_CLEAR(key);
93 Py_CLEAR(key);
125 }
94 }
126 ret = 0;
95 ret = 0;
127
96
128 bail:
97 bail:
129 Py_XDECREF(key);
98 Py_XDECREF(key);
130
99
131 return ret;
100 return ret;
132 }
101 }
133
102
134 static int _delpath(PyObject *dirs, PyObject *path)
103 static int _delpath(PyObject *dirs, PyObject *path)
135 {
104 {
136 char *cpath = PyBytes_AS_STRING(path);
105 char *cpath = PyBytes_AS_STRING(path);
137 Py_ssize_t pos = PyBytes_GET_SIZE(path);
106 Py_ssize_t pos = PyBytes_GET_SIZE(path);
138 PyObject *key = NULL;
107 PyObject *key = NULL;
139 int ret = -1;
108 int ret = -1;
140
109
141 while ((pos = _finddir(cpath, pos - 1)) != -1) {
110 while ((pos = _finddir(cpath, pos - 1)) != -1) {
142 PyObject *val;
111 PyObject *val;
143
112
144 key = PyBytes_FromStringAndSize(cpath, pos);
113 key = PyBytes_FromStringAndSize(cpath, pos);
145
114
146 if (key == NULL)
115 if (key == NULL)
147 goto bail;
116 goto bail;
148
117
149 val = PyDict_GetItem(dirs, key);
118 val = PyDict_GetItem(dirs, key);
150 if (val == NULL) {
119 if (val == NULL) {
151 PyErr_SetString(PyExc_ValueError,
120 PyErr_SetString(PyExc_ValueError,
152 "expected a value, found none");
121 "expected a value, found none");
153 goto bail;
122 goto bail;
154 }
123 }
155
124
156 if (--PYLONG_VALUE(val) <= 0) {
125 if (--PYLONG_VALUE(val) <= 0) {
157 if (PyDict_DelItem(dirs, key) == -1)
126 if (PyDict_DelItem(dirs, key) == -1)
158 goto bail;
127 goto bail;
159 } else
128 } else
160 break;
129 break;
161 Py_CLEAR(key);
130 Py_CLEAR(key);
162 }
131 }
163 ret = 0;
132 ret = 0;
164
133
165 bail:
134 bail:
166 Py_XDECREF(key);
135 Py_XDECREF(key);
167
136
168 return ret;
137 return ret;
169 }
138 }
170
139
171 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
140 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
172 {
141 {
173 PyObject *key, *value;
142 PyObject *key, *value;
174 Py_ssize_t pos = 0;
143 Py_ssize_t pos = 0;
175
144
176 while (PyDict_Next(source, &pos, &key, &value)) {
145 while (PyDict_Next(source, &pos, &key, &value)) {
177 if (!PyBytes_Check(key)) {
146 if (!PyBytes_Check(key)) {
178 PyErr_SetString(PyExc_TypeError, "expected string key");
147 PyErr_SetString(PyExc_TypeError, "expected string key");
179 return -1;
148 return -1;
180 }
149 }
181 if (skipchar) {
150 if (skipchar) {
182 if (!dirstate_tuple_check(value)) {
151 if (!dirstate_tuple_check(value)) {
183 PyErr_SetString(PyExc_TypeError,
152 PyErr_SetString(PyExc_TypeError,
184 "expected a dirstate tuple");
153 "expected a dirstate tuple");
185 return -1;
154 return -1;
186 }
155 }
187 if (((dirstateTupleObject *)value)->state == skipchar)
156 if (((dirstateTupleObject *)value)->state == skipchar)
188 continue;
157 continue;
189 }
158 }
190
159
191 if (_addpath(dirs, key) == -1)
160 if (_addpath(dirs, key) == -1)
192 return -1;
161 return -1;
193 }
162 }
194
163
195 return 0;
164 return 0;
196 }
165 }
197
166
198 static int dirs_fromiter(PyObject *dirs, PyObject *source)
167 static int dirs_fromiter(PyObject *dirs, PyObject *source)
199 {
168 {
200 PyObject *iter, *item = NULL;
169 PyObject *iter, *item = NULL;
201 int ret;
170 int ret;
202
171
203 iter = PyObject_GetIter(source);
172 iter = PyObject_GetIter(source);
204 if (iter == NULL)
173 if (iter == NULL)
205 return -1;
174 return -1;
206
175
207 while ((item = PyIter_Next(iter)) != NULL) {
176 while ((item = PyIter_Next(iter)) != NULL) {
208 if (!PyBytes_Check(item)) {
177 if (!PyBytes_Check(item)) {
209 PyErr_SetString(PyExc_TypeError, "expected string");
178 PyErr_SetString(PyExc_TypeError, "expected string");
210 break;
179 break;
211 }
180 }
212
181
213 if (_addpath(dirs, item) == -1)
182 if (_addpath(dirs, item) == -1)
214 break;
183 break;
215 Py_CLEAR(item);
184 Py_CLEAR(item);
216 }
185 }
217
186
218 ret = PyErr_Occurred() ? -1 : 0;
187 ret = PyErr_Occurred() ? -1 : 0;
219 Py_DECREF(iter);
188 Py_DECREF(iter);
220 Py_XDECREF(item);
189 Py_XDECREF(item);
221 return ret;
190 return ret;
222 }
191 }
223
192
224 /*
193 /*
225 * Calculate a refcounted set of directory names for the files in a
194 * Calculate a refcounted set of directory names for the files in a
226 * dirstate.
195 * dirstate.
227 */
196 */
228 static int dirs_init(dirsObject *self, PyObject *args)
197 static int dirs_init(dirsObject *self, PyObject *args)
229 {
198 {
230 PyObject *dirs = NULL, *source = NULL;
199 PyObject *dirs = NULL, *source = NULL;
231 char skipchar = 0;
200 char skipchar = 0;
232 int ret = -1;
201 int ret = -1;
233
202
234 self->dict = NULL;
203 self->dict = NULL;
235
204
236 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
205 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
237 return -1;
206 return -1;
238
207
239 dirs = PyDict_New();
208 dirs = PyDict_New();
240
209
241 if (dirs == NULL)
210 if (dirs == NULL)
242 return -1;
211 return -1;
243
212
244 if (source == NULL)
213 if (source == NULL)
245 ret = 0;
214 ret = 0;
246 else if (PyDict_Check(source))
215 else if (PyDict_Check(source))
247 ret = dirs_fromdict(dirs, source, skipchar);
216 ret = dirs_fromdict(dirs, source, skipchar);
248 else if (skipchar)
217 else if (skipchar)
249 PyErr_SetString(PyExc_ValueError,
218 PyErr_SetString(PyExc_ValueError,
250 "skip character is only supported "
219 "skip character is only supported "
251 "with a dict source");
220 "with a dict source");
252 else
221 else
253 ret = dirs_fromiter(dirs, source);
222 ret = dirs_fromiter(dirs, source);
254
223
255 if (ret == -1)
224 if (ret == -1)
256 Py_XDECREF(dirs);
225 Py_XDECREF(dirs);
257 else
226 else
258 self->dict = dirs;
227 self->dict = dirs;
259
228
260 return ret;
229 return ret;
261 }
230 }
262
231
263 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
232 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
264 {
233 {
265 PyObject *path;
234 PyObject *path;
266
235
267 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
236 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
268 return NULL;
237 return NULL;
269
238
270 if (_addpath(self->dict, path) == -1)
239 if (_addpath(self->dict, path) == -1)
271 return NULL;
240 return NULL;
272
241
273 Py_RETURN_NONE;
242 Py_RETURN_NONE;
274 }
243 }
275
244
276 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
245 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
277 {
246 {
278 PyObject *path;
247 PyObject *path;
279
248
280 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
249 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
281 return NULL;
250 return NULL;
282
251
283 if (_delpath(self->dict, path) == -1)
252 if (_delpath(self->dict, path) == -1)
284 return NULL;
253 return NULL;
285
254
286 Py_RETURN_NONE;
255 Py_RETURN_NONE;
287 }
256 }
288
257
289 static int dirs_contains(dirsObject *self, PyObject *value)
258 static int dirs_contains(dirsObject *self, PyObject *value)
290 {
259 {
291 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
260 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
292 }
261 }
293
262
294 static void dirs_dealloc(dirsObject *self)
263 static void dirs_dealloc(dirsObject *self)
295 {
264 {
296 Py_XDECREF(self->dict);
265 Py_XDECREF(self->dict);
297 PyObject_Del(self);
266 PyObject_Del(self);
298 }
267 }
299
268
300 static PyObject *dirs_iter(dirsObject *self)
269 static PyObject *dirs_iter(dirsObject *self)
301 {
270 {
302 return PyObject_GetIter(self->dict);
271 return PyObject_GetIter(self->dict);
303 }
272 }
304
273
305 static PySequenceMethods dirs_sequence_methods;
274 static PySequenceMethods dirs_sequence_methods;
306
275
307 static PyMethodDef dirs_methods[] = {
276 static PyMethodDef dirs_methods[] = {
308 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
277 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
309 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
278 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
310 {NULL} /* Sentinel */
279 {NULL} /* Sentinel */
311 };
280 };
312
281
313 static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) };
282 static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) };
314
283
315 void dirs_module_init(PyObject *mod)
284 void dirs_module_init(PyObject *mod)
316 {
285 {
317 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
286 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
318 dirsType.tp_name = "parsers.dirs";
287 dirsType.tp_name = "parsers.dirs";
319 dirsType.tp_new = PyType_GenericNew;
288 dirsType.tp_new = PyType_GenericNew;
320 dirsType.tp_basicsize = sizeof(dirsObject);
289 dirsType.tp_basicsize = sizeof(dirsObject);
321 dirsType.tp_dealloc = (destructor)dirs_dealloc;
290 dirsType.tp_dealloc = (destructor)dirs_dealloc;
322 dirsType.tp_as_sequence = &dirs_sequence_methods;
291 dirsType.tp_as_sequence = &dirs_sequence_methods;
323 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
292 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
324 dirsType.tp_doc = "dirs";
293 dirsType.tp_doc = "dirs";
325 dirsType.tp_iter = (getiterfunc)dirs_iter;
294 dirsType.tp_iter = (getiterfunc)dirs_iter;
326 dirsType.tp_methods = dirs_methods;
295 dirsType.tp_methods = dirs_methods;
327 dirsType.tp_init = (initproc)dirs_init;
296 dirsType.tp_init = (initproc)dirs_init;
328
297
329 if (PyType_Ready(&dirsType) < 0)
298 if (PyType_Ready(&dirsType) < 0)
330 return;
299 return;
331 Py_INCREF(&dirsType);
300 Py_INCREF(&dirsType);
332
301
333 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
302 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
334 }
303 }
General Comments 0
You need to be logged in to leave comments. Login now